#!/bin/bash set -e echo "🔍 配置监控和告警..." # 创建监控配置目录 mkdir -p monitoring # 创建Prometheus配置 cat > monitoring/prometheus.yml << 'EOF' global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: - job_name: 'novalon-website' static_configs: - targets: ['localhost:3000'] metrics_path: '/api/health' EOF # 创建Grafana仪表板配置 cat > monitoring/grafana-dashboard.json << 'EOF' { "dashboard": { "title": "Novalon Website Monitoring", "panels": [ { "title": "HTTP Requests", "targets": [ { "expr": "rate(http_requests_total[5m])" } ] }, { "title": "Response Time", "targets": [ { "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))" } ] }, { "title": "Error Rate", "targets": [ { "expr": "rate(http_requests_total{status=~\"5..\"}[5m])" } ] } ] } } EOF # 创建告警规则 cat > monitoring/alerts.yml << 'EOF' groups: - name: novalon-website rules: - alert: HighErrorRate expr: rate(http_requests_total{status=~\"5..\"}[5m]) > 0.05 for: 5m labels: severity: critical annotations: summary: "High error rate detected" description: "Error rate is {{ $value }}" - alert: HighResponseTime expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1 for: 5m labels: severity: warning annotations: summary: "High response time detected" description: "95th percentile response time is {{ $value }}s" - alert: ServiceDown expr: up{job=\"novalon-website\"} == 0 for: 1m labels: severity: critical annotations: summary: "Service is down" description: "Novalon website service is not responding" EOF echo "✅ 监控和告警配置完成!" echo "📊 Prometheus配置: monitoring/prometheus.yml" echo "📈 Grafana仪表板: monitoring/grafana-dashboard.json" echo "🚨 告警规则: monitoring/alerts.yml"