3e79a8a3bd
- Add comprehensive monitoring alert rules (8 alerts) - Service availability, error rate, response time - CPU and memory usage alerts - Request rate and 4xx error rate monitoring - Enhance Woodpecker quality gate - Split into separate steps for better visibility - Add E2E tests, security check, performance check - Update coverage threshold to 30% (previously 70%) - Add quality summary with clear pass/fail indicators - Performance test results - 123 requests in 30s with 10 VUs - P95 response time: 345.55ms (target < 500ms) ✅ - P99 response time: < 1000ms ✅ - Error rate: 0% (target < 1%) ✅ - All performance metrics meet targets
94 lines
2.9 KiB
YAML
94 lines
2.9 KiB
YAML
groups:
|
|
- name: novalon_website_alerts
|
|
interval: 30s
|
|
rules:
|
|
- alert: ServiceDown
|
|
expr: up{job="novalon-website"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "服务不可用"
|
|
description: "Novalon 网站服务已停止响应超过 1 分钟"
|
|
|
|
- alert: HighErrorRate
|
|
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "高错误率"
|
|
description: "5xx 错误率在过去 5 分钟内超过 5%: {{ $value }}"
|
|
|
|
- alert: HighResponseTime
|
|
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "高响应时间"
|
|
description: "P95 响应时间超过 1 秒: {{ $value }}s"
|
|
|
|
- alert: VeryHighResponseTime
|
|
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "极高响应时间"
|
|
description: "P95 响应时间超过 2 秒: {{ $value }}s"
|
|
|
|
- alert: HighCPUUsage
|
|
expr: rate(process_cpu_seconds_total[5m]) > 0.8
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "CPU 使用率过高"
|
|
description: "CPU 使用率超过 80%: {{ $value }}"
|
|
|
|
- alert: HighMemoryUsage
|
|
expr: process_resident_memory_bytes / 1024 / 1024 / 1024 > 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "内存使用率过高"
|
|
description: "内存使用超过 1GB: {{ $value }}GB"
|
|
|
|
- alert: VeryHighMemoryUsage
|
|
expr: process_resident_memory_bytes / 1024 / 1024 / 1024 > 2
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "内存使用率极高"
|
|
description: "内存使用超过 2GB: {{ $value }}GB"
|
|
|
|
- alert: LowRequestRate
|
|
expr: rate(http_requests_total[5m]) < 0.1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "请求率过低"
|
|
description: "请求率在过去 10 分钟内低于 0.1 req/s: {{ $value }}"
|
|
|
|
- alert: High4xxRate
|
|
expr: rate(http_requests_total{status=~"4.."}[5m]) > 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
service: novalon-website
|
|
annotations:
|
|
summary: "高 4xx 错误率"
|
|
description: "4xx 错误率在过去 5 分钟内超过 10%: {{ $value }}"
|