diff --git a/.woodpecker.yml b/.woodpecker.yml index a63a677..ac0b324 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -415,15 +415,14 @@ steps: WECHAT_WEBHOOK: from_secret: wechat_webhook commands: + - BRANCH="${CI_COMMIT_BRANCH:-unknown}" + - COMMIT="${CI_COMMIT_SHA:0:7}" + - MESSAGE=$(echo "${CI_COMMIT_MESSAGE:-no message}" | tr '\n' ' ' | sed 's/"/\\"/g') + - AUTHOR="${CI_COMMIT_AUTHOR:-unknown}" + - PIPELINE_NUMBER="${CI_PIPELINE_NUMBER:-0}" + - REPO_ID="${CI_REPO_ID:-1}" + - TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S") - | - BRANCH="${CI_COMMIT_BRANCH:-unknown}" - COMMIT="${CI_COMMIT_SHA:0:7}" - MESSAGE=$(echo "${CI_COMMIT_MESSAGE:-no message}" | tr '\n' ' ') - AUTHOR="${CI_COMMIT_AUTHOR:-unknown}" - PIPELINE_NUMBER="${CI_PIPELINE_NUMBER:-0}" - REPO_ID="${CI_REPO_ID:-1}" - TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S") - cat > /tmp/payload.json < /tmp/payload.json < /dev/null; then + echo "✅ Git LFS 已安装" + git lfs version +else + echo "❌ Git LFS 未安装" +fi + +if [ -f ".gitattributes" ]; then + echo "✅ .gitattributes 文件存在" + cat .gitattributes +else + echo "❌ .gitattributes 文件不存在(项目未使用LFS)" +fi + +echo "" +echo "📋 问题2: 环境变量检查" +echo "----------------------------------------" +echo "当前环境变量:" +echo " CI_COMMIT_BRANCH: ${CI_COMMIT_BRANCH:-未设置}" +echo " CI_COMMIT_SHA: ${CI_COMMIT_SHA:-未设置}" +echo " CI_COMMIT_MESSAGE: ${CI_COMMIT_MESSAGE:-未设置}" +echo " CI_COMMIT_AUTHOR: ${CI_COMMIT_AUTHOR:-未设置}" +echo " CI_PIPELINE_NUMBER: ${CI_PIPELINE_NUMBER:-未设置}" +echo " CI_REPO_ID: ${CI_REPO_ID:-未设置}" + +echo "" +echo "📋 问题3: Woodpecker CI 配置验证" +echo "----------------------------------------" +if command -v python3 &> /dev/null; then + echo "运行 Python 诊断脚本..." + python3 diagnose-woodpecker.py 2>/dev/null || echo "诊断脚本执行失败" +else + echo "⚠️ Python3 未安装,跳过配置验证" +fi + +echo "" +echo "==========================================" +echo "诊断完成" +echo "==========================================" diff --git a/docs/deployment/CICD_PREVENTION_GUIDE.md b/docs/deployment/CICD_PREVENTION_GUIDE.md new file mode 100644 index 0000000..20e78b8 --- /dev/null +++ b/docs/deployment/CICD_PREVENTION_GUIDE.md @@ -0,0 +1,241 @@ +# CI/CD 问题预防机制与快速修复指南 + +## 📋 已识别的问题与解决方案 + +### 问题1: Git LFS 执行失败 + +**根本原因**: +- Woodpecker CI 的 Git 插件默认启用 LFS 支持 +- 项目未使用 Git LFS,但 CI 仍尝试执行 `git lfs fetch` 和 `git lfs checkout` + +**解决方案**: +```yaml +clone: + git: + image: woodpeckerci/plugin-git + settings: + depth: 1 + partial: false + lfs: false # 禁用 LFS +``` + +**验证方法**: +```bash +# 检查项目是否使用 LFS +ls -la .gitattributes # 应该不存在或无 LFS 配置 +git lfs env # 应该返回 "Git LFS not configured" + +# 检查 CI 配置 +grep "lfs: false" .woodpecker.yml +``` + +--- + +### 问题2: 企业微信通知变量丢失 + +**根本原因**: +- Shell 脚本中的 heredoc 块内变量展开时机问题 +- 多行命令块导致环境变量未正确传递 + +**解决方案**: +```yaml +commands: + # 将变量赋值移到单独的命令行 + - BRANCH="${CI_COMMIT_BRANCH:-unknown}" + - COMMIT="${CI_COMMIT_SHA:0:7}" + - MESSAGE=$(echo "${CI_COMMIT_MESSAGE:-no message}" | tr '\n' ' ' | sed 's/"/\\"/g') + - AUTHOR="${CI_COMMIT_AUTHOR:-unknown}" + - PIPELINE_NUMBER="${CI_PIPELINE_NUMBER:-0}" + - REPO_ID="${CI_REPO_ID:-1}" + - TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S") + # heredoc 只用于生成 JSON + - | + cat > /tmp/payload.json < **构建状态**: 成功\n\n**项目信息**\n> 分支: \`${BRANCH}\`\n> 提交: \`${COMMIT}\`\n> 作者: ${AUTHOR}\n\n**提交信息**\n> ${MESSAGE}\n\n**操作**\n> [查看构建详情](https://ci.f.novalon.cn/repos/${REPO_ID}/pipeline/${PIPELINE_NUMBER})\n\n---\n> 时间: ${TIMESTAMP}\n> Pipeline #${PIPELINE_NUMBER}" + } + } + EOF + - curl -X POST "$WECHAT_WEBHOOK" -H 'Content-Type: application/json' -d @/tmp/payload.json +``` + +**验证方法**: +```bash +# 本地测试企业微信通知 +export WECHAT_WEBHOOK='your_webhook_url' +./scripts/test-wechat-notify.sh + +# 检查变量展开 +echo "BRANCH: ${CI_COMMIT_BRANCH:-unknown}" +echo "COMMIT: ${CI_COMMIT_SHA:0:7}" +``` + +--- + +## 🔍 持续监控机制 + +### 1. 自动化监控脚本 + +运行监控脚本: +```bash +chmod +x scripts/monitoring/cicd-monitor.sh +./scripts/monitoring/cicd-monitor.sh +``` + +### 2. 定时监控(Cron) + +添加到 crontab: +```bash +# 每小时运行一次监控 +0 * * * * cd /path/to/novalon-website && ./scripts/monitoring/cicd-monitor.sh + +# 每天凌晨2点清理旧日志 +0 2 * * * find /path/to/novalon-website/logs/cicd-monitor -name "*.log" -mtime +7 -delete +``` + +### 3. 监控指标 + +| 指标 | 正常值 | 异常处理 | +|------|--------|----------| +| Git LFS 配置 | 禁用 | 检查 `.woodpecker.yml` | +| YAML 语法 | 通过 | 运行 `yamllint .woodpecker.yml` | +| 环境变量展开 | 正确 | 检查通知脚本格式 | +| Secrets 配置 | 完整 | 在 Woodpecker CI 中配置 | +| 健康检查 | 已配置 | 检查部署步骤 | + +--- + +## 🚨 快速故障排查流程 + +### Step 1: 识别问题类型 + +```bash +# 运行诊断脚本 +./diagnose-cicd-issues.sh +``` + +### Step 2: 检查 CI 日志 + +访问: https://ci.f.novalon.cn/repos/1/pipeline/[PIPELINE_NUMBER] + +关键检查点: +- ✅ Clone 步骤是否成功 +- ✅ 环境变量是否正确传递 +- ✅ 通知是否发送成功 + +### Step 3: 本地验证 + +```bash +# 验证 Git LFS +git lfs env + +# 验证 YAML 语法 +yamllint .woodpecker.yml + +# 测试企业微信通知 +WECHAT_WEBHOOK='your_webhook' ./scripts/test-wechat-notify.sh +``` + +### Step 4: 修复并验证 + +1. 修改配置文件 +2. 提交并推送到测试分支 +3. 观察 CI 执行结果 +4. 验证通知是否正常 + +--- + +## 📊 预防措施清单 + +### 配置层面 + +- [x] 禁用 Git LFS(项目未使用) +- [x] 修复环境变量展开格式 +- [x] 配置健康检查和回滚机制 +- [x] 使用 Secret 管理敏感信息 +- [ ] 添加 npm 缓存(优化性能) +- [ ] 配置分支保护规则 + +### 监控层面 + +- [x] 创建监控脚本 +- [x] 建立日志记录机制 +- [ ] 配置告警通知 +- [ ] 集成到 CI/CD 流程 + +### 文档层面 + +- [x] 问题预防机制文档 +- [x] 快速修复指南 +- [x] 故障排查流程 +- [ ] 定期更新最佳实践 + +--- + +## 🎯 后续优化建议 + +### 高优先级(本周) + +1. **添加 npm 缓存** + ```yaml + steps: + lint: + image: node:20-alpine + commands: + - npm ci + cache: + mount: + - node_modules + - .npm + ``` + +2. **配置分支保护规则** + - main 分支:禁止直接推送 + - release/** 分支:需要 PR 审核 + - dev 分支:需要 CI 检查通过 + +3. **添加部署告警** + - 连续失败 3 次发送告警 + - 部署超时发送告警 + - 健康检查失败发送告警 + +### 中优先级(本月) + +1. **容器镜像安全扫描** + - 使用 Trivy 扫描镜像漏洞 + - 发现 Critical 漏洞阻止部署 + +2. **集成 APM 监控** + - 使用 Sentry 监控应用性能 + - 自动上报错误和性能指标 + +3. **优化测试策略** + - 并行执行 E2E 测试 + - 减少测试时间 30-50% + +--- + +## 📝 变更记录 + +| 日期 | 变更内容 | 负责人 | +|------|---------|--------| +| 2026-03-29 | 禁用 Git LFS | 张翔 | +| 2026-03-29 | 修复企业微信通知变量展开 | 张翔 | +| 2026-03-29 | 创建监控脚本 | 张翔 | +| 2026-03-29 | 建立预防机制文档 | 张翔 | + +--- + +## 🔗 相关文档 + +- [Woodpecker CI 官方文档](https://woodpecker-ci.org/) +- [Git LFS 文档](https://git-lfs.github.com/) +- [Shell 变量展开](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html) +- [YAML 语法检查](https://yamllint.readthedocs.io/) + +--- + +**最后更新**: 2026-03-29 +**维护人员**: 张翔 diff --git a/scripts/monitoring/cicd-monitor.sh b/scripts/monitoring/cicd-monitor.sh new file mode 100755 index 0000000..db3fa21 --- /dev/null +++ b/scripts/monitoring/cicd-monitor.sh @@ -0,0 +1,165 @@ +#!/bin/bash + +set -e + +echo "==========================================" +echo "CI/CD 持续监控脚本" +echo "==========================================" +echo "" + +MONITOR_DIR="./logs/cicd-monitor" +mkdir -p "$MONITOR_DIR" + +TIMESTAMP=$(date "+%Y%m%d_%H%M%S") +LOG_FILE="$MONITOR_DIR/monitor_${TIMESTAMP}.log" + +echo "监控日志: $LOG_FILE" +echo "" + +log() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" +} + +log "开始 CI/CD 监控..." + +log "" +log "==========================================" +log "1. 检查 Git LFS 配置" +log "==========================================" + +if [ -f ".gitattributes" ]; then + log "✅ .gitattributes 存在" + log "内容:" + cat .gitattributes | tee -a "$LOG_FILE" +else + log "✅ .gitattributes 不存在(项目未使用LFS)" +fi + +if grep -q "lfs: false" .woodpecker.yml; then + log "✅ Woodpecker CI 配置已禁用 LFS" +else + log "⚠️ Woodpecker CI 配置未禁用 LFS" +fi + +log "" +log "==========================================" +log "2. 检查 Woodpecker CI 配置语法" +log "==========================================" + +if command -v yamllint &> /dev/null; then + if yamllint .woodpecker.yml > /dev/null 2>&1; then + log "✅ YAML 语法正确" + else + log "❌ YAML 语法错误" + yamllint .woodpecker.yml | tee -a "$LOG_FILE" + fi +else + log "⚠️ yamllint 未安装,跳过语法检查" +fi + +log "" +log "==========================================" +log "3. 检查企业微信通知配置" +log "==========================================" + +if grep -q "WECHAT_WEBHOOK" .woodpecker.yml; then + log "✅ 企业微信通知已配置" + + if grep -q 'BRANCH="${CI_COMMIT_BRANCH' .woodpecker.yml; then + log "✅ 环境变量展开格式正确" + else + log "⚠️ 环境变量展开格式可能有问题" + fi +else + log "⚠️ 企业微信通知未配置" +fi + +log "" +log "==========================================" +log "4. 检查 Secrets 配置" +log "==========================================" + +REQUIRED_SECRETS=( + "registry_password" + "ssh_private_key" + "wechat_webhook" +) + +for secret in "${REQUIRED_SECRETS[@]}"; do + if grep -q "from_secret: $secret" .woodpecker.yml; then + log "✅ Secret '$secret' 已配置" + else + log "❌ Secret '$secret' 未配置" + fi +done + +log "" +log "==========================================" +log "5. 检查分支保护规则" +log "==========================================" + +BRANCHES=("main" "dev" "release" "release/**") + +for branch in "${BRANCHES[@]}"; do + if grep -q "branch:" .woodpecker.yml && grep -A 5 "branch:" .woodpecker.yml | grep -q "$branch"; then + log "✅ 分支 '$branch' 已配置触发规则" + else + log "⚠️ 分支 '$branch' 未配置触发规则" + fi +done + +log "" +log "==========================================" +log "6. 检查部署配置" +log "==========================================" + +if grep -q "deploy-production" .woodpecker.yml; then + log "✅ 生产部署步骤已配置" + + if grep -q "Health check" .woodpecker.yml; then + log "✅ 健康检查已配置" + else + log "⚠️ 健康检查未配置" + fi + + if grep -q "rolling back" .woodpecker.yml; then + log "✅ 回滚机制已配置" + else + log "⚠️ 回滚机制未配置" + fi +else + log "⚠️ 生产部署步骤未配置" +fi + +log "" +log "==========================================" +log "7. 性能指标检查" +log "==========================================" + +if [ -d "node_modules" ]; then + NODE_MODULES_SIZE=$(du -sh node_modules | cut -f1) + log "node_modules 大小: $NODE_MODULES_SIZE" +fi + +if [ -d "dist" ]; then + DIST_SIZE=$(du -sh dist | cut -f1) + log "dist 目录大小: $DIST_SIZE" +fi + +GIT_OBJECTS=$(find .git/objects -type f | wc -l | tr -d ' ') +log "Git 对象数量: $GIT_OBJECTS" + +log "" +log "==========================================" +log "监控完成" +log "==========================================" +log "" +log "📊 监控报告已保存到: $LOG_FILE" +log "" + +echo "" +echo "💡 建议操作:" +echo " 1. 定期运行此监控脚本(建议每小时一次)" +echo " 2. 将日志文件纳入版本控制" +echo " 3. 设置告警机制(如连续3次失败则发送通知)" +echo ""