#!/bin/bash # 生产环境连接超时诊断脚本 # 用途:系统化排查生产环境连接超时问题 set -e # 颜色定义 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # 日志函数 log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } separator() { echo "======================================================================" } # 1. 收集错误信息 collect_error_info() { separator log_info "Phase 1.1: 收集错误信息" separator echo "" log_info "测试基本网络连接..." # 测试本地网络 if ping -c 3 8.8.8.8 > /dev/null 2>&1; then log_success "本地网络正常" else log_error "本地网络异常" return 1 fi echo "" log_info "测试DNS解析..." # 测试生产服务器域名 if nslookup novalon.cn > /dev/null 2>&1; then log_success "novalon.cn DNS解析成功" PROD_IP=$(nslookup novalon.cn | grep "Address" | tail -1 | awk '{print $2}') log_info "生产服务器IP: $PROD_IP" else log_error "novalon.cn DNS解析失败" fi # 测试Git服务器 if nslookup git.f.novalon.cn > /dev/null 2>&1; then log_success "git.f.novalon.cn DNS解析成功" GIT_IP=$(nslookup git.f.novalon.cn | grep "Address" | tail -1 | awk '{print $2}') log_info "Git服务器IP: $GIT_IP" else log_error "git.f.novalon.cn DNS解析失败" fi # 测试CI服务器 if nslookup ci.f.novalon.cn > /dev/null 2>&1; then log_success "ci.f.novalon.cn DNS解析成功" CI_IP=$(nslookup ci.f.novalon.cn | grep "Address" | tail -1 | awk '{print $2}') log_info "CI服务器IP: $CI_IP" else log_error "ci.f.novalon.cn DNS解析失败" fi echo "" log_info "测试TCP连接..." # 测试HTTP端口 if nc -zv -w 5 novalon.cn 80 2>&1 | grep -q "succeeded"; then log_success "novalon.cn:80 连接成功" else log_error "novalon.cn:80 连接超时" fi # 测试HTTPS端口 if nc -zv -w 5 novalon.cn 443 2>&1 | grep -q "succeeded"; then log_success "novalon.cn:443 连接成功" else log_error "novalon.cn:443 连接超时" fi # 测试Git服务器 if nc -zv -w 5 git.f.novalon.cn 443 2>&1 | grep -q "succeeded"; then log_success "git.f.novalon.cn:443 连接成功" else log_error "git.f.novalon.cn:443 连接超时" fi # 测试CI服务器 if nc -zv -w 5 ci.f.novalon.cn 443 2>&1 | grep -q "succeeded"; then log_success "ci.f.novalon.cn:443 连接成功" else log_error "ci.f.novalon.cn:443 连接超时" fi echo "" log_info "测试HTTP响应..." # 测试HTTP请求 HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 10 --max-time 15 https://novalon.cn 2>&1) if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "301" ] || [ "$HTTP_CODE" = "302" ]; then log_success "novalon.cn HTTP响应正常 (HTTP $HTTP_CODE)" else log_error "novalon.cn HTTP响应异常 (HTTP $HTTP_CODE)" fi # 测试Git服务器 if curl -I --connect-timeout 10 --max-time 15 https://git.f.novalon.cn 2>&1 | grep -q "HTTP"; then log_success "git.f.novalon.cn HTTP响应正常" else log_error "git.f.novalon.cn HTTP响应超时" fi # 测试CI服务器 if curl -I --connect-timeout 10 --max-time 15 https://ci.f.novalon.cn 2>&1 | grep -q "HTTP"; then log_success "ci.f.novalon.cn HTTP响应正常" else log_error "ci.f.novalon.cn HTTP响应超时" fi } # 2. 检查最近的变更 check_recent_changes() { separator log_info "Phase 1.2: 检查最近的变更" separator echo "" log_info "检查本地Git状态..." # 检查当前分支 CURRENT_BRANCH=$(git branch --show-current) log_info "当前分支: $CURRENT_BRANCH" # 检查未提交的更改 if git status --porcelain | grep -q .; then log_warning "存在未提交的更改:" git status --short else log_success "工作区干净" fi # 检查最近的提交 echo "" log_info "最近5次提交:" git log --oneline -5 # 检查最近的推送 echo "" log_info "检查远程仓库连接..." if git remote -v | grep -q "git.f.novalon.cn"; then log_info "远程仓库: git.f.novalon.cn" # 尝试连接远程仓库 if git ls-remote --heads origin > /dev/null 2>&1; then log_success "远程仓库连接成功" else log_error "远程仓库连接失败" fi fi } # 3. 追踪数据流 trace_data_flow() { separator log_info "Phase 1.3: 追踪数据流" separator echo "" log_info "网络路由追踪..." # 追踪到生产服务器的路由 log_info "追踪到 novalon.cn 的路由:" traceroute -m 15 novalon.cn 2>&1 | head -20 || log_warning "traceroute命令不可用" echo "" log_info "追踪到 git.f.novalon.cn 的路由:" traceroute -m 15 git.f.novalon.cn 2>&1 | head -20 || log_warning "traceroute命令不可用" echo "" log_info "检查防火墙规则..." # 检查防火墙状态 if command -v ufw > /dev/null; then sudo ufw status verbose elif command -v firewall-cmd > /dev/null; then sudo firewall-cmd --list-all else log_warning "未检测到防火墙" fi echo "" log_info "检查网络代理设置..." env | grep -i proxy || log_info "未设置网络代理" } # 4. 分析系统资源 analyze_system_resources() { separator log_info "Phase 1.4: 分析系统资源" separator echo "" log_info "检查DNS配置..." cat /etc/resolv.conf echo "" log_info "检查网络接口..." ifconfig | grep -E "^[a-z]|inet " || ip addr show echo "" log_info "检查系统负载..." uptime echo "" log_info "检查磁盘空间..." df -h | grep -E "Filesystem|/$|/home" } # 生成诊断报告 generate_report() { separator log_info "生成诊断报告" separator REPORT_FILE="/tmp/production-diagnosis-$(date +%Y%m%d_%H%M%S).log" { echo "生产环境连接超时诊断报告" echo "生成时间: $(date)" echo "" collect_error_info echo "" check_recent_changes echo "" trace_data_flow echo "" analyze_system_resources } > "$REPORT_FILE" 2>&1 log_success "诊断报告已生成: $REPORT_FILE" echo "" log_info "报告摘要:" head -50 "$REPORT_FILE" } # 主函数 main() { log_info "开始生产环境连接超时诊断" if [ "$1" = "--report" ]; then generate_report else collect_error_info echo "" check_recent_changes echo "" trace_data_flow echo "" analyze_system_resources fi } main "$@"