#!/bin/bash # 远程服务器诊断脚本 # 用途:在生产服务器上诊断连接超时问题 set -e # 颜色定义 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # 日志函数 log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } separator() { echo "======================================================================" } # 1. 检查系统资源 check_system_resources() { separator log_info "检查系统资源" separator echo "" log_info "CPU和内存使用情况:" top -bn1 | head -20 echo "" log_info "磁盘使用情况:" df -h echo "" log_info "内存详情:" free -h echo "" log_info "系统负载:" uptime # 检查是否资源耗尽 MEMORY_USAGE=$(free | grep Mem | awk '{print ($3/$2) * 100.0}') DISK_USAGE=$(df -h / | tail -1 | awk '{print $5}' | sed 's/%//') if (( $(echo "$MEMORY_USAGE > 90" | bc -l) )); then log_error "内存使用率过高: ${MEMORY_USAGE}%" else log_success "内存使用正常: ${MEMORY_USAGE}%" fi if [ "$DISK_USAGE" -gt 90 ]; then log_error "磁盘使用率过高: ${DISK_USAGE}%" else log_success "磁盘使用正常: ${DISK_USAGE}%" fi } # 2. 检查Docker容器 check_docker() { separator log_info "检查Docker容器" separator echo "" log_info "Docker服务状态:" systemctl status docker --no-pager | head -20 echo "" log_info "运行中的容器:" docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" echo "" log_info "容器资源使用:" docker stats --no-stream # 检查容器健康状态 RUNNING_CONTAINERS=$(docker ps -q | wc -l) STOPPED_CONTAINERS=$(docker ps -f "status=exited" -q | wc -l) log_info "运行中容器: $RUNNING_CONTAINERS" log_info "已停止容器: $STOPPED_CONTAINERS" if [ "$STOPPED_CONTAINERS" -gt 0 ]; then log_warning "发现已停止的容器:" docker ps -f "status=exited" --format "table {{.Names}}\t{{.Status}}" fi # 检查容器日志 echo "" log_info "检查容器日志(最近50行):" for container in $(docker ps -q); do CONTAINER_NAME=$(docker inspect --format='{{.Name}}' $container | sed 's/\///') echo "" log_info "容器: $CONTAINER_NAME" docker logs --tail 50 $container 2>&1 | tail -20 done } # 3. 检查Nginx check_nginx() { separator log_info "检查Nginx" separator echo "" log_info "Nginx服务状态:" systemctl status nginx --no-pager | head -20 echo "" log_info "Nginx进程:" ps aux | grep nginx | grep -v grep echo "" log_info "Nginx监听端口:" netstat -tlnp | grep nginx || ss -tlnp | grep nginx echo "" log_info "Nginx配置测试:" nginx -t echo "" log_info "Nginx错误日志(最近50行):" tail -50 /var/log/nginx/error.log 2>/dev/null || log_warning "未找到Nginx错误日志" echo "" log_info "Nginx访问日志(最近20行):" tail -20 /var/log/nginx/access.log 2>/dev/null || log_warning "未找到Nginx访问日志" } # 4. 检查应用服务 check_application() { separator log_info "检查应用服务" separator echo "" log_info "Node.js进程:" ps aux | grep node | grep -v grep echo "" log_info "PM2进程(如果使用):" if command -v pm2 > /dev/null; then pm2 list pm2 logs --lines 20 --nostream else log_info "未使用PM2" fi echo "" log_info "检查端口占用:" netstat -tlnp | grep -E ":(3000|80|443)" || ss -tlnp | grep -E ":(3000|80|443)" echo "" log_info "测试本地应用连接:" if curl -I --connect-timeout 5 http://localhost:3000 2>&1 | grep -q "HTTP"; then log_success "应用服务响应正常" else log_error "应用服务无响应" fi } # 5. 检查防火墙和网络 check_network() { separator log_info "检查防火墙和网络" separator echo "" log_info "防火墙状态:" if command -v ufw > /dev/null; then ufw status verbose elif command -v firewall-cmd > /dev/null; then firewall-cmd --list-all else log_info "未检测到防火墙" fi echo "" log_info "网络连接状态:" netstat -an | grep -E ":(80|443|3000)" | head -20 echo "" log_info "系统日志(最近错误):" journalctl -xe --no-pager | tail -50 } # 6. 快速修复建议 suggest_fixes() { separator log_info "快速修复建议" separator echo "" log_warning "根据诊断结果,建议执行以下操作:" echo "" echo "1. 如果Docker容器停止:" echo " docker-compose -f /path/to/docker-compose.prod.yml up -d" echo " docker-compose -f /path/to/docker-compose.prod.yml restart" echo "" echo "2. 如果Nginx异常:" echo " sudo systemctl restart nginx" echo " sudo nginx -t # 测试配置" echo "" echo "3. 如果应用服务异常:" echo " docker logs # 查看日志" echo " docker restart # 重启容器" echo "" echo "4. 如果资源耗尽:" echo " # 清理Docker镜像" echo " docker system prune -a -f" echo " # 清理日志" echo " sudo journalctl --vacuum-time=3d" echo " # 重启服务" echo " sudo systemctl restart docker" echo "" echo "5. 查看详细日志:" echo " docker logs -f # 实时查看容器日志" echo " tail -f /var/log/nginx/error.log # 实时查看Nginx错误日志" echo " journalctl -u docker -f # 实时查看Docker服务日志" } # 一键诊断 full_diagnosis() { log_info "开始完整诊断..." check_system_resources echo "" check_docker echo "" check_nginx echo "" check_application echo "" check_network echo "" suggest_fixes } # 主菜单 main_menu() { clear separator echo "远程服务器诊断工具" separator echo "" echo "1. 检查系统资源" echo "2. 检查Docker容器" echo "3. 检查Nginx" echo "4. 检查应用服务" echo "5. 检查防火墙和网络" echo "6. 显示修复建议" echo "7. 完整诊断" echo "0. 退出" echo "" read -p "请选择操作 (0-7): " choice case $choice in 1) check_system_resources ;; 2) check_docker ;; 3) check_nginx ;; 4) check_application ;; 5) check_network ;; 6) suggest_fixes ;; 7) full_diagnosis ;; 0) log_info "退出程序" exit 0 ;; *) log_error "无效选择" ;; esac echo "" read -p "按回车键继续..." main_menu } # 主函数 main() { log_info "远程服务器诊断工具启动" if [ "$1" = "--full" ]; then full_diagnosis else main_menu fi } main "$@"