commit 52b62f06a35db0747590a879f7367f8f1d5c54c9 Author: woozu-shin Date: Sun Jun 8 15:53:03 2025 +0900 [NO-ISSUE] Initialize project diff --git a/README.md b/README.md new file mode 100644 index 0000000..cbdfb56 --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +# Docker Monitoring Stack + +이 프로젝트는 Docker 컨테이너의 CPU, 메모리, 네트워크, 디스크 사용량을 모니터링하기 위한 Prometheus + Grafana 스택입니다. + +## 구성 요소 + +- **Prometheus**: 메트릭 수집 및 저장 +- **Grafana**: 시각화 대시보드 +- **cAdvisor**: Docker 컨테이너 메트릭 수집 +- **Node Exporter**: 시스템 메트릭 수집 + +## 사용 방법 + +### 1. 스택 시작 + +```bash +./start.sh +``` + +또는 직접 docker-compose 사용: + +```bash +docker-compose up -d +``` + +### 2. 접속 정보 + +- **Grafana**: http://localhost:8883 + - 사용자명: `admin` + - 비밀번호: `admin` + +- **Prometheus**: http://localhost:8882 +- **cAdvisor**: http://localhost:8881 +- **Node Exporter**: http://localhost:9100 + +### 3. 대시보드 + +Grafana에 자동으로 프로비저닝되는 대시보드: +- Docker Container Monitoring + - 컨테이너 CPU 사용률 + - 컨테이너 메모리 사용량 + - 네트워크 I/O + - 디스크 I/O + +### 4. 스택 중지 + +```bash +docker-compose down +``` + +### 5. 로그 확인 + +```bash +docker-compose logs -f +``` + +## 파일 구조 + +``` +├── docker-compose.yml # 메인 컨테이너 설정 +├── prometheus/ +│ └── prometheus.yml # Prometheus 설정 +├── grafana/ +│ └── provisioning/ +│ ├── datasources/ +│ │ └── datasource.yml # Prometheus 데이터소스 설정 +│ └── dashboards/ +│ ├── dashboard.yml # 대시보드 프로비저닝 설정 +│ └── docker-dashboard.json # Docker 모니터링 대시보드 +├── start.sh # 시작 스크립트 +└── README.md # 이 파일 +``` + +## 주요 메트릭 + +- `container_cpu_usage_seconds_total`: 컨테이너 CPU 사용시간 +- `container_memory_usage_bytes`: 컨테이너 메모리 사용량 +- `container_network_receive_bytes_total`: 네트워크 수신 바이트 +- `container_network_transmit_bytes_total`: 네트워크 송신 바이트 +- `container_fs_reads_bytes_total`: 디스크 읽기 바이트 +- `container_fs_writes_bytes_total`: 디스크 쓰기 바이트 + +## 문제 해결 + +1. **포트 충돌**: 다른 서비스에서 포트를 사용 중인 경우 docker-compose.yml의 포트를 변경하세요. +2. **권한 문제**: cAdvisor가 Docker 메트릭에 접근하려면 권한이 필요합니다. +3. **데이터 지속성**: Docker 볼륨을 사용하여 Prometheus와 Grafana 데이터가 보존됩니다. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..629f787 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,68 @@ +version: '3.8' + +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus + ports: + - "8882:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + restart: unless-stopped + + grafana: + image: grafana/grafana:latest + container_name: grafana + ports: + - "8883:3000" + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + restart: unless-stopped + + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + ports: + - "8881:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + privileged: true + devices: + - /dev/kmsg + restart: unless-stopped + + node_exporter: + image: prom/node-exporter:latest + container_name: node_exporter + ports: + - "9100:9100" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' + restart: unless-stopped + +volumes: + prometheus_data: + grafana_data: \ No newline at end of file diff --git a/grafana/provisioning/dashboards/dashboard.yml b/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..a6422e3 --- /dev/null +++ b/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'docker-monitoring' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards \ No newline at end of file diff --git a/grafana/provisioning/dashboards/docker-dashboard.json b/grafana/provisioning/dashboards/docker-dashboard.json new file mode 100644 index 0000000..21ce3fe --- /dev/null +++ b/grafana/provisioning/dashboards/docker-dashboard.json @@ -0,0 +1,445 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xAxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yAxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yAxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_usage_bytes{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xAxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yAxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yAxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_network_receive_bytes_total{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - RX", + "refId": "A" + }, + { + "expr": "rate(container_network_transmit_bytes_total{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - TX", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xAxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yAxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yAxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_fs_reads_bytes_total{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - Read", + "refId": "A" + }, + { + "expr": "rate(container_fs_writes_bytes_total{name=~\"$include_containers\",name!~\"$exclude_containers\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - Write", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container Disk I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xAxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yAxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yAxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 16, + "style": "dark", + "tags": ["docker"], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(container_cpu_usage_seconds_total{name=~\".+\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}, name)", + "hide": 0, + "includeAll": true, + "label": "Include Containers", + "multi": true, + "name": "include_containers", + "options": [], + "query": "label_values(container_cpu_usage_seconds_total{name=~\".+\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}, name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": { + "text": "None", + "value": [ + "" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(container_cpu_usage_seconds_total{name=~\".+\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}, name)", + "hide": 0, + "includeAll": false, + "label": "Exclude Containers", + "multi": true, + "name": "exclude_containers", + "options": [], + "query": "label_values(container_cpu_usage_seconds_total{name=~\".+\",id=~\"/docker/[0-9a-f]+$\",job=\"cadvisor\"}, name)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["1s", "2s", "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] + }, + "timezone": "", + "title": "Docker Container Monitoring", + "uid": "docker-monitoring", + "version": 1 +} \ No newline at end of file diff --git a/grafana/provisioning/datasources/datasource.yml b/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..bb37f13 --- /dev/null +++ b/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: true + editable: true \ No newline at end of file diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000..799dce5 --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,29 @@ +global: + scrape_interval: 2s + evaluation_interval: 2s + +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +scrape_configs: + - job_name: 'prometheus' + scrape_interval: 2s + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'node' + scrape_interval: 2s + static_configs: + - targets: ['node_exporter:9100'] + + - job_name: 'cadvisor' + scrape_interval: 1s + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'docker' + scrape_interval: 1s + static_configs: + - targets: ['cadvisor:8080'] + metrics_path: '/metrics' \ No newline at end of file diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..b97aa93 --- /dev/null +++ b/start.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +echo "Starting Docker monitoring stack..." +echo "This will start Prometheus, Grafana, cAdvisor, and Node Exporter" + +# Start the services +docker compose down +docker compose up -d + +echo "" +echo "Services started successfully!" +echo "" +echo "Access URLs:" +echo "- Grafana: http://localhost:8883 (admin/admin)" +echo "- Prometheus: http://localhost:8882" +echo "- cAdvisor: http://localhost:8881" +echo "- Node Exporter: http://localhost:9100" +echo "" +echo "To stop the services, run: docker-compose down" +echo "To view logs, run: docker-compose logs -f"