diff --git a/compose/monitoring/logging/DOCKER-LOGS-DASHBOARD.md b/compose/monitoring/logging/DOCKER-LOGS-DASHBOARD.md new file mode 100644 index 0000000..c1a0a62 --- /dev/null +++ b/compose/monitoring/logging/DOCKER-LOGS-DASHBOARD.md @@ -0,0 +1,235 @@ +# Docker Logs Dashboard - Grafana + +A comprehensive dashboard for viewing all Docker container logs via Loki. + +## Features + +### 📊 Panels Included + +1. **Docker Container Logs** (Main Panel) + - Real-time log streaming from all containers + - Filter by container, image, or search term + - Expandable log details + - Sortable (ascending/descending) + +2. **Log Volume by Container** + - Stacked bar chart showing log activity over time + - Helps identify chatty containers + - Per-container breakdown + +3. **Error Logs by Container** + - Time series of ERROR/EXCEPTION/FATAL/PANIC logs + - Automatically detects error patterns + - Useful for monitoring application health + +4. **Total Logs by Container** + - Bar gauge showing total log lines per container + - Color-coded thresholds (green → yellow → red) + - Based on selected time range + +5. **Statistics Panels** + - **Active Containers**: Count of containers currently logging + - **Total Log Lines**: Sum of all logs in time range + - **Total Errors**: Count of error-level logs + - **Log Rate**: Logs per second (current rate) + +## Access the Dashboard + +1. Open Grafana: **https://logs.fig.systems** +2. Navigate to: **Dashboards** → **Loki** folder → **Docker Logs - All Containers** + +Or use direct link: +``` +https://logs.fig.systems/d/docker-logs-all +``` + +## Using the Filters + +### Container Filter +- Select specific containers to view +- Multi-select supported +- Default: "All" (shows all containers) + +Example: Select `traefik`, `loki`, `grafana` to view only those + +### Image Filter +- Filter by Docker image name +- Multi-select supported +- Useful for viewing all containers of same image + +Example: Filter by `grafana/loki:*` to see all Loki containers + +### Search Filter +- Free-text search with regex support +- Searches within log message content +- Case-insensitive by default + +Examples: +- `error` - Find logs containing "error" +- `(?i)started` - Case-insensitive "started" +- `HTTP [45][0-9]{2}` - HTTP 4xx/5xx errors +- `user.*login.*failed` - Failed login attempts + +## Time Range Selection + +Use Grafana's time picker (top right) to select: +- Last 5 minutes +- Last 15 minutes +- Last 1 hour (default) +- Last 24 hours +- Custom range + +## Auto-Refresh + +Dashboard auto-refreshes every **10 seconds** by default. + +Change refresh rate in top-right dropdown: +- 5s (very fast) +- 10s (default) +- 30s +- 1m +- 5m +- Off + +## LogQL Query Examples + +The dashboard uses these queries. You can modify panels or create new ones: + +### All logs from a container +```logql +{job="docker_all", container="traefik"} +``` + +### Errors only +```logql +{job="docker_all"} |~ "(?i)(error|exception|fatal|panic)" +``` + +### HTTP status codes +```logql +{job="docker_all", container="traefik"} | json | line_format "{{.status}} {{.method}} {{.path}}" +``` + +### Rate of logs +```logql +rate({job="docker_all"}[5m]) +``` + +### Count errors per container +```logql +sum by (container) (count_over_time({job="docker_all"} |~ "(?i)error" [1h])) +``` + +## Tips & Tricks + +### 1. Find Noisy Containers +- Use "Log Volume by Container" panel +- Look for tall bars = lots of logs +- Consider adjusting log levels for those containers + +### 2. Debug Application Issues +1. Set time range to when issue occurred +2. Filter to specific container +3. Search for error keywords +4. Expand log details for full context + +### 3. Monitor in Real-Time +1. Set time range to "Last 5 minutes" +2. Enable auto-refresh (5s or 10s) +3. Open "Docker Container Logs" panel +4. Watch logs stream live + +### 4. Export Logs +- Click on any log line +- Click "Copy" icon to copy log text +- Or use Loki API directly for bulk export + +### 5. Create Alerts +In Grafana, you can create alerts based on log patterns: +- Alert if errors exceed threshold +- Alert if specific pattern detected +- Alert if container stops logging (might be down) + +## Troubleshooting + +### No logs showing +1. Check Promtail is running: `docker ps | grep promtail` +2. Verify Loki datasource in Grafana is configured +3. Check time range (logs might be older/newer) +4. Verify containers are actually logging + +### Slow dashboard +- Narrow time range (use last 15m instead of 24h) +- Use container filter to reduce data +- Increase refresh interval to 30s or 1m + +### Missing containers +Your current Promtail config captures ALL Docker containers automatically. +If a container is missing, check: +1. Container is running: `docker ps` +2. Container has logs: `docker logs ` +3. Promtail can access Docker socket + +## Advanced Customization + +### Add a New Panel + +1. Click "Add Panel" in dashboard +2. Select "Logs" visualization +3. Use query: + ```logql + {job="docker_all", container="your-container"} + ``` +4. Configure options (time display, wrapping, etc.) +5. Save dashboard + +### Modify Existing Panels + +1. Click panel title → Edit +2. Modify LogQL query +3. Adjust visualization options +4. Save changes + +### Export Dashboard + +1. Dashboard settings (gear icon) +2. JSON Model +3. Copy JSON +4. Save to file for backup + +## Integration with Other Tools + +### View in Explore +- Click "Explore" on any panel +- Opens Loki Explore interface +- More advanced querying options +- Better for ad-hoc investigation + +### Share Dashboard +1. Click share icon (next to title) +2. Get shareable link +3. Or export snapshot + +### Embed in Other Apps +Use Grafana's embedding features to show logs in: +- Homarr dashboard +- Custom web apps +- Monitoring tools + +## Related Resources + +- [LogQL Documentation](https://grafana.com/docs/loki/latest/logql/) +- [Grafana Dashboards Guide](https://grafana.com/docs/grafana/latest/dashboards/) +- [Loki Best Practices](https://grafana.com/docs/loki/latest/best-practices/) + +## Support + +For issues with: +- **Dashboard**: Edit and customize as needed +- **Loki**: Check `/home/eduardo_figueroa/homelab/compose/monitoring/logging/` +- **Missing logs**: Verify Promtail configuration + +Dashboard file location: +``` +/home/eduardo_figueroa/homelab/compose/monitoring/logging/grafana-provisioning/dashboards/docker-logs.json +``` diff --git a/compose/monitoring/logging/grafana-provisioning/dashboards/docker-logs.json b/compose/monitoring/logging/grafana-provisioning/dashboards/docker-logs.json new file mode 100644 index 0000000..76a6148 --- /dev/null +++ b/compose/monitoring/logging/grafana-provisioning/dashboards/docker-logs.json @@ -0,0 +1,703 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "All Docker container logs in real-time", + "gridPos": { + "h": 24, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "{job=\"docker_all\", container=~\"$container\", image=~\"$image\"} |~ \"$search\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Docker Container Logs", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Log volume per container over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 50, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (container) (count_over_time({job=\"docker_all\", container=~\"$container\", image=~\"$image\"} |~ \"$search\" [$__interval]))", + "legendFormat": "{{container}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Log Volume by Container", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Count of ERROR level logs by container", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 3, + "options": { + "legend": { + "calcs": ["last"], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (container) (count_over_time({job=\"docker_all\", container=~\"$container\"} |~ \"(?i)(error|exception|fatal|panic)\" [$__interval]))", + "legendFormat": "{{container}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Error Logs by Container", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Total log lines per container", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1000 + }, + { + "color": "red", + "value": 10000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 4, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum by (container) (count_over_time({job=\"docker_all\", container=~\"$container\", image=~\"$image\"} |~ \"$search\" [$__range]))", + "legendFormat": "{{container}}", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Total Logs by Container (Time Range)", + "type": "bargauge" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Statistics about container logging", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 40 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(count by (container) (count_over_time({job=\"docker_all\"} [$__range])))", + "legendFormat": "Active Containers", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Active Containers", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Total log entries in selected time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10000 + }, + { + "color": "red", + "value": 100000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 40 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(count_over_time({job=\"docker_all\", container=~\"$container\", image=~\"$image\"} |~ \"$search\" [$__range]))", + "legendFormat": "Total Logs", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Total Log Lines", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Total errors in selected time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 40 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(count_over_time({job=\"docker_all\", container=~\"$container\"} |~ \"(?i)(error|exception|fatal|panic)\" [$__range]))", + "legendFormat": "Errors", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Total Errors", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "description": "Logs per second rate", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "logs/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 40 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "textMode": "auto" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(rate({job=\"docker_all\", container=~\"$container\", image=~\"$image\"} |~ \"$search\" [$__rate_interval]))", + "legendFormat": "Rate", + "queryType": "instant", + "refId": "A" + } + ], + "title": "Log Rate", + "type": "stat" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "dark", + "tags": ["docker", "logs", "loki"], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Loki", + "value": "Loki" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "definition": "label_values(container)", + "hide": 0, + "includeAll": true, + "label": "Container", + "multi": true, + "name": "container", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(container)" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "loki", + "uid": "${datasource}" + }, + "definition": "label_values(image)", + "hide": 0, + "includeAll": true, + "label": "Image", + "multi": true, + "name": "image", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(image)" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "description": "Search within log messages (regex supported)", + "hide": 0, + "label": "Search", + "name": "search", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h"] + }, + "timezone": "", + "title": "Docker Logs - All Containers", + "uid": "docker-logs-all", + "version": 1, + "weekStart": "" +}