Add observability stack: ServiceMonitors, Tempo, OTel API instrumentation, dashboards
- Add ServiceMonitors for Traefik, ArgoCD, and Longhorn - Enable cert-manager ServiceMonitor via helm values - Deploy Grafana Tempo for distributed tracing (single-binary, Longhorn PVC) - Add Tempo datasource with trace-to-logs and trace-to-metrics correlation - Instrument API with OpenTelemetry SDK (Prometheus metrics + OTLP traces) - Replace console.log with pino structured logging + pino-http middleware - Add Grafana dashboards for Traefik, API overview, and PostgreSQL (CNPG)
This commit is contained in:
@@ -21,7 +21,12 @@ spec:
|
|||||||
image: ghcr.io/lazorgurl/homelab-api:latest
|
image: ghcr.io/lazorgurl/homelab-api:latest
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 4000
|
- containerPort: 4000
|
||||||
|
name: http
|
||||||
|
- containerPort: 9464
|
||||||
|
name: metrics
|
||||||
env:
|
env:
|
||||||
|
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||||
|
value: http://tempo.observability.svc:4318
|
||||||
- name: DATABASE_URL
|
- name: DATABASE_URL
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
|
|||||||
@@ -6,3 +6,4 @@ resources:
|
|||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
- ghcr-pull-secret-sealed.yaml
|
- ghcr-pull-secret-sealed.yaml
|
||||||
- api-secrets-sealed.yaml
|
- api-secrets-sealed.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
|||||||
@@ -8,5 +8,10 @@ spec:
|
|||||||
- port: 80
|
- port: 80
|
||||||
targetPort: 4000
|
targetPort: 4000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
- port: 9464
|
||||||
|
targetPort: 9464
|
||||||
|
protocol: TCP
|
||||||
|
name: metrics
|
||||||
selector:
|
selector:
|
||||||
app: api
|
app: api
|
||||||
|
|||||||
14
apps/api/k8s/base/servicemonitor.yaml
Normal file
14
apps/api/k8s/base/servicemonitor.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: api
|
||||||
|
labels:
|
||||||
|
app: api
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: api
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 30s
|
||||||
|
path: /metrics
|
||||||
@@ -10,13 +10,22 @@
|
|||||||
"test": "echo \"no tests yet\""
|
"test": "echo \"no tests yet\""
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@opentelemetry/auto-instrumentations-node": "^0.71.0",
|
||||||
|
"@opentelemetry/exporter-prometheus": "^0.213.0",
|
||||||
|
"@opentelemetry/exporter-trace-otlp-http": "^0.213.0",
|
||||||
|
"@opentelemetry/resources": "^2.6.0",
|
||||||
|
"@opentelemetry/sdk-node": "^0.213.0",
|
||||||
|
"@opentelemetry/semantic-conventions": "^1.40.0",
|
||||||
|
"cors": "^2.8.5",
|
||||||
"express": "^4.21.0",
|
"express": "^4.21.0",
|
||||||
"cors": "^2.8.5"
|
"pino": "^10.3.1",
|
||||||
|
"pino-http": "^11.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/express": "^5.0.0",
|
|
||||||
"@types/cors": "^2.8.17",
|
"@types/cors": "^2.8.17",
|
||||||
|
"@types/express": "^5.0.0",
|
||||||
"@types/node": "^22.10.0",
|
"@types/node": "^22.10.0",
|
||||||
|
"@types/pino-http": "^6.1.0",
|
||||||
"tsup": "^8.3.0",
|
"tsup": "^8.3.0",
|
||||||
"tsx": "^4.19.0",
|
"tsx": "^4.19.0",
|
||||||
"typescript": "^5.7.0"
|
"typescript": "^5.7.0"
|
||||||
|
|||||||
@@ -1,11 +1,17 @@
|
|||||||
|
import "./instrumentation";
|
||||||
|
|
||||||
import express from "express";
|
import express from "express";
|
||||||
import cors from "cors";
|
import cors from "cors";
|
||||||
|
import pino from "pino";
|
||||||
|
import pinoHttp from "pino-http";
|
||||||
|
|
||||||
|
const logger = pino({ name: "api" });
|
||||||
const app = express();
|
const app = express();
|
||||||
const port = process.env.PORT || 4000;
|
const port = process.env.PORT || 4000;
|
||||||
|
|
||||||
app.use(cors());
|
app.use(cors());
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
|
app.use(pinoHttp({ logger }));
|
||||||
|
|
||||||
app.get("/health", (_req, res) => {
|
app.get("/health", (_req, res) => {
|
||||||
res.json({ status: "ok", timestamp: new Date().toISOString() });
|
res.json({ status: "ok", timestamp: new Date().toISOString() });
|
||||||
@@ -16,5 +22,5 @@ app.get("/api", (_req, res) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
app.listen(port, () => {
|
app.listen(port, () => {
|
||||||
console.log(`API server running on port ${port}`);
|
logger.info(`API server running on port ${port}`);
|
||||||
});
|
});
|
||||||
|
|||||||
36
apps/api/src/instrumentation.ts
Normal file
36
apps/api/src/instrumentation.ts
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import { NodeSDK } from "@opentelemetry/sdk-node";
|
||||||
|
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
||||||
|
import { PrometheusExporter } from "@opentelemetry/exporter-prometheus";
|
||||||
|
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
||||||
|
import { resourceFromAttributes } from "@opentelemetry/resources";
|
||||||
|
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
||||||
|
|
||||||
|
const prometheusExporter = new PrometheusExporter({ port: 9464 });
|
||||||
|
|
||||||
|
const traceExporter = new OTLPTraceExporter({
|
||||||
|
url:
|
||||||
|
process.env.OTEL_EXPORTER_OTLP_ENDPOINT ??
|
||||||
|
"http://tempo.observability.svc:4318/v1/traces",
|
||||||
|
});
|
||||||
|
|
||||||
|
const sdk = new NodeSDK({
|
||||||
|
resource: resourceFromAttributes({
|
||||||
|
[ATTR_SERVICE_NAME]: "api",
|
||||||
|
}),
|
||||||
|
metricReader: prometheusExporter,
|
||||||
|
traceExporter,
|
||||||
|
instrumentations: [
|
||||||
|
getNodeAutoInstrumentations({
|
||||||
|
"@opentelemetry/instrumentation-fs": { enabled: false },
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
sdk.start();
|
||||||
|
|
||||||
|
process.on("SIGTERM", () => {
|
||||||
|
sdk.shutdown().then(
|
||||||
|
() => process.exit(0),
|
||||||
|
() => process.exit(1),
|
||||||
|
);
|
||||||
|
});
|
||||||
@@ -8,6 +8,7 @@ resources:
|
|||||||
- appsets/platform.yaml
|
- appsets/platform.yaml
|
||||||
- appsets/apps.yaml
|
- appsets/apps.yaml
|
||||||
- appsets/previews.yaml
|
- appsets/previews.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
patches:
|
patches:
|
||||||
- target:
|
- target:
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|||||||
44
infra/kubernetes/argocd/servicemonitor.yaml
Normal file
44
infra/kubernetes/argocd/servicemonitor.yaml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-server
|
||||||
|
namespace: argocd
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: argocd
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-server
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 30s
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-repo-server
|
||||||
|
namespace: argocd
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: argocd
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-repo-server
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 30s
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: argocd-application-controller
|
||||||
|
namespace: argocd
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: argocd
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: argocd-application-controller
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 30s
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: api-overview-dashboard
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
data:
|
||||||
|
api-overview.json: |
|
||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "HTTP Request Rate",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"api\"}[5m])) by (http_route)",
|
||||||
|
"legendFormat": "{{ http_route }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "reqps" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "HTTP Error Rate",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(http_server_request_duration_seconds_count{service_name=\"api\", http_status_code=~\"5..\"}[5m])) by (http_route)",
|
||||||
|
"legendFormat": "{{ http_route }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "reqps" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "HTTP Request Duration (p50 / p95 / p99)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50, sum(rate(http_server_request_duration_seconds_bucket{service_name=\"api\"}[5m])) by (le))",
|
||||||
|
"legendFormat": "p50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket{service_name=\"api\"}[5m])) by (le))",
|
||||||
|
"legendFormat": "p95"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket{service_name=\"api\"}[5m])) by (le))",
|
||||||
|
"legendFormat": "p99"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "s" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "DB Query Duration",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.95, sum(rate(db_client_operation_duration_bucket{service_name=\"api\"}[5m])) by (le, db_operation_name))",
|
||||||
|
"legendFormat": "p95 {{ db_operation_name }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "s" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["homelab", "api", "otel"],
|
||||||
|
"templating": { "list": [] },
|
||||||
|
"time": { "from": "now-6h", "to": "now" },
|
||||||
|
"title": "API Overview",
|
||||||
|
"uid": "api-overview"
|
||||||
|
}
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: postgres-dashboard
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
data:
|
||||||
|
postgres.json: |
|
||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "Active Connections",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "cnpg_backends_total",
|
||||||
|
"legendFormat": "{{ pod }}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Transactions per Second",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(cnpg_pg_stat_database_xact_commit{datname=\"homelab\"}[5m])",
|
||||||
|
"legendFormat": "commits {{ pod }}"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(cnpg_pg_stat_database_xact_rollback{datname=\"homelab\"}[5m])",
|
||||||
|
"legendFormat": "rollbacks {{ pod }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "ops" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Replication Lag (bytes)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "cnpg_pg_replication_lag",
|
||||||
|
"legendFormat": "{{ pod }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "bytes" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Database Size",
|
||||||
|
"type": "stat",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "cnpg_pg_database_size_bytes{datname=\"homelab\"}",
|
||||||
|
"legendFormat": "{{ pod }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "bytes" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["homelab", "postgres", "cnpg"],
|
||||||
|
"templating": { "list": [] },
|
||||||
|
"time": { "from": "now-6h", "to": "now" },
|
||||||
|
"title": "PostgreSQL (CloudNativePG)",
|
||||||
|
"uid": "postgres-cnpg"
|
||||||
|
}
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: traefik-dashboard
|
||||||
|
namespace: observability
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
data:
|
||||||
|
traefik.json: |
|
||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(traefik_entrypoint_requests_total[5m])) by (entrypoint)",
|
||||||
|
"legendFormat": "{{ entrypoint }}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "reqps" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Error Rate (4xx/5xx)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(traefik_entrypoint_requests_total{code=~\"4..\"}[5m]))",
|
||||||
|
"legendFormat": "4xx"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(traefik_entrypoint_requests_total{code=~\"5..\"}[5m]))",
|
||||||
|
"legendFormat": "5xx"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "reqps" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Request Duration (p50 / p95 / p99)",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50, sum(rate(traefik_entrypoint_request_duration_seconds_bucket[5m])) by (le))",
|
||||||
|
"legendFormat": "p50"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.95, sum(rate(traefik_entrypoint_request_duration_seconds_bucket[5m])) by (le))",
|
||||||
|
"legendFormat": "p95"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(traefik_entrypoint_request_duration_seconds_bucket[5m])) by (le))",
|
||||||
|
"legendFormat": "p99"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "unit": "s" }
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Open Connections",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(traefik_entrypoint_open_connections) by (entrypoint)",
|
||||||
|
"legendFormat": "{{ entrypoint }}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["homelab", "traefik", "ingress"],
|
||||||
|
"templating": { "list": [] },
|
||||||
|
"time": { "from": "now-6h", "to": "now" },
|
||||||
|
"title": "Traefik",
|
||||||
|
"uid": "traefik"
|
||||||
|
}
|
||||||
@@ -15,3 +15,22 @@ data:
|
|||||||
url: http://loki.observability.svc:3100
|
url: http://loki.observability.svc:3100
|
||||||
jsonData:
|
jsonData:
|
||||||
maxLines: 1000
|
maxLines: 1000
|
||||||
|
derivedFields:
|
||||||
|
- datasourceUid: tempo
|
||||||
|
matcherRegex: '"traceID":"(\w+)"'
|
||||||
|
name: TraceID
|
||||||
|
url: "$${__value.raw}"
|
||||||
|
- name: Tempo
|
||||||
|
type: tempo
|
||||||
|
uid: tempo
|
||||||
|
access: proxy
|
||||||
|
url: http://tempo.observability.svc:3100
|
||||||
|
jsonData:
|
||||||
|
tracesToLogs:
|
||||||
|
datasourceUid: loki
|
||||||
|
filterByTraceID: true
|
||||||
|
filterBySpanID: false
|
||||||
|
tracesToMetrics:
|
||||||
|
datasourceUid: prometheus
|
||||||
|
serviceMap:
|
||||||
|
datasourceUid: prometheus
|
||||||
|
|||||||
@@ -2,5 +2,9 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
|||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
- application.yaml
|
- application.yaml
|
||||||
|
- grafana-admin-sealed.yaml
|
||||||
- grafana-datasources.yaml
|
- grafana-datasources.yaml
|
||||||
- dashboards/cluster-overview.yaml
|
- dashboards/cluster-overview.yaml
|
||||||
|
- dashboards/traefik.yaml
|
||||||
|
- dashboards/api-overview.yaml
|
||||||
|
- dashboards/postgres.yaml
|
||||||
|
|||||||
@@ -4,3 +4,4 @@ resources:
|
|||||||
- kube-prometheus-stack/
|
- kube-prometheus-stack/
|
||||||
- loki/
|
- loki/
|
||||||
- promtail/
|
- promtail/
|
||||||
|
- tempo/
|
||||||
|
|||||||
41
infra/kubernetes/observability/tempo/application.yaml
Normal file
41
infra/kubernetes/observability/tempo/application.yaml
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: tempo
|
||||||
|
namespace: argocd
|
||||||
|
spec:
|
||||||
|
project: default
|
||||||
|
source:
|
||||||
|
repoURL: https://grafana.github.io/helm-charts
|
||||||
|
chart: tempo
|
||||||
|
targetRevision: 1.12.0
|
||||||
|
helm:
|
||||||
|
valuesObject:
|
||||||
|
tempo:
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: "0.0.0.0:4317"
|
||||||
|
http:
|
||||||
|
endpoint: "0.0.0.0:4318"
|
||||||
|
retention: 168h
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 256Mi
|
||||||
|
cpu: 100m
|
||||||
|
limits:
|
||||||
|
memory: 1Gi
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClassName: longhorn
|
||||||
|
size: 10Gi
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: observability
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
4
infra/kubernetes/observability/tempo/kustomization.yaml
Normal file
4
infra/kubernetes/observability/tempo/kustomization.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- application.yaml
|
||||||
@@ -15,6 +15,9 @@ spec:
|
|||||||
valuesObject:
|
valuesObject:
|
||||||
crds:
|
crds:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
prometheus:
|
||||||
|
servicemonitor:
|
||||||
|
enabled: true
|
||||||
destination:
|
destination:
|
||||||
server: https://kubernetes.default.svc
|
server: https://kubernetes.default.svc
|
||||||
namespace: cert-manager
|
namespace: cert-manager
|
||||||
|
|||||||
@@ -3,3 +3,4 @@ kind: Kustomization
|
|||||||
resources:
|
resources:
|
||||||
- application.yaml
|
- application.yaml
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
|||||||
15
infra/kubernetes/platform/longhorn/servicemonitor.yaml
Normal file
15
infra/kubernetes/platform/longhorn/servicemonitor.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: longhorn
|
||||||
|
namespace: longhorn-system
|
||||||
|
labels:
|
||||||
|
app: longhorn
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: longhorn-manager
|
||||||
|
endpoints:
|
||||||
|
- port: manager
|
||||||
|
interval: 30s
|
||||||
|
path: /metrics
|
||||||
@@ -8,3 +8,4 @@ resources:
|
|||||||
- ingressroute-grafana.yaml
|
- ingressroute-grafana.yaml
|
||||||
- ingressroute-longhorn.yaml
|
- ingressroute-longhorn.yaml
|
||||||
- certificate-internal.yaml
|
- certificate-internal.yaml
|
||||||
|
- servicemonitor.yaml
|
||||||
|
|||||||
15
infra/kubernetes/platform/traefik/servicemonitor.yaml
Normal file
15
infra/kubernetes/platform/traefik/servicemonitor.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
app: traefik
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
endpoints:
|
||||||
|
- port: metrics
|
||||||
|
interval: 30s
|
||||||
|
path: /metrics
|
||||||
1781
pnpm-lock.yaml
generated
1781
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user