Add observability stack: ServiceMonitors, Tempo, OTel API instrumentation, dashboards
- Add ServiceMonitors for Traefik, ArgoCD, and Longhorn - Enable cert-manager ServiceMonitor via helm values - Deploy Grafana Tempo for distributed tracing (single-binary, Longhorn PVC) - Add Tempo datasource with trace-to-logs and trace-to-metrics correlation - Instrument API with OpenTelemetry SDK (Prometheus metrics + OTLP traces) - Replace console.log with pino structured logging + pino-http middleware - Add Grafana dashboards for Traefik, API overview, and PostgreSQL (CNPG)
This commit is contained in:
@@ -21,7 +21,12 @@ spec:
|
||||
image: ghcr.io/lazorgurl/homelab-api:latest
|
||||
ports:
|
||||
- containerPort: 4000
|
||||
name: http
|
||||
- containerPort: 9464
|
||||
name: metrics
|
||||
env:
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: http://tempo.observability.svc:4318
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
|
||||
@@ -6,3 +6,4 @@ resources:
|
||||
- ingress.yaml
|
||||
- ghcr-pull-secret-sealed.yaml
|
||||
- api-secrets-sealed.yaml
|
||||
- servicemonitor.yaml
|
||||
|
||||
@@ -8,5 +8,10 @@ spec:
|
||||
- port: 80
|
||||
targetPort: 4000
|
||||
protocol: TCP
|
||||
name: http
|
||||
- port: 9464
|
||||
targetPort: 9464
|
||||
protocol: TCP
|
||||
name: metrics
|
||||
selector:
|
||||
app: api
|
||||
|
||||
14
apps/api/k8s/base/servicemonitor.yaml
Normal file
14
apps/api/k8s/base/servicemonitor.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: api
|
||||
labels:
|
||||
app: api
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: api
|
||||
endpoints:
|
||||
- port: metrics
|
||||
interval: 30s
|
||||
path: /metrics
|
||||
@@ -10,13 +10,22 @@
|
||||
"test": "echo \"no tests yet\""
|
||||
},
|
||||
"dependencies": {
|
||||
"@opentelemetry/auto-instrumentations-node": "^0.71.0",
|
||||
"@opentelemetry/exporter-prometheus": "^0.213.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.213.0",
|
||||
"@opentelemetry/resources": "^2.6.0",
|
||||
"@opentelemetry/sdk-node": "^0.213.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.40.0",
|
||||
"cors": "^2.8.5",
|
||||
"express": "^4.21.0",
|
||||
"cors": "^2.8.5"
|
||||
"pino": "^10.3.1",
|
||||
"pino-http": "^11.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/cors": "^2.8.17",
|
||||
"@types/express": "^5.0.0",
|
||||
"@types/node": "^22.10.0",
|
||||
"@types/pino-http": "^6.1.0",
|
||||
"tsup": "^8.3.0",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.7.0"
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
import "./instrumentation";
|
||||
|
||||
import express from "express";
|
||||
import cors from "cors";
|
||||
import pino from "pino";
|
||||
import pinoHttp from "pino-http";
|
||||
|
||||
const logger = pino({ name: "api" });
|
||||
const app = express();
|
||||
const port = process.env.PORT || 4000;
|
||||
|
||||
app.use(cors());
|
||||
app.use(express.json());
|
||||
app.use(pinoHttp({ logger }));
|
||||
|
||||
app.get("/health", (_req, res) => {
|
||||
res.json({ status: "ok", timestamp: new Date().toISOString() });
|
||||
@@ -16,5 +22,5 @@ app.get("/api", (_req, res) => {
|
||||
});
|
||||
|
||||
app.listen(port, () => {
|
||||
console.log(`API server running on port ${port}`);
|
||||
logger.info(`API server running on port ${port}`);
|
||||
});
|
||||
|
||||
36
apps/api/src/instrumentation.ts
Normal file
36
apps/api/src/instrumentation.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { NodeSDK } from "@opentelemetry/sdk-node";
|
||||
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
||||
import { PrometheusExporter } from "@opentelemetry/exporter-prometheus";
|
||||
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
||||
import { resourceFromAttributes } from "@opentelemetry/resources";
|
||||
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
||||
|
||||
const prometheusExporter = new PrometheusExporter({ port: 9464 });
|
||||
|
||||
const traceExporter = new OTLPTraceExporter({
|
||||
url:
|
||||
process.env.OTEL_EXPORTER_OTLP_ENDPOINT ??
|
||||
"http://tempo.observability.svc:4318/v1/traces",
|
||||
});
|
||||
|
||||
const sdk = new NodeSDK({
|
||||
resource: resourceFromAttributes({
|
||||
[ATTR_SERVICE_NAME]: "api",
|
||||
}),
|
||||
metricReader: prometheusExporter,
|
||||
traceExporter,
|
||||
instrumentations: [
|
||||
getNodeAutoInstrumentations({
|
||||
"@opentelemetry/instrumentation-fs": { enabled: false },
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
sdk.start();
|
||||
|
||||
process.on("SIGTERM", () => {
|
||||
sdk.shutdown().then(
|
||||
() => process.exit(0),
|
||||
() => process.exit(1),
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user