Remove mock data from harness and add agent credential healthchecks

Strip all seed/mock data (fake tasks, models, usage entries, agent configs) so the dashboard starts clean and populates from real API state. Add /api/agents/health endpoint that validates each agent's provider credentials and CLI availability.
2026-03-21 19:42:53 +00:00
parent 9a40240bd2
commit df1111da15
4 changed files with 140 additions and 176 deletions
--- a/apps/harness/src/app/api/agents/health/route.ts
+++ b/apps/harness/src/app/api/agents/health/route.ts
@@ -0,0 +1,131 @@
 import { NextResponse } from "next/server";
 import { getAllAgentConfigs, AGENT_RUNTIMES, AgentConfig } from "@/lib/agents";
 import { getRawCredentialsByProvider, Provider } from "@/lib/credentials";
 const PROVIDER_ENV_VARS: Record<string, string> = {
  anthropic: "ANTHROPIC_API_KEY",
  openai: "OPENAI_API_KEY",
  google: "GOOGLE_API_KEY",
  openrouter: "OPENROUTER_API_KEY",
  "opencode-zen": "OPENCODE_ZEN_API_KEY",
 };
 const PROVIDER_VALIDATION: Record<string, (token: string, baseUrl?: string) => Promise<boolean>> = {
  async anthropic(token) {
    const res = await fetch("https://api.anthropic.com/v1/models", {
      headers: { "x-api-key": token, "anthropic-version": "2023-06-01" },
    });
    return res.ok;
  },
  async openai(token, baseUrl) {
    const res = await fetch(`${baseUrl || "https://api.openai.com"}/v1/models`, {
      headers: { Authorization: `Bearer ${token}` },
    });
    return res.ok;
  },
  async google(token) {
    const res = await fetch(
      `https://generativelanguage.googleapis.com/v1beta/models?key=${token}`
    );
    return res.ok;
  },
  async openrouter(token) {
    const res = await fetch("https://openrouter.ai/api/v1/models", {
      headers: { Authorization: `Bearer ${token}` },
    });
    return res.ok;
  },
 };
 export interface AgentHealthStatus {
  agentId: string;
  agentName: string;
  runtime: string;
  provider: string;
  modelId: string;
  credentialConfigured: boolean;
  credentialValid: boolean | null; // null = not checked (no credential)
  cliInstalled: boolean | null;   // null = not checked
  error?: string;
 }
 async function checkCliInstalled(command: string): Promise<boolean> {
  try {
    const { execSync } = require("node:child_process");
    execSync(`which ${command}`, { stdio: "ignore" });
    return true;
  } catch {
    return false;
  }
 }
 async function checkAgent(config: AgentConfig): Promise<AgentHealthStatus> {
  const runtime = AGENT_RUNTIMES[config.runtime];
  const status: AgentHealthStatus = {
    agentId: config.id,
    agentName: config.name,
    runtime: config.runtime,
    provider: config.provider,
    modelId: config.modelId,
    credentialConfigured: false,
    credentialValid: null,
    cliInstalled: null,
  };
  // Check CLI
  try {
    status.cliInstalled = await checkCliInstalled(runtime.cliCommand);
  } catch {
    status.cliInstalled = false;
  }
  // Check credential exists
  const creds = getRawCredentialsByProvider(config.provider as Provider);
  status.credentialConfigured = creds.length > 0;
  if (!status.credentialConfigured) {
    return status;
  }
  // Validate credential against provider API
  const validator = PROVIDER_VALIDATION[config.provider];
  if (validator) {
    try {
      status.credentialValid = await validator(creds[0].token, creds[0].baseUrl);
    } catch (err) {
      status.credentialValid = false;
      status.error = err instanceof Error ? err.message : "Validation failed";
    }
  } else {
    // No validator for this provider (e.g. opencode-zen) — just confirm credential exists
    status.credentialValid = null;
  }
  return status;
 }
 export async function GET() {
  const configs = getAllAgentConfigs();
  if (configs.length === 0) {
    return NextResponse.json({
      agents: [],
      summary: { total: 0, healthy: 0, misconfigured: 0, unchecked: 0 },
    });
  }
  const agents = await Promise.all(configs.map(checkAgent));
  const healthy = agents.filter(
    a => a.credentialConfigured && a.credentialValid === true && a.cliInstalled === true
  ).length;
  const misconfigured = agents.filter(
    a => !a.credentialConfigured || a.credentialValid === false || a.cliInstalled === false
  ).length;
  const unchecked = agents.length - healthy - misconfigured;
  return NextResponse.json({
    agents,
    summary: { total: agents.length, healthy, misconfigured, unchecked },
  });
 }
--- a/apps/harness/src/components/harness-dashboard.tsx
+++ b/apps/harness/src/components/harness-dashboard.tsx
@@ -72,85 +72,6 @@ function useIsMobile() {
  return mobile;
 }
 // ─── MOCK DATA ────────────────────────────────────────────────────────────────
 const MOCK_TASKS: Task[] = [
  {
    id: "task-001", slug: "pubsub-pipeline-migration",
    goal: "Replace CDC replication with Pub/Sub → GCS → BigQuery pipeline",
    status: "running", iteration: 3, maxIterations: 6,
    startedAt: Date.now() - 1000 * 60 * 23, project: "Hypixel",
    evals: {
      cost:    { value: -38, unit: "%",   label: "Cost Δ",      pass: false, target: "<-40%"  },
      latency: { value: 22,  unit: "s",   label: "P99 Latency", pass: true,  target: "<30s"   },
      tests:   { value: 97,  unit: "%",   label: "Test Pass",   pass: true,  target: "100%"   },
    },
    iterations: [
      { n: 1, status: "failed",  diagnosis: "Schema mismatch on UGC event table — BQ partition column incompatible" },
      { n: 2, status: "failed",  diagnosis: "Cost reduction insufficient — Pub/Sub fan-out creating duplicate messages" },
      { n: 3, status: "running", diagnosis: null },
    ],
  },
  {
    id: "task-002", slug: "haiku-moderation-tier2",
    goal: "Implement tiered UGC image moderation with Haiku classifier for tier-2 content",
    status: "completed", iteration: 4, maxIterations: 6,
    startedAt: Date.now() - 1000 * 60 * 60 * 3, completedAt: Date.now() - 1000 * 60 * 40,
    project: "Hypixel",
    pr: { number: 1847, title: "feat: tiered UGC moderation with Haiku classifier", status: "open" },
    evals: {
      accuracy:   { value: 94.2, unit: "%",   label: "Accuracy",   pass: true, target: ">92%"    },
      throughput: { value: 312,  unit: "rps", label: "Throughput", pass: true, target: ">200rps" },
      tests:      { value: 100,  unit: "%",   label: "Test Pass",  pass: true, target: "100%"    },
    },
    iterations: [
      { n: 1, status: "failed", diagnosis: "Classifier confidence threshold too low — 23% false positive rate" },
      { n: 2, status: "failed", diagnosis: "Rate limiting on Haiku API at burst load — throughput degraded" },
      { n: 3, status: "failed", diagnosis: "Accuracy marginal — prompt engineering needed for edge cases" },
      { n: 4, status: "passed", diagnosis: null },
    ],
  },
  {
    id: "task-003", slug: "neurotype-job-cancellation",
    goal: "Implement cancellable background jobs with rate limiting and dynamic prioritisation",
    status: "pending", iteration: 0, maxIterations: 5,
    startedAt: null, project: "Neurotype",
    evals: {}, iterations: [],
  },
 ];
 const MOCK_KNOWLEDGE: KnowledgeDoc[] = [
  { path: "docs/architecture/bigquery-pipeline.md",    title: "BigQuery Pipeline Architecture",    verificationStatus: "stale",    lastUpdated: "2026-03-18", project: "Hypixel",   preview: "Documents the Pub/Sub → GCS → BigQuery replacement for CDC replication. Original CDC pattern caused billing spike due to per-row streaming inserts at scale." },
  { path: "docs/architecture/ugc-moderation.md",       title: "UGC Moderation Tiering",            verificationStatus: "verified", lastUpdated: "2026-03-20", project: "Hypixel",   preview: "Three-tier classification: Haiku (fast, high-volume), Sonnet (complex cases), human review (appeals). Accuracy targets per tier defined." },
  { path: "docs/architecture/neurotype-job-system.md", title: "Background Job Processing",         verificationStatus: "verified", lastUpdated: "2026-03-19", project: "Neurotype", preview: "Postgres-native job queue with cancellation tokens, rate limiting per clinical workflow type, and dynamic priority lanes." },
  { path: "docs/beliefs.md",                           title: "Core Beliefs",                      verificationStatus: "verified", lastUpdated: "2026-03-15", project: "Global",    preview: "Invariants: no BQ streaming inserts, NHS audit trail on all clinical state mutations, Haiku only for non-PII classification." },
  { path: "decisions/2026-03-20-haiku-tier2-iter3.md", title: "Haiku Tier-2: Iter 3 Failure",      verificationStatus: "decision", lastUpdated: "2026-03-20", project: "Hypixel",   preview: "Accuracy 92.1% — marginally below target. Root cause: edge cases in animated content. Fix: few-shot examples in system prompt." },
  { path: "decisions/2026-03-19-pubsub-iter2.md",      title: "Pub/Sub Migration: Iter 2 Failure", verificationStatus: "decision", lastUpdated: "2026-03-19", project: "Hypixel",   preview: "Fan-out producing 2.3x message duplication on retry. Root cause: missing dedup window in Dataflow job." },
 ];
 const MOCK_PROJECTS: Project[] = [
  {
    id: "proj-001", name: "Hypixel",
    workspaces: [
      { name: "hypixel-api", repo: "github.com/org/hypixel-api" },
      { name: "hypixel-web", repo: "github.com/org/hypixel-web" },
      { name: "hypixel-infra", repo: "github.com/org/hypixel-infra" },
    ],
  },
  {
    id: "proj-002", name: "Neurotype",
    workspaces: [
      { name: "neurotype-backend", repo: "github.com/org/neurotype-backend" },
      { name: "neurotype-dashboard", repo: "github.com/org/neurotype-dashboard" },
    ],
  },
  {
    id: "proj-003", name: "Homelab",
    workspaces: [
      { name: "homelab", repo: "github.com/lazorgurl/homelab" },
    ],
  },
 ];
 function elapsed(ms: number) {
  const s = Math.floor(ms / 1000);
@@ -1843,10 +1764,15 @@ function TopBar({ activeTab, setActiveTab, tasks, mobile }: { activeTab: string;
 export default function HarnessDashboard() {
  const [activeTab, setActiveTab] = useState("LOOPS");
-  const [tasks, setTasks] = useState<Task[]>(MOCK_TASKS);
+  const [tasks, setTasks] = useState<Task[]>([]);
  const mobile = useIsMobile();
-  const [projects, setProjects] = useState<Project[]>(MOCK_PROJECTS);
+  const [projects, setProjects] = useState<Project[]>([]);
  const [knowledgeDocs, setKnowledgeDocs] = useState<KnowledgeDoc[]>([]);
  useEffect(() => {
    fetch("/api/tasks").then(r => r.json()).then(setTasks).catch(() => {});
  }, []);
  const handleNewTask = (form: TaskForm) => {
    const proj = projects.find(p => p.id === form.projectId);
@@ -1878,9 +1804,9 @@ export default function HarnessDashboard() {
        {activeTab === "LOOPS"     && <LoopsTab     tasks={tasks} setTasks={setTasks} mobile={mobile} />}
        {activeTab === "PROJECTS"  && <ProjectsTab  projects={projects} setProjects={setProjects} mobile={mobile} />}
        {activeTab === "MODELS"    && <ModelsTab    mobile={mobile} />}
-        {activeTab === "KNOWLEDGE" && <KnowledgeTab docs={MOCK_KNOWLEDGE}             mobile={mobile} />}
+        {activeTab === "KNOWLEDGE" && <KnowledgeTab docs={knowledgeDocs}               mobile={mobile} />}
        {activeTab === "HISTORY"   && <HistoryTab   tasks={tasks}                     mobile={mobile} />}
-        {activeTab === "NEW TASK"  && <NewTaskTab   onSubmit={handleNewTask}          mobile={mobile} projects={projects} knowledgeDocs={MOCK_KNOWLEDGE} />}
+        {activeTab === "NEW TASK"  && <NewTaskTab   onSubmit={handleNewTask}          mobile={mobile} projects={projects} knowledgeDocs={knowledgeDocs} />}
      </div>
      {mobile && <BottomNav activeTab={activeTab} setActiveTab={setActiveTab} tasks={tasks} />}
--- a/apps/harness/src/lib/agents.ts
+++ b/apps/harness/src/lib/agents.ts
@@ -77,61 +77,6 @@ export function deleteAgentConfig(id: string): boolean {
  return configs.delete(id);
 }
 // ─── SEED DATA ──────────────────────────────────────────────
 const SEED_CONFIGS: AgentConfig[] = [
  {
    id: "agent-claude-opus",
    name: "Claude Code · Opus 4",
    runtime: "claude-code",
    modelId: "claude-opus-4-20250514",
    provider: "anthropic",
  },
  {
    id: "agent-claude-sonnet",
    name: "Claude Code · Sonnet 4",
    runtime: "claude-code",
    modelId: "claude-sonnet-4-20250514",
    provider: "anthropic",
  },
  {
    id: "agent-codex-o3",
    name: "Codex · o3",
    runtime: "codex",
    modelId: "o3",
    provider: "openai",
  },
  {
    id: "agent-codex-o4mini",
    name: "Codex · o4-mini",
    runtime: "codex",
    modelId: "o4-mini",
    provider: "openai",
  },
  {
    id: "agent-opencode-sonnet",
    name: "OpenCode · Sonnet 4",
    runtime: "opencode",
    modelId: "claude-sonnet-4-20250514",
    provider: "anthropic",
  },
  {
    id: "agent-opencode-gemini",
    name: "OpenCode · Gemini 2.5 Pro",
    runtime: "opencode",
    modelId: "gemini-2.5-pro",
    provider: "google",
  },
 ];
 function seedAgents() {
  if (configs.size > 0) return;
  for (const c of SEED_CONFIGS) {
    configs.set(c.id, c);
  }
 }
 seedAgents();
 // ─── CLI BUILDER ────────────────────────────────────────────
 // Builds the shell command to invoke an agent headlessly.
--- a/apps/harness/src/lib/model-store.ts
+++ b/apps/harness/src/lib/model-store.ts
@@ -114,41 +114,3 @@ export function getUsageSummary(): ModelUsageSummary[] {
  return Array.from(grouped.values()).sort((a, b) => b.totalCost - a.totalCost);
 }
 // ─── SEED DATA ──────────────────────────────────────────────
 // Pre-populate with well-known models and pricing
 const SEED_MODELS: Omit<CuratedModel, "enabled">[] = [
  { id: "claude-opus-4-20250514",        name: "Claude Opus 4",          provider: "anthropic",  contextWindow: 200000, costPer1kInput: 0.015,  costPer1kOutput: 0.075  },
  { id: "claude-sonnet-4-20250514",      name: "Claude Sonnet 4",        provider: "anthropic",  contextWindow: 200000, costPer1kInput: 0.003,  costPer1kOutput: 0.015  },
  { id: "claude-haiku-4-20250514",       name: "Claude Haiku 4",         provider: "anthropic",  contextWindow: 200000, costPer1kInput: 0.0008, costPer1kOutput: 0.004  },
  { id: "gpt-4o",                        name: "GPT-4o",                 provider: "openai",     contextWindow: 128000, costPer1kInput: 0.0025, costPer1kOutput: 0.01   },
  { id: "gpt-4o-mini",                   name: "GPT-4o Mini",            provider: "openai",     contextWindow: 128000, costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
  { id: "o3",                            name: "o3",                     provider: "openai",     contextWindow: 200000, costPer1kInput: 0.01,   costPer1kOutput: 0.04   },
  { id: "o4-mini",                       name: "o4 Mini",                provider: "openai",     contextWindow: 200000, costPer1kInput: 0.0011, costPer1kOutput: 0.0044 },
  { id: "gemini-2.5-pro",               name: "Gemini 2.5 Pro",         provider: "google",     contextWindow: 1048576,costPer1kInput: 0.00125,costPer1kOutput: 0.01   },
  { id: "gemini-2.5-flash",             name: "Gemini 2.5 Flash",       provider: "google",     contextWindow: 1048576,costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
 ];
 const SEED_USAGE: Omit<ModelUsageEntry, "timestamp">[] = [
  { modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 1, inputTokens: 48200,  outputTokens: 12400, durationMs: 34000  },
  { modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 2, inputTokens: 52100,  outputTokens: 15800, durationMs: 41000  },
  { modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 3, inputTokens: 61300,  outputTokens: 18200, durationMs: 45000  },
  { modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 4, inputTokens: 55000,  outputTokens: 14600, durationMs: 38000  },
  { modelId: "claude-opus-4-20250514",   provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 85400, outputTokens: 28900, durationMs: 92000 },
  { modelId: "claude-opus-4-20250514",   provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 2, inputTokens: 91200, outputTokens: 31400, durationMs: 98000 },
  { modelId: "claude-opus-4-20250514",   provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 3, inputTokens: 78600, outputTokens: 22100, durationMs: 85000 },
  { modelId: "gpt-4o",                   provider: "openai",    taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 42000, outputTokens: 9800,  durationMs: 28000 },
 ];
 export function seedData() {
  if (curatedModels.size > 0) return; // already seeded
  for (const m of SEED_MODELS) {
    curatedModels.set(m.id, { ...m, enabled: true });
  }
  const now = Date.now();
  for (let i = 0; i < SEED_USAGE.length; i++) {
    usageLog.push({ ...SEED_USAGE[i], timestamp: now - (SEED_USAGE.length - i) * 1000 * 60 * 30 });
  }
 }
 seedData();