Add harness app: agent orchestrator with cluster deployment
- Next.js app for orchestrating coding agent benchmarks (Claude Code, Codex, OpenCode) - Dockerfile installs git, gh CLI, and agent CLIs for headless execution - K8s deployment with workspace volume, sealed credentials for Claude + OpenCode - Traefik IngressRoute at harness.coreworlds.io with internal-only middleware + TLS - CI pipeline path filter for harness builds - Fix OpenCode runtime flags (subcommand-based headless mode)
This commit is contained in:
3
.github/workflows/ci.yaml
vendored
3
.github/workflows/ci.yaml
vendored
@@ -27,6 +27,9 @@ jobs:
|
|||||||
api:
|
api:
|
||||||
- 'apps/api/**'
|
- 'apps/api/**'
|
||||||
- 'packages/**'
|
- 'packages/**'
|
||||||
|
harness:
|
||||||
|
- 'apps/harness/**'
|
||||||
|
- 'packages/**'
|
||||||
|
|
||||||
lint-and-test:
|
lint-and-test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
37
apps/harness/Dockerfile
Normal file
37
apps/harness/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
FROM node:20-alpine AS base
|
||||||
|
|
||||||
|
FROM base AS deps
|
||||||
|
RUN apk add --no-cache libc6-compat
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package.json ./
|
||||||
|
RUN npm install
|
||||||
|
|
||||||
|
FROM base AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=deps /app/node_modules ./node_modules
|
||||||
|
COPY . .
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
FROM base AS runner
|
||||||
|
WORKDIR /app
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
|
# System tools needed by agent executors
|
||||||
|
RUN apk add --no-cache git github-cli
|
||||||
|
|
||||||
|
# Agent CLIs (installed globally before dropping to non-root)
|
||||||
|
RUN npm install -g @anthropic-ai/claude-code @openai/codex opencode
|
||||||
|
|
||||||
|
RUN addgroup --system --gid 1001 nodejs
|
||||||
|
RUN adduser --system --uid 1001 nextjs
|
||||||
|
|
||||||
|
# Workspace directory for git worktrees (ephemeral)
|
||||||
|
RUN mkdir -p /data/harness && chown nextjs:nodejs /data/harness
|
||||||
|
|
||||||
|
COPY --from=builder /app/public ./public
|
||||||
|
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
|
||||||
|
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
|
||||||
|
USER nextjs
|
||||||
|
EXPOSE 3100
|
||||||
|
ENV PORT=3100
|
||||||
|
CMD ["node", "server.js"]
|
||||||
67
apps/harness/k8s/base/deployment.yaml
Normal file
67
apps/harness/k8s/base/deployment.yaml
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: harness
|
||||||
|
labels:
|
||||||
|
app: harness
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: harness
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: harness
|
||||||
|
spec:
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: ghcr-pull-secret
|
||||||
|
containers:
|
||||||
|
- name: harness
|
||||||
|
image: ghcr.io/lazorgurl/homelab-harness:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 3100
|
||||||
|
env:
|
||||||
|
- name: HARNESS_WORK_DIR
|
||||||
|
value: /data/harness
|
||||||
|
- name: CLAUDE_CONFIG_DIR
|
||||||
|
value: /secrets/claude
|
||||||
|
- name: OPENCODE_CONFIG_DIR
|
||||||
|
value: /secrets/opencode
|
||||||
|
volumeMounts:
|
||||||
|
- name: workspace
|
||||||
|
mountPath: /data/harness
|
||||||
|
- name: claude-credentials
|
||||||
|
mountPath: /secrets/claude
|
||||||
|
readOnly: true
|
||||||
|
- name: opencode-credentials
|
||||||
|
mountPath: /secrets/opencode
|
||||||
|
readOnly: true
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 256Mi
|
||||||
|
cpu: 100m
|
||||||
|
limits:
|
||||||
|
memory: 1Gi
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/health
|
||||||
|
port: 3100
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /api/health
|
||||||
|
port: 3100
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 20
|
||||||
|
volumes:
|
||||||
|
- name: workspace
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
- name: claude-credentials
|
||||||
|
secret:
|
||||||
|
secretName: harness-claude-credentials
|
||||||
|
- name: opencode-credentials
|
||||||
|
secret:
|
||||||
|
secretName: harness-opencode-credentials
|
||||||
13
apps/harness/k8s/base/harness-claude-credentials-sealed.yaml
Normal file
13
apps/harness/k8s/base/harness-claude-credentials-sealed.yaml
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
apiVersion: bitnami.com/v1alpha1
|
||||||
|
kind: SealedSecret
|
||||||
|
metadata:
|
||||||
|
name: harness-opencode-credentials
|
||||||
|
namespace: apps
|
||||||
|
spec:
|
||||||
|
encryptedData:
|
||||||
|
auth.json: AgBNDx8eNC6AiMRrZn0JZIJACWL7Wg/JhbeuPiNdsLOpnc3db7vrI+25AjIwk7f+EMh1XKDf9QMbPJAyC3/ZiO3hJ45JJjuAmb/QYH9c+Zgnsms/VhurMz5pYvaN04B5J6lzJusILjU2sqQjaHL5ARPh1jrqrXnk+pRY/WG4vZVGrVZ/J9rvswfQuXwPdpD2KBCia3rR44WgpBxRT+bIQso2FFWYCLTdRPz7HFH+jSuFTEA0MujWZj4vCyf8w+5kZ1fwWBze2pAuj3iTLl3+TX0TMJhS7G/wARbxEYxrSntBCK6LsAByn7Ul400rcbOLugPbe9QFJrnxyjvjVeoQrjP2x1yIYWo8UHy5iExCVR+wkTD8EDQceqkqZ4KGoIa5GZpqdRMl20PjhPXfvX2XgTJjyOL2uhszRD/8z/WPVEM2gDSdmI7KUGdxmnPcqEyS6cVwp0DuSoaCWmN3GxS8EvrQVnlLQNK6RWsibGmmYwt1O7PxE4T+8CEcRwfUkdXRtVqMURnr9aIvAhl+judMkxPAdh68s6L8WehHAbPyYBeA29FVKO3JsXhMoFfQGugCxxBvPH50GOHh0Ncxdvz2wzH/of+QP4vmkddV7JbQMMruLSEzF90pIk7pLDR0Vhd9OxehKeAeAHot7DqH21VG8UnqUn+NZstCCDtB57IY5JakSrcE1+pPSMR13a5PQ/lNYWjgFT5HVF/cGMYoUG74zo2BzgJK4k3S1yDvTANxqeQnO+ybxITVh4Azo1WE151t2Fsh2SmpKsADBwuNFQprJRz1OxZYphNMNnI7KlSua1+KlljkotFItLwEEmsLgO3/zm0HIRNPbZHzX4/d5+jSIb72QdVLiiVPSM4KtkbFVbuoHPdnsqwf9pzTIwSIGkTj6EBqECIBACaytwAQUf3ZmvAKXCTa34CxyXlfzQHgc7Hyzv+1u9csO49H+P7I8iMqr43NpUliDz9mcu+0964209DpBsaVNQBp9GUB3dnrraDG/bpVOzwljgZnwaV0WKfJSvk2uoMkKOZXEIqvX1A=
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: harness-opencode-credentials
|
||||||
|
namespace: apps
|
||||||
7
apps/harness/k8s/base/kustomization.yaml
Normal file
7
apps/harness/k8s/base/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- harness-claude-credentials-sealed.yaml
|
||||||
|
- harness-opencode-credentials-sealed.yaml
|
||||||
12
apps/harness/k8s/base/service.yaml
Normal file
12
apps/harness/k8s/base/service.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: harness
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 3100
|
||||||
|
protocol: TCP
|
||||||
|
selector:
|
||||||
|
app: harness
|
||||||
15
apps/harness/k8s/overlays/preview/kustomization.yaml
Normal file
15
apps/harness/k8s/overlays/preview/kustomization.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- ../../base
|
||||||
|
patches:
|
||||||
|
- target:
|
||||||
|
kind: Deployment
|
||||||
|
name: harness
|
||||||
|
patch: |
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: harness
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
19
apps/harness/k8s/overlays/production/kustomization.yaml
Normal file
19
apps/harness/k8s/overlays/production/kustomization.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- ../../base
|
||||||
|
patches:
|
||||||
|
- patch: |
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: harness
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
target:
|
||||||
|
kind: Deployment
|
||||||
|
name: harness
|
||||||
|
images:
|
||||||
|
- name: ghcr.io/lazorgurl/homelab-harness
|
||||||
|
newName: ghcr.io/lazorgurl/homelab-harness
|
||||||
|
newTag: latest
|
||||||
6
apps/harness/next-env.d.ts
vendored
Normal file
6
apps/harness/next-env.d.ts
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
/// <reference types="next" />
|
||||||
|
/// <reference types="next/image-types/global" />
|
||||||
|
/// <reference path="./.next/types/routes.d.ts" />
|
||||||
|
|
||||||
|
// NOTE: This file should not be edited
|
||||||
|
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
|
||||||
6
apps/harness/next.config.js
Normal file
6
apps/harness/next.config.js
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
/** @type {import('next').NextConfig} */
|
||||||
|
const nextConfig = {
|
||||||
|
output: "standalone",
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = nextConfig;
|
||||||
24
apps/harness/package.json
Normal file
24
apps/harness/package.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"name": "@homelab/harness",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "next dev --port 3100",
|
||||||
|
"build": "next build",
|
||||||
|
"start": "next start",
|
||||||
|
"lint": "next lint",
|
||||||
|
"test": "echo \"no tests yet\""
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"next": "^15.1.0",
|
||||||
|
"react": "^19.0.0",
|
||||||
|
"react-dom": "^19.0.0",
|
||||||
|
"yaml": "^2.7.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^22.10.0",
|
||||||
|
"@types/react": "^19.0.0",
|
||||||
|
"@types/react-dom": "^19.0.0",
|
||||||
|
"typescript": "^5.7.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
52
apps/harness/src/app/api/agents/route.ts
Normal file
52
apps/harness/src/app/api/agents/route.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import {
|
||||||
|
getAllAgentConfigs,
|
||||||
|
upsertAgentConfig,
|
||||||
|
deleteAgentConfig,
|
||||||
|
AGENT_RUNTIMES,
|
||||||
|
AgentConfig,
|
||||||
|
} from "@/lib/agents";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
return NextResponse.json({
|
||||||
|
configs: getAllAgentConfigs(),
|
||||||
|
runtimes: Object.values(AGENT_RUNTIMES),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
const body = await request.json();
|
||||||
|
|
||||||
|
if (!body.runtime || !body.modelId || !body.provider) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "runtime, modelId, and provider are required" },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!AGENT_RUNTIMES[body.runtime as keyof typeof AGENT_RUNTIMES]) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `runtime must be one of: ${Object.keys(AGENT_RUNTIMES).join(", ")}` },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const config: AgentConfig = {
|
||||||
|
id: body.id || `agent-${Date.now()}`,
|
||||||
|
name: body.name || `${body.runtime} · ${body.modelId}`,
|
||||||
|
runtime: body.runtime,
|
||||||
|
modelId: body.modelId,
|
||||||
|
provider: body.provider,
|
||||||
|
maxTokens: body.maxTokens,
|
||||||
|
env: body.env,
|
||||||
|
};
|
||||||
|
|
||||||
|
return NextResponse.json(upsertAgentConfig(config), { status: 201 });
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function DELETE(request: NextRequest) {
|
||||||
|
const id = request.nextUrl.searchParams.get("id");
|
||||||
|
if (!id) return NextResponse.json({ error: "id required" }, { status: 400 });
|
||||||
|
deleteAgentConfig(id);
|
||||||
|
return NextResponse.json({ ok: true });
|
||||||
|
}
|
||||||
9
apps/harness/src/app/api/health/route.ts
Normal file
9
apps/harness/src/app/api/health/route.ts
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
import { NextResponse } from "next/server";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
return NextResponse.json({
|
||||||
|
status: "ok",
|
||||||
|
service: "harness",
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
});
|
||||||
|
}
|
||||||
54
apps/harness/src/app/api/models/curated/route.ts
Normal file
54
apps/harness/src/app/api/models/curated/route.ts
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import {
|
||||||
|
getCuratedModels,
|
||||||
|
getEnabledModels,
|
||||||
|
upsertCuratedModel,
|
||||||
|
removeCuratedModel,
|
||||||
|
toggleModelEnabled,
|
||||||
|
updateModelCost,
|
||||||
|
CuratedModel,
|
||||||
|
} from "@/lib/model-store";
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
const enabledOnly = request.nextUrl.searchParams.get("enabled") === "true";
|
||||||
|
return NextResponse.json(enabledOnly ? getEnabledModels() : getCuratedModels());
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
const body = await request.json();
|
||||||
|
|
||||||
|
if (body.action === "toggle" && body.id) {
|
||||||
|
const result = toggleModelEnabled(body.id);
|
||||||
|
if (!result) return NextResponse.json({ error: "not found" }, { status: 404 });
|
||||||
|
return NextResponse.json(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body.action === "update-cost" && body.id) {
|
||||||
|
const result = updateModelCost(body.id, body.costPer1kInput, body.costPer1kOutput);
|
||||||
|
if (!result) return NextResponse.json({ error: "not found" }, { status: 404 });
|
||||||
|
return NextResponse.json(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!body.id || !body.provider) {
|
||||||
|
return NextResponse.json({ error: "id and provider are required" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const model: CuratedModel = {
|
||||||
|
id: body.id,
|
||||||
|
name: body.name || body.id,
|
||||||
|
provider: body.provider,
|
||||||
|
enabled: body.enabled ?? true,
|
||||||
|
contextWindow: body.contextWindow,
|
||||||
|
costPer1kInput: body.costPer1kInput,
|
||||||
|
costPer1kOutput: body.costPer1kOutput,
|
||||||
|
};
|
||||||
|
|
||||||
|
return NextResponse.json(upsertCuratedModel(model), { status: 201 });
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function DELETE(request: NextRequest) {
|
||||||
|
const id = request.nextUrl.searchParams.get("id");
|
||||||
|
if (!id) return NextResponse.json({ error: "id required" }, { status: 400 });
|
||||||
|
removeCuratedModel(id);
|
||||||
|
return NextResponse.json({ ok: true });
|
||||||
|
}
|
||||||
7
apps/harness/src/app/api/models/route.ts
Normal file
7
apps/harness/src/app/api/models/route.ts
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
import { NextResponse } from "next/server";
|
||||||
|
import { fetchAllModels } from "@/lib/model-providers";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
const models = await fetchAllModels();
|
||||||
|
return NextResponse.json(models);
|
||||||
|
}
|
||||||
9
apps/harness/src/app/api/models/usage/route.ts
Normal file
9
apps/harness/src/app/api/models/usage/route.ts
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
import { NextResponse } from "next/server";
|
||||||
|
import { getUsageSummary, getUsageLog } from "@/lib/model-store";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
return NextResponse.json({
|
||||||
|
summary: getUsageSummary(),
|
||||||
|
log: getUsageLog(),
|
||||||
|
});
|
||||||
|
}
|
||||||
31
apps/harness/src/app/api/orchestrator/route.ts
Normal file
31
apps/harness/src/app/api/orchestrator/route.ts
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import {
|
||||||
|
startOrchestrator,
|
||||||
|
stopOrchestrator,
|
||||||
|
isRunning,
|
||||||
|
currentRunningTaskId,
|
||||||
|
} from "@/lib/orchestrator";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
return NextResponse.json({
|
||||||
|
running: isRunning(),
|
||||||
|
currentTaskId: currentRunningTaskId(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
const body = await request.json();
|
||||||
|
const action = body.action as string;
|
||||||
|
|
||||||
|
if (action === "start") {
|
||||||
|
startOrchestrator();
|
||||||
|
return NextResponse.json({ ok: true, running: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (action === "stop") {
|
||||||
|
stopOrchestrator();
|
||||||
|
return NextResponse.json({ ok: true, running: false });
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json({ error: "Unknown action. Use 'start' or 'stop'" }, { status: 400 });
|
||||||
|
}
|
||||||
13
apps/harness/src/app/api/repos/search/route.ts
Normal file
13
apps/harness/src/app/api/repos/search/route.ts
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { searchRepos } from "@/lib/repo-search";
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
const query = request.nextUrl.searchParams.get("q") || "";
|
||||||
|
|
||||||
|
if (query.length < 2) {
|
||||||
|
return NextResponse.json([]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await searchRepos(query);
|
||||||
|
return NextResponse.json(results);
|
||||||
|
}
|
||||||
63
apps/harness/src/app/api/settings/credentials/route.ts
Normal file
63
apps/harness/src/app/api/settings/credentials/route.ts
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import {
|
||||||
|
getAllCredentials,
|
||||||
|
getCredentialsByKind,
|
||||||
|
upsertCredential,
|
||||||
|
deleteCredential,
|
||||||
|
Credential,
|
||||||
|
GIT_PROVIDERS,
|
||||||
|
AI_PROVIDERS,
|
||||||
|
} from "@/lib/credentials";
|
||||||
|
|
||||||
|
const VALID_PROVIDERS = [...GIT_PROVIDERS, ...AI_PROVIDERS];
|
||||||
|
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
const kind = request.nextUrl.searchParams.get("kind");
|
||||||
|
if (kind === "git" || kind === "ai") {
|
||||||
|
return NextResponse.json(getCredentialsByKind(kind));
|
||||||
|
}
|
||||||
|
return NextResponse.json(getAllCredentials());
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
const body = await request.json();
|
||||||
|
|
||||||
|
if (!body.provider || !body.token || !body.label) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "provider, label, and token are required" },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!VALID_PROVIDERS.includes(body.provider)) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `provider must be one of: ${VALID_PROVIDERS.join(", ")}` },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cred: Credential = {
|
||||||
|
id: body.id || `cred-${Date.now()}`,
|
||||||
|
provider: body.provider,
|
||||||
|
label: body.label,
|
||||||
|
token: body.token,
|
||||||
|
baseUrl: body.baseUrl,
|
||||||
|
};
|
||||||
|
|
||||||
|
const saved = upsertCredential(cred);
|
||||||
|
return NextResponse.json(saved, { status: 201 });
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function DELETE(request: NextRequest) {
|
||||||
|
const id = request.nextUrl.searchParams.get("id");
|
||||||
|
if (!id) {
|
||||||
|
return NextResponse.json({ error: "id is required" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const deleted = deleteCredential(id);
|
||||||
|
if (!deleted) {
|
||||||
|
return NextResponse.json({ error: "not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json({ ok: true });
|
||||||
|
}
|
||||||
27
apps/harness/src/app/api/tasks/[id]/route.ts
Normal file
27
apps/harness/src/app/api/tasks/[id]/route.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { getTask, updateTask } from "@/lib/store";
|
||||||
|
|
||||||
|
export async function GET(
|
||||||
|
_request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ id: string }> },
|
||||||
|
) {
|
||||||
|
const { id } = await params;
|
||||||
|
const task = getTask(id);
|
||||||
|
if (!task) {
|
||||||
|
return NextResponse.json({ error: "Task not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
return NextResponse.json(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function PATCH(
|
||||||
|
request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ id: string }> },
|
||||||
|
) {
|
||||||
|
const { id } = await params;
|
||||||
|
const body = await request.json();
|
||||||
|
const updated = updateTask(id, body);
|
||||||
|
if (!updated) {
|
||||||
|
return NextResponse.json({ error: "Task not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
return NextResponse.json(updated);
|
||||||
|
}
|
||||||
27
apps/harness/src/app/api/tasks/[id]/start/route.ts
Normal file
27
apps/harness/src/app/api/tasks/[id]/start/route.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { getTask } from "@/lib/store";
|
||||||
|
import { startOrchestrator } from "@/lib/orchestrator";
|
||||||
|
|
||||||
|
export async function POST(
|
||||||
|
_request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ id: string }> },
|
||||||
|
) {
|
||||||
|
const { id } = await params;
|
||||||
|
const task = getTask(id);
|
||||||
|
|
||||||
|
if (!task) {
|
||||||
|
return NextResponse.json({ error: "Task not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task.status !== "pending") {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Task is ${task.status}, not pending` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure orchestrator is running — it will pick up this task
|
||||||
|
startOrchestrator();
|
||||||
|
|
||||||
|
return NextResponse.json({ ok: true, message: "Orchestrator started, task will be picked up" });
|
||||||
|
}
|
||||||
32
apps/harness/src/app/api/tasks/[id]/stop/route.ts
Normal file
32
apps/harness/src/app/api/tasks/[id]/stop/route.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { getTask } from "@/lib/store";
|
||||||
|
import { cancelTask } from "@/lib/orchestrator";
|
||||||
|
|
||||||
|
export async function POST(
|
||||||
|
_request: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ id: string }> },
|
||||||
|
) {
|
||||||
|
const { id } = await params;
|
||||||
|
const task = getTask(id);
|
||||||
|
|
||||||
|
if (!task) {
|
||||||
|
return NextResponse.json({ error: "Task not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task.status !== "running") {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Task is ${task.status}, not running` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cancelled = cancelTask(id);
|
||||||
|
if (!cancelled) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Task is not the currently executing task" },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json({ ok: true, message: "Task cancellation requested" });
|
||||||
|
}
|
||||||
45
apps/harness/src/app/api/tasks/route.ts
Normal file
45
apps/harness/src/app/api/tasks/route.ts
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { getAllTasks, createTask } from "@/lib/store";
|
||||||
|
import { getAgentConfig } from "@/lib/agents";
|
||||||
|
import { Task, TaskSpec } from "@/lib/types";
|
||||||
|
|
||||||
|
export async function GET() {
|
||||||
|
return NextResponse.json(getAllTasks());
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
const spec: TaskSpec = await request.json();
|
||||||
|
|
||||||
|
if (!spec.slug || !spec.goal) {
|
||||||
|
return NextResponse.json({ error: "slug and goal are required" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!spec.agentId) {
|
||||||
|
return NextResponse.json({ error: "agentId is required" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const agentConfig = getAgentConfig(spec.agentId);
|
||||||
|
if (!agentConfig) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Agent config not found: ${spec.agentId}` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const task: Task = {
|
||||||
|
id: `task-${Date.now()}`,
|
||||||
|
slug: spec.slug,
|
||||||
|
goal: spec.goal,
|
||||||
|
project: spec.project || "—",
|
||||||
|
status: "pending",
|
||||||
|
iteration: 0,
|
||||||
|
maxIterations: spec.maxIterations || 6,
|
||||||
|
startedAt: null,
|
||||||
|
evals: {},
|
||||||
|
iterations: [],
|
||||||
|
spec,
|
||||||
|
};
|
||||||
|
|
||||||
|
const created = createTask(task);
|
||||||
|
return NextResponse.json(created, { status: 201 });
|
||||||
|
}
|
||||||
18
apps/harness/src/app/layout.tsx
Normal file
18
apps/harness/src/app/layout.tsx
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import type { Metadata } from "next";
|
||||||
|
|
||||||
|
export const metadata: Metadata = {
|
||||||
|
title: "Harness — Agent Orchestrator",
|
||||||
|
description: "Autonomous coding agent loop orchestrator and dashboard",
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function RootLayout({
|
||||||
|
children,
|
||||||
|
}: {
|
||||||
|
children: React.ReactNode;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<html lang="en">
|
||||||
|
<body style={{ margin: 0, padding: 0 }}>{children}</body>
|
||||||
|
</html>
|
||||||
|
);
|
||||||
|
}
|
||||||
5
apps/harness/src/app/page.tsx
Normal file
5
apps/harness/src/app/page.tsx
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
import HarnessDashboard from "@/components/harness-dashboard";
|
||||||
|
|
||||||
|
export default function Page() {
|
||||||
|
return <HarnessDashboard />;
|
||||||
|
}
|
||||||
1889
apps/harness/src/components/harness-dashboard.tsx
Normal file
1889
apps/harness/src/components/harness-dashboard.tsx
Normal file
File diff suppressed because it is too large
Load Diff
541
apps/harness/src/components/harness-design-system.tsx
Normal file
541
apps/harness/src/components/harness-design-system.tsx
Normal file
@@ -0,0 +1,541 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useState, useRef, useEffect } from "react";
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// HARNESS DESIGN SYSTEM
|
||||||
|
// Import this file and destructure what you need.
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
// ─── TOKENS ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const tokens = {
|
||||||
|
// Colour palette
|
||||||
|
color: {
|
||||||
|
// Backgrounds — darkest to lightest
|
||||||
|
bg0: "#060a0f", // page root
|
||||||
|
bg1: "#0d1117", // card / panel
|
||||||
|
bg2: "#111827", // nested surface
|
||||||
|
bg3: "#1f2937", // hover state / divider fill
|
||||||
|
|
||||||
|
// Borders
|
||||||
|
border0: "#1f2937", // structural borders (topbar, panel edges)
|
||||||
|
border1: "#374151", // interactive borders (buttons, inputs)
|
||||||
|
border2: "#4b5563", // focus rings
|
||||||
|
|
||||||
|
// Text
|
||||||
|
text0: "#f9fafb", // primary — headings, active labels
|
||||||
|
text1: "#9ca3af", // secondary — body, descriptions
|
||||||
|
text2: "#4b5563", // muted — metadata, timestamps
|
||||||
|
text3: "#374151", // faintest — placeholders, dividers
|
||||||
|
|
||||||
|
// Semantic — signal colours
|
||||||
|
pass: "#00ff9f", // success, running, online
|
||||||
|
passDim: "#064e3b", // pass border / bg tint
|
||||||
|
fail: "#f87171", // failure, error
|
||||||
|
failDim: "#7f1d1d", // fail border / bg tint
|
||||||
|
warn: "#f59e0b", // stale, warning
|
||||||
|
warnDim: "#78350f", // warn border / bg tint
|
||||||
|
info: "#7dd3fc", // completed, informational
|
||||||
|
infoDim: "#0c4a6e", // info border / bg tint
|
||||||
|
purple: "#a78bfa", // decision records, AI-authored
|
||||||
|
purpleDim: "#3b0764", // purple border / bg tint
|
||||||
|
muted: "#6b7280", // pending, disabled, unknown
|
||||||
|
|
||||||
|
// Accent — brand
|
||||||
|
accent: "#00ff9f", // == pass, primary accent
|
||||||
|
accentGlow:"0 0 8px #00ff9f",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Typography
|
||||||
|
font: {
|
||||||
|
mono: "'Courier New', 'Lucida Console', monospace",
|
||||||
|
sans: "'IBM Plex Sans', 'Helvetica Neue', sans-serif",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Font sizes (px)
|
||||||
|
size: {
|
||||||
|
xs: 13,
|
||||||
|
sm: 14,
|
||||||
|
md: 15,
|
||||||
|
base:16,
|
||||||
|
lg: 18,
|
||||||
|
xl: 26,
|
||||||
|
xxl: 34,
|
||||||
|
},
|
||||||
|
|
||||||
|
// Letter spacing
|
||||||
|
tracking: {
|
||||||
|
tight: "0.02em",
|
||||||
|
normal: "0.08em",
|
||||||
|
wide: "0.12em",
|
||||||
|
wider: "0.15em",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Spacing (px) — 4pt grid
|
||||||
|
space: {
|
||||||
|
1: 4,
|
||||||
|
2: 8,
|
||||||
|
3: 12,
|
||||||
|
4: 16,
|
||||||
|
5: 20,
|
||||||
|
6: 24,
|
||||||
|
8: 32,
|
||||||
|
} as Record<number, number>,
|
||||||
|
|
||||||
|
// Border radius — intentionally minimal (tool aesthetic)
|
||||||
|
radius: {
|
||||||
|
none: 0,
|
||||||
|
sm: 2,
|
||||||
|
},
|
||||||
|
|
||||||
|
// Transitions
|
||||||
|
transition: {
|
||||||
|
fast: "all 0.1s ease",
|
||||||
|
normal: "all 0.15s ease",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Touch targets
|
||||||
|
touch: { min: 44 },
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── STATUS CONFIG ───────────────────────────────────────────
|
||||||
|
// Single source of truth for all status variants
|
||||||
|
|
||||||
|
export const STATUS: Record<string, { label: string; color: string; dim: string; dot: boolean }> = {
|
||||||
|
running: { label: "RUNNING", color: tokens.color.pass, dim: tokens.color.passDim, dot: true },
|
||||||
|
completed: { label: "COMPLETED", color: tokens.color.info, dim: tokens.color.infoDim, dot: false },
|
||||||
|
pending: { label: "PENDING", color: tokens.color.muted, dim: tokens.color.bg3, dot: false },
|
||||||
|
failed: { label: "FAILED", color: tokens.color.fail, dim: tokens.color.failDim, dot: false },
|
||||||
|
passed: { label: "PASSED", color: tokens.color.pass, dim: tokens.color.passDim, dot: false },
|
||||||
|
stale: { label: "STALE", color: tokens.color.warn, dim: tokens.color.warnDim, dot: false },
|
||||||
|
verified: { label: "VERIFIED", color: tokens.color.pass, dim: tokens.color.passDim, dot: false },
|
||||||
|
decision: { label: "DECISION", color: tokens.color.purple, dim: tokens.color.purpleDim, dot: false },
|
||||||
|
open: { label: "OPEN", color: tokens.color.info, dim: tokens.color.infoDim, dot: false },
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── PRIMITIVE COMPONENTS ────────────────────────────────────
|
||||||
|
|
||||||
|
// Label — all-caps mono metadata tag
|
||||||
|
export function Label({ children, color, style }: { children: React.ReactNode; color?: string; style?: React.CSSProperties }) {
|
||||||
|
return (
|
||||||
|
<span style={{
|
||||||
|
fontFamily: tokens.font.mono,
|
||||||
|
fontSize: tokens.size.xs,
|
||||||
|
letterSpacing: tokens.tracking.wide,
|
||||||
|
color: color || tokens.color.text2,
|
||||||
|
textTransform: "uppercase",
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
{children}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mono — inline monospace text, body weight
|
||||||
|
export function Mono({ children, size, color, style }: { children: React.ReactNode; size?: number; color?: string; style?: React.CSSProperties }) {
|
||||||
|
return (
|
||||||
|
<span style={{
|
||||||
|
fontFamily: tokens.font.mono,
|
||||||
|
fontSize: size || tokens.size.base,
|
||||||
|
color: color || tokens.color.text1,
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
{children}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Divider — horizontal rule
|
||||||
|
export function Divider({ style }: { style?: React.CSSProperties }) {
|
||||||
|
return (
|
||||||
|
<div style={{
|
||||||
|
height: 1,
|
||||||
|
background: tokens.color.border0,
|
||||||
|
width: "100%",
|
||||||
|
flexShrink: 0,
|
||||||
|
...style,
|
||||||
|
}} />
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// StatusBadge — RUNNING / FAILED / VERIFIED etc.
|
||||||
|
export function StatusBadge({ status, style }: { status: string; style?: React.CSSProperties }) {
|
||||||
|
const s = STATUS[status] || { label: (status || "UNKNOWN").toUpperCase(), color: tokens.color.muted, dim: tokens.color.bg3, dot: false };
|
||||||
|
return (
|
||||||
|
<span style={{
|
||||||
|
display: "inline-flex",
|
||||||
|
alignItems: "center",
|
||||||
|
gap: 5,
|
||||||
|
padding: "3px 8px",
|
||||||
|
border: `1px solid ${s.dim}`,
|
||||||
|
background: s.dim + "44",
|
||||||
|
fontFamily: tokens.font.mono,
|
||||||
|
fontSize: tokens.size.xs,
|
||||||
|
letterSpacing: tokens.tracking.wide,
|
||||||
|
color: s.color,
|
||||||
|
whiteSpace: "nowrap",
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
{s.dot && (
|
||||||
|
<span style={{
|
||||||
|
width: 5, height: 5,
|
||||||
|
borderRadius: "50%",
|
||||||
|
background: s.color,
|
||||||
|
boxShadow: `0 0 6px ${s.color}`,
|
||||||
|
display: "inline-block",
|
||||||
|
animation: "hpulse 2s infinite",
|
||||||
|
}} />
|
||||||
|
)}
|
||||||
|
{s.label}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Panel — surface container
|
||||||
|
export function Panel({ children, style }: { children: React.ReactNode; style?: React.CSSProperties }) {
|
||||||
|
return (
|
||||||
|
<div style={{
|
||||||
|
background: tokens.color.bg1,
|
||||||
|
border: `1px solid ${tokens.color.border0}`,
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
{children}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// PanelHeader — labelled top edge of a panel
|
||||||
|
export function PanelHeader({ label, children, style }: { label: string; children?: React.ReactNode; style?: React.CSSProperties }) {
|
||||||
|
return (
|
||||||
|
<div style={{
|
||||||
|
display: "flex",
|
||||||
|
alignItems: "center",
|
||||||
|
justifyContent: "space-between",
|
||||||
|
padding: `0 ${tokens.space[4]}px`,
|
||||||
|
borderBottom: `1px solid ${tokens.color.border0}`,
|
||||||
|
height: 44,
|
||||||
|
flexShrink: 0,
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
<Label color={tokens.color.text2}>{label}</Label>
|
||||||
|
{children && <div style={{ display: "flex", gap: tokens.space[2] }}>{children}</div>}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Btn — button with variants
|
||||||
|
export function Btn({ children, variant = "default", onClick, style, disabled }: {
|
||||||
|
children: React.ReactNode;
|
||||||
|
variant?: "primary" | "danger" | "default" | "ghost";
|
||||||
|
onClick?: (e: React.MouseEvent) => void;
|
||||||
|
style?: React.CSSProperties;
|
||||||
|
disabled?: boolean;
|
||||||
|
}) {
|
||||||
|
const [hov, setHov] = useState(false);
|
||||||
|
const v = {
|
||||||
|
primary: { border: tokens.color.accent, color: tokens.color.accent },
|
||||||
|
danger: { border: tokens.color.fail, color: tokens.color.fail },
|
||||||
|
default: { border: tokens.color.border1, color: tokens.color.text1 },
|
||||||
|
ghost: { border: "transparent", color: tokens.color.text2 },
|
||||||
|
}[variant];
|
||||||
|
return (
|
||||||
|
<button onClick={onClick} disabled={disabled}
|
||||||
|
onMouseEnter={() => setHov(true)} onMouseLeave={() => setHov(false)}
|
||||||
|
style={{
|
||||||
|
background: "transparent", border: `1px solid ${v.border}`,
|
||||||
|
color: hov && !disabled ? tokens.color.text0 : v.color,
|
||||||
|
fontFamily: tokens.font.mono, fontSize: tokens.size.sm,
|
||||||
|
letterSpacing: tokens.tracking.wide,
|
||||||
|
padding: `${tokens.space[2]}px ${tokens.space[3]}px`,
|
||||||
|
minHeight: tokens.touch.min,
|
||||||
|
cursor: disabled ? "not-allowed" : "pointer",
|
||||||
|
transition: tokens.transition.fast, borderRadius: 0,
|
||||||
|
opacity: disabled ? 0.4 : 1,
|
||||||
|
display: "inline-flex", alignItems: "center", justifyContent: "center",
|
||||||
|
...style,
|
||||||
|
}}>
|
||||||
|
{children}
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Input — text input field
|
||||||
|
export function Input({ value, onChange, placeholder, style }: {
|
||||||
|
value: string;
|
||||||
|
onChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
|
||||||
|
placeholder?: string;
|
||||||
|
style?: React.CSSProperties;
|
||||||
|
}) {
|
||||||
|
const [foc, setFoc] = useState(false);
|
||||||
|
return (
|
||||||
|
<input value={value} onChange={onChange} placeholder={placeholder}
|
||||||
|
onFocus={() => setFoc(true)} onBlur={() => setFoc(false)}
|
||||||
|
style={{
|
||||||
|
background: tokens.color.bg0,
|
||||||
|
border: `1px solid ${foc ? tokens.color.border2 : tokens.color.border0}`,
|
||||||
|
color: tokens.color.text0, fontFamily: tokens.font.mono,
|
||||||
|
fontSize: tokens.size.lg,
|
||||||
|
padding: `${tokens.space[3]}px ${tokens.space[3]}px`,
|
||||||
|
minHeight: tokens.touch.min,
|
||||||
|
outline: "none", transition: tokens.transition.fast,
|
||||||
|
borderRadius: 0, width: "100%", boxSizing: "border-box" as const, ...style,
|
||||||
|
}} />
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Textarea
|
||||||
|
export function Textarea({ value, onChange, placeholder, rows = 4, style }: {
|
||||||
|
value: string;
|
||||||
|
onChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
|
||||||
|
placeholder?: string;
|
||||||
|
rows?: number;
|
||||||
|
style?: React.CSSProperties;
|
||||||
|
}) {
|
||||||
|
const [foc, setFoc] = useState(false);
|
||||||
|
return (
|
||||||
|
<textarea value={value} onChange={onChange} placeholder={placeholder} rows={rows}
|
||||||
|
onFocus={() => setFoc(true)} onBlur={() => setFoc(false)}
|
||||||
|
style={{
|
||||||
|
background: tokens.color.bg0,
|
||||||
|
border: `1px solid ${foc ? tokens.color.border2 : tokens.color.border0}`,
|
||||||
|
color: tokens.color.text0, fontFamily: tokens.font.mono,
|
||||||
|
fontSize: tokens.size.lg,
|
||||||
|
padding: `${tokens.space[3]}px ${tokens.space[3]}px`,
|
||||||
|
outline: "none", transition: tokens.transition.fast,
|
||||||
|
resize: "vertical" as const, width: "100%", boxSizing: "border-box" as const,
|
||||||
|
borderRadius: 0, ...style,
|
||||||
|
}} />
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// EvalPip — metric readout tile
|
||||||
|
export function EvalPip({ label, value, unit, pass, target }: {
|
||||||
|
label: string; value: number | string; unit: string; pass: boolean; target: string;
|
||||||
|
}) {
|
||||||
|
return (
|
||||||
|
<div style={{ display: "flex", flexDirection: "column", gap: tokens.space[1], padding: `${tokens.space[3]}px ${tokens.space[3]}px`, background: tokens.color.bg0, border: `1px solid ${pass ? tokens.color.passDim : tokens.color.failDim}`, minWidth: 96, flexShrink: 0 }}>
|
||||||
|
<Label color={tokens.color.text2}>{label}</Label>
|
||||||
|
<span style={{ fontFamily: tokens.font.mono, fontSize: tokens.size.xl, color: pass ? tokens.color.pass : tokens.color.fail, fontWeight: "bold", lineHeight: 1 }}>
|
||||||
|
{value}<span style={{ fontSize: tokens.size.sm, marginLeft: 2 }}>{unit}</span>
|
||||||
|
</span>
|
||||||
|
<Label color={tokens.color.text3}>target {target}</Label>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// IterDot — single iteration status pip
|
||||||
|
export function IterDot({ status, n }: { status?: string; n: number }) {
|
||||||
|
const color = status === "passed" || status === "completed" ? tokens.color.pass
|
||||||
|
: status === "failed" ? tokens.color.fail
|
||||||
|
: status === "running" ? tokens.color.warn
|
||||||
|
: tokens.color.bg3;
|
||||||
|
return (
|
||||||
|
<div title={`Iter ${n}: ${status || "pending"}`}
|
||||||
|
style={{ width: 18, height: 5, background: color, opacity: status ? 1 : 0.25, boxShadow: status === "running" ? `0 0 6px ${color}` : "none", transition: tokens.transition.normal, flexShrink: 0 }} />
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// PathCrumb — file path display
|
||||||
|
export function PathCrumb({ path, style }: { path: string; style?: React.CSSProperties }) {
|
||||||
|
const parts = path.split("/");
|
||||||
|
return (
|
||||||
|
<span style={{ fontFamily: tokens.font.mono, fontSize: tokens.size.sm, ...style }}>
|
||||||
|
{parts.map((p, i) => (
|
||||||
|
<span key={i}>
|
||||||
|
<span style={{ color: i === parts.length - 1 ? tokens.color.text1 : tokens.color.text3 }}>{p}</span>
|
||||||
|
{i < parts.length - 1 && <span style={{ color: tokens.color.text3, margin: "0 2px" }}>/</span>}
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchableDropdown — filterable option picker
|
||||||
|
export interface DropdownOption {
|
||||||
|
value: string;
|
||||||
|
label: string;
|
||||||
|
detail?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function SearchableDropdown({ options, value, onChange, placeholder, style, multi = false }: {
|
||||||
|
options: DropdownOption[];
|
||||||
|
value: string | string[];
|
||||||
|
onChange: (value: string | string[]) => void;
|
||||||
|
placeholder?: string;
|
||||||
|
style?: React.CSSProperties;
|
||||||
|
multi?: boolean;
|
||||||
|
}) {
|
||||||
|
const [open, setOpen] = useState(false);
|
||||||
|
const [query, setQuery] = useState("");
|
||||||
|
const ref = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const handler = (e: MouseEvent) => {
|
||||||
|
if (ref.current && !ref.current.contains(e.target as Node)) setOpen(false);
|
||||||
|
};
|
||||||
|
document.addEventListener("mousedown", handler);
|
||||||
|
return () => document.removeEventListener("mousedown", handler);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const filtered = options.filter(o =>
|
||||||
|
o.label.toLowerCase().includes(query.toLowerCase()) ||
|
||||||
|
(o.detail && o.detail.toLowerCase().includes(query.toLowerCase()))
|
||||||
|
);
|
||||||
|
|
||||||
|
const selected = multi
|
||||||
|
? (value as string[])
|
||||||
|
: value ? [value as string] : [];
|
||||||
|
|
||||||
|
const selectedLabels = selected
|
||||||
|
.map(v => options.find(o => o.value === v)?.label)
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(", ");
|
||||||
|
|
||||||
|
const toggle = (optValue: string) => {
|
||||||
|
if (multi) {
|
||||||
|
const arr = value as string[];
|
||||||
|
const next = arr.includes(optValue) ? arr.filter(v => v !== optValue) : [...arr, optValue];
|
||||||
|
onChange(next);
|
||||||
|
} else {
|
||||||
|
onChange(optValue);
|
||||||
|
setOpen(false);
|
||||||
|
setQuery("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const remove = (optValue: string) => {
|
||||||
|
if (multi) {
|
||||||
|
onChange((value as string[]).filter(v => v !== optValue));
|
||||||
|
} else {
|
||||||
|
onChange("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div ref={ref} style={{ position: "relative", width: "100%", ...style }}>
|
||||||
|
{/* Selected tags (multi) or trigger */}
|
||||||
|
<div
|
||||||
|
onClick={() => setOpen(!open)}
|
||||||
|
style={{
|
||||||
|
background: tokens.color.bg0,
|
||||||
|
border: `1px solid ${open ? tokens.color.border2 : tokens.color.border0}`,
|
||||||
|
minHeight: tokens.touch.min,
|
||||||
|
padding: `${tokens.space[2]}px ${tokens.space[3]}px`,
|
||||||
|
display: "flex", alignItems: "center", flexWrap: "wrap", gap: tokens.space[1],
|
||||||
|
cursor: "pointer", transition: tokens.transition.fast,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{multi && selected.length > 0 ? (
|
||||||
|
selected.map(v => {
|
||||||
|
const opt = options.find(o => o.value === v);
|
||||||
|
return (
|
||||||
|
<span key={v} style={{
|
||||||
|
display: "inline-flex", alignItems: "center", gap: 4,
|
||||||
|
padding: "2px 8px",
|
||||||
|
background: tokens.color.bg2,
|
||||||
|
border: `1px solid ${tokens.color.border0}`,
|
||||||
|
fontFamily: tokens.font.mono, fontSize: tokens.size.sm,
|
||||||
|
color: tokens.color.text1,
|
||||||
|
}}>
|
||||||
|
{opt?.label || v}
|
||||||
|
<span
|
||||||
|
onClick={(e) => { e.stopPropagation(); remove(v); }}
|
||||||
|
style={{ cursor: "pointer", color: tokens.color.text2, fontSize: tokens.size.xs, marginLeft: 2 }}
|
||||||
|
>
|
||||||
|
x
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
})
|
||||||
|
) : !multi && selectedLabels ? (
|
||||||
|
<span style={{ fontFamily: tokens.font.mono, fontSize: tokens.size.base, color: tokens.color.text0 }}>
|
||||||
|
{selectedLabels}
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span style={{ fontFamily: tokens.font.mono, fontSize: tokens.size.base, color: tokens.color.text3 }}>
|
||||||
|
{placeholder || "Select..."}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
<span style={{ marginLeft: "auto", color: tokens.color.text3, fontSize: tokens.size.sm, flexShrink: 0 }}>
|
||||||
|
{open ? "▴" : "▾"}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Dropdown */}
|
||||||
|
{open && (
|
||||||
|
<div style={{
|
||||||
|
position: "absolute", top: "100%", left: 0, right: 0, zIndex: 50,
|
||||||
|
background: tokens.color.bg1,
|
||||||
|
border: `1px solid ${tokens.color.border1}`,
|
||||||
|
borderTop: "none",
|
||||||
|
maxHeight: 240, display: "flex", flexDirection: "column",
|
||||||
|
}}>
|
||||||
|
<div style={{ padding: tokens.space[2], borderBottom: `1px solid ${tokens.color.border0}` }}>
|
||||||
|
<input
|
||||||
|
autoFocus
|
||||||
|
value={query}
|
||||||
|
onChange={e => setQuery(e.target.value)}
|
||||||
|
placeholder="Search..."
|
||||||
|
style={{
|
||||||
|
background: tokens.color.bg0,
|
||||||
|
border: `1px solid ${tokens.color.border0}`,
|
||||||
|
color: tokens.color.text0,
|
||||||
|
fontFamily: tokens.font.mono,
|
||||||
|
fontSize: tokens.size.sm,
|
||||||
|
padding: `${tokens.space[1]}px ${tokens.space[2]}px`,
|
||||||
|
outline: "none", width: "100%", boxSizing: "border-box" as const,
|
||||||
|
borderRadius: 0,
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div style={{ flex: 1, overflowY: "auto" }}>
|
||||||
|
{filtered.length === 0 ? (
|
||||||
|
<div style={{ padding: `${tokens.space[3]}px ${tokens.space[3]}px` }}>
|
||||||
|
<Mono size={tokens.size.sm} color={tokens.color.text3}>No matches</Mono>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
filtered.map(opt => {
|
||||||
|
const isSelected = selected.includes(opt.value);
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={opt.value}
|
||||||
|
onClick={() => toggle(opt.value)}
|
||||||
|
style={{
|
||||||
|
padding: `${tokens.space[2]}px ${tokens.space[3]}px`,
|
||||||
|
cursor: "pointer",
|
||||||
|
background: isSelected ? tokens.color.bg2 : "transparent",
|
||||||
|
borderLeft: `2px solid ${isSelected ? tokens.color.accent : "transparent"}`,
|
||||||
|
display: "flex", flexDirection: "column", gap: 2,
|
||||||
|
minHeight: 36,
|
||||||
|
justifyContent: "center",
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<div style={{ display: "flex", alignItems: "center", justifyContent: "space-between" }}>
|
||||||
|
<Mono size={tokens.size.sm} color={isSelected ? tokens.color.text0 : tokens.color.text1}>
|
||||||
|
{opt.label}
|
||||||
|
</Mono>
|
||||||
|
{isSelected && <span style={{ color: tokens.color.accent, fontSize: tokens.size.xs }}>✓</span>}
|
||||||
|
</div>
|
||||||
|
{opt.detail && (
|
||||||
|
<Mono size={tokens.size.xs} color={tokens.color.text3}>{opt.detail}</Mono>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackBtn — mobile drill-down back button
|
||||||
|
export function BackBtn({ onBack, label }: { onBack: () => void; label: string }) {
|
||||||
|
return (
|
||||||
|
<button onClick={onBack} style={{ background: "none", border: "none", color: tokens.color.accent, fontFamily: tokens.font.mono, fontSize: tokens.size.sm, letterSpacing: tokens.tracking.wide, cursor: "pointer", display: "flex", alignItems: "center", gap: tokens.space[2], padding: 0, minHeight: tokens.touch.min }}>
|
||||||
|
<span style={{ fontSize: 16, lineHeight: 1 }}>‹</span> {label}
|
||||||
|
</button>
|
||||||
|
);
|
||||||
|
}
|
||||||
154
apps/harness/src/lib/agents.ts
Normal file
154
apps/harness/src/lib/agents.ts
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
// Agent runtime definitions and configuration
|
||||||
|
|
||||||
|
export type AgentRuntime = "claude-code" | "codex" | "opencode";
|
||||||
|
|
||||||
|
export interface AgentRuntimeInfo {
|
||||||
|
id: AgentRuntime;
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
defaultProviders: string[]; // which AI providers this runtime supports
|
||||||
|
cliCommand: string; // base CLI command
|
||||||
|
headlessFlag: string; // flag to run headless
|
||||||
|
modelFlag: string; // flag to specify model
|
||||||
|
promptFlag: string; // flag to pass the prompt/task
|
||||||
|
}
|
||||||
|
|
||||||
|
export const AGENT_RUNTIMES: Record<AgentRuntime, AgentRuntimeInfo> = {
|
||||||
|
"claude-code": {
|
||||||
|
id: "claude-code",
|
||||||
|
name: "Claude Code",
|
||||||
|
description: "Anthropic's agentic coding CLI. Supports Claude models via Anthropic API or Bedrock.",
|
||||||
|
defaultProviders: ["anthropic"],
|
||||||
|
cliCommand: "claude",
|
||||||
|
headlessFlag: "--print",
|
||||||
|
modelFlag: "--model",
|
||||||
|
promptFlag: "--prompt",
|
||||||
|
},
|
||||||
|
"codex": {
|
||||||
|
id: "codex",
|
||||||
|
name: "Codex CLI",
|
||||||
|
description: "OpenAI's open-source coding agent. Supports OpenAI models.",
|
||||||
|
defaultProviders: ["openai"],
|
||||||
|
cliCommand: "codex",
|
||||||
|
headlessFlag: "--quiet",
|
||||||
|
modelFlag: "--model",
|
||||||
|
promptFlag: "", // prompt is positional
|
||||||
|
},
|
||||||
|
"opencode": {
|
||||||
|
id: "opencode",
|
||||||
|
name: "OpenCode",
|
||||||
|
description: "Open-source multi-provider coding agent. Supports Anthropic, OpenAI, Google, OpenRouter.",
|
||||||
|
defaultProviders: ["anthropic", "openai", "google", "openrouter", "opencode-zen"],
|
||||||
|
cliCommand: "opencode",
|
||||||
|
headlessFlag: "run", // subcommand, not a flag
|
||||||
|
modelFlag: "--model",
|
||||||
|
promptFlag: "", // prompt is positional (like codex)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── AGENT CONFIGURATIONS ────────────────────────────────────
|
||||||
|
|
||||||
|
export interface AgentConfig {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
runtime: AgentRuntime;
|
||||||
|
modelId: string;
|
||||||
|
provider: string;
|
||||||
|
maxTokens?: number;
|
||||||
|
env?: Record<string, string>; // additional env vars for the agent process
|
||||||
|
}
|
||||||
|
|
||||||
|
const configs: Map<string, AgentConfig> = new Map();
|
||||||
|
|
||||||
|
export function getAllAgentConfigs(): AgentConfig[] {
|
||||||
|
return Array.from(configs.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getAgentConfig(id: string): AgentConfig | undefined {
|
||||||
|
return configs.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upsertAgentConfig(config: AgentConfig): AgentConfig {
|
||||||
|
configs.set(config.id, config);
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function deleteAgentConfig(id: string): boolean {
|
||||||
|
return configs.delete(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── SEED DATA ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
const SEED_CONFIGS: AgentConfig[] = [
|
||||||
|
{
|
||||||
|
id: "agent-claude-opus",
|
||||||
|
name: "Claude Code · Opus 4",
|
||||||
|
runtime: "claude-code",
|
||||||
|
modelId: "claude-opus-4-20250514",
|
||||||
|
provider: "anthropic",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "agent-claude-sonnet",
|
||||||
|
name: "Claude Code · Sonnet 4",
|
||||||
|
runtime: "claude-code",
|
||||||
|
modelId: "claude-sonnet-4-20250514",
|
||||||
|
provider: "anthropic",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "agent-codex-o3",
|
||||||
|
name: "Codex · o3",
|
||||||
|
runtime: "codex",
|
||||||
|
modelId: "o3",
|
||||||
|
provider: "openai",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "agent-codex-o4mini",
|
||||||
|
name: "Codex · o4-mini",
|
||||||
|
runtime: "codex",
|
||||||
|
modelId: "o4-mini",
|
||||||
|
provider: "openai",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "agent-opencode-sonnet",
|
||||||
|
name: "OpenCode · Sonnet 4",
|
||||||
|
runtime: "opencode",
|
||||||
|
modelId: "claude-sonnet-4-20250514",
|
||||||
|
provider: "anthropic",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "agent-opencode-gemini",
|
||||||
|
name: "OpenCode · Gemini 2.5 Pro",
|
||||||
|
runtime: "opencode",
|
||||||
|
modelId: "gemini-2.5-pro",
|
||||||
|
provider: "google",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
function seedAgents() {
|
||||||
|
if (configs.size > 0) return;
|
||||||
|
for (const c of SEED_CONFIGS) {
|
||||||
|
configs.set(c.id, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seedAgents();
|
||||||
|
|
||||||
|
// ─── CLI BUILDER ────────────────────────────────────────────
|
||||||
|
// Builds the shell command to invoke an agent headlessly.
|
||||||
|
|
||||||
|
export function buildAgentCommand(config: AgentConfig, prompt: string, workDir: string): string[] {
|
||||||
|
const runtime = AGENT_RUNTIMES[config.runtime];
|
||||||
|
const args = [runtime.cliCommand];
|
||||||
|
|
||||||
|
if (runtime.headlessFlag) args.push(runtime.headlessFlag);
|
||||||
|
if (runtime.modelFlag && config.modelId) args.push(runtime.modelFlag, config.modelId);
|
||||||
|
|
||||||
|
if (runtime.promptFlag) {
|
||||||
|
args.push(runtime.promptFlag, prompt);
|
||||||
|
} else {
|
||||||
|
// positional prompt (codex)
|
||||||
|
args.push(prompt);
|
||||||
|
}
|
||||||
|
|
||||||
|
return args;
|
||||||
|
}
|
||||||
57
apps/harness/src/lib/credentials.ts
Normal file
57
apps/harness/src/lib/credentials.ts
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
export type Provider =
|
||||||
|
| "github" | "gitlab"
|
||||||
|
| "anthropic" | "openai" | "openrouter" | "google" | "opencode-zen";
|
||||||
|
|
||||||
|
export const GIT_PROVIDERS: Provider[] = ["github", "gitlab"];
|
||||||
|
export const AI_PROVIDERS: Provider[] = ["anthropic", "openai", "openrouter", "google", "opencode-zen"];
|
||||||
|
|
||||||
|
export interface Credential {
|
||||||
|
id: string;
|
||||||
|
provider: Provider;
|
||||||
|
label: string;
|
||||||
|
token: string;
|
||||||
|
baseUrl?: string; // for self-hosted GitLab or custom endpoints
|
||||||
|
}
|
||||||
|
|
||||||
|
// In-memory store. Will be replaced with encrypted persistent storage.
|
||||||
|
const credentials: Map<string, Credential> = new Map();
|
||||||
|
|
||||||
|
export function getAllCredentials(): Credential[] {
|
||||||
|
return Array.from(credentials.values()).map(c => ({
|
||||||
|
...c,
|
||||||
|
token: maskToken(c.token),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCredentialsByKind(kind: "git" | "ai"): Credential[] {
|
||||||
|
const providers = kind === "git" ? GIT_PROVIDERS : AI_PROVIDERS;
|
||||||
|
return Array.from(credentials.values())
|
||||||
|
.filter(c => providers.includes(c.provider))
|
||||||
|
.map(c => ({ ...c, token: maskToken(c.token) }));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCredential(id: string): Credential | undefined {
|
||||||
|
return credentials.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getCredentialsByProvider(provider: Provider): Credential[] {
|
||||||
|
return Array.from(credentials.values()).filter(c => c.provider === provider);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getRawCredentialsByProvider(provider: Provider): Credential[] {
|
||||||
|
return Array.from(credentials.values()).filter(c => c.provider === provider);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upsertCredential(cred: Credential): Credential {
|
||||||
|
credentials.set(cred.id, cred);
|
||||||
|
return { ...cred, token: maskToken(cred.token) };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function deleteCredential(id: string): boolean {
|
||||||
|
return credentials.delete(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function maskToken(token: string): string {
|
||||||
|
if (token.length <= 8) return "••••••••";
|
||||||
|
return token.slice(0, 4) + "••••" + token.slice(-4);
|
||||||
|
}
|
||||||
99
apps/harness/src/lib/evaluator.ts
Normal file
99
apps/harness/src/lib/evaluator.ts
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
import { Task, Eval } from "./types";
|
||||||
|
import { hasDiff, getDiffStats } from "./git-ops";
|
||||||
|
|
||||||
|
export interface EvalResult {
|
||||||
|
evals: Record<string, Eval>;
|
||||||
|
allPassed: boolean;
|
||||||
|
diagnosis: string;
|
||||||
|
diffStats: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple target DSL:
|
||||||
|
// exitCode:0 — exit code equals value
|
||||||
|
// contains:<text> — agent output contains text
|
||||||
|
// filesChanged:>0 — git diff has changes
|
||||||
|
function evaluateCriterion(
|
||||||
|
criterion: { label: string; target: string },
|
||||||
|
context: { exitCode: number; agentOutput: string; hasChanges: boolean },
|
||||||
|
): Eval {
|
||||||
|
const { label, target } = criterion;
|
||||||
|
|
||||||
|
// exitCode:N
|
||||||
|
const exitMatch = target.match(/^exitCode:(\d+)$/);
|
||||||
|
if (exitMatch) {
|
||||||
|
const expected = parseInt(exitMatch[1], 10);
|
||||||
|
return {
|
||||||
|
label,
|
||||||
|
value: context.exitCode,
|
||||||
|
unit: "exit code",
|
||||||
|
pass: context.exitCode === expected,
|
||||||
|
target,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// contains:<text>
|
||||||
|
const containsMatch = target.match(/^contains:(.+)$/);
|
||||||
|
if (containsMatch) {
|
||||||
|
const text = containsMatch[1];
|
||||||
|
const found = context.agentOutput.includes(text);
|
||||||
|
return {
|
||||||
|
label,
|
||||||
|
value: found ? "found" : "not found",
|
||||||
|
unit: "",
|
||||||
|
pass: found,
|
||||||
|
target,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// filesChanged:>0
|
||||||
|
if (target === "filesChanged:>0") {
|
||||||
|
return {
|
||||||
|
label,
|
||||||
|
value: context.hasChanges ? ">0" : "0",
|
||||||
|
unit: "files",
|
||||||
|
pass: context.hasChanges,
|
||||||
|
target,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unknown target — always fail
|
||||||
|
return {
|
||||||
|
label,
|
||||||
|
value: "unknown",
|
||||||
|
unit: "",
|
||||||
|
pass: false,
|
||||||
|
target,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function evaluate(opts: {
|
||||||
|
task: Task;
|
||||||
|
iterationNumber: number;
|
||||||
|
agentOutput: string;
|
||||||
|
exitCode: number;
|
||||||
|
workDir: string;
|
||||||
|
}): Promise<EvalResult> {
|
||||||
|
const { task, agentOutput, exitCode, workDir } = opts;
|
||||||
|
|
||||||
|
const hasChanges = await hasDiff(workDir);
|
||||||
|
const diffStats = await getDiffStats(workDir);
|
||||||
|
|
||||||
|
const context = { exitCode, agentOutput, hasChanges };
|
||||||
|
const evals: Record<string, Eval> = {};
|
||||||
|
const failures: string[] = [];
|
||||||
|
|
||||||
|
for (const criterion of task.spec.criteria) {
|
||||||
|
const result = evaluateCriterion(criterion, context);
|
||||||
|
evals[criterion.label] = result;
|
||||||
|
if (!result.pass) {
|
||||||
|
failures.push(`${criterion.label}: expected ${criterion.target}, got ${result.value}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const allPassed = failures.length === 0;
|
||||||
|
const diagnosis = allPassed
|
||||||
|
? "All criteria passed."
|
||||||
|
: `Failed criteria:\n${failures.map((f) => `- ${f}`).join("\n")}`;
|
||||||
|
|
||||||
|
return { evals, allPassed, diagnosis, diffStats };
|
||||||
|
}
|
||||||
158
apps/harness/src/lib/executor.ts
Normal file
158
apps/harness/src/lib/executor.ts
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
import { spawn, ChildProcess } from "node:child_process";
|
||||||
|
import { getAgentConfig, buildAgentCommand, AGENT_RUNTIMES } from "./agents";
|
||||||
|
import { getRawCredentialsByProvider, Provider } from "./credentials";
|
||||||
|
import { ExecutionResult } from "./types";
|
||||||
|
|
||||||
|
const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes
|
||||||
|
|
||||||
|
// Maps AI providers to their env var names
|
||||||
|
const PROVIDER_ENV_VARS: Record<string, string> = {
|
||||||
|
anthropic: "ANTHROPIC_API_KEY",
|
||||||
|
openai: "OPENAI_API_KEY",
|
||||||
|
google: "GOOGLE_API_KEY",
|
||||||
|
openrouter: "OPENROUTER_API_KEY",
|
||||||
|
"opencode-zen": "OPENCODE_ZEN_API_KEY",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Best-effort token extraction regexes per runtime
|
||||||
|
const TOKEN_PATTERNS: Record<string, { input: RegExp; output: RegExp }> = {
|
||||||
|
"claude-code": {
|
||||||
|
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
},
|
||||||
|
codex: {
|
||||||
|
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
},
|
||||||
|
opencode: {
|
||||||
|
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
function parseTokenCount(text: string, pattern: RegExp): number {
|
||||||
|
const match = text.match(pattern);
|
||||||
|
if (!match) return 0;
|
||||||
|
return parseInt(match[1].replace(/,/g, ""), 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function executeAgent(opts: {
|
||||||
|
agentId: string;
|
||||||
|
prompt: string;
|
||||||
|
workDir: string;
|
||||||
|
timeoutMs?: number;
|
||||||
|
signal?: AbortSignal;
|
||||||
|
}): Promise<ExecutionResult> {
|
||||||
|
const config = getAgentConfig(opts.agentId);
|
||||||
|
if (!config) {
|
||||||
|
throw new Error(`Agent config not found: ${opts.agentId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const args = buildAgentCommand(config, opts.prompt, opts.workDir);
|
||||||
|
const command = args[0];
|
||||||
|
const commandArgs = args.slice(1);
|
||||||
|
|
||||||
|
// Build environment with credentials
|
||||||
|
const env: NodeJS.ProcessEnv = { ...process.env };
|
||||||
|
|
||||||
|
// Set API keys — OpenCode is multi-provider so inject all available keys;
|
||||||
|
// other runtimes only need their configured provider's key.
|
||||||
|
const providersToInject =
|
||||||
|
config.runtime === "opencode"
|
||||||
|
? Object.keys(PROVIDER_ENV_VARS)
|
||||||
|
: [config.provider];
|
||||||
|
|
||||||
|
for (const provider of providersToInject) {
|
||||||
|
const envVar = PROVIDER_ENV_VARS[provider];
|
||||||
|
if (!envVar) continue;
|
||||||
|
const creds = getRawCredentialsByProvider(provider as Provider);
|
||||||
|
if (creds.length > 0) {
|
||||||
|
env[envVar] = creds[0].token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set GitHub token for git operations within agent
|
||||||
|
const ghCreds = getRawCredentialsByProvider("github" as Provider);
|
||||||
|
if (ghCreds.length > 0) {
|
||||||
|
env.GITHUB_TOKEN = ghCreds[0].token;
|
||||||
|
env.GH_TOKEN = ghCreds[0].token;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any custom env from agent config
|
||||||
|
if (config.env) {
|
||||||
|
Object.assign(env, config.env);
|
||||||
|
}
|
||||||
|
|
||||||
|
const timeout = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
return new Promise<ExecutionResult>((resolve) => {
|
||||||
|
const child: ChildProcess = spawn(command, commandArgs, {
|
||||||
|
cwd: opts.workDir,
|
||||||
|
env,
|
||||||
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
|
});
|
||||||
|
|
||||||
|
let stdout = "";
|
||||||
|
let stderr = "";
|
||||||
|
let killed = false;
|
||||||
|
|
||||||
|
child.stdout!.on("data", (chunk: Buffer) => {
|
||||||
|
stdout += chunk.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
child.stderr!.on("data", (chunk: Buffer) => {
|
||||||
|
stderr += chunk.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Timeout
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
killed = true;
|
||||||
|
child.kill("SIGTERM");
|
||||||
|
setTimeout(() => child.kill("SIGKILL"), 5000);
|
||||||
|
}, timeout);
|
||||||
|
|
||||||
|
// Cancellation via AbortSignal
|
||||||
|
const onAbort = () => {
|
||||||
|
killed = true;
|
||||||
|
child.kill("SIGTERM");
|
||||||
|
setTimeout(() => child.kill("SIGKILL"), 5000);
|
||||||
|
};
|
||||||
|
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
||||||
|
|
||||||
|
child.on("close", (code: number | null) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
opts.signal?.removeEventListener("abort", onAbort);
|
||||||
|
|
||||||
|
const durationMs = Date.now() - startTime;
|
||||||
|
const combined = stdout + "\n" + stderr;
|
||||||
|
const runtime = AGENT_RUNTIMES[config.runtime];
|
||||||
|
const patterns = TOKEN_PATTERNS[runtime.id] ?? TOKEN_PATTERNS["claude-code"];
|
||||||
|
|
||||||
|
resolve({
|
||||||
|
exitCode: code ?? 1,
|
||||||
|
stdout,
|
||||||
|
stderr,
|
||||||
|
durationMs,
|
||||||
|
inputTokens: parseTokenCount(combined, patterns.input),
|
||||||
|
outputTokens: parseTokenCount(combined, patterns.output),
|
||||||
|
killed,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on("error", (err: Error) => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
opts.signal?.removeEventListener("abort", onAbort);
|
||||||
|
|
||||||
|
resolve({
|
||||||
|
exitCode: 1,
|
||||||
|
stdout,
|
||||||
|
stderr: stderr + "\n" + err.message,
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
inputTokens: 0,
|
||||||
|
outputTokens: 0,
|
||||||
|
killed: false,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
149
apps/harness/src/lib/git-ops.ts
Normal file
149
apps/harness/src/lib/git-ops.ts
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
import { execFile } from "node:child_process";
|
||||||
|
import { promisify } from "node:util";
|
||||||
|
import { mkdir } from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
|
const exec = promisify(execFile);
|
||||||
|
|
||||||
|
const WORK_DIR = process.env.HARNESS_WORK_DIR || "/tmp/harness";
|
||||||
|
|
||||||
|
function reposDir(): string {
|
||||||
|
return path.join(WORK_DIR, "repos");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function taskDir(taskId: string): string {
|
||||||
|
return path.join(WORK_DIR, "tasks", taskId);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function iterationDir(taskId: string, iteration: number): string {
|
||||||
|
return path.join(taskDir(taskId), `iter-${iteration}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildAuthenticatedCloneUrl(
|
||||||
|
repo: string,
|
||||||
|
provider: "github" | "gitlab",
|
||||||
|
token: string,
|
||||||
|
): string {
|
||||||
|
// repo format: "owner/name"
|
||||||
|
if (provider === "gitlab") {
|
||||||
|
return `https://oauth2:${token}@gitlab.com/${repo}.git`;
|
||||||
|
}
|
||||||
|
return `https://x-access-token:${token}@github.com/${repo}.git`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function bareClonePath(slug: string): string {
|
||||||
|
return path.join(reposDir(), `${slug}.git`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function ensureBareClone(
|
||||||
|
repoUrl: string,
|
||||||
|
slug: string,
|
||||||
|
): Promise<string> {
|
||||||
|
const clonePath = bareClonePath(slug);
|
||||||
|
await mkdir(reposDir(), { recursive: true });
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Try fetching first (repo already cloned)
|
||||||
|
await exec("git", ["fetch", "--all"], { cwd: clonePath });
|
||||||
|
} catch {
|
||||||
|
// Clone bare
|
||||||
|
await exec("git", ["clone", "--bare", repoUrl, clonePath]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return clonePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createWorktree(
|
||||||
|
bareClone: string,
|
||||||
|
worktreePath: string,
|
||||||
|
branch: string,
|
||||||
|
base?: string,
|
||||||
|
): Promise<void> {
|
||||||
|
await mkdir(path.dirname(worktreePath), { recursive: true });
|
||||||
|
|
||||||
|
const args = ["worktree", "add", worktreePath, "-b", branch];
|
||||||
|
if (base) args.push(base);
|
||||||
|
await exec("git", args, { cwd: bareClone });
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function removeWorktree(
|
||||||
|
bareClone: string,
|
||||||
|
worktreePath: string,
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
await exec("git", ["worktree", "remove", "--force", worktreePath], {
|
||||||
|
cwd: bareClone,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Best-effort cleanup
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getDiffStats(workDir: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
const { stdout } = await exec("git", ["diff", "--stat", "HEAD"], {
|
||||||
|
cwd: workDir,
|
||||||
|
});
|
||||||
|
return stdout.trim();
|
||||||
|
} catch {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function hasDiff(workDir: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const { stdout } = await exec(
|
||||||
|
"git",
|
||||||
|
["diff", "--name-only", "HEAD"],
|
||||||
|
{ cwd: workDir },
|
||||||
|
);
|
||||||
|
return stdout.trim().length > 0;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function commitAll(
|
||||||
|
workDir: string,
|
||||||
|
message: string,
|
||||||
|
): Promise<void> {
|
||||||
|
await exec("git", ["add", "-A"], { cwd: workDir });
|
||||||
|
await exec("git", ["commit", "-m", message, "--allow-empty"], {
|
||||||
|
cwd: workDir,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function pushBranch(
|
||||||
|
workDir: string,
|
||||||
|
branch: string,
|
||||||
|
): Promise<void> {
|
||||||
|
await exec("git", ["push", "origin", branch, "--force-with-lease"], {
|
||||||
|
cwd: workDir,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createPullRequest(opts: {
|
||||||
|
repo: string;
|
||||||
|
head: string;
|
||||||
|
title: string;
|
||||||
|
body: string;
|
||||||
|
token: string;
|
||||||
|
}): Promise<{ number: number; url: string }> {
|
||||||
|
const { stdout } = await exec(
|
||||||
|
"gh",
|
||||||
|
[
|
||||||
|
"pr",
|
||||||
|
"create",
|
||||||
|
"--repo", opts.repo,
|
||||||
|
"--head", opts.head,
|
||||||
|
"--title", opts.title,
|
||||||
|
"--body", opts.body,
|
||||||
|
"--json", "number,url",
|
||||||
|
],
|
||||||
|
{
|
||||||
|
env: { ...process.env, GH_TOKEN: opts.token },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
return JSON.parse(stdout.trim());
|
||||||
|
}
|
||||||
135
apps/harness/src/lib/model-providers.ts
Normal file
135
apps/harness/src/lib/model-providers.ts
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
import { getRawCredentialsByProvider } from "./credentials";
|
||||||
|
|
||||||
|
export interface ModelInfo {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
provider: string;
|
||||||
|
contextWindow?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function fetchAllModels(): Promise<ModelInfo[]> {
|
||||||
|
const results = await Promise.allSettled([
|
||||||
|
fetchAnthropicModels(),
|
||||||
|
fetchOpenAIModels(),
|
||||||
|
fetchOpenRouterModels(),
|
||||||
|
fetchGoogleModels(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return results.flatMap(r => r.status === "fulfilled" ? r.value : []);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchAnthropicModels(): Promise<ModelInfo[]> {
|
||||||
|
const creds = getRawCredentialsByProvider("anthropic");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
try {
|
||||||
|
const res = await fetch("https://api.anthropic.com/v1/models", {
|
||||||
|
headers: {
|
||||||
|
"x-api-key": cred.token,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return (data.data || []).map((m: { id: string; display_name?: string }) => ({
|
||||||
|
id: m.id,
|
||||||
|
name: m.display_name || m.id,
|
||||||
|
provider: "anthropic",
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchOpenAIModels(): Promise<ModelInfo[]> {
|
||||||
|
const creds = getRawCredentialsByProvider("openai");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
try {
|
||||||
|
const baseUrl = cred.baseUrl || "https://api.openai.com";
|
||||||
|
const res = await fetch(`${baseUrl}/v1/models`, {
|
||||||
|
headers: { Authorization: `Bearer ${cred.token}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return (data.data || [])
|
||||||
|
.filter((m: { id: string }) =>
|
||||||
|
m.id.startsWith("gpt-") || m.id.startsWith("o") || m.id.startsWith("chatgpt-")
|
||||||
|
)
|
||||||
|
.map((m: { id: string }) => ({
|
||||||
|
id: m.id,
|
||||||
|
name: m.id,
|
||||||
|
provider: "openai",
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchOpenRouterModels(): Promise<ModelInfo[]> {
|
||||||
|
const creds = getRawCredentialsByProvider("openrouter");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
try {
|
||||||
|
const res = await fetch("https://openrouter.ai/api/v1/models", {
|
||||||
|
headers: { Authorization: `Bearer ${cred.token}` },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return (data.data || []).map((m: { id: string; name?: string; context_length?: number }) => ({
|
||||||
|
id: m.id,
|
||||||
|
name: m.name || m.id,
|
||||||
|
provider: "openrouter",
|
||||||
|
contextWindow: m.context_length,
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchGoogleModels(): Promise<ModelInfo[]> {
|
||||||
|
const creds = getRawCredentialsByProvider("google");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(
|
||||||
|
`https://generativelanguage.googleapis.com/v1beta/models?key=${cred.token}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return (data.models || [])
|
||||||
|
.filter((m: { name: string }) => m.name.includes("gemini"))
|
||||||
|
.map((m: { name: string; displayName?: string; inputTokenLimit?: number }) => ({
|
||||||
|
id: m.name.replace("models/", ""),
|
||||||
|
name: m.displayName || m.name,
|
||||||
|
provider: "google",
|
||||||
|
contextWindow: m.inputTokenLimit,
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
154
apps/harness/src/lib/model-store.ts
Normal file
154
apps/harness/src/lib/model-store.ts
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
// Curated model list and usage tracking
|
||||||
|
|
||||||
|
export interface CuratedModel {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
provider: string;
|
||||||
|
enabled: boolean;
|
||||||
|
contextWindow?: number;
|
||||||
|
costPer1kInput?: number; // USD per 1k input tokens
|
||||||
|
costPer1kOutput?: number; // USD per 1k output tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ModelUsageEntry {
|
||||||
|
modelId: string;
|
||||||
|
provider: string;
|
||||||
|
taskId: string;
|
||||||
|
taskSlug: string;
|
||||||
|
iteration: number;
|
||||||
|
inputTokens: number;
|
||||||
|
outputTokens: number;
|
||||||
|
durationMs: number;
|
||||||
|
timestamp: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ModelUsageSummary {
|
||||||
|
modelId: string;
|
||||||
|
provider: string;
|
||||||
|
totalInputTokens: number;
|
||||||
|
totalOutputTokens: number;
|
||||||
|
totalCost: number;
|
||||||
|
totalRequests: number;
|
||||||
|
totalDurationMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// In-memory stores
|
||||||
|
const curatedModels: Map<string, CuratedModel> = new Map();
|
||||||
|
const usageLog: ModelUsageEntry[] = [];
|
||||||
|
|
||||||
|
// ─── CURATED MODELS ─────────────────────────────────────────
|
||||||
|
|
||||||
|
export function getCuratedModels(): CuratedModel[] {
|
||||||
|
return Array.from(curatedModels.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getEnabledModels(): CuratedModel[] {
|
||||||
|
return Array.from(curatedModels.values()).filter(m => m.enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upsertCuratedModel(model: CuratedModel): CuratedModel {
|
||||||
|
curatedModels.set(model.id, model);
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function removeCuratedModel(id: string): boolean {
|
||||||
|
return curatedModels.delete(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function toggleModelEnabled(id: string): CuratedModel | undefined {
|
||||||
|
const model = curatedModels.get(id);
|
||||||
|
if (!model) return undefined;
|
||||||
|
model.enabled = !model.enabled;
|
||||||
|
curatedModels.set(id, model);
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateModelCost(id: string, costPer1kInput: number, costPer1kOutput: number): CuratedModel | undefined {
|
||||||
|
const model = curatedModels.get(id);
|
||||||
|
if (!model) return undefined;
|
||||||
|
model.costPer1kInput = costPer1kInput;
|
||||||
|
model.costPer1kOutput = costPer1kOutput;
|
||||||
|
curatedModels.set(id, model);
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── USAGE TRACKING ─────────────────────────────────────────
|
||||||
|
|
||||||
|
export function recordUsage(entry: ModelUsageEntry): void {
|
||||||
|
usageLog.push(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getUsageLog(): ModelUsageEntry[] {
|
||||||
|
return [...usageLog];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getUsageSummary(): ModelUsageSummary[] {
|
||||||
|
const grouped = new Map<string, ModelUsageSummary>();
|
||||||
|
|
||||||
|
for (const entry of usageLog) {
|
||||||
|
const key = `${entry.provider}:${entry.modelId}`;
|
||||||
|
const existing = grouped.get(key);
|
||||||
|
const model = curatedModels.get(entry.modelId);
|
||||||
|
const inputCost = model?.costPer1kInput ? (entry.inputTokens / 1000) * model.costPer1kInput : 0;
|
||||||
|
const outputCost = model?.costPer1kOutput ? (entry.outputTokens / 1000) * model.costPer1kOutput : 0;
|
||||||
|
|
||||||
|
if (existing) {
|
||||||
|
existing.totalInputTokens += entry.inputTokens;
|
||||||
|
existing.totalOutputTokens += entry.outputTokens;
|
||||||
|
existing.totalCost += inputCost + outputCost;
|
||||||
|
existing.totalRequests += 1;
|
||||||
|
existing.totalDurationMs += entry.durationMs;
|
||||||
|
} else {
|
||||||
|
grouped.set(key, {
|
||||||
|
modelId: entry.modelId,
|
||||||
|
provider: entry.provider,
|
||||||
|
totalInputTokens: entry.inputTokens,
|
||||||
|
totalOutputTokens: entry.outputTokens,
|
||||||
|
totalCost: inputCost + outputCost,
|
||||||
|
totalRequests: 1,
|
||||||
|
totalDurationMs: entry.durationMs,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(grouped.values()).sort((a, b) => b.totalCost - a.totalCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── SEED DATA ──────────────────────────────────────────────
|
||||||
|
// Pre-populate with well-known models and pricing
|
||||||
|
|
||||||
|
const SEED_MODELS: Omit<CuratedModel, "enabled">[] = [
|
||||||
|
{ id: "claude-opus-4-20250514", name: "Claude Opus 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.015, costPer1kOutput: 0.075 },
|
||||||
|
{ id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.003, costPer1kOutput: 0.015 },
|
||||||
|
{ id: "claude-haiku-4-20250514", name: "Claude Haiku 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.0008, costPer1kOutput: 0.004 },
|
||||||
|
{ id: "gpt-4o", name: "GPT-4o", provider: "openai", contextWindow: 128000, costPer1kInput: 0.0025, costPer1kOutput: 0.01 },
|
||||||
|
{ id: "gpt-4o-mini", name: "GPT-4o Mini", provider: "openai", contextWindow: 128000, costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
|
||||||
|
{ id: "o3", name: "o3", provider: "openai", contextWindow: 200000, costPer1kInput: 0.01, costPer1kOutput: 0.04 },
|
||||||
|
{ id: "o4-mini", name: "o4 Mini", provider: "openai", contextWindow: 200000, costPer1kInput: 0.0011, costPer1kOutput: 0.0044 },
|
||||||
|
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro", provider: "google", contextWindow: 1048576,costPer1kInput: 0.00125,costPer1kOutput: 0.01 },
|
||||||
|
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash", provider: "google", contextWindow: 1048576,costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
|
||||||
|
];
|
||||||
|
|
||||||
|
const SEED_USAGE: Omit<ModelUsageEntry, "timestamp">[] = [
|
||||||
|
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 1, inputTokens: 48200, outputTokens: 12400, durationMs: 34000 },
|
||||||
|
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 2, inputTokens: 52100, outputTokens: 15800, durationMs: 41000 },
|
||||||
|
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 3, inputTokens: 61300, outputTokens: 18200, durationMs: 45000 },
|
||||||
|
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 4, inputTokens: 55000, outputTokens: 14600, durationMs: 38000 },
|
||||||
|
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 85400, outputTokens: 28900, durationMs: 92000 },
|
||||||
|
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 2, inputTokens: 91200, outputTokens: 31400, durationMs: 98000 },
|
||||||
|
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 3, inputTokens: 78600, outputTokens: 22100, durationMs: 85000 },
|
||||||
|
{ modelId: "gpt-4o", provider: "openai", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 42000, outputTokens: 9800, durationMs: 28000 },
|
||||||
|
];
|
||||||
|
|
||||||
|
export function seedData() {
|
||||||
|
if (curatedModels.size > 0) return; // already seeded
|
||||||
|
for (const m of SEED_MODELS) {
|
||||||
|
curatedModels.set(m.id, { ...m, enabled: true });
|
||||||
|
}
|
||||||
|
const now = Date.now();
|
||||||
|
for (let i = 0; i < SEED_USAGE.length; i++) {
|
||||||
|
usageLog.push({ ...SEED_USAGE[i], timestamp: now - (SEED_USAGE.length - i) * 1000 * 60 * 30 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seedData();
|
||||||
316
apps/harness/src/lib/orchestrator.ts
Normal file
316
apps/harness/src/lib/orchestrator.ts
Normal file
@@ -0,0 +1,316 @@
|
|||||||
|
import {
|
||||||
|
getTask,
|
||||||
|
updateTask,
|
||||||
|
appendIteration,
|
||||||
|
updateIteration,
|
||||||
|
getFirstPendingTask,
|
||||||
|
getRunningTasks,
|
||||||
|
} from "./store";
|
||||||
|
import { recordUsage } from "./model-store";
|
||||||
|
import { getAgentConfig } from "./agents";
|
||||||
|
import { getRawCredentialsByProvider } from "./credentials";
|
||||||
|
import {
|
||||||
|
ensureBareClone,
|
||||||
|
createWorktree,
|
||||||
|
removeWorktree,
|
||||||
|
iterationDir,
|
||||||
|
buildAuthenticatedCloneUrl,
|
||||||
|
commitAll,
|
||||||
|
pushBranch,
|
||||||
|
createPullRequest,
|
||||||
|
} from "./git-ops";
|
||||||
|
import { executeAgent } from "./executor";
|
||||||
|
import { buildPrompt } from "./prompt-builder";
|
||||||
|
import { evaluate } from "./evaluator";
|
||||||
|
import { Task, Iteration } from "./types";
|
||||||
|
|
||||||
|
const POLL_INTERVAL_MS = 2000;
|
||||||
|
|
||||||
|
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
let running = false;
|
||||||
|
let currentTaskId: string | null = null;
|
||||||
|
let currentAbort: AbortController | null = null;
|
||||||
|
|
||||||
|
export function isRunning(): boolean {
|
||||||
|
return running;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function currentRunningTaskId(): string | null {
|
||||||
|
return currentTaskId;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function startOrchestrator(): void {
|
||||||
|
if (running) return;
|
||||||
|
running = true;
|
||||||
|
|
||||||
|
// Mark any crashed running tasks as failed on startup
|
||||||
|
recoverCrashedTasks();
|
||||||
|
|
||||||
|
pollTimer = setInterval(() => {
|
||||||
|
if (currentTaskId) return; // already processing a task
|
||||||
|
poll();
|
||||||
|
}, POLL_INTERVAL_MS);
|
||||||
|
|
||||||
|
// Immediate first poll
|
||||||
|
poll();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function stopOrchestrator(): void {
|
||||||
|
running = false;
|
||||||
|
if (pollTimer) {
|
||||||
|
clearInterval(pollTimer);
|
||||||
|
pollTimer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cancelTask(taskId: string): boolean {
|
||||||
|
if (currentTaskId !== taskId) return false;
|
||||||
|
currentAbort?.abort();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function recoverCrashedTasks(): void {
|
||||||
|
const runningTasks = getRunningTasks();
|
||||||
|
for (const task of runningTasks) {
|
||||||
|
// Mark running iterations as failed
|
||||||
|
const updatedIterations = task.iterations.map((iter) =>
|
||||||
|
iter.status === "running"
|
||||||
|
? { ...iter, status: "failed" as const, diagnosis: "Interrupted — server restarted", completedAt: Date.now() }
|
||||||
|
: iter,
|
||||||
|
);
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
iterations: updatedIterations,
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function poll(): Promise<void> {
|
||||||
|
if (!running || currentTaskId) return;
|
||||||
|
|
||||||
|
const task = getFirstPendingTask();
|
||||||
|
if (!task) return;
|
||||||
|
|
||||||
|
currentTaskId = task.id;
|
||||||
|
currentAbort = new AbortController();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await runTask(task);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[orchestrator] Task ${task.id} failed with error:`, err);
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
currentTaskId = null;
|
||||||
|
currentAbort = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runTask(task: Task): Promise<void> {
|
||||||
|
const agentConfig = getAgentConfig(task.spec.agentId);
|
||||||
|
if (!agentConfig) {
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine git credentials and repo URL
|
||||||
|
const gitCreds = getRawCredentialsByProvider("github");
|
||||||
|
const gitToken = gitCreds[0]?.token;
|
||||||
|
if (!gitToken) {
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const repoUrl = buildAuthenticatedCloneUrl(task.project, "github", gitToken);
|
||||||
|
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "running",
|
||||||
|
startedAt: Date.now(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Ensure bare clone
|
||||||
|
let bareClone: string;
|
||||||
|
try {
|
||||||
|
bareClone = await ensureBareClone(repoUrl, task.slug);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[orchestrator] Failed to clone repo for task ${task.id}:`, err);
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const branchName = `harness/${task.slug}`;
|
||||||
|
let converged = false;
|
||||||
|
|
||||||
|
for (let n = 1; n <= task.maxIterations; n++) {
|
||||||
|
if (currentAbort?.signal.aborted) {
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await runIteration(task, n, bareClone, branchName);
|
||||||
|
if (!result) {
|
||||||
|
// Iteration was cancelled or errored fatally
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.allPassed) {
|
||||||
|
converged = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (converged) {
|
||||||
|
// Push and create PR
|
||||||
|
try {
|
||||||
|
const lastIterN = getTask(task.id)!.iteration;
|
||||||
|
const workDir = iterationDir(task.id, lastIterN);
|
||||||
|
|
||||||
|
await commitAll(workDir, `harness: ${task.goal}`);
|
||||||
|
await pushBranch(workDir, branchName);
|
||||||
|
|
||||||
|
const pr = await createPullRequest({
|
||||||
|
repo: task.project,
|
||||||
|
head: branchName,
|
||||||
|
title: `[harness] ${task.goal}`,
|
||||||
|
body: `Automated by harness orchestrator.\n\nTask: ${task.slug}\nIterations: ${lastIterN}`,
|
||||||
|
token: gitToken,
|
||||||
|
});
|
||||||
|
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "completed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
pr: { number: pr.number, title: `[harness] ${task.goal}`, status: "open" },
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[orchestrator] Failed to create PR for task ${task.id}:`, err);
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "completed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
updateTask(task.id, {
|
||||||
|
status: "failed",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup worktrees
|
||||||
|
const finalTask = getTask(task.id)!;
|
||||||
|
for (const iter of finalTask.iterations) {
|
||||||
|
await removeWorktree(bareClone, iterationDir(task.id, iter.n));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runIteration(
|
||||||
|
task: Task,
|
||||||
|
n: number,
|
||||||
|
bareClone: string,
|
||||||
|
branchName: string,
|
||||||
|
): Promise<{ allPassed: boolean } | null> {
|
||||||
|
const iteration: Iteration = {
|
||||||
|
n,
|
||||||
|
status: "running",
|
||||||
|
diagnosis: null,
|
||||||
|
startedAt: Date.now(),
|
||||||
|
};
|
||||||
|
appendIteration(task.id, iteration);
|
||||||
|
|
||||||
|
const workDir = iterationDir(task.id, n);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Create worktree — first iteration gets a new branch, subsequent reuse it
|
||||||
|
const branchForWorktree = n === 1 ? branchName : `${branchName}-iter-${n}`;
|
||||||
|
await createWorktree(bareClone, workDir, branchForWorktree, "HEAD");
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[orchestrator] Failed to create worktree for iteration ${n}:`, err);
|
||||||
|
updateIteration(task.id, n, {
|
||||||
|
status: "failed",
|
||||||
|
diagnosis: `Failed to create worktree: ${err}`,
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build prompt with prior iterations
|
||||||
|
const currentTask = getTask(task.id)!;
|
||||||
|
const priorIterations = currentTask.iterations.filter((i) => i.n < n);
|
||||||
|
const prompt = await buildPrompt({
|
||||||
|
task: currentTask,
|
||||||
|
iterationNumber: n,
|
||||||
|
priorIterations,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Execute agent
|
||||||
|
const execResult = await executeAgent({
|
||||||
|
agentId: task.spec.agentId,
|
||||||
|
prompt,
|
||||||
|
workDir,
|
||||||
|
signal: currentAbort?.signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (execResult.killed && currentAbort?.signal.aborted) {
|
||||||
|
updateIteration(task.id, n, {
|
||||||
|
status: "failed",
|
||||||
|
diagnosis: "Cancelled by user",
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
updateTask(task.id, { status: "failed", completedAt: Date.now() });
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evaluate
|
||||||
|
const evalResult = await evaluate({
|
||||||
|
task: currentTask,
|
||||||
|
iterationNumber: n,
|
||||||
|
agentOutput: execResult.stdout,
|
||||||
|
exitCode: execResult.exitCode,
|
||||||
|
workDir,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Record usage
|
||||||
|
const agentConfig = getAgentConfig(task.spec.agentId);
|
||||||
|
if (agentConfig) {
|
||||||
|
recordUsage({
|
||||||
|
modelId: agentConfig.modelId,
|
||||||
|
provider: agentConfig.provider,
|
||||||
|
taskId: task.id,
|
||||||
|
taskSlug: task.slug,
|
||||||
|
iteration: n,
|
||||||
|
inputTokens: execResult.inputTokens,
|
||||||
|
outputTokens: execResult.outputTokens,
|
||||||
|
durationMs: execResult.durationMs,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update iteration
|
||||||
|
updateIteration(task.id, n, {
|
||||||
|
status: evalResult.allPassed ? "passed" : "failed",
|
||||||
|
diagnosis: evalResult.diagnosis,
|
||||||
|
agentOutput: execResult.stdout.slice(-8000), // keep last 8k chars
|
||||||
|
evals: evalResult.evals,
|
||||||
|
diffStats: evalResult.diffStats,
|
||||||
|
completedAt: Date.now(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update task-level evals
|
||||||
|
updateTask(task.id, { evals: evalResult.evals });
|
||||||
|
|
||||||
|
return { allPassed: evalResult.allPassed };
|
||||||
|
}
|
||||||
94
apps/harness/src/lib/prompt-builder.ts
Normal file
94
apps/harness/src/lib/prompt-builder.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import { readFile, readdir } from "node:fs/promises";
|
||||||
|
import path from "node:path";
|
||||||
|
import { Task, Iteration } from "./types";
|
||||||
|
|
||||||
|
const KNOWLEDGE_DIR = process.env.HARNESS_KNOWLEDGE_DIR || "";
|
||||||
|
const MAX_AGENT_OUTPUT_LENGTH = 4000;
|
||||||
|
const MAX_PRIOR_ITERATIONS = 3;
|
||||||
|
|
||||||
|
export async function buildPrompt(opts: {
|
||||||
|
task: Task;
|
||||||
|
iterationNumber: number;
|
||||||
|
priorIterations: Iteration[];
|
||||||
|
}): Promise<string> {
|
||||||
|
const { task, iterationNumber, priorIterations } = opts;
|
||||||
|
const sections: string[] = [];
|
||||||
|
|
||||||
|
// Task goal
|
||||||
|
sections.push(`# Task\n\n${task.goal}`);
|
||||||
|
|
||||||
|
// Success criteria
|
||||||
|
if (task.spec.criteria.length > 0) {
|
||||||
|
const criteriaLines = task.spec.criteria
|
||||||
|
.map((c) => `- **${c.label}**: ${c.target}`)
|
||||||
|
.join("\n");
|
||||||
|
sections.push(`# Success Criteria\n\n${criteriaLines}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constraints
|
||||||
|
if (task.spec.constraints.length > 0) {
|
||||||
|
const constraintLines = task.spec.constraints
|
||||||
|
.map((c) => `- ${c}`)
|
||||||
|
.join("\n");
|
||||||
|
sections.push(`# Constraints\n\n${constraintLines}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Knowledge references
|
||||||
|
const knowledgeContent = await loadKnowledge(task.spec.knowledgeRefs);
|
||||||
|
if (knowledgeContent) {
|
||||||
|
sections.push(`# Reference Material\n\n${knowledgeContent}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prior iterations
|
||||||
|
if (priorIterations.length > 0) {
|
||||||
|
const recentIterations = priorIterations.slice(-MAX_PRIOR_ITERATIONS);
|
||||||
|
const priorLines = recentIterations.map((iter) => {
|
||||||
|
const parts = [`## Iteration ${iter.n} — ${iter.status}`];
|
||||||
|
if (iter.diagnosis) {
|
||||||
|
parts.push(`**Diagnosis:** ${iter.diagnosis}`);
|
||||||
|
}
|
||||||
|
if (iter.evals) {
|
||||||
|
const evalSummary = Object.entries(iter.evals)
|
||||||
|
.map(([key, ev]) => `- ${key}: ${ev.pass ? "PASS" : "FAIL"} (${ev.value} ${ev.unit}, target: ${ev.target})`)
|
||||||
|
.join("\n");
|
||||||
|
parts.push(`**Evals:**\n${evalSummary}`);
|
||||||
|
}
|
||||||
|
// Include truncated agent output only for the most recent iteration
|
||||||
|
if (iter === recentIterations[recentIterations.length - 1] && iter.agentOutput) {
|
||||||
|
const truncated = iter.agentOutput.length > MAX_AGENT_OUTPUT_LENGTH
|
||||||
|
? iter.agentOutput.slice(-MAX_AGENT_OUTPUT_LENGTH) + "\n... (truncated)"
|
||||||
|
: iter.agentOutput;
|
||||||
|
parts.push(`**Agent Output (truncated):**\n\`\`\`\n${truncated}\n\`\`\``);
|
||||||
|
}
|
||||||
|
return parts.join("\n");
|
||||||
|
});
|
||||||
|
sections.push(`# Prior Iterations\n\n${priorLines.join("\n\n")}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instructions
|
||||||
|
sections.push(
|
||||||
|
`# Instructions\n\n` +
|
||||||
|
`This is iteration ${iterationNumber} of ${task.maxIterations}.\n` +
|
||||||
|
`Work in the current directory. Make all necessary changes to satisfy the success criteria.\n` +
|
||||||
|
`If prior iterations failed, analyze the diagnosis and try a different approach.`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return sections.join("\n\n---\n\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadKnowledge(refs: string[]): Promise<string> {
|
||||||
|
if (!KNOWLEDGE_DIR || refs.length === 0) return "";
|
||||||
|
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (const ref of refs) {
|
||||||
|
try {
|
||||||
|
// ref can be a filename or glob-like path
|
||||||
|
const filePath = path.resolve(KNOWLEDGE_DIR, ref);
|
||||||
|
const content = await readFile(filePath, "utf-8");
|
||||||
|
parts.push(`## ${ref}\n\n${content}`);
|
||||||
|
} catch {
|
||||||
|
// Skip missing knowledge files
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts.join("\n\n");
|
||||||
|
}
|
||||||
100
apps/harness/src/lib/repo-search.ts
Normal file
100
apps/harness/src/lib/repo-search.ts
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import { getCredentialsByProvider } from "./credentials";
|
||||||
|
|
||||||
|
export interface RepoResult {
|
||||||
|
provider: "github" | "gitlab";
|
||||||
|
fullName: string;
|
||||||
|
url: string;
|
||||||
|
description: string;
|
||||||
|
defaultBranch: string;
|
||||||
|
private: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchRepos(query: string): Promise<RepoResult[]> {
|
||||||
|
if (!query || query.length < 2) return [];
|
||||||
|
|
||||||
|
const results = await Promise.allSettled([
|
||||||
|
searchGitHub(query),
|
||||||
|
searchGitLab(query),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return results.flatMap(r => r.status === "fulfilled" ? r.value : []);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function searchGitHub(query: string): Promise<RepoResult[]> {
|
||||||
|
const creds = getCredentialsByProvider("github");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
const results: RepoResult[] = [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(
|
||||||
|
`https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&per_page=10&sort=updated`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${cred.token}`,
|
||||||
|
Accept: "application/vnd.github+json",
|
||||||
|
"X-GitHub-Api-Version": "2022-11-28",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
for (const repo of data.items || []) {
|
||||||
|
results.push({
|
||||||
|
provider: "github",
|
||||||
|
fullName: repo.full_name,
|
||||||
|
url: repo.html_url,
|
||||||
|
description: repo.description || "",
|
||||||
|
defaultBranch: repo.default_branch || "main",
|
||||||
|
private: repo.private,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// skip failed credential
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function searchGitLab(query: string): Promise<RepoResult[]> {
|
||||||
|
const creds = getCredentialsByProvider("gitlab");
|
||||||
|
if (creds.length === 0) return [];
|
||||||
|
|
||||||
|
const results: RepoResult[] = [];
|
||||||
|
|
||||||
|
for (const cred of creds) {
|
||||||
|
const baseUrl = cred.baseUrl || "https://gitlab.com";
|
||||||
|
try {
|
||||||
|
const res = await fetch(
|
||||||
|
`${baseUrl}/api/v4/projects?search=${encodeURIComponent(query)}&per_page=10&order_by=updated_at&membership=true`,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"PRIVATE-TOKEN": cred.token,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!res.ok) continue;
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
for (const project of data) {
|
||||||
|
results.push({
|
||||||
|
provider: "gitlab",
|
||||||
|
fullName: project.path_with_namespace,
|
||||||
|
url: project.web_url,
|
||||||
|
description: project.description || "",
|
||||||
|
defaultBranch: project.default_branch || "main",
|
||||||
|
private: project.visibility === "private",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// skip failed credential
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
61
apps/harness/src/lib/store.ts
Normal file
61
apps/harness/src/lib/store.ts
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import { Task } from "./types";
|
||||||
|
|
||||||
|
// In-memory task store. Will be replaced with persistent storage (CloudNativePG)
|
||||||
|
// once the orchestrator loop is wired up.
|
||||||
|
const tasks: Map<string, Task> = new Map();
|
||||||
|
|
||||||
|
export function getAllTasks(): Task[] {
|
||||||
|
return Array.from(tasks.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getTask(id: string): Task | undefined {
|
||||||
|
return tasks.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTask(task: Task): Task {
|
||||||
|
tasks.set(task.id, task);
|
||||||
|
return task;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateTask(id: string, updates: Partial<Task>): Task | undefined {
|
||||||
|
const existing = tasks.get(id);
|
||||||
|
if (!existing) return undefined;
|
||||||
|
const updated = { ...existing, ...updates };
|
||||||
|
tasks.set(id, updated);
|
||||||
|
return updated;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function deleteTask(id: string): boolean {
|
||||||
|
return tasks.delete(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function appendIteration(id: string, iteration: import("./types").Iteration): Task | undefined {
|
||||||
|
const existing = tasks.get(id);
|
||||||
|
if (!existing) return undefined;
|
||||||
|
existing.iterations = [...existing.iterations, iteration];
|
||||||
|
existing.iteration = iteration.n;
|
||||||
|
tasks.set(id, existing);
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateIteration(
|
||||||
|
id: string,
|
||||||
|
iterationN: number,
|
||||||
|
updates: Partial<import("./types").Iteration>,
|
||||||
|
): Task | undefined {
|
||||||
|
const existing = tasks.get(id);
|
||||||
|
if (!existing) return undefined;
|
||||||
|
existing.iterations = existing.iterations.map((iter) =>
|
||||||
|
iter.n === iterationN ? { ...iter, ...updates } : iter,
|
||||||
|
);
|
||||||
|
tasks.set(id, existing);
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getFirstPendingTask(): Task | undefined {
|
||||||
|
return Array.from(tasks.values()).find((t) => t.status === "pending");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getRunningTasks(): Task[] {
|
||||||
|
return Array.from(tasks.values()).filter((t) => t.status === "running");
|
||||||
|
}
|
||||||
68
apps/harness/src/lib/types.ts
Normal file
68
apps/harness/src/lib/types.ts
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
export interface TaskSpec {
|
||||||
|
slug: string;
|
||||||
|
goal: string;
|
||||||
|
project: string;
|
||||||
|
agentId: string;
|
||||||
|
maxIterations: number;
|
||||||
|
criteria: { label: string; target: string }[];
|
||||||
|
constraints: string[];
|
||||||
|
knowledgeRefs: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Eval {
|
||||||
|
label: string;
|
||||||
|
value: number | string;
|
||||||
|
unit: string;
|
||||||
|
pass: boolean;
|
||||||
|
target: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Iteration {
|
||||||
|
n: number;
|
||||||
|
status: "pending" | "running" | "passed" | "failed";
|
||||||
|
diagnosis: string | null;
|
||||||
|
agentOutput?: string;
|
||||||
|
evals?: Record<string, Eval>;
|
||||||
|
diffStats?: string;
|
||||||
|
startedAt?: number;
|
||||||
|
completedAt?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ExecutionResult {
|
||||||
|
exitCode: number;
|
||||||
|
stdout: string;
|
||||||
|
stderr: string;
|
||||||
|
durationMs: number;
|
||||||
|
inputTokens: number;
|
||||||
|
outputTokens: number;
|
||||||
|
killed: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Task {
|
||||||
|
id: string;
|
||||||
|
slug: string;
|
||||||
|
goal: string;
|
||||||
|
status: "pending" | "running" | "completed" | "failed";
|
||||||
|
iteration: number;
|
||||||
|
maxIterations: number;
|
||||||
|
startedAt: number | null;
|
||||||
|
completedAt?: number;
|
||||||
|
project: string;
|
||||||
|
evals: Record<string, Eval>;
|
||||||
|
iterations: Iteration[];
|
||||||
|
pr?: {
|
||||||
|
number: number;
|
||||||
|
title: string;
|
||||||
|
status: string;
|
||||||
|
};
|
||||||
|
spec: TaskSpec;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface KnowledgeDoc {
|
||||||
|
path: string;
|
||||||
|
title: string;
|
||||||
|
verificationStatus: string;
|
||||||
|
lastUpdated: string;
|
||||||
|
project: string;
|
||||||
|
preview: string;
|
||||||
|
}
|
||||||
21
apps/harness/tsconfig.json
Normal file
21
apps/harness/tsconfig.json
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2017",
|
||||||
|
"lib": ["dom", "dom.iterable", "esnext"],
|
||||||
|
"allowJs": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"strict": true,
|
||||||
|
"noEmit": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"module": "esnext",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"jsx": "preserve",
|
||||||
|
"incremental": true,
|
||||||
|
"plugins": [{ "name": "next" }],
|
||||||
|
"paths": { "@/*": ["./src/*"] }
|
||||||
|
},
|
||||||
|
"include": ["**/*.ts", "**/*.tsx", "next-env.d.ts", ".next/types/**/*.ts"],
|
||||||
|
"exclude": ["node_modules"]
|
||||||
|
}
|
||||||
1
apps/harness/tsconfig.tsbuildinfo
Normal file
1
apps/harness/tsconfig.tsbuildinfo
Normal file
File diff suppressed because one or more lines are too long
@@ -36,3 +36,16 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- longhorn.coreworlds.io
|
- longhorn.coreworlds.io
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: harness-tls
|
||||||
|
namespace: platform
|
||||||
|
spec:
|
||||||
|
secretName: harness-tls
|
||||||
|
issuerRef:
|
||||||
|
name: letsencrypt-production
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- harness.coreworlds.io
|
||||||
|
|||||||
22
infra/kubernetes/platform/traefik/ingressroute-harness.yaml
Normal file
22
infra/kubernetes/platform/traefik/ingressroute-harness.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: harness
|
||||||
|
namespace: platform
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-production
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`harness.coreworlds.io`)
|
||||||
|
kind: Rule
|
||||||
|
middlewares:
|
||||||
|
- name: internal-only
|
||||||
|
namespace: platform
|
||||||
|
services:
|
||||||
|
- name: harness
|
||||||
|
namespace: apps
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: harness-tls
|
||||||
@@ -7,5 +7,6 @@ resources:
|
|||||||
- ingressroute-argocd.yaml
|
- ingressroute-argocd.yaml
|
||||||
- ingressroute-grafana.yaml
|
- ingressroute-grafana.yaml
|
||||||
- ingressroute-longhorn.yaml
|
- ingressroute-longhorn.yaml
|
||||||
|
- ingressroute-harness.yaml
|
||||||
- certificate-internal.yaml
|
- certificate-internal.yaml
|
||||||
- servicemonitor.yaml
|
- servicemonitor.yaml
|
||||||
|
|||||||
28
pnpm-lock.yaml
generated
28
pnpm-lock.yaml
generated
@@ -70,6 +70,34 @@ importers:
|
|||||||
specifier: ^5.7.0
|
specifier: ^5.7.0
|
||||||
version: 5.9.3
|
version: 5.9.3
|
||||||
|
|
||||||
|
apps/harness:
|
||||||
|
dependencies:
|
||||||
|
next:
|
||||||
|
specifier: ^15.1.0
|
||||||
|
version: 15.5.14(@opentelemetry/api@1.9.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
||||||
|
react:
|
||||||
|
specifier: ^19.0.0
|
||||||
|
version: 19.2.4
|
||||||
|
react-dom:
|
||||||
|
specifier: ^19.0.0
|
||||||
|
version: 19.2.4(react@19.2.4)
|
||||||
|
yaml:
|
||||||
|
specifier: ^2.7.0
|
||||||
|
version: 2.8.2
|
||||||
|
devDependencies:
|
||||||
|
'@types/node':
|
||||||
|
specifier: ^22.10.0
|
||||||
|
version: 22.19.15
|
||||||
|
'@types/react':
|
||||||
|
specifier: ^19.0.0
|
||||||
|
version: 19.2.14
|
||||||
|
'@types/react-dom':
|
||||||
|
specifier: ^19.0.0
|
||||||
|
version: 19.2.3(@types/react@19.2.14)
|
||||||
|
typescript:
|
||||||
|
specifier: ^5.7.0
|
||||||
|
version: 5.9.3
|
||||||
|
|
||||||
apps/web:
|
apps/web:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@tailwindcss/postcss':
|
'@tailwindcss/postcss':
|
||||||
|
|||||||
Reference in New Issue
Block a user