Add harness app: agent orchestrator with cluster deployment
- Next.js app for orchestrating coding agent benchmarks (Claude Code, Codex, OpenCode) - Dockerfile installs git, gh CLI, and agent CLIs for headless execution - K8s deployment with workspace volume, sealed credentials for Claude + OpenCode - Traefik IngressRoute at harness.coreworlds.io with internal-only middleware + TLS - CI pipeline path filter for harness builds - Fix OpenCode runtime flags (subcommand-based headless mode)
This commit is contained in:
154
apps/harness/src/lib/agents.ts
Normal file
154
apps/harness/src/lib/agents.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
// Agent runtime definitions and configuration
|
||||
|
||||
export type AgentRuntime = "claude-code" | "codex" | "opencode";
|
||||
|
||||
export interface AgentRuntimeInfo {
|
||||
id: AgentRuntime;
|
||||
name: string;
|
||||
description: string;
|
||||
defaultProviders: string[]; // which AI providers this runtime supports
|
||||
cliCommand: string; // base CLI command
|
||||
headlessFlag: string; // flag to run headless
|
||||
modelFlag: string; // flag to specify model
|
||||
promptFlag: string; // flag to pass the prompt/task
|
||||
}
|
||||
|
||||
export const AGENT_RUNTIMES: Record<AgentRuntime, AgentRuntimeInfo> = {
|
||||
"claude-code": {
|
||||
id: "claude-code",
|
||||
name: "Claude Code",
|
||||
description: "Anthropic's agentic coding CLI. Supports Claude models via Anthropic API or Bedrock.",
|
||||
defaultProviders: ["anthropic"],
|
||||
cliCommand: "claude",
|
||||
headlessFlag: "--print",
|
||||
modelFlag: "--model",
|
||||
promptFlag: "--prompt",
|
||||
},
|
||||
"codex": {
|
||||
id: "codex",
|
||||
name: "Codex CLI",
|
||||
description: "OpenAI's open-source coding agent. Supports OpenAI models.",
|
||||
defaultProviders: ["openai"],
|
||||
cliCommand: "codex",
|
||||
headlessFlag: "--quiet",
|
||||
modelFlag: "--model",
|
||||
promptFlag: "", // prompt is positional
|
||||
},
|
||||
"opencode": {
|
||||
id: "opencode",
|
||||
name: "OpenCode",
|
||||
description: "Open-source multi-provider coding agent. Supports Anthropic, OpenAI, Google, OpenRouter.",
|
||||
defaultProviders: ["anthropic", "openai", "google", "openrouter", "opencode-zen"],
|
||||
cliCommand: "opencode",
|
||||
headlessFlag: "run", // subcommand, not a flag
|
||||
modelFlag: "--model",
|
||||
promptFlag: "", // prompt is positional (like codex)
|
||||
},
|
||||
};
|
||||
|
||||
// ─── AGENT CONFIGURATIONS ────────────────────────────────────
|
||||
|
||||
export interface AgentConfig {
|
||||
id: string;
|
||||
name: string;
|
||||
runtime: AgentRuntime;
|
||||
modelId: string;
|
||||
provider: string;
|
||||
maxTokens?: number;
|
||||
env?: Record<string, string>; // additional env vars for the agent process
|
||||
}
|
||||
|
||||
const configs: Map<string, AgentConfig> = new Map();
|
||||
|
||||
export function getAllAgentConfigs(): AgentConfig[] {
|
||||
return Array.from(configs.values());
|
||||
}
|
||||
|
||||
export function getAgentConfig(id: string): AgentConfig | undefined {
|
||||
return configs.get(id);
|
||||
}
|
||||
|
||||
export function upsertAgentConfig(config: AgentConfig): AgentConfig {
|
||||
configs.set(config.id, config);
|
||||
return config;
|
||||
}
|
||||
|
||||
export function deleteAgentConfig(id: string): boolean {
|
||||
return configs.delete(id);
|
||||
}
|
||||
|
||||
// ─── SEED DATA ──────────────────────────────────────────────
|
||||
|
||||
const SEED_CONFIGS: AgentConfig[] = [
|
||||
{
|
||||
id: "agent-claude-opus",
|
||||
name: "Claude Code · Opus 4",
|
||||
runtime: "claude-code",
|
||||
modelId: "claude-opus-4-20250514",
|
||||
provider: "anthropic",
|
||||
},
|
||||
{
|
||||
id: "agent-claude-sonnet",
|
||||
name: "Claude Code · Sonnet 4",
|
||||
runtime: "claude-code",
|
||||
modelId: "claude-sonnet-4-20250514",
|
||||
provider: "anthropic",
|
||||
},
|
||||
{
|
||||
id: "agent-codex-o3",
|
||||
name: "Codex · o3",
|
||||
runtime: "codex",
|
||||
modelId: "o3",
|
||||
provider: "openai",
|
||||
},
|
||||
{
|
||||
id: "agent-codex-o4mini",
|
||||
name: "Codex · o4-mini",
|
||||
runtime: "codex",
|
||||
modelId: "o4-mini",
|
||||
provider: "openai",
|
||||
},
|
||||
{
|
||||
id: "agent-opencode-sonnet",
|
||||
name: "OpenCode · Sonnet 4",
|
||||
runtime: "opencode",
|
||||
modelId: "claude-sonnet-4-20250514",
|
||||
provider: "anthropic",
|
||||
},
|
||||
{
|
||||
id: "agent-opencode-gemini",
|
||||
name: "OpenCode · Gemini 2.5 Pro",
|
||||
runtime: "opencode",
|
||||
modelId: "gemini-2.5-pro",
|
||||
provider: "google",
|
||||
},
|
||||
];
|
||||
|
||||
function seedAgents() {
|
||||
if (configs.size > 0) return;
|
||||
for (const c of SEED_CONFIGS) {
|
||||
configs.set(c.id, c);
|
||||
}
|
||||
}
|
||||
|
||||
seedAgents();
|
||||
|
||||
// ─── CLI BUILDER ────────────────────────────────────────────
|
||||
// Builds the shell command to invoke an agent headlessly.
|
||||
|
||||
export function buildAgentCommand(config: AgentConfig, prompt: string, workDir: string): string[] {
|
||||
const runtime = AGENT_RUNTIMES[config.runtime];
|
||||
const args = [runtime.cliCommand];
|
||||
|
||||
if (runtime.headlessFlag) args.push(runtime.headlessFlag);
|
||||
if (runtime.modelFlag && config.modelId) args.push(runtime.modelFlag, config.modelId);
|
||||
|
||||
if (runtime.promptFlag) {
|
||||
args.push(runtime.promptFlag, prompt);
|
||||
} else {
|
||||
// positional prompt (codex)
|
||||
args.push(prompt);
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
57
apps/harness/src/lib/credentials.ts
Normal file
57
apps/harness/src/lib/credentials.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
export type Provider =
|
||||
| "github" | "gitlab"
|
||||
| "anthropic" | "openai" | "openrouter" | "google" | "opencode-zen";
|
||||
|
||||
export const GIT_PROVIDERS: Provider[] = ["github", "gitlab"];
|
||||
export const AI_PROVIDERS: Provider[] = ["anthropic", "openai", "openrouter", "google", "opencode-zen"];
|
||||
|
||||
export interface Credential {
|
||||
id: string;
|
||||
provider: Provider;
|
||||
label: string;
|
||||
token: string;
|
||||
baseUrl?: string; // for self-hosted GitLab or custom endpoints
|
||||
}
|
||||
|
||||
// In-memory store. Will be replaced with encrypted persistent storage.
|
||||
const credentials: Map<string, Credential> = new Map();
|
||||
|
||||
export function getAllCredentials(): Credential[] {
|
||||
return Array.from(credentials.values()).map(c => ({
|
||||
...c,
|
||||
token: maskToken(c.token),
|
||||
}));
|
||||
}
|
||||
|
||||
export function getCredentialsByKind(kind: "git" | "ai"): Credential[] {
|
||||
const providers = kind === "git" ? GIT_PROVIDERS : AI_PROVIDERS;
|
||||
return Array.from(credentials.values())
|
||||
.filter(c => providers.includes(c.provider))
|
||||
.map(c => ({ ...c, token: maskToken(c.token) }));
|
||||
}
|
||||
|
||||
export function getCredential(id: string): Credential | undefined {
|
||||
return credentials.get(id);
|
||||
}
|
||||
|
||||
export function getCredentialsByProvider(provider: Provider): Credential[] {
|
||||
return Array.from(credentials.values()).filter(c => c.provider === provider);
|
||||
}
|
||||
|
||||
export function getRawCredentialsByProvider(provider: Provider): Credential[] {
|
||||
return Array.from(credentials.values()).filter(c => c.provider === provider);
|
||||
}
|
||||
|
||||
export function upsertCredential(cred: Credential): Credential {
|
||||
credentials.set(cred.id, cred);
|
||||
return { ...cred, token: maskToken(cred.token) };
|
||||
}
|
||||
|
||||
export function deleteCredential(id: string): boolean {
|
||||
return credentials.delete(id);
|
||||
}
|
||||
|
||||
function maskToken(token: string): string {
|
||||
if (token.length <= 8) return "••••••••";
|
||||
return token.slice(0, 4) + "••••" + token.slice(-4);
|
||||
}
|
||||
99
apps/harness/src/lib/evaluator.ts
Normal file
99
apps/harness/src/lib/evaluator.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import { Task, Eval } from "./types";
|
||||
import { hasDiff, getDiffStats } from "./git-ops";
|
||||
|
||||
export interface EvalResult {
|
||||
evals: Record<string, Eval>;
|
||||
allPassed: boolean;
|
||||
diagnosis: string;
|
||||
diffStats: string;
|
||||
}
|
||||
|
||||
// Simple target DSL:
|
||||
// exitCode:0 — exit code equals value
|
||||
// contains:<text> — agent output contains text
|
||||
// filesChanged:>0 — git diff has changes
|
||||
function evaluateCriterion(
|
||||
criterion: { label: string; target: string },
|
||||
context: { exitCode: number; agentOutput: string; hasChanges: boolean },
|
||||
): Eval {
|
||||
const { label, target } = criterion;
|
||||
|
||||
// exitCode:N
|
||||
const exitMatch = target.match(/^exitCode:(\d+)$/);
|
||||
if (exitMatch) {
|
||||
const expected = parseInt(exitMatch[1], 10);
|
||||
return {
|
||||
label,
|
||||
value: context.exitCode,
|
||||
unit: "exit code",
|
||||
pass: context.exitCode === expected,
|
||||
target,
|
||||
};
|
||||
}
|
||||
|
||||
// contains:<text>
|
||||
const containsMatch = target.match(/^contains:(.+)$/);
|
||||
if (containsMatch) {
|
||||
const text = containsMatch[1];
|
||||
const found = context.agentOutput.includes(text);
|
||||
return {
|
||||
label,
|
||||
value: found ? "found" : "not found",
|
||||
unit: "",
|
||||
pass: found,
|
||||
target,
|
||||
};
|
||||
}
|
||||
|
||||
// filesChanged:>0
|
||||
if (target === "filesChanged:>0") {
|
||||
return {
|
||||
label,
|
||||
value: context.hasChanges ? ">0" : "0",
|
||||
unit: "files",
|
||||
pass: context.hasChanges,
|
||||
target,
|
||||
};
|
||||
}
|
||||
|
||||
// Unknown target — always fail
|
||||
return {
|
||||
label,
|
||||
value: "unknown",
|
||||
unit: "",
|
||||
pass: false,
|
||||
target,
|
||||
};
|
||||
}
|
||||
|
||||
export async function evaluate(opts: {
|
||||
task: Task;
|
||||
iterationNumber: number;
|
||||
agentOutput: string;
|
||||
exitCode: number;
|
||||
workDir: string;
|
||||
}): Promise<EvalResult> {
|
||||
const { task, agentOutput, exitCode, workDir } = opts;
|
||||
|
||||
const hasChanges = await hasDiff(workDir);
|
||||
const diffStats = await getDiffStats(workDir);
|
||||
|
||||
const context = { exitCode, agentOutput, hasChanges };
|
||||
const evals: Record<string, Eval> = {};
|
||||
const failures: string[] = [];
|
||||
|
||||
for (const criterion of task.spec.criteria) {
|
||||
const result = evaluateCriterion(criterion, context);
|
||||
evals[criterion.label] = result;
|
||||
if (!result.pass) {
|
||||
failures.push(`${criterion.label}: expected ${criterion.target}, got ${result.value}`);
|
||||
}
|
||||
}
|
||||
|
||||
const allPassed = failures.length === 0;
|
||||
const diagnosis = allPassed
|
||||
? "All criteria passed."
|
||||
: `Failed criteria:\n${failures.map((f) => `- ${f}`).join("\n")}`;
|
||||
|
||||
return { evals, allPassed, diagnosis, diffStats };
|
||||
}
|
||||
158
apps/harness/src/lib/executor.ts
Normal file
158
apps/harness/src/lib/executor.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
import { spawn, ChildProcess } from "node:child_process";
|
||||
import { getAgentConfig, buildAgentCommand, AGENT_RUNTIMES } from "./agents";
|
||||
import { getRawCredentialsByProvider, Provider } from "./credentials";
|
||||
import { ExecutionResult } from "./types";
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes
|
||||
|
||||
// Maps AI providers to their env var names
|
||||
const PROVIDER_ENV_VARS: Record<string, string> = {
|
||||
anthropic: "ANTHROPIC_API_KEY",
|
||||
openai: "OPENAI_API_KEY",
|
||||
google: "GOOGLE_API_KEY",
|
||||
openrouter: "OPENROUTER_API_KEY",
|
||||
"opencode-zen": "OPENCODE_ZEN_API_KEY",
|
||||
};
|
||||
|
||||
// Best-effort token extraction regexes per runtime
|
||||
const TOKEN_PATTERNS: Record<string, { input: RegExp; output: RegExp }> = {
|
||||
"claude-code": {
|
||||
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
},
|
||||
codex: {
|
||||
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
},
|
||||
opencode: {
|
||||
input: /input[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
output: /output[_\s]tokens?[:\s]+(\d[\d,]*)/i,
|
||||
},
|
||||
};
|
||||
|
||||
function parseTokenCount(text: string, pattern: RegExp): number {
|
||||
const match = text.match(pattern);
|
||||
if (!match) return 0;
|
||||
return parseInt(match[1].replace(/,/g, ""), 10);
|
||||
}
|
||||
|
||||
export async function executeAgent(opts: {
|
||||
agentId: string;
|
||||
prompt: string;
|
||||
workDir: string;
|
||||
timeoutMs?: number;
|
||||
signal?: AbortSignal;
|
||||
}): Promise<ExecutionResult> {
|
||||
const config = getAgentConfig(opts.agentId);
|
||||
if (!config) {
|
||||
throw new Error(`Agent config not found: ${opts.agentId}`);
|
||||
}
|
||||
|
||||
const args = buildAgentCommand(config, opts.prompt, opts.workDir);
|
||||
const command = args[0];
|
||||
const commandArgs = args.slice(1);
|
||||
|
||||
// Build environment with credentials
|
||||
const env: NodeJS.ProcessEnv = { ...process.env };
|
||||
|
||||
// Set API keys — OpenCode is multi-provider so inject all available keys;
|
||||
// other runtimes only need their configured provider's key.
|
||||
const providersToInject =
|
||||
config.runtime === "opencode"
|
||||
? Object.keys(PROVIDER_ENV_VARS)
|
||||
: [config.provider];
|
||||
|
||||
for (const provider of providersToInject) {
|
||||
const envVar = PROVIDER_ENV_VARS[provider];
|
||||
if (!envVar) continue;
|
||||
const creds = getRawCredentialsByProvider(provider as Provider);
|
||||
if (creds.length > 0) {
|
||||
env[envVar] = creds[0].token;
|
||||
}
|
||||
}
|
||||
|
||||
// Set GitHub token for git operations within agent
|
||||
const ghCreds = getRawCredentialsByProvider("github" as Provider);
|
||||
if (ghCreds.length > 0) {
|
||||
env.GITHUB_TOKEN = ghCreds[0].token;
|
||||
env.GH_TOKEN = ghCreds[0].token;
|
||||
}
|
||||
|
||||
// Add any custom env from agent config
|
||||
if (config.env) {
|
||||
Object.assign(env, config.env);
|
||||
}
|
||||
|
||||
const timeout = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
||||
const startTime = Date.now();
|
||||
|
||||
return new Promise<ExecutionResult>((resolve) => {
|
||||
const child: ChildProcess = spawn(command, commandArgs, {
|
||||
cwd: opts.workDir,
|
||||
env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let killed = false;
|
||||
|
||||
child.stdout!.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
child.stderr!.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
// Timeout
|
||||
const timer = setTimeout(() => {
|
||||
killed = true;
|
||||
child.kill("SIGTERM");
|
||||
setTimeout(() => child.kill("SIGKILL"), 5000);
|
||||
}, timeout);
|
||||
|
||||
// Cancellation via AbortSignal
|
||||
const onAbort = () => {
|
||||
killed = true;
|
||||
child.kill("SIGTERM");
|
||||
setTimeout(() => child.kill("SIGKILL"), 5000);
|
||||
};
|
||||
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
||||
|
||||
child.on("close", (code: number | null) => {
|
||||
clearTimeout(timer);
|
||||
opts.signal?.removeEventListener("abort", onAbort);
|
||||
|
||||
const durationMs = Date.now() - startTime;
|
||||
const combined = stdout + "\n" + stderr;
|
||||
const runtime = AGENT_RUNTIMES[config.runtime];
|
||||
const patterns = TOKEN_PATTERNS[runtime.id] ?? TOKEN_PATTERNS["claude-code"];
|
||||
|
||||
resolve({
|
||||
exitCode: code ?? 1,
|
||||
stdout,
|
||||
stderr,
|
||||
durationMs,
|
||||
inputTokens: parseTokenCount(combined, patterns.input),
|
||||
outputTokens: parseTokenCount(combined, patterns.output),
|
||||
killed,
|
||||
});
|
||||
});
|
||||
|
||||
child.on("error", (err: Error) => {
|
||||
clearTimeout(timer);
|
||||
opts.signal?.removeEventListener("abort", onAbort);
|
||||
|
||||
resolve({
|
||||
exitCode: 1,
|
||||
stdout,
|
||||
stderr: stderr + "\n" + err.message,
|
||||
durationMs: Date.now() - startTime,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
killed: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
149
apps/harness/src/lib/git-ops.ts
Normal file
149
apps/harness/src/lib/git-ops.ts
Normal file
@@ -0,0 +1,149 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
const exec = promisify(execFile);
|
||||
|
||||
const WORK_DIR = process.env.HARNESS_WORK_DIR || "/tmp/harness";
|
||||
|
||||
function reposDir(): string {
|
||||
return path.join(WORK_DIR, "repos");
|
||||
}
|
||||
|
||||
export function taskDir(taskId: string): string {
|
||||
return path.join(WORK_DIR, "tasks", taskId);
|
||||
}
|
||||
|
||||
export function iterationDir(taskId: string, iteration: number): string {
|
||||
return path.join(taskDir(taskId), `iter-${iteration}`);
|
||||
}
|
||||
|
||||
export function buildAuthenticatedCloneUrl(
|
||||
repo: string,
|
||||
provider: "github" | "gitlab",
|
||||
token: string,
|
||||
): string {
|
||||
// repo format: "owner/name"
|
||||
if (provider === "gitlab") {
|
||||
return `https://oauth2:${token}@gitlab.com/${repo}.git`;
|
||||
}
|
||||
return `https://x-access-token:${token}@github.com/${repo}.git`;
|
||||
}
|
||||
|
||||
function bareClonePath(slug: string): string {
|
||||
return path.join(reposDir(), `${slug}.git`);
|
||||
}
|
||||
|
||||
export async function ensureBareClone(
|
||||
repoUrl: string,
|
||||
slug: string,
|
||||
): Promise<string> {
|
||||
const clonePath = bareClonePath(slug);
|
||||
await mkdir(reposDir(), { recursive: true });
|
||||
|
||||
try {
|
||||
// Try fetching first (repo already cloned)
|
||||
await exec("git", ["fetch", "--all"], { cwd: clonePath });
|
||||
} catch {
|
||||
// Clone bare
|
||||
await exec("git", ["clone", "--bare", repoUrl, clonePath]);
|
||||
}
|
||||
|
||||
return clonePath;
|
||||
}
|
||||
|
||||
export async function createWorktree(
|
||||
bareClone: string,
|
||||
worktreePath: string,
|
||||
branch: string,
|
||||
base?: string,
|
||||
): Promise<void> {
|
||||
await mkdir(path.dirname(worktreePath), { recursive: true });
|
||||
|
||||
const args = ["worktree", "add", worktreePath, "-b", branch];
|
||||
if (base) args.push(base);
|
||||
await exec("git", args, { cwd: bareClone });
|
||||
}
|
||||
|
||||
export async function removeWorktree(
|
||||
bareClone: string,
|
||||
worktreePath: string,
|
||||
): Promise<void> {
|
||||
try {
|
||||
await exec("git", ["worktree", "remove", "--force", worktreePath], {
|
||||
cwd: bareClone,
|
||||
});
|
||||
} catch {
|
||||
// Best-effort cleanup
|
||||
}
|
||||
}
|
||||
|
||||
export async function getDiffStats(workDir: string): Promise<string> {
|
||||
try {
|
||||
const { stdout } = await exec("git", ["diff", "--stat", "HEAD"], {
|
||||
cwd: workDir,
|
||||
});
|
||||
return stdout.trim();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export async function hasDiff(workDir: string): Promise<boolean> {
|
||||
try {
|
||||
const { stdout } = await exec(
|
||||
"git",
|
||||
["diff", "--name-only", "HEAD"],
|
||||
{ cwd: workDir },
|
||||
);
|
||||
return stdout.trim().length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function commitAll(
|
||||
workDir: string,
|
||||
message: string,
|
||||
): Promise<void> {
|
||||
await exec("git", ["add", "-A"], { cwd: workDir });
|
||||
await exec("git", ["commit", "-m", message, "--allow-empty"], {
|
||||
cwd: workDir,
|
||||
});
|
||||
}
|
||||
|
||||
export async function pushBranch(
|
||||
workDir: string,
|
||||
branch: string,
|
||||
): Promise<void> {
|
||||
await exec("git", ["push", "origin", branch, "--force-with-lease"], {
|
||||
cwd: workDir,
|
||||
});
|
||||
}
|
||||
|
||||
export async function createPullRequest(opts: {
|
||||
repo: string;
|
||||
head: string;
|
||||
title: string;
|
||||
body: string;
|
||||
token: string;
|
||||
}): Promise<{ number: number; url: string }> {
|
||||
const { stdout } = await exec(
|
||||
"gh",
|
||||
[
|
||||
"pr",
|
||||
"create",
|
||||
"--repo", opts.repo,
|
||||
"--head", opts.head,
|
||||
"--title", opts.title,
|
||||
"--body", opts.body,
|
||||
"--json", "number,url",
|
||||
],
|
||||
{
|
||||
env: { ...process.env, GH_TOKEN: opts.token },
|
||||
},
|
||||
);
|
||||
|
||||
return JSON.parse(stdout.trim());
|
||||
}
|
||||
135
apps/harness/src/lib/model-providers.ts
Normal file
135
apps/harness/src/lib/model-providers.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { getRawCredentialsByProvider } from "./credentials";
|
||||
|
||||
export interface ModelInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
provider: string;
|
||||
contextWindow?: number;
|
||||
}
|
||||
|
||||
export async function fetchAllModels(): Promise<ModelInfo[]> {
|
||||
const results = await Promise.allSettled([
|
||||
fetchAnthropicModels(),
|
||||
fetchOpenAIModels(),
|
||||
fetchOpenRouterModels(),
|
||||
fetchGoogleModels(),
|
||||
]);
|
||||
|
||||
return results.flatMap(r => r.status === "fulfilled" ? r.value : []);
|
||||
}
|
||||
|
||||
async function fetchAnthropicModels(): Promise<ModelInfo[]> {
|
||||
const creds = getRawCredentialsByProvider("anthropic");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
for (const cred of creds) {
|
||||
try {
|
||||
const res = await fetch("https://api.anthropic.com/v1/models", {
|
||||
headers: {
|
||||
"x-api-key": cred.token,
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
});
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
return (data.data || []).map((m: { id: string; display_name?: string }) => ({
|
||||
id: m.id,
|
||||
name: m.display_name || m.id,
|
||||
provider: "anthropic",
|
||||
}));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchOpenAIModels(): Promise<ModelInfo[]> {
|
||||
const creds = getRawCredentialsByProvider("openai");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
for (const cred of creds) {
|
||||
try {
|
||||
const baseUrl = cred.baseUrl || "https://api.openai.com";
|
||||
const res = await fetch(`${baseUrl}/v1/models`, {
|
||||
headers: { Authorization: `Bearer ${cred.token}` },
|
||||
});
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
return (data.data || [])
|
||||
.filter((m: { id: string }) =>
|
||||
m.id.startsWith("gpt-") || m.id.startsWith("o") || m.id.startsWith("chatgpt-")
|
||||
)
|
||||
.map((m: { id: string }) => ({
|
||||
id: m.id,
|
||||
name: m.id,
|
||||
provider: "openai",
|
||||
}));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchOpenRouterModels(): Promise<ModelInfo[]> {
|
||||
const creds = getRawCredentialsByProvider("openrouter");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
for (const cred of creds) {
|
||||
try {
|
||||
const res = await fetch("https://openrouter.ai/api/v1/models", {
|
||||
headers: { Authorization: `Bearer ${cred.token}` },
|
||||
});
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
return (data.data || []).map((m: { id: string; name?: string; context_length?: number }) => ({
|
||||
id: m.id,
|
||||
name: m.name || m.id,
|
||||
provider: "openrouter",
|
||||
contextWindow: m.context_length,
|
||||
}));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
async function fetchGoogleModels(): Promise<ModelInfo[]> {
|
||||
const creds = getRawCredentialsByProvider("google");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
for (const cred of creds) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`https://generativelanguage.googleapis.com/v1beta/models?key=${cred.token}`
|
||||
);
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
return (data.models || [])
|
||||
.filter((m: { name: string }) => m.name.includes("gemini"))
|
||||
.map((m: { name: string; displayName?: string; inputTokenLimit?: number }) => ({
|
||||
id: m.name.replace("models/", ""),
|
||||
name: m.displayName || m.name,
|
||||
provider: "google",
|
||||
contextWindow: m.inputTokenLimit,
|
||||
}));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
154
apps/harness/src/lib/model-store.ts
Normal file
154
apps/harness/src/lib/model-store.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
// Curated model list and usage tracking
|
||||
|
||||
export interface CuratedModel {
|
||||
id: string;
|
||||
name: string;
|
||||
provider: string;
|
||||
enabled: boolean;
|
||||
contextWindow?: number;
|
||||
costPer1kInput?: number; // USD per 1k input tokens
|
||||
costPer1kOutput?: number; // USD per 1k output tokens
|
||||
}
|
||||
|
||||
export interface ModelUsageEntry {
|
||||
modelId: string;
|
||||
provider: string;
|
||||
taskId: string;
|
||||
taskSlug: string;
|
||||
iteration: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
durationMs: number;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
export interface ModelUsageSummary {
|
||||
modelId: string;
|
||||
provider: string;
|
||||
totalInputTokens: number;
|
||||
totalOutputTokens: number;
|
||||
totalCost: number;
|
||||
totalRequests: number;
|
||||
totalDurationMs: number;
|
||||
}
|
||||
|
||||
// In-memory stores
|
||||
const curatedModels: Map<string, CuratedModel> = new Map();
|
||||
const usageLog: ModelUsageEntry[] = [];
|
||||
|
||||
// ─── CURATED MODELS ─────────────────────────────────────────
|
||||
|
||||
export function getCuratedModels(): CuratedModel[] {
|
||||
return Array.from(curatedModels.values());
|
||||
}
|
||||
|
||||
export function getEnabledModels(): CuratedModel[] {
|
||||
return Array.from(curatedModels.values()).filter(m => m.enabled);
|
||||
}
|
||||
|
||||
export function upsertCuratedModel(model: CuratedModel): CuratedModel {
|
||||
curatedModels.set(model.id, model);
|
||||
return model;
|
||||
}
|
||||
|
||||
export function removeCuratedModel(id: string): boolean {
|
||||
return curatedModels.delete(id);
|
||||
}
|
||||
|
||||
export function toggleModelEnabled(id: string): CuratedModel | undefined {
|
||||
const model = curatedModels.get(id);
|
||||
if (!model) return undefined;
|
||||
model.enabled = !model.enabled;
|
||||
curatedModels.set(id, model);
|
||||
return model;
|
||||
}
|
||||
|
||||
export function updateModelCost(id: string, costPer1kInput: number, costPer1kOutput: number): CuratedModel | undefined {
|
||||
const model = curatedModels.get(id);
|
||||
if (!model) return undefined;
|
||||
model.costPer1kInput = costPer1kInput;
|
||||
model.costPer1kOutput = costPer1kOutput;
|
||||
curatedModels.set(id, model);
|
||||
return model;
|
||||
}
|
||||
|
||||
// ─── USAGE TRACKING ─────────────────────────────────────────
|
||||
|
||||
export function recordUsage(entry: ModelUsageEntry): void {
|
||||
usageLog.push(entry);
|
||||
}
|
||||
|
||||
export function getUsageLog(): ModelUsageEntry[] {
|
||||
return [...usageLog];
|
||||
}
|
||||
|
||||
export function getUsageSummary(): ModelUsageSummary[] {
|
||||
const grouped = new Map<string, ModelUsageSummary>();
|
||||
|
||||
for (const entry of usageLog) {
|
||||
const key = `${entry.provider}:${entry.modelId}`;
|
||||
const existing = grouped.get(key);
|
||||
const model = curatedModels.get(entry.modelId);
|
||||
const inputCost = model?.costPer1kInput ? (entry.inputTokens / 1000) * model.costPer1kInput : 0;
|
||||
const outputCost = model?.costPer1kOutput ? (entry.outputTokens / 1000) * model.costPer1kOutput : 0;
|
||||
|
||||
if (existing) {
|
||||
existing.totalInputTokens += entry.inputTokens;
|
||||
existing.totalOutputTokens += entry.outputTokens;
|
||||
existing.totalCost += inputCost + outputCost;
|
||||
existing.totalRequests += 1;
|
||||
existing.totalDurationMs += entry.durationMs;
|
||||
} else {
|
||||
grouped.set(key, {
|
||||
modelId: entry.modelId,
|
||||
provider: entry.provider,
|
||||
totalInputTokens: entry.inputTokens,
|
||||
totalOutputTokens: entry.outputTokens,
|
||||
totalCost: inputCost + outputCost,
|
||||
totalRequests: 1,
|
||||
totalDurationMs: entry.durationMs,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(grouped.values()).sort((a, b) => b.totalCost - a.totalCost);
|
||||
}
|
||||
|
||||
// ─── SEED DATA ──────────────────────────────────────────────
|
||||
// Pre-populate with well-known models and pricing
|
||||
|
||||
const SEED_MODELS: Omit<CuratedModel, "enabled">[] = [
|
||||
{ id: "claude-opus-4-20250514", name: "Claude Opus 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.015, costPer1kOutput: 0.075 },
|
||||
{ id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.003, costPer1kOutput: 0.015 },
|
||||
{ id: "claude-haiku-4-20250514", name: "Claude Haiku 4", provider: "anthropic", contextWindow: 200000, costPer1kInput: 0.0008, costPer1kOutput: 0.004 },
|
||||
{ id: "gpt-4o", name: "GPT-4o", provider: "openai", contextWindow: 128000, costPer1kInput: 0.0025, costPer1kOutput: 0.01 },
|
||||
{ id: "gpt-4o-mini", name: "GPT-4o Mini", provider: "openai", contextWindow: 128000, costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
|
||||
{ id: "o3", name: "o3", provider: "openai", contextWindow: 200000, costPer1kInput: 0.01, costPer1kOutput: 0.04 },
|
||||
{ id: "o4-mini", name: "o4 Mini", provider: "openai", contextWindow: 200000, costPer1kInput: 0.0011, costPer1kOutput: 0.0044 },
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro", provider: "google", contextWindow: 1048576,costPer1kInput: 0.00125,costPer1kOutput: 0.01 },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash", provider: "google", contextWindow: 1048576,costPer1kInput: 0.00015,costPer1kOutput: 0.0006 },
|
||||
];
|
||||
|
||||
const SEED_USAGE: Omit<ModelUsageEntry, "timestamp">[] = [
|
||||
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 1, inputTokens: 48200, outputTokens: 12400, durationMs: 34000 },
|
||||
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 2, inputTokens: 52100, outputTokens: 15800, durationMs: 41000 },
|
||||
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 3, inputTokens: 61300, outputTokens: 18200, durationMs: 45000 },
|
||||
{ modelId: "claude-sonnet-4-20250514", provider: "anthropic", taskId: "task-002", taskSlug: "haiku-moderation-tier2", iteration: 4, inputTokens: 55000, outputTokens: 14600, durationMs: 38000 },
|
||||
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 85400, outputTokens: 28900, durationMs: 92000 },
|
||||
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 2, inputTokens: 91200, outputTokens: 31400, durationMs: 98000 },
|
||||
{ modelId: "claude-opus-4-20250514", provider: "anthropic", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 3, inputTokens: 78600, outputTokens: 22100, durationMs: 85000 },
|
||||
{ modelId: "gpt-4o", provider: "openai", taskId: "task-001", taskSlug: "pubsub-pipeline-migration", iteration: 1, inputTokens: 42000, outputTokens: 9800, durationMs: 28000 },
|
||||
];
|
||||
|
||||
export function seedData() {
|
||||
if (curatedModels.size > 0) return; // already seeded
|
||||
for (const m of SEED_MODELS) {
|
||||
curatedModels.set(m.id, { ...m, enabled: true });
|
||||
}
|
||||
const now = Date.now();
|
||||
for (let i = 0; i < SEED_USAGE.length; i++) {
|
||||
usageLog.push({ ...SEED_USAGE[i], timestamp: now - (SEED_USAGE.length - i) * 1000 * 60 * 30 });
|
||||
}
|
||||
}
|
||||
|
||||
seedData();
|
||||
316
apps/harness/src/lib/orchestrator.ts
Normal file
316
apps/harness/src/lib/orchestrator.ts
Normal file
@@ -0,0 +1,316 @@
|
||||
import {
|
||||
getTask,
|
||||
updateTask,
|
||||
appendIteration,
|
||||
updateIteration,
|
||||
getFirstPendingTask,
|
||||
getRunningTasks,
|
||||
} from "./store";
|
||||
import { recordUsage } from "./model-store";
|
||||
import { getAgentConfig } from "./agents";
|
||||
import { getRawCredentialsByProvider } from "./credentials";
|
||||
import {
|
||||
ensureBareClone,
|
||||
createWorktree,
|
||||
removeWorktree,
|
||||
iterationDir,
|
||||
buildAuthenticatedCloneUrl,
|
||||
commitAll,
|
||||
pushBranch,
|
||||
createPullRequest,
|
||||
} from "./git-ops";
|
||||
import { executeAgent } from "./executor";
|
||||
import { buildPrompt } from "./prompt-builder";
|
||||
import { evaluate } from "./evaluator";
|
||||
import { Task, Iteration } from "./types";
|
||||
|
||||
const POLL_INTERVAL_MS = 2000;
|
||||
|
||||
let pollTimer: ReturnType<typeof setInterval> | null = null;
|
||||
let running = false;
|
||||
let currentTaskId: string | null = null;
|
||||
let currentAbort: AbortController | null = null;
|
||||
|
||||
export function isRunning(): boolean {
|
||||
return running;
|
||||
}
|
||||
|
||||
export function currentRunningTaskId(): string | null {
|
||||
return currentTaskId;
|
||||
}
|
||||
|
||||
export function startOrchestrator(): void {
|
||||
if (running) return;
|
||||
running = true;
|
||||
|
||||
// Mark any crashed running tasks as failed on startup
|
||||
recoverCrashedTasks();
|
||||
|
||||
pollTimer = setInterval(() => {
|
||||
if (currentTaskId) return; // already processing a task
|
||||
poll();
|
||||
}, POLL_INTERVAL_MS);
|
||||
|
||||
// Immediate first poll
|
||||
poll();
|
||||
}
|
||||
|
||||
export function stopOrchestrator(): void {
|
||||
running = false;
|
||||
if (pollTimer) {
|
||||
clearInterval(pollTimer);
|
||||
pollTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
export function cancelTask(taskId: string): boolean {
|
||||
if (currentTaskId !== taskId) return false;
|
||||
currentAbort?.abort();
|
||||
return true;
|
||||
}
|
||||
|
||||
function recoverCrashedTasks(): void {
|
||||
const runningTasks = getRunningTasks();
|
||||
for (const task of runningTasks) {
|
||||
// Mark running iterations as failed
|
||||
const updatedIterations = task.iterations.map((iter) =>
|
||||
iter.status === "running"
|
||||
? { ...iter, status: "failed" as const, diagnosis: "Interrupted — server restarted", completedAt: Date.now() }
|
||||
: iter,
|
||||
);
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
iterations: updatedIterations,
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function poll(): Promise<void> {
|
||||
if (!running || currentTaskId) return;
|
||||
|
||||
const task = getFirstPendingTask();
|
||||
if (!task) return;
|
||||
|
||||
currentTaskId = task.id;
|
||||
currentAbort = new AbortController();
|
||||
|
||||
try {
|
||||
await runTask(task);
|
||||
} catch (err) {
|
||||
console.error(`[orchestrator] Task ${task.id} failed with error:`, err);
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
} finally {
|
||||
currentTaskId = null;
|
||||
currentAbort = null;
|
||||
}
|
||||
}
|
||||
|
||||
async function runTask(task: Task): Promise<void> {
|
||||
const agentConfig = getAgentConfig(task.spec.agentId);
|
||||
if (!agentConfig) {
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine git credentials and repo URL
|
||||
const gitCreds = getRawCredentialsByProvider("github");
|
||||
const gitToken = gitCreds[0]?.token;
|
||||
if (!gitToken) {
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const repoUrl = buildAuthenticatedCloneUrl(task.project, "github", gitToken);
|
||||
|
||||
updateTask(task.id, {
|
||||
status: "running",
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
// Ensure bare clone
|
||||
let bareClone: string;
|
||||
try {
|
||||
bareClone = await ensureBareClone(repoUrl, task.slug);
|
||||
} catch (err) {
|
||||
console.error(`[orchestrator] Failed to clone repo for task ${task.id}:`, err);
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const branchName = `harness/${task.slug}`;
|
||||
let converged = false;
|
||||
|
||||
for (let n = 1; n <= task.maxIterations; n++) {
|
||||
if (currentAbort?.signal.aborted) {
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await runIteration(task, n, bareClone, branchName);
|
||||
if (!result) {
|
||||
// Iteration was cancelled or errored fatally
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.allPassed) {
|
||||
converged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (converged) {
|
||||
// Push and create PR
|
||||
try {
|
||||
const lastIterN = getTask(task.id)!.iteration;
|
||||
const workDir = iterationDir(task.id, lastIterN);
|
||||
|
||||
await commitAll(workDir, `harness: ${task.goal}`);
|
||||
await pushBranch(workDir, branchName);
|
||||
|
||||
const pr = await createPullRequest({
|
||||
repo: task.project,
|
||||
head: branchName,
|
||||
title: `[harness] ${task.goal}`,
|
||||
body: `Automated by harness orchestrator.\n\nTask: ${task.slug}\nIterations: ${lastIterN}`,
|
||||
token: gitToken,
|
||||
});
|
||||
|
||||
updateTask(task.id, {
|
||||
status: "completed",
|
||||
completedAt: Date.now(),
|
||||
pr: { number: pr.number, title: `[harness] ${task.goal}`, status: "open" },
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(`[orchestrator] Failed to create PR for task ${task.id}:`, err);
|
||||
updateTask(task.id, {
|
||||
status: "completed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
updateTask(task.id, {
|
||||
status: "failed",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
// Cleanup worktrees
|
||||
const finalTask = getTask(task.id)!;
|
||||
for (const iter of finalTask.iterations) {
|
||||
await removeWorktree(bareClone, iterationDir(task.id, iter.n));
|
||||
}
|
||||
}
|
||||
|
||||
async function runIteration(
|
||||
task: Task,
|
||||
n: number,
|
||||
bareClone: string,
|
||||
branchName: string,
|
||||
): Promise<{ allPassed: boolean } | null> {
|
||||
const iteration: Iteration = {
|
||||
n,
|
||||
status: "running",
|
||||
diagnosis: null,
|
||||
startedAt: Date.now(),
|
||||
};
|
||||
appendIteration(task.id, iteration);
|
||||
|
||||
const workDir = iterationDir(task.id, n);
|
||||
|
||||
try {
|
||||
// Create worktree — first iteration gets a new branch, subsequent reuse it
|
||||
const branchForWorktree = n === 1 ? branchName : `${branchName}-iter-${n}`;
|
||||
await createWorktree(bareClone, workDir, branchForWorktree, "HEAD");
|
||||
} catch (err) {
|
||||
console.error(`[orchestrator] Failed to create worktree for iteration ${n}:`, err);
|
||||
updateIteration(task.id, n, {
|
||||
status: "failed",
|
||||
diagnosis: `Failed to create worktree: ${err}`,
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
// Build prompt with prior iterations
|
||||
const currentTask = getTask(task.id)!;
|
||||
const priorIterations = currentTask.iterations.filter((i) => i.n < n);
|
||||
const prompt = await buildPrompt({
|
||||
task: currentTask,
|
||||
iterationNumber: n,
|
||||
priorIterations,
|
||||
});
|
||||
|
||||
// Execute agent
|
||||
const execResult = await executeAgent({
|
||||
agentId: task.spec.agentId,
|
||||
prompt,
|
||||
workDir,
|
||||
signal: currentAbort?.signal,
|
||||
});
|
||||
|
||||
if (execResult.killed && currentAbort?.signal.aborted) {
|
||||
updateIteration(task.id, n, {
|
||||
status: "failed",
|
||||
diagnosis: "Cancelled by user",
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
updateTask(task.id, { status: "failed", completedAt: Date.now() });
|
||||
return null;
|
||||
}
|
||||
|
||||
// Evaluate
|
||||
const evalResult = await evaluate({
|
||||
task: currentTask,
|
||||
iterationNumber: n,
|
||||
agentOutput: execResult.stdout,
|
||||
exitCode: execResult.exitCode,
|
||||
workDir,
|
||||
});
|
||||
|
||||
// Record usage
|
||||
const agentConfig = getAgentConfig(task.spec.agentId);
|
||||
if (agentConfig) {
|
||||
recordUsage({
|
||||
modelId: agentConfig.modelId,
|
||||
provider: agentConfig.provider,
|
||||
taskId: task.id,
|
||||
taskSlug: task.slug,
|
||||
iteration: n,
|
||||
inputTokens: execResult.inputTokens,
|
||||
outputTokens: execResult.outputTokens,
|
||||
durationMs: execResult.durationMs,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
// Update iteration
|
||||
updateIteration(task.id, n, {
|
||||
status: evalResult.allPassed ? "passed" : "failed",
|
||||
diagnosis: evalResult.diagnosis,
|
||||
agentOutput: execResult.stdout.slice(-8000), // keep last 8k chars
|
||||
evals: evalResult.evals,
|
||||
diffStats: evalResult.diffStats,
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
|
||||
// Update task-level evals
|
||||
updateTask(task.id, { evals: evalResult.evals });
|
||||
|
||||
return { allPassed: evalResult.allPassed };
|
||||
}
|
||||
94
apps/harness/src/lib/prompt-builder.ts
Normal file
94
apps/harness/src/lib/prompt-builder.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { readFile, readdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { Task, Iteration } from "./types";
|
||||
|
||||
const KNOWLEDGE_DIR = process.env.HARNESS_KNOWLEDGE_DIR || "";
|
||||
const MAX_AGENT_OUTPUT_LENGTH = 4000;
|
||||
const MAX_PRIOR_ITERATIONS = 3;
|
||||
|
||||
export async function buildPrompt(opts: {
|
||||
task: Task;
|
||||
iterationNumber: number;
|
||||
priorIterations: Iteration[];
|
||||
}): Promise<string> {
|
||||
const { task, iterationNumber, priorIterations } = opts;
|
||||
const sections: string[] = [];
|
||||
|
||||
// Task goal
|
||||
sections.push(`# Task\n\n${task.goal}`);
|
||||
|
||||
// Success criteria
|
||||
if (task.spec.criteria.length > 0) {
|
||||
const criteriaLines = task.spec.criteria
|
||||
.map((c) => `- **${c.label}**: ${c.target}`)
|
||||
.join("\n");
|
||||
sections.push(`# Success Criteria\n\n${criteriaLines}`);
|
||||
}
|
||||
|
||||
// Constraints
|
||||
if (task.spec.constraints.length > 0) {
|
||||
const constraintLines = task.spec.constraints
|
||||
.map((c) => `- ${c}`)
|
||||
.join("\n");
|
||||
sections.push(`# Constraints\n\n${constraintLines}`);
|
||||
}
|
||||
|
||||
// Knowledge references
|
||||
const knowledgeContent = await loadKnowledge(task.spec.knowledgeRefs);
|
||||
if (knowledgeContent) {
|
||||
sections.push(`# Reference Material\n\n${knowledgeContent}`);
|
||||
}
|
||||
|
||||
// Prior iterations
|
||||
if (priorIterations.length > 0) {
|
||||
const recentIterations = priorIterations.slice(-MAX_PRIOR_ITERATIONS);
|
||||
const priorLines = recentIterations.map((iter) => {
|
||||
const parts = [`## Iteration ${iter.n} — ${iter.status}`];
|
||||
if (iter.diagnosis) {
|
||||
parts.push(`**Diagnosis:** ${iter.diagnosis}`);
|
||||
}
|
||||
if (iter.evals) {
|
||||
const evalSummary = Object.entries(iter.evals)
|
||||
.map(([key, ev]) => `- ${key}: ${ev.pass ? "PASS" : "FAIL"} (${ev.value} ${ev.unit}, target: ${ev.target})`)
|
||||
.join("\n");
|
||||
parts.push(`**Evals:**\n${evalSummary}`);
|
||||
}
|
||||
// Include truncated agent output only for the most recent iteration
|
||||
if (iter === recentIterations[recentIterations.length - 1] && iter.agentOutput) {
|
||||
const truncated = iter.agentOutput.length > MAX_AGENT_OUTPUT_LENGTH
|
||||
? iter.agentOutput.slice(-MAX_AGENT_OUTPUT_LENGTH) + "\n... (truncated)"
|
||||
: iter.agentOutput;
|
||||
parts.push(`**Agent Output (truncated):**\n\`\`\`\n${truncated}\n\`\`\``);
|
||||
}
|
||||
return parts.join("\n");
|
||||
});
|
||||
sections.push(`# Prior Iterations\n\n${priorLines.join("\n\n")}`);
|
||||
}
|
||||
|
||||
// Instructions
|
||||
sections.push(
|
||||
`# Instructions\n\n` +
|
||||
`This is iteration ${iterationNumber} of ${task.maxIterations}.\n` +
|
||||
`Work in the current directory. Make all necessary changes to satisfy the success criteria.\n` +
|
||||
`If prior iterations failed, analyze the diagnosis and try a different approach.`,
|
||||
);
|
||||
|
||||
return sections.join("\n\n---\n\n");
|
||||
}
|
||||
|
||||
async function loadKnowledge(refs: string[]): Promise<string> {
|
||||
if (!KNOWLEDGE_DIR || refs.length === 0) return "";
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const ref of refs) {
|
||||
try {
|
||||
// ref can be a filename or glob-like path
|
||||
const filePath = path.resolve(KNOWLEDGE_DIR, ref);
|
||||
const content = await readFile(filePath, "utf-8");
|
||||
parts.push(`## ${ref}\n\n${content}`);
|
||||
} catch {
|
||||
// Skip missing knowledge files
|
||||
}
|
||||
}
|
||||
return parts.join("\n\n");
|
||||
}
|
||||
100
apps/harness/src/lib/repo-search.ts
Normal file
100
apps/harness/src/lib/repo-search.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { getCredentialsByProvider } from "./credentials";
|
||||
|
||||
export interface RepoResult {
|
||||
provider: "github" | "gitlab";
|
||||
fullName: string;
|
||||
url: string;
|
||||
description: string;
|
||||
defaultBranch: string;
|
||||
private: boolean;
|
||||
}
|
||||
|
||||
export async function searchRepos(query: string): Promise<RepoResult[]> {
|
||||
if (!query || query.length < 2) return [];
|
||||
|
||||
const results = await Promise.allSettled([
|
||||
searchGitHub(query),
|
||||
searchGitLab(query),
|
||||
]);
|
||||
|
||||
return results.flatMap(r => r.status === "fulfilled" ? r.value : []);
|
||||
}
|
||||
|
||||
async function searchGitHub(query: string): Promise<RepoResult[]> {
|
||||
const creds = getCredentialsByProvider("github");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
const results: RepoResult[] = [];
|
||||
|
||||
for (const cred of creds) {
|
||||
try {
|
||||
const res = await fetch(
|
||||
`https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&per_page=10&sort=updated`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${cred.token}`,
|
||||
Accept: "application/vnd.github+json",
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
for (const repo of data.items || []) {
|
||||
results.push({
|
||||
provider: "github",
|
||||
fullName: repo.full_name,
|
||||
url: repo.html_url,
|
||||
description: repo.description || "",
|
||||
defaultBranch: repo.default_branch || "main",
|
||||
private: repo.private,
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// skip failed credential
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async function searchGitLab(query: string): Promise<RepoResult[]> {
|
||||
const creds = getCredentialsByProvider("gitlab");
|
||||
if (creds.length === 0) return [];
|
||||
|
||||
const results: RepoResult[] = [];
|
||||
|
||||
for (const cred of creds) {
|
||||
const baseUrl = cred.baseUrl || "https://gitlab.com";
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${baseUrl}/api/v4/projects?search=${encodeURIComponent(query)}&per_page=10&order_by=updated_at&membership=true`,
|
||||
{
|
||||
headers: {
|
||||
"PRIVATE-TOKEN": cred.token,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) continue;
|
||||
|
||||
const data = await res.json();
|
||||
for (const project of data) {
|
||||
results.push({
|
||||
provider: "gitlab",
|
||||
fullName: project.path_with_namespace,
|
||||
url: project.web_url,
|
||||
description: project.description || "",
|
||||
defaultBranch: project.default_branch || "main",
|
||||
private: project.visibility === "private",
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// skip failed credential
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
61
apps/harness/src/lib/store.ts
Normal file
61
apps/harness/src/lib/store.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import { Task } from "./types";
|
||||
|
||||
// In-memory task store. Will be replaced with persistent storage (CloudNativePG)
|
||||
// once the orchestrator loop is wired up.
|
||||
const tasks: Map<string, Task> = new Map();
|
||||
|
||||
export function getAllTasks(): Task[] {
|
||||
return Array.from(tasks.values());
|
||||
}
|
||||
|
||||
export function getTask(id: string): Task | undefined {
|
||||
return tasks.get(id);
|
||||
}
|
||||
|
||||
export function createTask(task: Task): Task {
|
||||
tasks.set(task.id, task);
|
||||
return task;
|
||||
}
|
||||
|
||||
export function updateTask(id: string, updates: Partial<Task>): Task | undefined {
|
||||
const existing = tasks.get(id);
|
||||
if (!existing) return undefined;
|
||||
const updated = { ...existing, ...updates };
|
||||
tasks.set(id, updated);
|
||||
return updated;
|
||||
}
|
||||
|
||||
export function deleteTask(id: string): boolean {
|
||||
return tasks.delete(id);
|
||||
}
|
||||
|
||||
export function appendIteration(id: string, iteration: import("./types").Iteration): Task | undefined {
|
||||
const existing = tasks.get(id);
|
||||
if (!existing) return undefined;
|
||||
existing.iterations = [...existing.iterations, iteration];
|
||||
existing.iteration = iteration.n;
|
||||
tasks.set(id, existing);
|
||||
return existing;
|
||||
}
|
||||
|
||||
export function updateIteration(
|
||||
id: string,
|
||||
iterationN: number,
|
||||
updates: Partial<import("./types").Iteration>,
|
||||
): Task | undefined {
|
||||
const existing = tasks.get(id);
|
||||
if (!existing) return undefined;
|
||||
existing.iterations = existing.iterations.map((iter) =>
|
||||
iter.n === iterationN ? { ...iter, ...updates } : iter,
|
||||
);
|
||||
tasks.set(id, existing);
|
||||
return existing;
|
||||
}
|
||||
|
||||
export function getFirstPendingTask(): Task | undefined {
|
||||
return Array.from(tasks.values()).find((t) => t.status === "pending");
|
||||
}
|
||||
|
||||
export function getRunningTasks(): Task[] {
|
||||
return Array.from(tasks.values()).filter((t) => t.status === "running");
|
||||
}
|
||||
68
apps/harness/src/lib/types.ts
Normal file
68
apps/harness/src/lib/types.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
export interface TaskSpec {
|
||||
slug: string;
|
||||
goal: string;
|
||||
project: string;
|
||||
agentId: string;
|
||||
maxIterations: number;
|
||||
criteria: { label: string; target: string }[];
|
||||
constraints: string[];
|
||||
knowledgeRefs: string[];
|
||||
}
|
||||
|
||||
export interface Eval {
|
||||
label: string;
|
||||
value: number | string;
|
||||
unit: string;
|
||||
pass: boolean;
|
||||
target: string;
|
||||
}
|
||||
|
||||
export interface Iteration {
|
||||
n: number;
|
||||
status: "pending" | "running" | "passed" | "failed";
|
||||
diagnosis: string | null;
|
||||
agentOutput?: string;
|
||||
evals?: Record<string, Eval>;
|
||||
diffStats?: string;
|
||||
startedAt?: number;
|
||||
completedAt?: number;
|
||||
}
|
||||
|
||||
export interface ExecutionResult {
|
||||
exitCode: number;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
durationMs: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
killed: boolean;
|
||||
}
|
||||
|
||||
export interface Task {
|
||||
id: string;
|
||||
slug: string;
|
||||
goal: string;
|
||||
status: "pending" | "running" | "completed" | "failed";
|
||||
iteration: number;
|
||||
maxIterations: number;
|
||||
startedAt: number | null;
|
||||
completedAt?: number;
|
||||
project: string;
|
||||
evals: Record<string, Eval>;
|
||||
iterations: Iteration[];
|
||||
pr?: {
|
||||
number: number;
|
||||
title: string;
|
||||
status: string;
|
||||
};
|
||||
spec: TaskSpec;
|
||||
}
|
||||
|
||||
export interface KnowledgeDoc {
|
||||
path: string;
|
||||
title: string;
|
||||
verificationStatus: string;
|
||||
lastUpdated: string;
|
||||
project: string;
|
||||
preview: string;
|
||||
}
|
||||
Reference in New Issue
Block a user