diff --git a/packages/runtime/src/cloud-defaults.ts b/packages/runtime/src/cloud-defaults.ts index 710303fa..ce1bee88 100644 --- a/packages/runtime/src/cloud-defaults.ts +++ b/packages/runtime/src/cloud-defaults.ts @@ -11,12 +11,14 @@ import { resolveStringMapLenient, type PersonaSpec } from '@agentworkforce/persona-kit'; +import { createDefaultLlm } from './cloud-llm.js'; import { SandboxNotAvailableError } from './errors.js'; import type { FilesContext, HarnessRunArgs, HarnessRunResult, HarnessUsage, + LlmContext, SandboxContext, WorkforceAgentContext, WorkforceCtx, @@ -57,6 +59,7 @@ export interface CloudRuntimeDefaults { sandbox: SandboxContext; files: FilesContext; workflow?: WorkflowContext; + llm?: LlmContext; harnessRunner: (args: HarnessRunArgs) => Promise; } @@ -73,10 +76,18 @@ export function createCloudRuntimeDefaults(options: CloudDefaultOptions): CloudR workspaceRoot: root, env }); + // ctx.llm from sandbox credentials — without this, no cloud persona ever + // gets a working ctx.llm (buildCtx falls back to a throwing stub). + const llm = createDefaultLlm({ + persona: options.persona, + env, + log: options.log + }); return { sandbox, files, ...(workflow ? { workflow } : {}), + ...(llm ? { llm } : {}), harnessRunner: createProcessHarnessRunner({ ...options, workspaceRoot: root, diff --git a/packages/runtime/src/cloud-llm.test.ts b/packages/runtime/src/cloud-llm.test.ts new file mode 100644 index 00000000..e4348297 --- /dev/null +++ b/packages/runtime/src/cloud-llm.test.ts @@ -0,0 +1,188 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import type { PersonaSpec } from '@agentworkforce/persona-kit'; +import { createDefaultLlm } from './cloud-llm.js'; + +const basePersona: PersonaSpec = { + id: 'demo', + intent: 'documentation', + tags: ['documentation'], + description: 'test persona', + skills: [], + harness: 'claude', + model: 'anthropic/claude-sonnet-4-6', + systemPrompt: 'be helpful', + harnessSettings: { reasoning: 'medium', timeoutSeconds: 300 }, + cloud: true +}; + +const noopLog = () => {}; + +interface CapturedRequest { + url: string; + headers: Record; + body: Record; +} + +function stubFetch( + t: { after(fn: () => void): void }, + response: { status?: number; payload?: unknown; rawBody?: string } +): CapturedRequest[] { + const captured: CapturedRequest[] = []; + const original = globalThis.fetch; + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + captured.push({ + url: String(input), + headers: Object.fromEntries( + Object.entries((init?.headers ?? {}) as Record).map(([key, value]) => [ + key.toLowerCase(), + value + ]) + ), + body: JSON.parse(String(init?.body ?? '{}')) as Record + }); + const status = response.status ?? 200; + const body = response.rawBody ?? JSON.stringify(response.payload ?? {}); + return new Response(body, { + status, + headers: { 'content-type': 'application/json' } + }); + }) as typeof fetch; + t.after(() => { + globalThis.fetch = original; + }); + return captured; +} + +test('returns undefined when the sandbox has no LLM credentials', () => { + const llm = createDefaultLlm({ persona: basePersona, env: {}, log: noopLog }); + assert.equal(llm, undefined); +}); + +test('ANTHROPIC_API_KEY produces an x-api-key Messages API client', async (t) => { + const requests = stubFetch(t, { + payload: { content: [{ type: 'text', text: 'hello from claude' }] } + }); + const llm = createDefaultLlm({ + persona: basePersona, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi', { maxTokens: 64 }); + assert.equal(result, 'hello from claude'); + assert.equal(requests.length, 1); + const request = requests[0]!; + assert.equal(request.url, 'https://api.anthropic.com/v1/messages'); + assert.equal(request.headers['x-api-key'], 'sk-ant-test'); + assert.equal(request.headers['anthropic-version'], '2023-06-01'); + assert.equal(request.headers['authorization'], undefined); + assert.equal(request.headers['anthropic-beta'], undefined); // beta header is OAuth-leg-only + assert.equal(request.body.model, 'claude-sonnet-4-6'); // anthropic/ prefix stripped + assert.equal(request.body.max_tokens, 64); + assert.deepEqual(request.body.messages, [{ role: 'user', content: 'hi' }]); +}); + +test('CLAUDE_CODE_OAUTH_TOKEN authenticates via Authorization: Bearer only', async (t) => { + const requests = stubFetch(t, { + payload: { content: [{ type: 'text', text: 'ok' }] } + }); + const llm = createDefaultLlm({ + persona: basePersona, + env: { CLAUDE_CODE_OAUTH_TOKEN: 'oat-token' }, + log: noopLog + }); + assert.ok(llm); + await llm.complete('hi'); + const request = requests[0]!; + assert.equal(request.headers['authorization'], 'Bearer oat-token'); + assert.equal(request.headers['x-api-key'], undefined); + // Setup-tokens are rejected by /v1/messages without the OAuth beta header. + assert.equal(request.headers['anthropic-beta'], 'oauth-2025-04-20'); +}); + +test('codex-only persona models fall back to the default chat model', async (t) => { + const requests = stubFetch(t, { + payload: { choices: [{ message: { content: 'ok' } }] } + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai-codex/gpt-5.5-codex' }, + env: { OPENAI_API_KEY: 'sk-openai-test' }, + log: noopLog + }); + assert.ok(llm); + await llm.complete('hi'); + // gpt-*-codex is a Codex CLI model, not served by /v1/chat/completions. + assert.equal(requests[0]!.body.model, 'gpt-5.5'); +}); + +test('OPENAI_API_KEY routes gpt-family personas to chat completions', async (t) => { + const requests = stubFetch(t, { + payload: { choices: [{ message: { content: 'hello from gpt' } }] } + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai/gpt-5.4' }, + env: { OPENAI_API_KEY: 'sk-openai-test' }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi', { maxTokens: 32 }); + assert.equal(result, 'hello from gpt'); + const request = requests[0]!; + assert.equal(request.url, 'https://api.openai.com/v1/chat/completions'); + assert.equal(request.headers['authorization'], 'Bearer sk-openai-test'); + assert.equal(request.body.model, 'gpt-5.4'); // openai/ prefix stripped + assert.equal(request.body.max_completion_tokens, 32); +}); + +test('persona model family wins when multiple credentials exist', async (t) => { + const requests = stubFetch(t, { + payload: { choices: [{ message: { content: 'gpt answer' } }] } + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'gpt-5.1' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test', OPENAI_API_KEY: 'sk-openai-test' }, + log: noopLog + }); + assert.ok(llm); + await llm.complete('hi'); + assert.equal(requests[0]!.url, 'https://api.openai.com/v1/chat/completions'); +}); + +test('anthropic credential is the default when the persona model names no family', async (t) => { + const requests = stubFetch(t, { + payload: { content: [{ type: 'text', text: 'ok' }] } + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, model: undefined }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test', OPENAI_API_KEY: 'sk-openai-test' }, + log: noopLog + }); + assert.ok(llm); + await llm.complete('hi'); + const request = requests[0]!; + assert.equal(request.url, 'https://api.anthropic.com/v1/messages'); + assert.equal(request.body.model, 'claude-opus-4-8'); +}); + +test('non-2xx responses throw with status and detail', async (t) => { + stubFetch(t, { status: 401, rawBody: '{"error":{"message":"bad key"}}' }); + const llm = createDefaultLlm({ + persona: basePersona, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + log: noopLog + }); + assert.ok(llm); + await assert.rejects(llm.complete('hi'), /401/); +}); + +test('empty text content throws instead of returning an empty string', async (t) => { + stubFetch(t, { payload: { content: [], stop_reason: 'max_tokens' } }); + const llm = createDefaultLlm({ + persona: basePersona, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + log: noopLog + }); + assert.ok(llm); + await assert.rejects(llm.complete('hi'), /no text content/); +}); diff --git a/packages/runtime/src/cloud-llm.ts b/packages/runtime/src/cloud-llm.ts new file mode 100644 index 00000000..3006a332 --- /dev/null +++ b/packages/runtime/src/cloud-llm.ts @@ -0,0 +1,269 @@ +import type { PersonaSpec } from '@agentworkforce/persona-kit'; +import type { LlmContext, WorkforceCtx } from './types.js'; + +/** + * Env-derived `ctx.llm` for cloud-deployed personas. + * + * Until this existed, NOTHING constructed an LlmContext for deployed + * personas — `buildCtx` only receives `llm` when a caller passes one, and + * no caller ever did, so every cloud persona's `ctx.llm.complete()` threw + * the UNAVAILABLE_LLM stub error regardless of `persona.useSubscription` + * (which only gates harness-binary credential linking at deploy time). + * The gap stayed invisible for months because the personas that call + * `ctx.llm` (linear-chat-lead, granola, hn-monitor) failed earlier in + * their handlers; the linear CWD fix advanced execution onto this cliff. + * + * Credential sources, in order: + * 1. ANTHROPIC_API_KEY — Anthropic Messages API, `x-api-key`. + * 2. CLAUDE_CODE_OAUTH_TOKEN — `claude setup-token` OAuth bearer + * (cloud#1629 injects it for `oauth_token` provider credentials). + * Sent as `Authorization: Bearer`, never via `x-api-key`. + * 3. OPENAI_API_KEY — OpenAI chat completions, bearer. + * + * When the persona's `model` names a provider family (a `claude-*` model + * or an `anthropic/`-prefixed ref vs a `gpt-*` / `openai/`-prefixed one), + * a credential for that family is preferred; otherwise the first + * available source above wins. Returns undefined when no credential is + * present so `buildCtx` keeps the existing throwing stub (its message + * names the fix). + */ + +const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-8'; +const DEFAULT_OPENAI_MODEL = 'gpt-5.5'; +const DEFAULT_MAX_TOKENS = 16_000; +const COMPLETE_TIMEOUT_MS = 120_000; +const ANTHROPIC_BASE_URL = 'https://api.anthropic.com'; +const OPENAI_BASE_URL = 'https://api.openai.com'; + +export interface CloudLlmOptions { + persona: PersonaSpec; + env: NodeJS.ProcessEnv; + log: WorkforceCtx['log']; +} + +type LlmProviderFamily = 'anthropic' | 'openai'; + +interface LlmCredential { + family: LlmProviderFamily; + headers: Record; + source: string; +} + +export function createDefaultLlm(options: CloudLlmOptions): LlmContext | undefined { + const credential = selectCredential(options.env, personaModelFamily(options.persona)); + if (!credential) return undefined; + + const model = resolveModel(options.persona, credential.family); + options.log('info', 'ctx.llm configured from sandbox credentials', { + provider: credential.family, + source: credential.source, + model + }); + + if (credential.family === 'anthropic') { + return anthropicLlm(credential, model, options.log); + } + return openaiLlm(credential, model, options.log); +} + +function selectCredential( + env: NodeJS.ProcessEnv, + preferred: LlmProviderFamily | null +): LlmCredential | null { + const candidates: LlmCredential[] = []; + const anthropicApiKey = nonEmpty(env.ANTHROPIC_API_KEY); + const claudeOauth = nonEmpty(env.CLAUDE_CODE_OAUTH_TOKEN); + const openaiApiKey = nonEmpty(env.OPENAI_API_KEY); + + // Exactly one auth header per request: an OAuth bearer must go on + // `Authorization`, an API key on `x-api-key`; sending both is rejected. + if (anthropicApiKey) { + candidates.push({ + family: 'anthropic', + headers: { 'x-api-key': anthropicApiKey }, + source: 'ANTHROPIC_API_KEY' + }); + } else if (claudeOauth) { + candidates.push({ + family: 'anthropic', + headers: { + authorization: `Bearer ${claudeOauth}`, + // Claude Code setup-tokens are accepted by the Messages API only + // with the OAuth beta header; a bare Bearer is rejected. + 'anthropic-beta': 'oauth-2025-04-20' + }, + source: 'CLAUDE_CODE_OAUTH_TOKEN' + }); + } + if (openaiApiKey) { + candidates.push({ + family: 'openai', + headers: { authorization: `Bearer ${openaiApiKey}` }, + source: 'OPENAI_API_KEY' + }); + } + + if (candidates.length === 0) return null; + if (preferred) { + const match = candidates.find((candidate) => candidate.family === preferred); + if (match) return match; + } + return candidates[0] ?? null; +} + +function personaModelFamily(persona: PersonaSpec): LlmProviderFamily | null { + const model = nonEmpty(persona.model); + if (!model) return null; + const normalized = model.toLowerCase(); + if (normalized.startsWith('anthropic/') || normalized.includes('claude')) return 'anthropic'; + if ( + normalized.startsWith('openai/') || + normalized.startsWith('openai-codex/') || + normalized.includes('gpt-') || + normalized.includes('codex') + ) { + return 'openai'; + } + return null; +} + +function resolveModel(persona: PersonaSpec, family: LlmProviderFamily): string { + const personaFamily = personaModelFamily(persona); + const personaModel = nonEmpty(persona.model); + if (personaModel && personaFamily === family) { + // Strip provider prefixes like `anthropic/` / `openai/` / `openai-codex/`. + const slash = personaModel.indexOf('/'); + const stripped = slash >= 0 ? personaModel.slice(slash + 1) : personaModel; + // Codex CLI models (gpt-*-codex) are not served by /v1/chat/completions — + // they steer family selection above, but the completion call falls back + // to the default chat model. + if (family === 'openai' && stripped.toLowerCase().includes('codex')) { + return DEFAULT_OPENAI_MODEL; + } + return stripped; + } + return family === 'anthropic' ? DEFAULT_ANTHROPIC_MODEL : DEFAULT_OPENAI_MODEL; +} + +function anthropicLlm( + credential: LlmCredential, + model: string, + log: WorkforceCtx['log'] +): LlmContext { + return { + async complete(prompt, opts) { + const body = { + model, + max_tokens: opts?.maxTokens ?? DEFAULT_MAX_TOKENS, + messages: [{ role: 'user', content: prompt }] + }; + const payload = await postJson( + `${ANTHROPIC_BASE_URL}/v1/messages`, + { + ...credential.headers, + 'anthropic-version': '2023-06-01', + 'content-type': 'application/json' + }, + body, + log + ); + const content = Array.isArray((payload as { content?: unknown }).content) + ? ((payload as { content: unknown[] }).content) + : []; + const text = content + .filter( + (block): block is { type: 'text'; text: string } => + isRecord(block) && block.type === 'text' && typeof block.text === 'string' + ) + .map((block) => block.text) + .join(''); + if (!text) { + throw new Error( + `ctx.llm: Anthropic response contained no text content (stop_reason=${String( + (payload as { stop_reason?: unknown }).stop_reason ?? 'unknown' + )})` + ); + } + return text; + } + }; +} + +function openaiLlm( + credential: LlmCredential, + model: string, + log: WorkforceCtx['log'] +): LlmContext { + return { + async complete(prompt, opts) { + const body = { + model, + max_completion_tokens: opts?.maxTokens ?? DEFAULT_MAX_TOKENS, + messages: [{ role: 'user', content: prompt }] + }; + const payload = await postJson( + `${OPENAI_BASE_URL}/v1/chat/completions`, + { + ...credential.headers, + 'content-type': 'application/json' + }, + body, + log + ); + const choices = (payload as { choices?: unknown }).choices; + const first = Array.isArray(choices) ? choices[0] : undefined; + const text = + isRecord(first) && isRecord(first.message) && typeof first.message.content === 'string' + ? first.message.content + : ''; + if (!text) { + throw new Error('ctx.llm: OpenAI response contained no message content'); + } + return text; + } + }; +} + +async function postJson( + url: string, + headers: Record, + body: unknown, + log: WorkforceCtx['log'] +): Promise { + let response: Response; + try { + response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body), + signal: AbortSignal.timeout(COMPLETE_TIMEOUT_MS) + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log('warn', 'ctx.llm request failed before a response', { url, error: message }); + throw new Error(`ctx.llm: request to ${url} failed: ${message}`); + } + if (!response.ok) { + const detail = truncate(await response.text().catch(() => ''), 500); + log('warn', 'ctx.llm request returned an error status', { + url, + status: response.status, + detail + }); + throw new Error(`ctx.llm: ${url} returned ${response.status}: ${detail}`); + } + return (await response.json()) as unknown; +} + +function nonEmpty(value: string | undefined): string | null { + const trimmed = value?.trim(); + return trimmed && trimmed.length > 0 ? trimmed : null; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function truncate(value: string, max: number): string { + return value.length > max ? `${value.slice(0, max)}…` : value; +} diff --git a/packages/runtime/src/runner.ts b/packages/runtime/src/runner.ts index 6aa630b4..811c111c 100644 --- a/packages/runtime/src/runner.ts +++ b/packages/runtime/src/runner.ts @@ -114,7 +114,9 @@ export async function startRunner(options: StartRunnerOptions): Promise { sandbox: options.subsystems?.sandbox ?? cloudDefaults.sandbox, files: options.subsystems?.files ?? cloudDefaults.files, harnessRunner: options.harnessRunner ?? cloudDefaults.harnessRunner, - ...(options.subsystems?.llm ? { llm: options.subsystems.llm } : {}), + ...(options.subsystems?.llm ?? cloudDefaults.llm + ? { llm: options.subsystems?.llm ?? cloudDefaults.llm } + : {}), ...(options.subsystems?.memory ? { memory: options.subsystems.memory } : {}), ...(options.subsystems?.workflow ?? cloudDefaults.workflow ? { workflow: options.subsystems?.workflow ?? cloudDefaults.workflow }