diff --git a/packages/runtime/CHANGELOG.md b/packages/runtime/CHANGELOG.md index 1a1fcca6..79ed4a5e 100644 --- a/packages/runtime/CHANGELOG.md +++ b/packages/runtime/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **Codex backend LlmContext credential leg for ChatGPT subscription OAuth tokens** (#198) + ## [3.0.44] - 2026-06-04 ### Added @@ -86,4 +90,3 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Deploy v1 CLI runtime credentials and customer example (#109) - diff --git a/packages/runtime/src/cloud-llm.test.ts b/packages/runtime/src/cloud-llm.test.ts index e4348297..f64ebe55 100644 --- a/packages/runtime/src/cloud-llm.test.ts +++ b/packages/runtime/src/cloud-llm.test.ts @@ -116,6 +116,91 @@ test('codex-only persona models fall back to the default chat model', async (t) assert.equal(requests[0]!.body.model, 'gpt-5.5'); }); +test('CODEX_OAUTH_TOKEN routes codex personas to the ChatGPT codex backend', async (t) => { + const requests = stubFetch(t, { + rawBody: [ + 'event: response.created', + 'data: {"type":"response.created","response":{"id":"resp-1"}}', + '', + 'event: response.output_text.delta', + 'data: {"type":"response.output_text.delta","delta":"hello "}', + '', + 'event: response.output_text.delta', + 'data: {"type":"response.output_text.delta","delta":"from codex"}', + '', + 'event: response.completed', + 'data: {"type":"response.completed","response":{"id":"resp-1"}}', + '', + '' + ].join('\n') + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai-codex/gpt-5.5-codex' }, + env: { CODEX_OAUTH_TOKEN: 'chatgpt-access', CODEX_ACCOUNT_ID: 'acct-123' }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi', { maxTokens: 48 }); + assert.equal(result, 'hello from codex'); + const request = requests[0]!; + assert.equal(request.url, 'https://chatgpt.com/backend-api/codex/responses'); + assert.equal(request.headers['authorization'], 'Bearer chatgpt-access'); + assert.equal(request.headers['chatgpt-account-id'], 'acct-123'); + assert.equal(request.headers.originator, 'codex_cli_rs'); + assert.ok(request.headers['session-id']); + assert.ok(request.headers['thread-id']); + assert.equal(request.headers.accept, 'text/event-stream'); + assert.equal(request.headers['x-api-key'], undefined); + assert.equal(request.body.model, 'gpt-5.5-codex'); + assert.equal(request.body.stream, true); + assert.equal(request.body.max_output_tokens, 48); + assert.deepEqual(request.body.include, ['reasoning.encrypted_content']); + assert.deepEqual(request.body.input, [ + { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: 'hi' }] + } + ]); +}); + +test('CODEX_OAUTH_CREDENTIAL accepts refreshed auth blob shape with account_id', async (t) => { + const requests = stubFetch(t, { + rawBody: [ + 'event: response.output_item.done', + 'data: {"type":"response.output_item.done","item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"blob ok"}]}}', + '', + 'event: response.completed', + 'data: {"type":"response.completed","response":{"id":"resp-1"}}', + '', + '' + ].join('\n') + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai/gpt-5.5' }, + env: { + CODEX_OAUTH_CREDENTIAL: JSON.stringify({ + tokens: { + access_token: 'fresh-access', + refresh_token: 'refresh', + account_id: 'acct-blob' + }, + last_refresh: '2026-06-04T20:00:00.000Z', + base_url: 'https://example.test/backend-api/codex' + }) + }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi'); + assert.equal(result, 'blob ok'); + const request = requests[0]!; + assert.equal(request.url, 'https://example.test/backend-api/codex/responses'); + assert.equal(request.headers['authorization'], 'Bearer fresh-access'); + assert.equal(request.headers['chatgpt-account-id'], 'acct-blob'); + assert.equal(request.body.model, 'gpt-5.5-codex'); // platform slug mapped to backend codex slug +}); + test('OPENAI_API_KEY routes gpt-family personas to chat completions', async (t) => { const requests = stubFetch(t, { payload: { choices: [{ message: { content: 'hello from gpt' } }] } @@ -135,6 +220,55 @@ test('OPENAI_API_KEY routes gpt-family personas to chat completions', async (t) assert.equal(request.body.max_completion_tokens, 32); }); +test('OPENAI_API_KEY remains preferred over codex backend for plain gpt personas', async (t) => { + const requests = stubFetch(t, { + payload: { choices: [{ message: { content: 'platform answer' } }] } + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai/gpt-5.4' }, + env: { + OPENAI_API_KEY: 'sk-openai-test', + CODEX_OAUTH_TOKEN: 'chatgpt-access', + CODEX_ACCOUNT_ID: 'acct-123' + }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi'); + assert.equal(result, 'platform answer'); + assert.equal(requests[0]!.url, 'https://api.openai.com/v1/chat/completions'); +}); + +test('codex backend is the OpenAI fallback when the persona model names no family', async (t) => { + const requests = stubFetch(t, { + rawBody: [ + 'event: response.output_text.delta', + 'data: {"type":"response.output_text.delta","delta":"subscription answer"}', + '', + 'event: response.completed', + 'data: {"type":"response.completed","response":{"id":"resp-1"}}', + '', + '' + ].join('\n') + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, model: undefined }, + env: { + OPENAI_API_KEY: 'sk-openai-test', + CODEX_OAUTH_TOKEN: 'chatgpt-access', + CODEX_ACCOUNT_ID: 'acct-123' + }, + log: noopLog + }); + assert.ok(llm); + const result = await llm.complete('hi'); + assert.equal(result, 'subscription answer'); + assert.equal(requests[0]!.url, 'https://chatgpt.com/backend-api/codex/responses'); + assert.equal(requests[0]!.headers['authorization'], 'Bearer chatgpt-access'); + assert.equal(requests[0]!.headers['chatgpt-account-id'], 'acct-123'); + assert.equal(requests[0]!.body.model, 'gpt-5.5-codex'); +}); + test('persona model family wins when multiple credentials exist', async (t) => { const requests = stubFetch(t, { payload: { choices: [{ message: { content: 'gpt answer' } }] } @@ -149,6 +283,24 @@ test('persona model family wins when multiple credentials exist', async (t) => { assert.equal(requests[0]!.url, 'https://api.openai.com/v1/chat/completions'); }); +test('codex backend stream must reach response.completed', async (t) => { + stubFetch(t, { + rawBody: [ + 'event: response.output_text.delta', + 'data: {"type":"response.output_text.delta","delta":"partial"}', + '', + '' + ].join('\n') + }); + const llm = createDefaultLlm({ + persona: { ...basePersona, harness: 'codex', model: 'openai-codex/gpt-5.5-codex' }, + env: { CODEX_OAUTH_TOKEN: 'chatgpt-access', CODEX_ACCOUNT_ID: 'acct-123' }, + log: noopLog + }); + assert.ok(llm); + await assert.rejects(llm.complete('hi'), /response\.completed/); +}); + test('anthropic credential is the default when the persona model names no family', async (t) => { const requests = stubFetch(t, { payload: { content: [{ type: 'text', text: 'ok' }] } diff --git a/packages/runtime/src/cloud-llm.ts b/packages/runtime/src/cloud-llm.ts index 3006a332..4562ab96 100644 --- a/packages/runtime/src/cloud-llm.ts +++ b/packages/runtime/src/cloud-llm.ts @@ -18,7 +18,11 @@ import type { LlmContext, WorkforceCtx } from './types.js'; * 2. CLAUDE_CODE_OAUTH_TOKEN — `claude setup-token` OAuth bearer * (cloud#1629 injects it for `oauth_token` provider credentials). * Sent as `Authorization: Bearer`, never via `x-api-key`. - * 3. OPENAI_API_KEY — OpenAI chat completions, bearer. + * 3. CODEX_OAUTH_CREDENTIAL — structured ChatGPT/Codex OAuth blob + * for the codex backend, shaped like the Codex CLI auth blob: + * `{tokens:{access_token,account_id}}`. + * 4. CODEX_OAUTH_TOKEN + CODEX_ACCOUNT_ID — split env equivalent of #3. + * 5. OPENAI_API_KEY — OpenAI chat completions, bearer. * * When the persona's `model` names a provider family (a `claude-*` model * or an `anthropic/`-prefixed ref vs a `gpt-*` / `openai/`-prefixed one), @@ -30,10 +34,16 @@ import type { LlmContext, WorkforceCtx } from './types.js'; const DEFAULT_ANTHROPIC_MODEL = 'claude-opus-4-8'; const DEFAULT_OPENAI_MODEL = 'gpt-5.5'; +const DEFAULT_CODEX_BACKEND_MODEL = 'gpt-5.5-codex'; const DEFAULT_MAX_TOKENS = 16_000; const COMPLETE_TIMEOUT_MS = 120_000; const ANTHROPIC_BASE_URL = 'https://api.anthropic.com'; const OPENAI_BASE_URL = 'https://api.openai.com'; +// Private ChatGPT/Codex backend protocol, not a published OpenAI Platform API. +// Keep this leg pinned to the Codex CLI's observed request/stream contract and +// expect maintenance if chatgpt.com/backend-api/codex changes. +const CODEX_BACKEND_BASE_URL = 'https://chatgpt.com/backend-api/codex'; +const CODEX_BACKEND_ORIGINATOR = 'codex_cli_rs'; export interface CloudLlmOptions { persona: PersonaSpec; @@ -41,14 +51,30 @@ export interface CloudLlmOptions { log: WorkforceCtx['log']; } -type LlmProviderFamily = 'anthropic' | 'openai'; +type PersonaModelFamily = 'anthropic' | 'openai' | 'codex'; +type LlmProviderFamily = 'anthropic' | 'openai' | 'codex-backend'; interface LlmCredential { family: LlmProviderFamily; headers: Record; source: string; + accessToken?: string; + accountId?: string; + baseUrl?: string; } +const CODEX_BACKEND_MODEL_BY_PERSONA_MODEL: Record = { + codex: DEFAULT_CODEX_BACKEND_MODEL, + 'codex-latest': DEFAULT_CODEX_BACKEND_MODEL, + 'codex-tuned': DEFAULT_CODEX_BACKEND_MODEL, + 'gpt-5.5': DEFAULT_CODEX_BACKEND_MODEL, + 'gpt-5.5-codex': 'gpt-5.5-codex', + 'gpt-5.4': 'gpt-5.4-codex', + 'gpt-5.4-codex': 'gpt-5.4-codex', + 'gpt-5.1': 'gpt-5.1-codex', + 'gpt-5.1-codex': 'gpt-5.1-codex' +}; + export function createDefaultLlm(options: CloudLlmOptions): LlmContext | undefined { const credential = selectCredential(options.env, personaModelFamily(options.persona)); if (!credential) return undefined; @@ -63,17 +89,21 @@ export function createDefaultLlm(options: CloudLlmOptions): LlmContext | undefin if (credential.family === 'anthropic') { return anthropicLlm(credential, model, options.log); } + if (credential.family === 'codex-backend') { + return codexBackendLlm(credential, model, options.log); + } return openaiLlm(credential, model, options.log); } function selectCredential( env: NodeJS.ProcessEnv, - preferred: LlmProviderFamily | null + preferred: PersonaModelFamily | null ): LlmCredential | null { const candidates: LlmCredential[] = []; const anthropicApiKey = nonEmpty(env.ANTHROPIC_API_KEY); const claudeOauth = nonEmpty(env.CLAUDE_CODE_OAUTH_TOKEN); const openaiApiKey = nonEmpty(env.OPENAI_API_KEY); + const codexOauth = codexOauthCredential(env); // Exactly one auth header per request: an OAuth bearer must go on // `Authorization`, an API key on `x-api-key`; sending both is rejected. @@ -95,6 +125,16 @@ function selectCredential( source: 'CLAUDE_CODE_OAUTH_TOKEN' }); } + if (codexOauth) { + candidates.push({ + family: 'codex-backend', + headers: {}, + source: codexOauth.source, + accessToken: codexOauth.accessToken, + accountId: codexOauth.accountId, + baseUrl: codexOauth.baseUrl + }); + } if (openaiApiKey) { candidates.push({ family: 'openai', @@ -105,23 +145,41 @@ function selectCredential( if (candidates.length === 0) return null; if (preferred) { - const match = candidates.find((candidate) => candidate.family === preferred); + const match = preferredCredential(candidates, preferred); if (match) return match; } + // Family-less personas intentionally follow candidate order. With OpenAI-only + // credentials, prefer the ChatGPT/Codex subscription backend before the + // platform API; explicit gpt/openai model families still select OPENAI_API_KEY. return candidates[0] ?? null; } -function personaModelFamily(persona: PersonaSpec): LlmProviderFamily | null { +function preferredCredential( + candidates: LlmCredential[], + preferred: PersonaModelFamily +): LlmCredential | undefined { + if (preferred === 'anthropic') { + return candidates.find((candidate) => candidate.family === 'anthropic'); + } + if (preferred === 'codex') { + return ( + candidates.find((candidate) => candidate.family === 'codex-backend') ?? + candidates.find((candidate) => candidate.family === 'openai') + ); + } + return ( + candidates.find((candidate) => candidate.family === 'openai') ?? + candidates.find((candidate) => candidate.family === 'codex-backend') + ); +} + +function personaModelFamily(persona: PersonaSpec): PersonaModelFamily | null { const model = nonEmpty(persona.model); if (!model) return null; const normalized = model.toLowerCase(); if (normalized.startsWith('anthropic/') || normalized.includes('claude')) return 'anthropic'; - if ( - normalized.startsWith('openai/') || - normalized.startsWith('openai-codex/') || - normalized.includes('gpt-') || - normalized.includes('codex') - ) { + if (normalized.startsWith('openai-codex/') || normalized.includes('codex')) return 'codex'; + if (normalized.startsWith('openai/') || normalized.includes('gpt-')) { return 'openai'; } return null; @@ -130,7 +188,7 @@ function personaModelFamily(persona: PersonaSpec): LlmProviderFamily | null { function resolveModel(persona: PersonaSpec, family: LlmProviderFamily): string { const personaFamily = personaModelFamily(persona); const personaModel = nonEmpty(persona.model); - if (personaModel && personaFamily === family) { + if (personaModel && credentialMatchesPersonaFamily(family, personaFamily)) { // Strip provider prefixes like `anthropic/` / `openai/` / `openai-codex/`. const slash = personaModel.indexOf('/'); const stripped = slash >= 0 ? personaModel.slice(slash + 1) : personaModel; @@ -140,9 +198,35 @@ function resolveModel(persona: PersonaSpec, family: LlmProviderFamily): string { if (family === 'openai' && stripped.toLowerCase().includes('codex')) { return DEFAULT_OPENAI_MODEL; } + if (family === 'codex-backend') { + return resolveCodexBackendModel(stripped); + } return stripped; } - return family === 'anthropic' ? DEFAULT_ANTHROPIC_MODEL : DEFAULT_OPENAI_MODEL; + if (family === 'anthropic') return DEFAULT_ANTHROPIC_MODEL; + if (family === 'codex-backend') return DEFAULT_CODEX_BACKEND_MODEL; + return DEFAULT_OPENAI_MODEL; +} + +function credentialMatchesPersonaFamily( + credentialFamily: LlmProviderFamily, + personaFamily: PersonaModelFamily | null +): boolean { + if (!personaFamily) return false; + if (credentialFamily === 'anthropic') return personaFamily === 'anthropic'; + if (credentialFamily === 'codex-backend') { + return personaFamily === 'codex' || personaFamily === 'openai'; + } + return personaFamily === 'openai' || personaFamily === 'codex'; +} + +function resolveCodexBackendModel(model: string): string { + const normalized = model.toLowerCase(); + // The ChatGPT/Codex backend serves Codex-tuned model slugs, not the + // platform `/v1` model ids. This table intentionally mirrors only the + // slugs we have observed in the Codex CLI/probes; treat additions here as + // protocol maintenance, not an OpenAI platform model rollout. + return CODEX_BACKEND_MODEL_BY_PERSONA_MODEL[normalized] ?? DEFAULT_CODEX_BACKEND_MODEL; } function anthropicLlm( @@ -224,6 +308,59 @@ function openaiLlm( }; } +function codexBackendLlm( + credential: LlmCredential, + model: string, + log: WorkforceCtx['log'] +): LlmContext { + const accessToken = credential.accessToken; + const accountId = credential.accountId; + if (!accessToken || !accountId) { + throw new Error('ctx.llm: Codex backend OAuth credential is missing access token or account id'); + } + + return { + async complete(prompt, opts) { + const sessionId = randomId(); + const threadId = randomId(); + const body = { + model, + instructions: '', + input: [ + { + type: 'message', + role: 'user', + content: [{ type: 'input_text', text: prompt }] + } + ], + tools: [], + tool_choice: 'auto', + parallel_tool_calls: false, + reasoning: null, + store: false, + stream: true, + include: ['reasoning.encrypted_content'], + max_output_tokens: opts?.maxTokens ?? DEFAULT_MAX_TOKENS + }; + + return postCodexBackendStream( + `${(credential.baseUrl ?? CODEX_BACKEND_BASE_URL).replace(/\/+$/, '')}/responses`, + { + authorization: `Bearer ${accessToken}`, + 'chatgpt-account-id': accountId, + originator: CODEX_BACKEND_ORIGINATOR, + 'session-id': sessionId, + 'thread-id': threadId, + accept: 'text/event-stream', + 'content-type': 'application/json' + }, + body, + log + ); + } + }; +} + async function postJson( url: string, headers: Record, @@ -255,11 +392,157 @@ async function postJson( return (await response.json()) as unknown; } +async function postCodexBackendStream( + url: string, + headers: Record, + body: unknown, + log: WorkforceCtx['log'] +): Promise { + let response: Response; + try { + response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body), + signal: AbortSignal.timeout(COMPLETE_TIMEOUT_MS) + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + log('warn', 'ctx.llm Codex backend request failed before a response', { url, error: message }); + throw new Error(`ctx.llm: request to ${url} failed: ${message}`); + } + if (!response.ok) { + const detail = truncate(await response.text().catch(() => ''), 500); + log('warn', 'ctx.llm Codex backend request returned an error status', { + url, + status: response.status, + detail + }); + throw new Error(`ctx.llm: ${url} returned ${response.status}: ${detail}`); + } + + const raw = await response.text(); + const { text, completed } = parseCodexBackendSse(raw); + if (!completed) { + throw new Error('ctx.llm: Codex backend stream closed before response.completed'); + } + if (!text) { + throw new Error('ctx.llm: Codex backend response contained no output text'); + } + return text; +} + +function parseCodexBackendSse(raw: string): { text: string; completed: boolean } { + const chunks = raw.split(/\r?\n\r?\n/); + let text = ''; + let completed = false; + for (const chunk of chunks) { + const lines = chunk.split(/\r?\n/); + const data = lines + .filter((line) => line.startsWith('data:')) + .map((line) => line.slice('data:'.length).trimStart()) + .join('\n'); + if (!data || data === '[DONE]') continue; + let event: unknown; + try { + event = JSON.parse(data) as unknown; + } catch { + continue; + } + if (!isRecord(event)) continue; + if (event.type === 'response.output_text.delta' && typeof event.delta === 'string') { + text += event.delta; + } else if (event.type === 'response.output_item.done') { + text += outputTextFromItem(event.item); + } else if (event.type === 'response.completed') { + completed = true; + } else if (event.type === 'response.failed' || event.type === 'response.incomplete') { + throw new Error(`ctx.llm: Codex backend stream returned ${String(event.type)}`); + } + } + return { text, completed }; +} + +function outputTextFromItem(item: unknown): string { + if (!isRecord(item) || !Array.isArray(item.content)) return ''; + return item.content + .filter( + (content): content is { type: 'output_text'; text: string } => + isRecord(content) && content.type === 'output_text' && typeof content.text === 'string' + ) + .map((content) => content.text) + .join(''); +} + +function codexOauthCredential(env: NodeJS.ProcessEnv): + | { accessToken: string; accountId: string; source: string; baseUrl?: string } + | null { + const structured = nonEmpty(env.CODEX_OAUTH_CREDENTIAL); + if (structured) { + // The cloud resolver should refresh this auth blob before env injection + // with refreshHarnessCliCredentialIfStale; runtime consumes the current + // access_token/account_id and does not persist refreshed credentials. + const parsed = parseCodexOauthCredential(structured); + if (parsed) { + const baseUrl = nonEmpty(env.CODEX_BACKEND_BASE_URL) ?? parsed.baseUrl; + return { + ...parsed, + source: 'CODEX_OAUTH_CREDENTIAL', + ...(baseUrl ? { baseUrl } : {}) + }; + } + } + + const accessToken = nonEmpty(env.CODEX_OAUTH_TOKEN); + const accountId = nonEmpty(env.CODEX_ACCOUNT_ID) ?? nonEmpty(env.CHATGPT_ACCOUNT_ID); + if (!accessToken || !accountId) return null; + const baseUrl = nonEmpty(env.CODEX_BACKEND_BASE_URL); + return { + accessToken, + accountId, + source: 'CODEX_OAUTH_TOKEN', + ...(baseUrl ? { baseUrl } : {}) + }; +} + +function parseCodexOauthCredential( + raw: string +): { accessToken: string; accountId: string; baseUrl?: string } | null { + let parsed: unknown; + try { + parsed = JSON.parse(raw) as unknown; + } catch { + return null; + } + if (!isRecord(parsed)) return null; + const tokens = isRecord(parsed.tokens) ? parsed.tokens : parsed; + const accessToken = + nonEmptyString(tokens.access_token) ?? + nonEmptyString(tokens.accessToken) ?? + nonEmptyString(parsed.CODEX_OAUTH_TOKEN); + const accountId = + nonEmptyString(tokens.account_id) ?? + nonEmptyString(tokens.accountId) ?? + nonEmptyString(parsed.account_id) ?? + nonEmptyString(parsed.accountId); + if (!accessToken || !accountId) return null; + const baseUrl = nonEmptyString(parsed.base_url) ?? nonEmptyString(parsed.baseUrl); + return { + accessToken, + accountId, + ...(baseUrl ? { baseUrl } : {}) + }; +} + function nonEmpty(value: string | undefined): string | null { const trimmed = value?.trim(); return trimmed && trimmed.length > 0 ? trimmed : null; } +function nonEmptyString(value: unknown): string | null { + return typeof value === 'string' ? nonEmpty(value) : null; +} + function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null; } @@ -267,3 +550,9 @@ function isRecord(value: unknown): value is Record { function truncate(value: string, max: number): string { return value.length > max ? `${value.slice(0, max)}…` : value; } + +function randomId(): string { + return ( + globalThis.crypto?.randomUUID?.() ?? `wf-${Date.now()}-${Math.random().toString(16).slice(2)}` + ); +}