From f13f5fb230463c15e83d0c7fedc7e5149d3cd689 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 14:46:37 -0700 Subject: [PATCH 1/6] Add debug logging to makeRequest to diagnose e2e CLI hangs The Vercel prod e2e tests intermittently hang on CLI inspect commands. The CLI gets through startup but the fetch() to the workflow-server API never returns. Adding before/after logging around the fetch call to determine if the hang is in the request itself or the retry agent. --- packages/world-vercel/src/utils.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/world-vercel/src/utils.ts b/packages/world-vercel/src/utils.ts index ebcb5811b7..742dfedd78 100644 --- a/packages/world-vercel/src/utils.ts +++ b/packages/world-vercel/src/utils.ts @@ -280,11 +280,22 @@ export async function makeRequest({ body, headers, }); + + if (process.env.DEBUG === '1') { + console.error(`[Debug] ${method} ${url} - sending request...`); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- undici v7 dispatcher types don't match @types/node's RequestInit const response = await fetch(request, { dispatcher: getDispatcher(), } as any); + if (process.env.DEBUG === '1') { + console.error( + `[Debug] ${method} ${url} - ${response.status} ${response.statusText}` + ); + } + span?.setAttributes({ ...HttpResponseStatusCode(response.status), }); From ffda24995dc72abc078be695ead6335d948c930e Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 14:46:54 -0700 Subject: [PATCH 2/6] Add changeset --- .changeset/debug-make-request-logging.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/debug-make-request-logging.md diff --git a/.changeset/debug-make-request-logging.md b/.changeset/debug-make-request-logging.md new file mode 100644 index 0000000000..4b19fbb24f --- /dev/null +++ b/.changeset/debug-make-request-logging.md @@ -0,0 +1,5 @@ +--- +"@workflow/world-vercel": patch +--- + +Add debug logging around fetch calls in `makeRequest` to diagnose intermittent API hangs From d772cb68e75c6e6f46ea8626d186a1662d21cd89 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 15:19:33 -0700 Subject: [PATCH 3/6] Add debug logging to fetchRunKey to confirm it is the hang point --- packages/world-vercel/src/encryption.ts | 28 +++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/packages/world-vercel/src/encryption.ts b/packages/world-vercel/src/encryption.ts index fb3b1be68d..a0b2d476c7 100644 --- a/packages/world-vercel/src/encryption.ts +++ b/packages/world-vercel/src/encryption.ts @@ -114,17 +114,23 @@ export async function fetchRunKey( params.set('teamId', options.teamId); } // 429/5xx retries are handled by the shared RetryAgent from getDispatcher() - const response = await fetch( - `https://api.vercel.com/v1/workflow/run-key/${deploymentId}?${params}`, - { - method: 'GET', - headers: { - authorization: `Bearer ${token}`, - }, - // @ts-expect-error -- undici dispatcher is accepted by Node.js fetch but not in @types/node's RequestInit - dispatcher: getDispatcher(), - } - ); + const url = `https://api.vercel.com/v1/workflow/run-key/${deploymentId}?${params}`; + if (process.env.DEBUG === '1') { + console.error(`[Debug] GET ${url} - sending request...`); + } + const response = await fetch(url, { + method: 'GET', + headers: { + authorization: `Bearer ${token}`, + }, + // @ts-expect-error -- undici dispatcher is accepted by Node.js fetch but not in @types/node's RequestInit + dispatcher: getDispatcher(), + }); + if (process.env.DEBUG === '1') { + console.error( + `[Debug] GET ${url} - ${response.status} ${response.statusText}` + ); + } if (!response.ok) { let body: string; From 5d6c372f889e00869f137bfcbc375ad350687149 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 15:22:27 -0700 Subject: [PATCH 4/6] Temporarily enable fail-fast on Vercel prod e2e to speed up debug iteration --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 03fd2313c9..056b79bb0b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -213,7 +213,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 30 strategy: - fail-fast: false + fail-fast: true matrix: app: - name: "example" From 36c386cd65fe5dc2a612d928e9d3cc5003692e73 Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 15:35:39 -0700 Subject: [PATCH 5/6] Skip OIDC call in fetchRunKey when token already available getVercelOidcToken() was called unconditionally before checking options.token or VERCEL_TOKEN. In the CLI/e2e context where a token is always provided, this is a wasted call that can hang in CI. Reorder the fallback chain: options.token -> VERCEL_TOKEN -> OIDC. --- packages/world-vercel/src/encryption.ts | 9 ++++++--- packages/world-vercel/src/resolve-latest-deployment.ts | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/world-vercel/src/encryption.ts b/packages/world-vercel/src/encryption.ts index a0b2d476c7..7680a55968 100644 --- a/packages/world-vercel/src/encryption.ts +++ b/packages/world-vercel/src/encryption.ts @@ -100,9 +100,12 @@ export async function fetchRunKey( } ): Promise { // Authenticate via provided token (CLI/config), OIDC token (runtime), - // or VERCEL_TOKEN env var (external tooling) - const oidcToken = await getVercelOidcToken().catch(() => null); - const token = options?.token ?? oidcToken ?? process.env.VERCEL_TOKEN; + // or VERCEL_TOKEN env var (external tooling). + // Skip the OIDC network call when a token is already available. + const token = + options?.token ?? + process.env.VERCEL_TOKEN ?? + (await getVercelOidcToken().catch(() => null)); if (!token) { throw new Error( 'Cannot fetch run key: no OIDC token or VERCEL_TOKEN available' diff --git a/packages/world-vercel/src/resolve-latest-deployment.ts b/packages/world-vercel/src/resolve-latest-deployment.ts index 23f8456584..ffb56d0dc2 100644 --- a/packages/world-vercel/src/resolve-latest-deployment.ts +++ b/packages/world-vercel/src/resolve-latest-deployment.ts @@ -36,9 +36,12 @@ export function createResolveLatestDeploymentId( } // Authenticate via provided token (CLI/config), OIDC token (runtime), - // or VERCEL_TOKEN env var (external tooling) - const oidcToken = await getVercelOidcToken().catch(() => null); - const token = config?.token ?? oidcToken ?? process.env.VERCEL_TOKEN; + // or VERCEL_TOKEN env var (external tooling). + // Skip the OIDC network call when a token is already available. + const token = + config?.token ?? + process.env.VERCEL_TOKEN ?? + (await getVercelOidcToken().catch(() => null)); if (!token) { throw new Error( 'Cannot resolve latest deployment: no OIDC token or VERCEL_TOKEN available' From 09144cc2fe877d0204b0625c163320ffd643000c Mon Sep 17 00:00:00 2001 From: Nathan Rajlich Date: Thu, 12 Mar 2026 22:47:16 -0700 Subject: [PATCH 6/6] Add debug logging to undici RetryAgent retry callback Replaces the default opaque retry behavior with a custom retry function that logs status codes and retry attempts when DEBUG=1. This makes 429 rate limiting and 5xx errors visible in CI logs instead of being silently retried. --- packages/world-vercel/src/http-client.ts | 92 ++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/packages/world-vercel/src/http-client.ts b/packages/world-vercel/src/http-client.ts index a6c1d7f63b..12607c8a6b 100644 --- a/packages/world-vercel/src/http-client.ts +++ b/packages/world-vercel/src/http-client.ts @@ -1,5 +1,7 @@ import { Agent, RetryAgent } from 'undici'; +const debug = process.env.DEBUG === '1'; + let _dispatcher: RetryAgent | undefined; /** @@ -17,6 +19,24 @@ let _dispatcher: RetryAgent | undefined; */ export function getDispatcher(): RetryAgent { if (!_dispatcher) { + const maxRetries = 5; + const minTimeout = 500; + const maxTimeout = 30_000; + const timeoutFactor = 2; + const statusCodes = [500, 502, 503, 504, 429]; + const errorCodes = [ + 'ECONNRESET', + 'ECONNREFUSED', + 'ENOTFOUND', + 'ENETDOWN', + 'ENETUNREACH', + 'EHOSTDOWN', + 'EHOSTUNREACH', + 'EPIPE', + 'UND_ERR_SOCKET', + ]; + const methods = ['GET', 'HEAD', 'OPTIONS', 'PUT', 'DELETE', 'TRACE']; + _dispatcher = new RetryAgent( new Agent({ connections: 8, @@ -24,12 +44,74 @@ export function getDispatcher(): RetryAgent { pipelining: 1, }), { - // Observe Retry-After header if received retryAfter: true, - // By default, we observe re-try headers, and also separately - // re-try on these status codes: 429 / 500 / 502 / 503 / 504. - // TODO: We might want to let 429s pass through, so that we can do - // runtime retry-after handling through the queue. + retry(err, { state, opts }, cb) { + const { statusCode, code, headers } = err as Error & { + statusCode?: number; + code?: string; + headers?: Record; + }; + + if (debug) { + console.error( + `[Debug] RetryAgent: ${opts.method} ${opts.origin}${opts.path} → ${statusCode ?? code ?? 'unknown error'} (attempt ${state.counter}/${maxRetries})` + ); + } + + // Not a retryable error code + if ( + code && + code !== 'UND_ERR_REQ_RETRY' && + !errorCodes.includes(code) + ) { + cb(err); + return; + } + + // Not a retryable method + if (!methods.includes(opts.method)) { + cb(err); + return; + } + + // Not a retryable status code + if (statusCode != null && !statusCodes.includes(statusCode)) { + cb(err); + return; + } + + // Max retries exceeded + if (state.counter > maxRetries) { + cb(err); + return; + } + + // Calculate retry delay (observe Retry-After header) + let retryAfter = 0; + if (headers?.['retry-after']) { + const parsed = Number(headers['retry-after']); + retryAfter = Number.isNaN(parsed) + ? Math.max( + 0, + new Date(headers['retry-after']).getTime() - Date.now() + ) + : parsed * 1000; + } + + const delay = + retryAfter > 0 + ? Math.min(retryAfter, maxTimeout) + : Math.min( + minTimeout * timeoutFactor ** (state.counter - 1), + maxTimeout + ); + + if (debug) { + console.error(`[Debug] RetryAgent: retrying in ${delay}ms`); + } + + setTimeout(() => cb(null), delay); + }, } ); }