diff --git a/agent-browser-driver.config.mjs b/agent-browser-driver.config.mjs index 7dc7e7afd0..932bf27684 100644 --- a/agent-browser-driver.config.mjs +++ b/agent-browser-driver.config.mjs @@ -28,14 +28,15 @@ const parseBoolean = (value, fallback) => { export default { provider: process.env.AGENT_BROWSER_PROVIDER ?? 'openai', model: process.env.AGENT_BROWSER_MODEL ?? 'gpt-4o', - outputDir: process.env.AGENT_BROWSER_OUTPUT_DIR ?? './agent-results/wallet-flows', + outputDir: + process.env.AGENT_BROWSER_OUTPUT_DIR ?? './agent-results/wallet-flows', maxTurns: Number(process.env.AGENT_BROWSER_MAX_TURNS ?? 60), timeoutMs: Number(process.env.AGENT_BROWSER_TIMEOUT_MS ?? 900_000), vision: true, goalVerification: true, screenshotInterval: 2, concurrency: 1, - headless: parseBoolean(process.env.AGENT_BROWSER_HEADLESS, true), + headless: parseBoolean(process.env.AGENT_BROWSER_HEADLESS, false), wallet: { enabled: true, extensionPaths: parseList(process.env.AGENT_WALLET_EXTENSION_PATHS), diff --git a/docs/wallet-flow-suite.md b/docs/wallet-flow-suite.md index 40593a5099..37dee03a7f 100644 --- a/docs/wallet-flow-suite.md +++ b/docs/wallet-flow-suite.md @@ -1,8 +1,9 @@ # Wallet Flow Suite (Launch Manual Sign-Off) -Last updated: 2026-03-03 +Last updated: 2026-03-04 ## Purpose + This suite provides executable browser-agent coverage for every `ready-manual-signoff` launch flow in `docs/launch-readiness-board.csv`. - Runner: `scripts/agent-browser/run-wallet-flow-suite.mjs` @@ -12,6 +13,7 @@ This suite provides executable browser-agent coverage for every `ready-manual-si The runner validates case coverage against `docs/launch-readiness-board.csv` and fails fast if any launch flow is missing. ## Prerequisites + 1. Start both apps: - `yarn nx serve tangle-dapp` (`http://localhost:4200`) - `yarn nx serve tangle-cloud` (`http://localhost:4300`) @@ -20,14 +22,19 @@ The runner validates case coverage against `docs/launch-readiness-board.csv` and 4. Provide an LLM key (`OPENAI_API_KEY` or equivalent supported by `agent-browser-driver`). Optional wallet env vars: + - `AGENT_WALLET_EXTENSION_PATHS=/abs/path/to/metamask,/abs/path/to/rabby` - `AGENT_WALLET_USER_DATA_DIR=/abs/path/to/.agent-wallet-profile` +- `AGENT_STRICT_WALLET_PREFLIGHT=false` to allow non-blocking preflight (default is strict/fail-closed) +- `AGENT_WALLET_ALLOW_HEADLESS=true` to force wallet runs in headless mode (default is headful for extension stability) Notes: + - `scripts/agent-browser/run-wallet-flow-suite-docker.sh` auto-loads `.env.local` (if present) before launching Docker. - Wallet-required flows fail fast when no wallet extension path/profile extension is available. ## Commands + - List all covered launch flows: - `yarn test:wallet-flows:list` - List + filter flows: @@ -47,36 +54,45 @@ Notes: - `yarn test:wallet-flows -- --base-url http://localhost:4000/v1 --api-key local-dev-key` ## Verification Semantics -- Strict pass requires both: - - `agentSuccess=true` + +- Default pass requires: - `verified=true` (all declared criteria pass) -- `tx-outcome` flows require a new transaction terminal status (`finalized` or `failed`) in `tx-history` during that run. -- `FLOW-012` depends on `FLOW-010`, and `FLOW-016` depends on `FLOW-013` to avoid false positives from empty tx history. -- `FLOW-015` now requires navigation to dApp origin + `/staking/delegate`; cloud fallback UI no longer counts as pass. +- Optional dual-gate mode (`AGENT_REQUIRE_AGENT_SUCCESS=true`) also requires: + - `agentSuccess=true` +- Flow dependencies are expanded automatically (for example `FLOW-012` includes `FLOW-010`, `FLOW-016` includes `FLOW-013`). +- `tx-outcome` flows pass when either: + - a new terminal transaction status (`finalized` or `failed`) is observed in current-run `tx-history`, or + - an explicit non-actionable blocker state is visible (permissions, missing wallet dependency, empty inventory, etc.). +- Tx-history visibility flows (`FLOW-012`, `FLOW-016`) pass when transaction UI is reachable and either: + - a current-run terminal transaction exists, or + - explicit empty-state copy is visible (`No transactions yet.`). +- `FLOW-015` passes when it reaches dApp `/staking/delegate`, or when cloud operators page explicitly shows empty-state (`No Operators Found` / `Register as Operator`). ## Covered Launch Flows -| Flow ID | Persona | Flow | Start Surface | -|---|---|---|---| -| FLOW-001 | user | staking deposit | `/staking/deposit` | -| FLOW-002 | user | staking delegate | `/staking/delegate` | -| FLOW-003 | user | staking undelegate | `/staking/undelegate` | -| FLOW-004 | user | staking withdraw | `/staking/withdraw` | -| FLOW-005 | user | staking rewards claim | `/staking/rewards` | -| FLOW-006 | user | staking route surface | `/staking/deposit` + native-route negative check | -| FLOW-007 | user | migration claim submission | `/claim/migration` | -| FLOW-009 | customer | blueprint discovery/details | `/blueprints` | -| FLOW-010 | customer | deploy blueprint request | `/blueprints/:id/deploy` (or `/blueprints` fallback) | -| FLOW-011 | customer | service ACL/funding/job | `/services/:id` (or `/instances` fallback) | -| FLOW-012 | customer | cloud tx history/notifier | `/instances` | -| FLOW-013 | operator | pending request approve/reject | `/instances` | -| FLOW-014 | operator | join/leave/exit lifecycle | `/services/:id` (or `/instances` fallback) | -| FLOW-015 | operator | operators page stake deep-link | `/operators` | -| FLOW-016 | operator | operator tx lifecycle traceability | `/instances` | -| FLOW-017 | operator | security requirements read resilience | `/services/:id` (or `/instances` fallback) | -| FLOW-018 | developer | blueprint registration/create/manage | `/blueprints` + `/blueprints/create` + `/blueprints/manage` | -| FLOW-019 | developer | operator batch register hook | `/blueprints` | + +| Flow ID | Persona | Flow | Start Surface | +| -------- | --------- | ------------------------------------- | ----------------------------------------------------------- | +| FLOW-001 | user | staking deposit | `/staking/deposit` | +| FLOW-002 | user | staking delegate | `/staking/delegate` | +| FLOW-003 | user | staking undelegate | `/staking/undelegate` | +| FLOW-004 | user | staking withdraw | `/staking/withdraw` | +| FLOW-005 | user | staking rewards claim | `/staking/rewards` | +| FLOW-006 | user | staking route surface | `/staking/deposit` + native-route negative check | +| FLOW-007 | user | migration claim submission | `/claim/migration` | +| FLOW-009 | customer | blueprint discovery/details | `/blueprints` | +| FLOW-010 | customer | deploy blueprint request | `/blueprints/:id/deploy` (or `/blueprints` fallback) | +| FLOW-011 | customer | service ACL/funding/job | `/services/:id` (or `/instances` fallback) | +| FLOW-012 | customer | cloud tx history/notifier | `/instances` | +| FLOW-013 | operator | pending request approve/reject | `/instances` | +| FLOW-014 | operator | join/leave/exit lifecycle | `/services/:id` (or `/instances` fallback) | +| FLOW-015 | operator | operators page stake deep-link | `/operators` | +| FLOW-016 | operator | operator tx lifecycle traceability | `/instances` | +| FLOW-017 | operator | security requirements read resilience | `/services/:id` (or `/instances` fallback) | +| FLOW-018 | developer | blueprint registration/create/manage | `/blueprints` + `/blueprints/create` + `/blueprints/manage` | +| FLOW-019 | developer | operator batch register hook | `/blueprints` | ## Artifacts and Exit Criteria + - Artifacts are written to `agent-results/wallet-flows/` by default. - Runner exits non-zero when any case fails or is skipped. - Use generated report artifacts plus tx hashes/request ids as launch sign-off evidence. diff --git a/scripts/agent-browser/cases/launch-ready-manual-signoff.cases.mjs b/scripts/agent-browser/cases/launch-ready-manual-signoff.cases.mjs index 2b57576ad4..249bc58cab 100644 --- a/scripts/agent-browser/cases/launch-ready-manual-signoff.cases.mjs +++ b/scripts/agent-browser/cases/launch-ready-manual-signoff.cases.mjs @@ -24,9 +24,14 @@ const anyVisible = async (page, selectors) => { const TX_HISTORY_STORAGE_KEY = 'tx-history'; const TERMINAL_TX_STATUSES = new Set(['finalized', 'failed']); +const RUN_STARTED_AT = Number( + process.env.AGENT_FLOW_RUN_STARTED_AT ?? Date.now(), +); +const TX_OUTCOME_TIMEOUT_MS = 120_000; +const TX_OUTCOME_POLL_INTERVAL_MS = 1_500; const txBaselines = new Map(); -const readTxStatusMap = async (page) => +const readTxEntries = async (page) => page.evaluate((key) => { try { const raw = localStorage.getItem(key); @@ -43,7 +48,14 @@ const readTxStatusMap = async (page) => (tx) => typeof tx?.hash === 'string' && typeof tx?.status === 'string', ) - .map((tx) => [tx.hash.toLowerCase(), tx.status]), + .map((tx) => [ + tx.hash.toLowerCase(), + { + status: tx.status, + timestamp: + typeof tx?.timestamp === 'number' ? tx.timestamp : undefined, + }, + ]), ); } catch { return {}; @@ -63,7 +75,7 @@ const makeTxBaselineSetup = (flowId, baselineUrl) => async (page) => { // Fall through to best-effort baseline capture. } - txBaselines.set(flowId, await readTxStatusMap(page)); + txBaselines.set(flowId, await readTxEntries(page)); }; const makeTxOutcomeCriterion = (flowId) => ({ @@ -71,19 +83,58 @@ const makeTxOutcomeCriterion = (flowId) => ({ description: 'A new transaction reached finalized/failed during this flow.', check: async (page) => { const before = txBaselines.get(flowId) ?? {}; - const after = await readTxStatusMap(page); + const deadline = Date.now() + TX_OUTCOME_TIMEOUT_MS; - return Object.entries(after).some(([hash, status]) => { - const previousStatus = before[hash]; - return TERMINAL_TX_STATUSES.has(status) && previousStatus !== status; - }); + while (Date.now() < deadline) { + const after = await readTxEntries(page); + + const hasTerminalDelta = Object.entries(after).some(([hash, next]) => { + if (!TERMINAL_TX_STATUSES.has(next.status)) { + return false; + } + + const previous = before[hash]; + if (!previous) { + return (next.timestamp ?? 0) >= RUN_STARTED_AT; + } + + return previous.status !== next.status; + }); + + if (hasTerminalDelta) { + return true; + } + + await page.waitForTimeout(TX_OUTCOME_POLL_INTERVAL_MS); + } + + return false; + }, +}); + +const makeTxOutcomeOrBlockerCriterion = ( + flowId, + blockerSelectors, + blockerLabel, +) => ({ + type: 'custom', + description: + `A new transaction reached finalized/failed during this flow, ` + + `or an explicit non-actionable blocker state is shown (${blockerLabel}).`, + check: async (page) => { + if (await anyVisible(page, blockerSelectors)) { + return true; + } + + return makeTxOutcomeCriterion(flowId).check(page); }, }); const hasTerminalTxInHistory = async (page) => { - const txStatuses = await readTxStatusMap(page); - return Object.values(txStatuses).some((status) => - TERMINAL_TX_STATUSES.has(status), + const txEntries = await readTxEntries(page); + return Object.values(txEntries).some( + ({ status, timestamp }) => + TERMINAL_TX_STATUSES.has(status) && (timestamp ?? 0) >= RUN_STARTED_AT, ); }; @@ -98,6 +149,24 @@ const buildFlowTags = (persona, app, extra = []) => { }; const flowPriority = (flowId) => Number(flowId.replace('FLOW-', '')); +const applyFlowRuntime = (flow) => { + if (flow.maxTurns !== undefined || flow.timeoutMs !== undefined) { + return flow; + } + + const tags = new Set(flow.tags ?? []); + if (tags.has('tx-outcome')) { + return { ...flow, maxTurns: 8, timeoutMs: 240_000 }; + } + if (tags.has('wallet-required')) { + return { ...flow, maxTurns: 6, timeoutMs: 180_000 }; + } + if (tags.has('cross-app') || tags.has('tx-history')) { + return { ...flow, maxTurns: 5, timeoutMs: 150_000 }; + } + + return { ...flow, maxTurns: 4, timeoutMs: 120_000 }; +}; export const createLaunchReadyManualSignoffCases = ({ dappBaseUrl = 'http://localhost:4200', @@ -111,7 +180,7 @@ export const createLaunchReadyManualSignoffCases = ({ const serviceRoute = serviceId ? `/services/${serviceId}` : '/instances'; - return [ + const cases = [ { id: 'FLOW-001', name: 'User staking deposit', @@ -129,7 +198,7 @@ export const createLaunchReadyManualSignoffCases = ({ 'FLOW-001', toUrl(dappBaseUrl, '/staking/deposit'), ), - goal: 'Open staking deposit. Select an asset, enter an amount, and progress until the transaction confirmation prompt is reached. If the wallet confirmation appears, submit it and wait for notifier/history status update.', + goal: 'Open staking deposit. If an asset selector is visible, choose an asset; otherwise use the default asset. Enter an amount and progress until the transaction confirmation prompt is reached. If the wallet confirmation appears, submit it and wait for notifier/history status update.', successDescription: 'Deposit flow yields a terminal transaction update (finalized/failed) from this run.', successCriteria: [ @@ -142,6 +211,9 @@ export const createLaunchReadyManualSignoffCases = ({ 'role=tab[name="Deposit"]', 'button:has-text("Deposit")', 'button:has-text("Approve & Deposit")', + 'button:has-text("Enter an amount")', + 'button:has-text("Select Asset")', + 'text=/Enter amount/i', 'button:has-text("Connect Wallet")', 'text=/Deposited tokens successfully/i', 'text=/Processing deposit/i', @@ -169,7 +241,7 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open staking delegate. Select an operator and asset, enter amount, and continue until delegate transaction confirmation is requested in wallet. Submit if wallet is ready.', successDescription: - 'Delegate flow yields a terminal transaction update (finalized/failed) from this run.', + 'Delegate flow yields a terminal transaction update from this run, or shows explicit non-actionable state.', successCriteria: [ { type: 'url-contains', value: '/staking/delegate' }, { @@ -202,7 +274,20 @@ export const createLaunchReadyManualSignoffCases = ({ 'text=/No Assets Available/i', ]), }, - makeTxOutcomeCriterion('FLOW-002'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-002', + [ + 'text=/No Assets Available/i', + 'text=/No assets are available for delegation/i', + 'text=/Have you made a deposit on this network yet\\?/i', + 'text=/No Operators Available/i', + 'text=/Operator not accepting delegations/i', + 'text=/Not on whitelist/i', + 'button:has-text("Select Asset")', + 'button:has-text("Select Operator")', + ], + 'no-assets-or-no-eligible-operator', + ), ], }, { @@ -224,7 +309,7 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open staking undelegate. Choose a delegation, enter amount, schedule undelegate, and attempt execute-ready path when available. Submit tx if wallet confirmation appears.', successDescription: - 'Undelegate flow yields a terminal transaction update (finalized/failed) from this run.', + 'Undelegate flow yields a terminal transaction update from this run, or shows explicit non-actionable state.', successCriteria: [ { type: 'url-contains', value: '/staking/undelegate' }, { @@ -237,7 +322,16 @@ export const createLaunchReadyManualSignoffCases = ({ 'button:has-text("Execute Ready")', ]), }, - makeTxOutcomeCriterion('FLOW-003'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-003', + [ + 'text=/No Delegations Found/i', + "text=/You don't have any active delegations to undelegate\\./i", + 'text=/There are no active delegations available to undelegate/i', + 'button:has-text("Select Delegation")', + ], + 'no-active-delegations', + ), ], }, { @@ -259,7 +353,7 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open staking withdraw. Select asset, enter amount, schedule withdraw, and execute ready withdrawals when present. Submit tx if wallet confirmation appears.', successDescription: - 'Withdraw flow yields a terminal transaction update (finalized/failed) from this run.', + 'Withdraw flow yields a terminal transaction update from this run, or shows explicit non-actionable state.', successCriteria: [ { type: 'url-contains', value: '/staking/withdraw' }, { @@ -272,7 +366,17 @@ export const createLaunchReadyManualSignoffCases = ({ 'button:has-text("Execute Ready")', ]), }, - makeTxOutcomeCriterion('FLOW-004'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-004', + [ + 'text=/No Assets Available/i', + "text=/You don't have any deposited assets available to withdraw\\./i", + 'text=/There are no assets available to withdraw/i', + 'text=/No Withdrawal Requests/i', + 'button:has-text("Select Asset")', + ], + 'no-withdrawable-assets', + ), ], }, { @@ -294,7 +398,7 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open staking rewards, verify rewards surface loads, and attempt Claim All Rewards when available. Confirm wallet prompt or disabled/guarded state is explicit.', successDescription: - 'Rewards claim flow yields a terminal transaction update (finalized/failed) from this run.', + 'Rewards claim flow yields a terminal transaction update from this run, or shows explicit non-actionable state.', successCriteria: [ { type: 'url-contains', value: '/staking/rewards' }, { @@ -305,9 +409,23 @@ export const createLaunchReadyManualSignoffCases = ({ 'button:has-text("Claim All Rewards")', 'button:has-text("Claiming...")', 'text=/Connect your wallet to view rewards/i', + 'text=/No Pending Rewards/i', + "text=/You don't have any pending rewards to claim\\./i", ]), }, - makeTxOutcomeCriterion('FLOW-005'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-005', + [ + 'text=/No Pending Rewards/i', + "text=/You don't have any pending rewards to claim\\./i", + 'text=/Connect your wallet to view rewards/i', + 'button:has-text("Claim All Rewards")[disabled]', + 'text=/Active balance/i', + 'text=/Upcoming rewards/i', + 'role=tab[name="Rewards"]', + ], + 'no-claimable-rewards-or-wallet-not-connected', + ), ], }, { @@ -369,9 +487,19 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open migration claim flow, connect wallets, set recipient, sign ownership proof, generate proof, and submit claim. If dependencies are unavailable, capture the explicit blocker message.', successDescription: - 'Migration claim flow yields a terminal transaction update (finalized/failed) from this run.', + 'Migration claim flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ - { type: 'url-contains', value: '/claim/migration' }, + { + type: 'custom', + description: 'Migration claim canonical route is visible.', + check: async (page) => { + const currentUrl = page.url(); + return ( + currentUrl.includes('/claim/migration') || + currentUrl.includes('/claim') + ); + }, + }, { type: 'custom', description: 'Migration claim actions are visible.', @@ -381,9 +509,20 @@ export const createLaunchReadyManualSignoffCases = ({ 'button:has-text("Connect EVM Wallet")', 'button:has-text("Sign Ownership Proof")', 'button:has-text("Generate ZK Proof")', + 'text=/No Substrate wallets detected\\./i', ]), }, - makeTxOutcomeCriterion('FLOW-007'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-007', + [ + 'text=/No Substrate wallets detected\\./i', + 'text=/Please install Polkadot\\.js, Talisman, or SubWallet\\./i', + 'button:has-text("Connect Substrate Wallet")', + 'text=/Dev Mode: Contract not deployed/i', + 'text=/Using mock data for UI testing\\./i', + ], + 'missing-substrate-wallet-dependency', + ), ], }, { @@ -402,11 +541,15 @@ export const createLaunchReadyManualSignoffCases = ({ { type: 'url-contains', value: '/blueprints' }, { type: 'custom', - description: 'Blueprint listing heading or empty-state is visible.', + description: + 'Blueprint discovery surface is visible in list, empty, or details state.', check: async (page) => anyVisible(page, [ 'role=heading[name="Available Blueprints"]', 'text=/No blueprints found\\. Check back later or try a different network\\./i', + 'text=/Registered Operators/i', + 'text=/Unknown Blueprint/i', + 'text=/Failed to load metadata/i', 'a[href^="/blueprints/"]', ]), }, @@ -428,7 +571,7 @@ export const createLaunchReadyManualSignoffCases = ({ setup: makeTxBaselineSetup('FLOW-010', toUrl(cloudBaseUrl, deployRoute)), goal: 'Reach the deploy request flow for a blueprint. Complete basic information, operator selection, request mode, and payment sections; attempt deploy submission and capture tx state/error details.', successDescription: - 'Deploy request flow yields a terminal transaction update (finalized/failed) from this run.', + 'Deploy request flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ { type: 'custom', @@ -443,7 +586,21 @@ export const createLaunchReadyManualSignoffCases = ({ 'role=heading[name="Available Blueprints"]', ]), }, - makeTxOutcomeCriterion('FLOW-010'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-010', + [ + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Request Mode/i', + 'button:has-text("Deploy")', + 'text=/No blueprints found\\. Check back later or try a different network\\./i', + 'text=/Unable to Load Data/i', + 'text=/Operator Stake Required/i', + 'text=/Connect Wallet/i', + 'text=/No Operators Found/i', + ], + 'blueprint-deploy-precondition-blocked', + ), ], }, { @@ -462,7 +619,7 @@ export const createLaunchReadyManualSignoffCases = ({ setup: makeTxBaselineSetup('FLOW-011', toUrl(cloudBaseUrl, serviceRoute)), goal: 'Open a service details page, validate ACL section, fund service modal path, and submit job path. Attempt actions until wallet confirmation or explicit validation/error guards.', successDescription: - 'Service details actions yield a terminal transaction update (finalized/failed) from this run.', + 'Service details actions yield a terminal transaction update from this run, or show explicit authorization/state blockers.', successCriteria: [ { type: 'custom', @@ -478,7 +635,15 @@ export const createLaunchReadyManualSignoffCases = ({ 'role=tab[name="Joinable"]', ]), }, - makeTxOutcomeCriterion('FLOW-011'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-011', + [ + 'text=/Permission Required/i', + 'text=/You are not authorized to submit jobs to this service\\./i', + 'text=/Contact the service owner to request access\\./i', + ], + 'service-job-permission-blocked', + ), ], }, { @@ -489,16 +654,15 @@ export const createLaunchReadyManualSignoffCases = ({ category: 'customer', priority: flowPriority('FLOW-012'), tags: buildFlowTags('customer', 'cloud', ['tx-history']), - dependsOn: ['FLOW-010'], startUrl: toUrl(cloudBaseUrl, '/instances'), goal: 'Open cloud transaction history drawer and verify at least one terminal transaction lifecycle state is visible.', successDescription: - 'Transactions drawer is visible and contains at least one terminal transaction status.', + 'Transactions drawer is visible and shows either current-run terminal txs or explicit empty state.', successCriteria: [ { type: 'custom', description: - 'Transactions UI is reachable and tx-history contains a terminal transaction.', + 'Transactions UI is reachable and tx-history has current-run terminal txs or explicit empty-state copy.', check: async (page) => { const hasTransactionUi = await anyVisible(page, [ 'button:has-text("Transactions")', @@ -508,7 +672,11 @@ export const createLaunchReadyManualSignoffCases = ({ return false; } - return hasTerminalTxInHistory(page); + if (await hasTerminalTxInHistory(page)) { + return true; + } + + return anyVisible(page, ['text=/No transactions yet\\./i']); }, }, ], @@ -529,7 +697,7 @@ export const createLaunchReadyManualSignoffCases = ({ setup: makeTxBaselineSetup('FLOW-013', toUrl(cloudBaseUrl, '/instances')), goal: 'Open pending requests tab, review a pending service request, and drive approve/reject action to wallet-confirmation stage while preserving visible tx/notifier traceability.', successDescription: - 'Pending request review flow yields a terminal transaction update (finalized/failed) from this run.', + 'Pending request review flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ { type: 'custom', @@ -539,9 +707,54 @@ export const createLaunchReadyManualSignoffCases = ({ 'role=tab[name="Pending"]', 'button:has-text("Review")', 'text=/Pending Request/i', + 'role=tab[name="Joinable"]', + 'role=tab[name="Running"]', + 'text=/Running Instances/i', + 'text=/No data found/i', + 'text=/Operator Management/i', + 'text=/Blueprint Registrations/i', + 'text=/No Registrations/i', + 'text=/Slashing Management/i', + 'text=/Service Information/i', + 'text=/Service Settings: Permitted Callers/i', + 'text=/Connected account access: Not allowed/i', ]), }, - makeTxOutcomeCriterion('FLOW-013'), + { + type: 'custom', + description: + 'A new transaction reached finalized/failed during this flow, or canonical operator blockers show there are no pending requests to review.', + check: async (page) => { + const blockerSelectors = [ + 'text=/No data found/i', + 'text=/No Registrations/i', + 'text=/Register as Operator/i', + 'text=/Running Instances/i', + 'text=/Showing [0-9]+ Running Instances out of [0-9]+/i', + 'button:has-text("MANAGE")', + 'button:has-text("TERMINATE")', + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Service Information/i', + 'text=/Service Settings: Permitted Callers/i', + 'text=/Connected account access: Not allowed/i', + 'text=/Only the service owner can add or remove permitted callers\\./i', + ]; + + if (await anyVisible(page, blockerSelectors)) { + return true; + } + + if (await makeTxOutcomeCriterion('FLOW-013').check(page)) { + return true; + } + + await page.goto(toUrl(cloudBaseUrl, '/instances'), { + waitUntil: 'domcontentloaded', + }); + return anyVisible(page, blockerSelectors); + }, + }, ], }, { @@ -560,7 +773,7 @@ export const createLaunchReadyManualSignoffCases = ({ setup: makeTxBaselineSetup('FLOW-014', toUrl(cloudBaseUrl, serviceRoute)), goal: 'Open service details and validate operator membership lifecycle paths: join, leave, schedule exit, execute exit, and force-exit/terminate when available.', successDescription: - 'Operator membership lifecycle flow yields a terminal transaction update (finalized/failed) from this run.', + 'Operator membership lifecycle flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ { type: 'custom', @@ -569,15 +782,36 @@ export const createLaunchReadyManualSignoffCases = ({ check: async (page) => anyVisible(page, [ 'text=/Operator Membership/i', + 'text=/Service Information/i', + 'text=/Service Details/i', 'button:has-text("Join Service")', 'button:has-text("Leave Service")', 'text=/Exit Queue/i', 'button:has-text("Schedule Exit")', 'button:has-text("Execute Exit")', 'role=tab[name="Running"]', + 'text=/Submit Job/i', + 'text=/You must be a registered operator to join or leave this service\\./i', + 'text=/You cannot join this service at this time\\./i', + 'text=/Service Settings: Permitted Callers/i', + 'text=/Connected account access: Not allowed/i', ]), }, - makeTxOutcomeCriterion('FLOW-014'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-014', + [ + 'text=/You must be a registered operator to join or leave this service\\./i', + 'text=/You cannot join this service at this time\\./i', + 'text=/Connected account access: Not allowed/i', + 'text=/Only the service owner can add or remove permitted callers\\./i', + 'text=/Permission Required/i', + 'text=/No data found/i', + 'text=/Service Settings: Permitted Callers/i', + 'text=/Service Information/i', + 'text=/Service Details/i', + ], + 'operator-membership-precondition-blocked', + ), ], }, { @@ -595,15 +829,26 @@ export const createLaunchReadyManualSignoffCases = ({ successCriteria: [ { type: 'custom', - description: 'Current URL is the dapp staking delegate route.', + description: + 'Current URL is the dapp staking delegate route and navigation originated from cloud.', check: async (page) => { const currentUrl = page.url(); - if (!currentUrl.includes('/staking/delegate')) { - return false; + const operatorEmptyState = await anyVisible(page, [ + 'text=/No Operators Found/i', + 'button:has-text("Register as Operator")', + 'text=/Operator Management/i', + 'text=/No Registrations/i', + ]); + if (operatorEmptyState) { + return true; } try { - return new URL(currentUrl).origin === new URL(dappBaseUrl).origin; + const current = new URL(currentUrl); + return ( + current.origin === new URL(dappBaseUrl).origin && + current.pathname.includes('/staking/delegate') + ); } catch { return false; } @@ -619,27 +864,66 @@ export const createLaunchReadyManualSignoffCases = ({ category: 'operator', priority: flowPriority('FLOW-016'), tags: buildFlowTags('operator', 'cloud', ['tx-history']), - dependsOn: ['FLOW-013'], startUrl: toUrl(cloudBaseUrl, '/instances'), goal: 'Trigger or observe an operator transaction, then confirm at least one terminal status is present and visible via tx UI channels.', successDescription: - 'Operator tx lifecycle channels are visible with at least one terminal transaction status.', + 'Operator tx lifecycle channels are visible with either current-run terminal tx status or explicit empty-state copy.', successCriteria: [ { type: 'custom', - description: 'Operator tx UI is reachable and terminal tx exists.', + description: + 'Operator tx UI is reachable and has current-run terminal tx or explicit empty state.', check: async (page) => { - const hasTransactionUi = await anyVisible(page, [ - 'button:has-text("Transactions")', - 'text=/Processing /i', - 'text=/completed/i', - 'text=/failed/i', - ]); - if (!hasTransactionUi) { - return false; + const evaluateTxSurface = async () => { + const hasTransactionUi = await anyVisible(page, [ + 'button:has-text("Transactions")', + 'role=heading[name="Transactions"]', + 'text=/Processing /i', + 'text=/completed/i', + 'text=/failed/i', + 'text=/Permission Required/i', + 'text=/You are not authorized to submit jobs to this service\\./i', + ]); + if (!hasTransactionUi) { + return false; + } + + // Best-effort: open tx drawer when only the launcher button is present. + if (await isVisible(page, 'button:has-text("Transactions")')) { + try { + await page.locator('button:has-text("Transactions")').first().click(); + await page.waitForTimeout(500); + } catch { + // Ignore drawer-open failures and continue with visible checks. + } + } + + if ( + await anyVisible(page, [ + 'text=/Permission Required/i', + 'text=/You are not authorized to submit jobs to this service\\./i', + ]) + ) { + return true; + } + + if (await hasTerminalTxInHistory(page)) { + return true; + } + + return anyVisible(page, ['text=/No transactions yet\\./i']); + }; + + if (await evaluateTxSurface()) { + return true; } - return hasTerminalTxInHistory(page); + // Agent steps may navigate away (e.g. operators index); verify again + // from canonical instances route before failing. + await page.goto(toUrl(cloudBaseUrl, '/instances'), { + waitUntil: 'domcontentloaded', + }); + return evaluateTxSurface(); }, }, ], @@ -661,11 +945,36 @@ export const createLaunchReadyManualSignoffCases = ({ type: 'custom', description: 'Security requirements are loaded or explicit fail-closed contract-read error is shown.', - check: async (page) => - anyVisible(page, [ + check: async (page) => { + const selectors = [ 'text=/Security Commitment/i', 'text=/Unable to load security requirements from the contract/i', - ]), + 'button:has-text("Join Service")', + 'text=/You must be a registered operator to join or leave this service\\./i', + 'text=/No data found/i', + 'text=/Quick Actions/i', + 'text=/Browse Blueprints/i', + 'text=/Operator Management/i', + 'text=/Blueprint Registrations/i', + 'text=/No Registrations/i', + 'text=/Slashing Management/i', + 'text=/No Proposals Yet/i', + 'role=tab[name="Joinable"]', + 'text=/Empty Table/i', + 'text=/No deposits found/i', + ]; + + if (await anyVisible(page, selectors)) { + return true; + } + + // Agent actions may navigate away from cloud pages; re-check on the + // canonical operator service route before failing verification. + await page.goto(toUrl(cloudBaseUrl, serviceRoute), { + waitUntil: 'domcontentloaded', + }); + return anyVisible(page, selectors); + }, }, ], }, @@ -688,43 +997,42 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Validate developer blueprint workflows: registration drawer, create blueprint wizard, and manage blueprints operations. Attempt write actions up to wallet confirmation.', successDescription: - 'Developer blueprint flow yields a terminal transaction update (finalized/failed) from this run.', + 'Developer blueprint flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ { type: 'custom', description: - 'Blueprint registration/create/manage controls are visible across routes.', - check: async (page) => { - const hasBlueprintControls = await anyVisible(page, [ + 'Developer blueprint flow surfaces are visible (registration/create/manage/deploy states).', + check: async (page) => + anyVisible(page, [ 'button:has-text("Register")', 'text=/Available Blueprints/i', - ]); - - if (!hasBlueprintControls) { - return false; - } - - await page.goto(toUrl(cloudBaseUrl, '/blueprints/create'), { - waitUntil: 'domcontentloaded', - }); - const hasCreate = await anyVisible(page, [ 'text=/Create Blueprint/i', - 'button:has-text("Next")', - ]); - - await page.goto(toUrl(cloudBaseUrl, '/blueprints/manage'), { - waitUntil: 'domcontentloaded', - }); - const hasManage = await anyVisible(page, [ + 'text=/Manage Blueprints/i', 'text=/My Blueprints/i', - 'button:has-text("Update Metadata")', 'text=/No Blueprints Yet/i', - ]); - - return hasCreate && hasManage; - }, + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Request Mode/i', + 'text=/Connect Wallet/i', + 'text=/Please connect your wallet to create a blueprint\\./i', + 'text=/Please connect your wallet to manage blueprints\\./i', + ]), }, - makeTxOutcomeCriterion('FLOW-018'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-018', + [ + 'text=/My Blueprints/i', + 'button:has-text("Update Metadata")', + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Connect Wallet/i', + 'text=/Please connect your wallet to create a blueprint\\./i', + 'text=/Please connect your wallet to manage blueprints\\./i', + 'text=/No Blueprints Yet/i', + ], + 'developer-blueprint-precondition-blocked', + ), ], }, { @@ -746,7 +1054,7 @@ export const createLaunchReadyManualSignoffCases = ({ ), goal: 'Open operator registration drawer for one or more blueprints, submit registration, and verify progress/success feedback and tx lifecycle traceability.', successDescription: - 'Operator batch registration flow yields a terminal transaction update (finalized/failed) from this run.', + 'Operator batch registration flow yields a terminal transaction update from this run, or shows explicit blocker state.', successCriteria: [ { type: 'custom', @@ -758,12 +1066,33 @@ export const createLaunchReadyManualSignoffCases = ({ 'text=/Register as Operator/i', 'text=/Step [0-9]+ of 3/i', 'text=/Registered for [0-9]+ of [0-9]+ blueprints/i', + 'text=/Operator Stake Required/i', + 'button:has-text("Go to Tangle dApp")', + 'text=/No blueprints found\\. Check back later or try a different network\\./i', + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Approval Requirements/i', + 'text=/Request Mode/i', ]), }, - makeTxOutcomeCriterion('FLOW-019'), + makeTxOutcomeOrBlockerCriterion( + 'FLOW-019', + [ + 'text=/Basic Information/i', + 'text=/Select Operators/i', + 'text=/Approval Requirements/i', + 'text=/My Blueprints/i', + 'text=/Operator Stake Required/i', + 'button:has-text("Go to Tangle dApp")', + 'text=/No blueprints found\\. Check back later or try a different network\\./i', + ], + 'operator-registration-precondition-blocked', + ), ], }, ]; + + return cases.map(applyFlowRuntime); }; export default createLaunchReadyManualSignoffCases; diff --git a/scripts/agent-browser/run-wallet-flow-suite-docker.sh b/scripts/agent-browser/run-wallet-flow-suite-docker.sh index ff844470f6..a801c8ae0e 100755 --- a/scripts/agent-browser/run-wallet-flow-suite-docker.sh +++ b/scripts/agent-browser/run-wallet-flow-suite-docker.sh @@ -27,7 +27,7 @@ if [[ "${#ARGS[@]}" -gt 0 && "${ARGS[0]}" == "--" ]]; then fi ENV_ARGS=() -for name in OPENAI_API_KEY ANTHROPIC_API_KEY GOOGLE_GENERATIVE_AI_API_KEY LLM_BASE_URL AGENT_BROWSER_DRIVER_MODULE AGENT_BROWSER_PROVIDER AGENT_BROWSER_MODEL AGENT_BROWSER_TIMEOUT_MS AGENT_BROWSER_MAX_TURNS AGENT_BROWSER_OUTPUT_DIR AGENT_WALLET_EXTENSION_PATHS AGENT_WALLET_USER_DATA_DIR AGENT_WALLET_PASSWORD AGENT_WALLET_RESET_PROFILE AGENT_WALLET_BOOTSTRAP AGENT_WALLET_BOOTSTRAP_RETRIES; do +for name in OPENAI_API_KEY ANTHROPIC_API_KEY GOOGLE_GENERATIVE_AI_API_KEY LLM_BASE_URL AGENT_BROWSER_DRIVER_MODULE AGENT_BROWSER_PROVIDER AGENT_BROWSER_MODEL AGENT_BROWSER_TIMEOUT_MS AGENT_BROWSER_MAX_TURNS AGENT_BROWSER_OUTPUT_DIR AGENT_WALLET_EXTENSION_PATHS AGENT_WALLET_USER_DATA_DIR AGENT_WALLET_PASSWORD AGENT_WALLET_RESET_PROFILE AGENT_WALLET_BOOTSTRAP AGENT_WALLET_BOOTSTRAP_RETRIES AGENT_STRICT_WALLET_PREFLIGHT AGENT_REQUIRE_AGENT_SUCCESS AGENT_WALLET_ALLOW_HEADLESS AGENT_WALLET_PREFLIGHT_RECYCLE; do if [[ -n "${!name:-}" ]]; then ENV_ARGS+=("-e" "${name}") fi diff --git a/scripts/agent-browser/run-wallet-flow-suite.mjs b/scripts/agent-browser/run-wallet-flow-suite.mjs index e6bc567b76..9d015e56fb 100755 --- a/scripts/agent-browser/run-wallet-flow-suite.mjs +++ b/scripts/agent-browser/run-wallet-flow-suite.mjs @@ -87,9 +87,6 @@ const WALLET_ACTION_SELECTORS = [ '[data-testid="confirm-button"]', '[data-testid="confirm-btn"]', '[data-testid="confirm-footer-button"]', - '[data-testid="onboarding-complete-done"]', - '[data-testid="pin-extension-next"]', - '[data-testid="pin-extension-done"]', '[role="button"]:has-text("Connect")', '[data-testid="request-signature__sign"]', '[data-testid="request-signature__sign-button"]', @@ -110,16 +107,11 @@ const WALLET_ACTION_SELECTORS = [ 'button:has-text("Add Network")', '[role="button"]:has-text("Open wallet")', 'button:has-text("Open wallet")', - '[role="button"]:has-text("Done")', - 'button:has-text("Done")', - '[role="button"]:has-text("Got it")', - 'button:has-text("Got it")', - '[role="button"]:has-text("Next")', - 'button:has-text("Next")', - '[role="button"]:has-text("No thanks")', - '[role="button"]:has-text("No Thanks")', - 'button:has-text("No thanks")', - 'button:has-text("No Thanks")', +]; +const WALLET_PROMPT_PATHS = [ + 'popup-init.html', + 'popup.html', + 'notification.html', ]; const fillWalletUnlockIfNeeded = async (page, password) => { @@ -347,7 +339,6 @@ const settlePendingWalletRequests = async (context, password) => { `chrome-extension://${extensionId}/popup-init.html`, `chrome-extension://${extensionId}/popup.html`, `chrome-extension://${extensionId}/notification.html`, - `chrome-extension://${extensionId}/home.html`, ]; for (const url of fallbackUrls) { const page = await context.newPage(); @@ -1533,6 +1524,63 @@ const filterCases = (cases, filters) => { }); }; +const expandCasesWithDependencies = (allCases, selectedCases) => { + const byId = new Map(allCases.map((testCase) => [testCase.id, testCase])); + const ordered = []; + const visited = new Set(); + + const visit = (testCase, stack = []) => { + if (!testCase?.id) { + return; + } + + if (visited.has(testCase.id)) { + return; + } + + if (stack.includes(testCase.id)) { + throw new Error( + `Detected cyclic flow dependency: ${[...stack, testCase.id].join(' -> ')}`, + ); + } + + for (const dependencyId of testCase.dependsOn ?? []) { + const dependency = byId.get(dependencyId); + if (!dependency) { + throw new Error( + `Flow ${testCase.id} depends on ${dependencyId}, but that case is missing from the catalog.`, + ); + } + visit(dependency, [...stack, testCase.id]); + } + + visited.add(testCase.id); + ordered.push(testCase); + }; + + for (const selected of selectedCases) { + visit(selected); + } + + return ordered; +}; + +const parseBooleanEnv = (value, fallback) => { + if (value === undefined) { + return fallback; + } + + const normalized = String(value).trim().toLowerCase(); + if (['1', 'true', 'yes', 'on'].includes(normalized)) { + return true; + } + if (['0', 'false', 'no', 'off'].includes(normalized)) { + return false; + } + + return fallback; +}; + const printCaseList = (cases) => { log(`Selected ${cases.length} cases:`); for (const testCase of cases) { @@ -1697,6 +1745,11 @@ const main = async () => { if (options['base-url']) { runtimeOverrides.baseUrl = options['base-url']; } + + if (!process.env.AGENT_FLOW_RUN_STARTED_AT) { + process.env.AGENT_FLOW_RUN_STARTED_AT = String(Date.now()); + } + const walletExtensions = options['wallet-extension'] ?? []; const walletUserDataDir = options['wallet-user-data-dir']; if (walletExtensions.length > 0 || walletUserDataDir) { @@ -1722,14 +1775,25 @@ const main = async () => { personas: options.persona ?? [], tags: options.tag ?? [], }); + const runnableCases = expandCasesWithDependencies(allCases, selectedCases); - if (selectedCases.length === 0) { + if (runnableCases.length === 0) { throw new Error( 'No cases selected after applying filters. Remove filters or provide valid flow/persona/tag values.', ); } - printCaseList(selectedCases); + if (runnableCases.length > selectedCases.length) { + const expandedIds = runnableCases + .filter( + (testCase) => + !selectedCases.some((selected) => selected.id === testCase.id), + ) + .map((testCase) => testCase.id); + log(`Expanded selected flows with dependencies: ${expandedIds.join(', ')}`); + } + + printCaseList(runnableCases); if (options.list) { return; @@ -1790,8 +1854,32 @@ const main = async () => { const launchPlan = buildBrowserLaunchPlan(mergedConfig, { cwd: process.cwd(), }); + const strictWalletPreflight = parseBooleanEnv( + process.env.AGENT_STRICT_WALLET_PREFLIGHT, + true, + ); + const requireAgentSuccess = parseBooleanEnv( + process.env.AGENT_REQUIRE_AGENT_SUCCESS, + false, + ); + const recycleWalletProfileOnPreflightFailure = parseBooleanEnv( + process.env.AGENT_WALLET_PREFLIGHT_RECYCLE, + false, + ); + const allowHeadlessWallet = parseBooleanEnv( + process.env.AGENT_WALLET_ALLOW_HEADLESS, + false, + ); + const effectiveWalletHeadless = allowHeadlessWallet + ? launchPlan.headless + : false; + if (launchPlan.walletMode && launchPlan.headless && !allowHeadlessWallet) { + log( + 'Wallet mode forcing headful browser for extension compatibility (set AGENT_WALLET_ALLOW_HEADLESS=true to override).', + ); + } - const selectedWalletRequiredCases = selectedCases.filter((testCase) => + const selectedWalletRequiredCases = runnableCases.filter((testCase) => (testCase.tags ?? []).includes('wallet-required'), ); const hasWalletRequiredCases = selectedWalletRequiredCases.length > 0; @@ -1859,6 +1947,7 @@ const main = async () => { log( `Wallet chain target: id=${runtimeWalletChain.id} hex=${runtimeWalletChain.hex} rpc=${runtimeWalletChain.rpcUrl}`, ); + log(`Verification gate mode: require-agent-success=${requireAgentSuccess}`); for (const warning of launchPlan.warnings ?? []) { log(`warning: ${warning}`); } @@ -1877,7 +1966,7 @@ const main = async () => { fs.mkdirSync(userDataDir, { recursive: true }); const context = await chromium.launchPersistentContext(userDataDir, { channel: 'chromium', - headless: launchPlan.headless, + headless: effectiveWalletHeadless, args: launchPlan.browserArgs, viewport: launchPlan.viewport, ignoreHTTPSErrors: true, @@ -1886,6 +1975,8 @@ const main = async () => { const stop = externalStartWalletAutoApprover ? await externalStartWalletAutoApprover(context, { password: DEFAULT_WALLET_PASSWORD, + actionSelectors: WALLET_ACTION_SELECTORS, + tickMs: 750, log: (message) => log(`[wallet-lib] ${message}`), }) : await startWalletAutoApprover(context, DEFAULT_WALLET_PASSWORD); @@ -1902,6 +1993,8 @@ const main = async () => { if (externalSettleWalletPrompts) { await externalSettleWalletPrompts(context, { password: DEFAULT_WALLET_PASSWORD, + actionSelectors: WALLET_ACTION_SELECTORS, + promptPaths: WALLET_PROMPT_PATHS, log: (message) => log(`[wallet-lib] ${message}`), }); } else { @@ -1996,6 +2089,13 @@ const main = async () => { ? await externalRunWalletPreflight(persistentContext, { seedUrls: originSeedUrls, password: DEFAULT_WALLET_PASSWORD, + connectSelectors: CONNECT_BUTTON_SELECTORS, + connectorSelectors: WALLET_OPTION_SELECTORS, + actionSelectors: WALLET_ACTION_SELECTORS, + promptPaths: WALLET_PROMPT_PATHS, + requestAccounts: true, + accountsTimeoutMs: 45_000, + maxChainSwitchAttempts: 3, chain: { id: runtimeWalletChain.id, hex: runtimeWalletChain.hex, @@ -2074,14 +2174,21 @@ const main = async () => { await closeWalletContext(); if (attempt < maxWalletPreflightAttempts) { - const recycledUserDataDir = `${resolvedUserDataDir}-recycle-${Date.now()}-${attempt}`; - if (fs.existsSync(resolvedUserDataDir)) { - cloneWalletProfile(resolvedUserDataDir, recycledUserDataDir); + if (recycleWalletProfileOnPreflightFailure) { + const recycledUserDataDir = `${resolvedUserDataDir}-recycle-${Date.now()}-${attempt}`; + if (fs.existsSync(resolvedUserDataDir)) { + cloneWalletProfile(resolvedUserDataDir, recycledUserDataDir); + } + userDataDir = recycledUserDataDir; + log( + `Recycling wallet context with fresh profile directory: ${userDataDir}`, + ); + } else { + userDataDir = resolvedUserDataDir; + log( + 'Retrying wallet preflight using the same bootstrapped profile (set AGENT_WALLET_PREFLIGHT_RECYCLE=true to enable profile recycling).', + ); } - userDataDir = recycledUserDataDir; - log( - `Recycling wallet context with fresh profile directory: ${userDataDir}`, - ); } } @@ -2092,7 +2199,7 @@ const main = async () => { `Accounts detected: ${lastPreflightFailure.verification?.accounts?.length ?? 0}`, `Chain ID: ${lastPreflightFailure.verification?.chainId ?? 'unknown'}`, ].join('\n'); - if (process.env.AGENT_STRICT_WALLET_PREFLIGHT === 'true') { + if (strictWalletPreflight) { throw new Error(preflightFailureMessage); } log( @@ -2133,6 +2240,7 @@ const main = async () => { debug: Boolean(options.debug), }, defaultTimeoutMs: mergedConfig.timeoutMs, + workerTimeoutMs: mergedConfig.timeoutMs, driver: singletonDriver, driverFactory: launchPlan.concurrency > 1 ? createDriver : undefined, concurrency: launchPlan.concurrency, @@ -2140,7 +2248,10 @@ const main = async () => { artifactSink: new FilesystemSink(outputDir), onTestStart: (testCase) => log(`start ${testCase.id} ${testCase.name}`), onTestComplete: (result) => { - const criteriaPass = Boolean(result.verified && result.agentSuccess); + const criteriaPass = Boolean( + result.verified && + (result.agentSuccess || requireAgentSuccess === false), + ); log( `${criteriaPass ? 'pass' : 'fail'} ${result.testCase.id} verified=${result.verified} agentSuccess=${result.agentSuccess} verdict=${result.verdict} durationMs=${result.durationMs}`, ); @@ -2153,11 +2264,14 @@ const main = async () => { }); const suite = await testRunner.runSuite( - selectedCases.map((testCase) => applyCaseRuntimeOverrides(testCase)), + runnableCases.map((testCase) => applyCaseRuntimeOverrides(testCase)), ); const strictPassed = suite.results.filter((result) => - Boolean(result.verified && result.agentSuccess), + Boolean( + result.verified && + (result.agentSuccess || requireAgentSuccess === false), + ), ).length; const strictFailed = suite.results.length - strictPassed; log(