From f7decd769d4e25116bd8cb0a3527b231a4d038bb Mon Sep 17 00:00:00 2001 From: Ross Date: Fri, 6 Mar 2026 11:37:12 -0800 Subject: [PATCH 1/4] status / summary --- .../benchmark-job/{status.ts => summary.ts} | 93 ++++++++++--------- src/commands/benchmark-job/watch.ts | 93 ++++++++++--------- src/utils/commands.ts | 11 ++- 3 files changed, 103 insertions(+), 94 deletions(-) rename src/commands/benchmark-job/{status.ts => summary.ts} (85%) diff --git a/src/commands/benchmark-job/status.ts b/src/commands/benchmark-job/summary.ts similarity index 85% rename from src/commands/benchmark-job/status.ts rename to src/commands/benchmark-job/summary.ts index c46e2357..3ab04aa9 100644 --- a/src/commands/benchmark-job/status.ts +++ b/src/commands/benchmark-job/summary.ts @@ -1,5 +1,5 @@ /** - * Status benchmark job command + * Summary benchmark job command */ import chalk from "chalk"; @@ -11,8 +11,9 @@ import { } from "../../services/benchmarkJobService.js"; import { output, outputError } from "../../utils/output.js"; -interface StatusOptions { +interface SummaryOptions { output?: string; + extended?: boolean; } // Job states that indicate completion @@ -285,7 +286,7 @@ function calculateCompletedStats( } // Print results table for completed jobs -function printResultsTable(job: BenchmarkJob): void { +function printResultsTable(job: BenchmarkJob, extended: boolean = false): void { const outcomes = job.benchmark_outcomes || []; if (outcomes.length === 0) { @@ -363,55 +364,59 @@ function printResultsTable(job: BenchmarkJob): void { chalk.dim(totalColStr), ); - // Print individual scenario results underneath (indented) - for (const scenario of scenarioOutcomes) { - const scenarioName = - scenario.scenario_name || scenario.scenario_definition_id || "unknown"; - const state = scenario.state || "unknown"; - const score = scenario.score; - - let statusIcon: string; - let statusColor: typeof chalk.green; - - if (state.toUpperCase() === "COMPLETED") { - if (score === 1.0) { - statusIcon = chalk.green("\u2713"); // checkmark - statusColor = chalk.green; + // Print individual scenario results underneath (indented) when extended + if (extended) { + for (const scenario of scenarioOutcomes) { + const scenarioName = + scenario.scenario_name || + scenario.scenario_definition_id || + "unknown"; + const state = scenario.state || "unknown"; + const score = scenario.score; + + let statusIcon: string; + let statusColor: typeof chalk.green; + + if (state.toUpperCase() === "COMPLETED") { + if (score === 1.0) { + statusIcon = chalk.green("\u2713"); // checkmark + statusColor = chalk.green; + } else { + statusIcon = chalk.yellow("\u2717"); // X + statusColor = chalk.yellow; + } } else { - statusIcon = chalk.yellow("\u2717"); // X - statusColor = chalk.yellow; + statusIcon = chalk.red("!"); + statusColor = chalk.red; } - } else { - statusIcon = chalk.red("!"); - statusColor = chalk.red; - } - const scenarioNameTrunc = - scenarioName.length > 50 - ? scenarioName.slice(0, 47) + "..." - : scenarioName; - - const scoreStr = - score !== undefined && score !== null - ? `score=${score.toFixed(1)}` - : state; - - console.log( - chalk.dim(" ") + - statusIcon + - " " + - chalk.dim(scenarioNameTrunc.padEnd(52)) + - statusColor(scoreStr), - ); + const scenarioNameTrunc = + scenarioName.length > 50 + ? scenarioName.slice(0, 47) + "..." + : scenarioName; + + const scoreStr = + score !== undefined && score !== null + ? `score=${score.toFixed(1)}` + : state; + + console.log( + chalk.dim(" ") + + statusIcon + + " " + + chalk.dim(scenarioNameTrunc.padEnd(52)) + + statusColor(scoreStr), + ); + } } } console.log(); } -export async function statusBenchmarkJob( +export async function summaryBenchmarkJob( id: string, - options: StatusOptions = {}, + options: SummaryOptions = {}, ) { try { const job = await getBenchmarkJob(id); @@ -420,11 +425,11 @@ export async function statusBenchmarkJob( if (options.output && options.output !== "text") { output(job, { format: options.output, defaultFormat: "json" }); } else if (isComplete) { - printResultsTable(job); + printResultsTable(job, options.extended); } else { await printStatus(job); } } catch (error) { - outputError("Failed to get benchmark job status", error); + outputError("Failed to get benchmark job summary", error); } } diff --git a/src/commands/benchmark-job/watch.ts b/src/commands/benchmark-job/watch.ts index da93da0d..07485e9c 100644 --- a/src/commands/benchmark-job/watch.ts +++ b/src/commands/benchmark-job/watch.ts @@ -71,10 +71,22 @@ function formatPercent(count: number, total: number): string { return ((count / total) * 100).toFixed(1) + "%"; } +// Format duration in human-readable format +function formatDuration(ms: number): string { + const seconds = Math.floor(ms / 1000); + const minutes = Math.floor(seconds / 60); + const remainingSeconds = seconds % 60; + if (minutes > 0) { + return `${minutes}m ${remainingSeconds}s`; + } + return `${seconds}s`; +} + // In-progress scenario info for display interface InProgressScenario { name: string; state: string; + startTimeMs?: number; } // Progress stats for a benchmark run @@ -141,17 +153,20 @@ function calculateRunProgress( inProgressScenarios.push({ name: scenario.name || scenario.scenario_id || "unknown", state: scenarioState, + startTimeMs: scenario.start_time_ms, }); } else if (scenarioState === "running") { running++; inProgressScenarios.push({ name: scenario.name || scenario.scenario_id || "unknown", state: scenarioState, + startTimeMs: scenario.start_time_ms, }); } else if (scenarioState && scenarioState !== "pending") { inProgressScenarios.push({ name: scenario.name || scenario.scenario_id || "unknown", state: scenarioState, + startTimeMs: scenario.start_time_ms, }); } } @@ -311,15 +326,29 @@ function formatScenarioLines( ): string[] { const limited = scenarios.slice(0, maxScenarios); const remaining = scenarios.length - limited.length; + const now = Date.now(); const lines: string[] = []; for (const scenario of limited) { let name = scenario.name; - if (name.length > 45) { - name = name.slice(0, 42) + "..."; + if (name.length > 35) { + name = name.slice(0, 32) + "..."; } const stateStr = formatScenarioState(scenario.state, tick); - lines.push(chalk.dim(" └ ") + chalk.dim(name.padEnd(46)) + stateStr); + + // Calculate elapsed time if start time is available + let elapsedStr = ""; + if (scenario.startTimeMs) { + const elapsedMs = now - scenario.startTimeMs; + elapsedStr = chalk.dim(` (${formatDuration(elapsedMs)})`); + } + + lines.push( + chalk.dim(" └ ") + + chalk.dim(name.padEnd(36)) + + stateStr + + elapsedStr, + ); } if (remaining > 0) { @@ -457,47 +486,6 @@ function printResultsTable(job: BenchmarkJob): void { failedErrorColored + chalk.dim(totalColStr), ); - - for (const scenario of scenarioOutcomes) { - const scenarioName = - scenario.scenario_name || scenario.scenario_definition_id || "unknown"; - const state = scenario.state || "unknown"; - const score = scenario.score; - - let statusIcon: string; - let statusColor: typeof chalk.green; - - if (state.toUpperCase() === "COMPLETED") { - if (score === 1.0) { - statusIcon = chalk.green("\u2713"); - statusColor = chalk.green; - } else { - statusIcon = chalk.yellow("\u2717"); - statusColor = chalk.yellow; - } - } else { - statusIcon = chalk.red("!"); - statusColor = chalk.red; - } - - const scenarioNameTrunc = - scenarioName.length > 50 - ? scenarioName.slice(0, 47) + "..." - : scenarioName; - - const scoreStr = - score !== undefined && score !== null - ? `score=${score.toFixed(1)}` - : state; - - console.log( - chalk.dim(" ") + - statusIcon + - " " + - chalk.dim(scenarioNameTrunc.padEnd(52)) + - statusColor(scoreStr), - ); - } } console.log(); @@ -604,8 +592,23 @@ export async function watchBenchmarkJob(id: string) { cleanup(); } - // Show final results + // Calculate total elapsed time + const totalElapsed = Date.now() - startTime; + const totalElapsedStr = formatDuration(totalElapsed); + + // Show completion message + console.log(chalk.green.bold("Benchmark job completed!")); + console.log(chalk.dim(`Total time: ${totalElapsedStr}`)); + + // Show final results (summary only) printResultsTable(job); + + // Show hint for full results + console.log( + chalk.dim( + `To see full results, run: rli benchmark-job summary -e ${job.id}`, + ), + ); } catch (error) { outputError("Failed to watch benchmark job", error); } diff --git a/src/utils/commands.ts b/src/utils/commands.ts index a935bc52..46f96ca0 100644 --- a/src/utils/commands.ts +++ b/src/utils/commands.ts @@ -1054,16 +1054,17 @@ export function createProgram(): Command { }); benchmarkJob - .command("status ") - .description("Get benchmark job status and results") + .command("summary ") + .description("Get benchmark job summary and results") + .option("-e, --extended", "Show individual scenario results") .option( "-o, --output [format]", "Output format: text|json|yaml (default: text)", ) .action(async (id, options) => { - const { statusBenchmarkJob } = - await import("../commands/benchmark-job/status.js"); - await statusBenchmarkJob(id, options); + const { summaryBenchmarkJob } = + await import("../commands/benchmark-job/summary.js"); + await summaryBenchmarkJob(id, options); }); benchmarkJob From 1d5aa4ad9535da62c182ea57584037bec361dfd7 Mon Sep 17 00:00:00 2001 From: Ross Date: Fri, 6 Mar 2026 11:37:29 -0800 Subject: [PATCH 2/4] cp --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 27d152ac..0ef45ee4 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ rli mcp install # Install Runloop MCP server configurat ```bash rli benchmark-job run # Run a benchmark job with one or more ... -rli benchmark-job status # Get benchmark job status and results +rli benchmark-job summary # Get benchmark job summary and results rli benchmark-job watch # Watch benchmark job progress in real-... ``` From 3d190510712e389c9a92468582daac56e4fa8f7f Mon Sep 17 00:00:00 2001 From: Ross Date: Fri, 6 Mar 2026 11:44:54 -0800 Subject: [PATCH 3/4] cp --- src/commands/benchmark-job/watch.ts | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/commands/benchmark-job/watch.ts b/src/commands/benchmark-job/watch.ts index 07485e9c..bdcefb0d 100644 --- a/src/commands/benchmark-job/watch.ts +++ b/src/commands/benchmark-job/watch.ts @@ -56,11 +56,13 @@ function exitFullScreen(): void { process.stdout.write(ANSI.exitAltScreen); } -// Render content at top of screen +// Render content at top of screen, truncating to fit terminal height function renderScreen(lines: string[]): void { process.stdout.write(ANSI.moveTo(1, 1)); process.stdout.write(ANSI.clearScreen); - for (const line of lines) { + const maxLines = (process.stdout.rows || 24) - 1; // Leave 1 line buffer + const truncatedLines = lines.slice(0, maxLines); + for (const line of truncatedLines) { console.log(line); } } @@ -535,6 +537,13 @@ export async function watchBenchmarkJob(id: string) { const SPINNER_INTERVAL_MS = 100; const UPDATES_PER_POLL = Math.floor(POLL_INTERVAL_MS / SPINNER_INTERVAL_MS); + // Handle terminal resize - clear screen to prevent artifacts + let needsFullRedraw = false; + const handleResize = () => { + needsFullRedraw = true; + }; + process.stdout.on("resize", handleResize); + try { let tick = 0; let progressList = await fetchAllRunsProgress(job); @@ -560,6 +569,7 @@ export async function watchBenchmarkJob(id: string) { chalk.bold.cyan(`Benchmark Job: ${jobName}`) + chalk.dim(` (${elapsedStr})`), ); + screenLines.push(chalk.dim(`ID: ${job.id}`)); screenLines.push(chalk.dim(`State: ${job.state}`)); screenLines.push(""); @@ -573,6 +583,12 @@ export async function watchBenchmarkJob(id: string) { screenLines.push(""); screenLines.push(chalk.dim("Press Ctrl+C to exit")); + // Force full clear on resize to prevent artifacts + if (needsFullRedraw) { + process.stdout.write(ANSI.clearScreen); + needsFullRedraw = false; + } + // Render the screen renderScreen(screenLines); @@ -589,6 +605,7 @@ export async function watchBenchmarkJob(id: string) { await sleep(SPINNER_INTERVAL_MS); } } finally { + process.stdout.off("resize", handleResize); cleanup(); } From ec9e3bd204dfcf2e5d348b42d893fbc55d662738 Mon Sep 17 00:00:00 2001 From: Ross Date: Fri, 6 Mar 2026 11:58:09 -0800 Subject: [PATCH 4/4] feedback --- src/commands/benchmark-job/watch.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/commands/benchmark-job/watch.ts b/src/commands/benchmark-job/watch.ts index bdcefb0d..8be161a3 100644 --- a/src/commands/benchmark-job/watch.ts +++ b/src/commands/benchmark-job/watch.ts @@ -77,7 +77,12 @@ function formatPercent(count: number, total: number): string { function formatDuration(ms: number): string { const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); + const hours = Math.floor(minutes / 60); + const remainingMinutes = minutes % 60; const remainingSeconds = seconds % 60; + if (hours > 0) { + return `${hours}h ${remainingMinutes}m ${remainingSeconds}s`; + } if (minutes > 0) { return `${minutes}m ${remainingSeconds}s`; }