From 8ab83f397707c916c201f81c66fd009f1364bbb9 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 13:33:50 -0700 Subject: [PATCH 01/15] Add diagnostics views for processes and traces - Add server-side process and trace diagnostics aggregation - Expose diagnostics RPCs and settings UI - Reduce tracing noise for diagnostics RPCs --- .../diagnostics/ProcessDiagnostics.test.ts | 105 ++ .../src/diagnostics/ProcessDiagnostics.ts | 374 +++++++ .../src/diagnostics/TraceDiagnostics.test.ts | 158 +++ .../src/diagnostics/TraceDiagnostics.ts | 387 +++++++ .../src/observability/RpcInstrumentation.ts | 53 +- apps/server/src/vcs/GitVcsDriverCore.ts | 24 +- apps/server/src/ws.ts | 27 +- .../settings/DiagnosticsSettings.tsx | 963 ++++++++++++++++++ .../settings/SettingsPanels.logic.test.ts | 43 +- .../settings/SettingsPanels.logic.ts | 48 + .../components/settings/SettingsPanels.tsx | 91 +- apps/web/src/lib/processDiagnosticsState.ts | 63 ++ apps/web/src/lib/traceDiagnosticsState.ts | 63 ++ apps/web/src/localApi.ts | 12 + apps/web/src/routeTree.gen.ts | 21 + apps/web/src/routes/settings.diagnostics.tsx | 7 + apps/web/src/rpc/wsRpcClient.ts | 17 + packages/contracts/src/ipc.ts | 7 + packages/contracts/src/rpc.ts | 25 + packages/contracts/src/server.ts | 129 +++ 20 files changed, 2515 insertions(+), 102 deletions(-) create mode 100644 apps/server/src/diagnostics/ProcessDiagnostics.test.ts create mode 100644 apps/server/src/diagnostics/ProcessDiagnostics.ts create mode 100644 apps/server/src/diagnostics/TraceDiagnostics.test.ts create mode 100644 apps/server/src/diagnostics/TraceDiagnostics.ts create mode 100644 apps/web/src/components/settings/DiagnosticsSettings.tsx create mode 100644 apps/web/src/lib/processDiagnosticsState.ts create mode 100644 apps/web/src/lib/traceDiagnosticsState.ts create mode 100644 apps/web/src/routes/settings.diagnostics.tsx diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts new file mode 100644 index 00000000000..92829b343b6 --- /dev/null +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -0,0 +1,105 @@ +import { describe, expect, it } from "vitest"; + +import { aggregateProcessDiagnostics, parsePosixProcessRows } from "./ProcessDiagnostics.ts"; + +describe("ProcessDiagnostics", () => { + it("parses POSIX ps rows with full commands", () => { + const rows = parsePosixProcessRows( + [ + " 10 1 10 Ss 0.0 1024 01:02.03 /usr/bin/node server.js", + " 11 10 10 S+ 12.5 20480 00:04 codex app-server --config /tmp/one two", + ].join("\n"), + ); + + expect(rows).toEqual([ + { + pid: 10, + ppid: 1, + pgid: 10, + status: "Ss", + cpuPercent: 0, + rssBytes: 1024 * 1024, + elapsed: "01:02.03", + command: "/usr/bin/node server.js", + }, + { + pid: 11, + ppid: 10, + pgid: 10, + status: "S+", + cpuPercent: 12.5, + rssBytes: 20480 * 1024, + elapsed: "00:04", + command: "codex app-server --config /tmp/one two", + }, + ]); + }); + + it("aggregates only descendants of the server process", () => { + const diagnostics = aggregateProcessDiagnostics({ + serverPid: 100, + readAt: new Date("2026-05-05T10:00:00.000Z"), + rows: [ + { + pid: 100, + ppid: 1, + pgid: 100, + status: "S", + cpuPercent: 0, + rssBytes: 1_000, + elapsed: "01:00", + command: "t3 server", + }, + { + pid: 101, + ppid: 100, + pgid: 100, + status: "S", + cpuPercent: 1.5, + rssBytes: 2_000, + elapsed: "00:20", + command: "codex app-server", + }, + { + pid: 102, + ppid: 101, + pgid: 100, + status: "R", + cpuPercent: 3.25, + rssBytes: 4_000, + elapsed: "00:05", + command: "git status", + }, + { + pid: 200, + ppid: 1, + pgid: 200, + status: "S", + cpuPercent: 99, + rssBytes: 8_000, + elapsed: "00:01", + command: "unrelated", + }, + { + pid: 201, + ppid: 100, + pgid: 100, + status: "R", + cpuPercent: 9, + rssBytes: 9_000, + elapsed: "00:00", + command: "ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=", + }, + ], + }); + + expect(diagnostics.serverPid).toBe(100); + expect(diagnostics.readAt).toBe("2026-05-05T10:00:00.000Z"); + expect(diagnostics.processCount).toBe(2); + expect(diagnostics.totalRssBytes).toBe(6_000); + expect(diagnostics.totalCpuPercent).toBe(4.75); + expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); + expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); + expect(diagnostics.processes[0]?.childPids).toEqual([102]); + }); +}); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts new file mode 100644 index 00000000000..cd00982d14d --- /dev/null +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -0,0 +1,374 @@ +import type { + ServerProcessDiagnosticsEntry, + ServerProcessDiagnosticsResult, + ServerProcessSignal, + ServerSignalProcessResult, +} from "@t3tools/contracts"; +import { Effect, Schema } from "effect"; + +import { runProcess } from "../processRunner.ts"; + +interface ProcessRow { + readonly pid: number; + readonly ppid: number; + readonly pgid: number | null; + readonly status: string; + readonly cpuPercent: number; + readonly rssBytes: number; + readonly elapsed: string; + readonly command: string; +} + +const PROCESS_QUERY_TIMEOUT_MS = 1_000; +const POSIX_PROCESS_QUERY_COMMAND = "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="; + +class ProcessDiagnosticsError extends Schema.TaggedErrorClass()( + "ProcessDiagnosticsError", + { + message: Schema.String, + cause: Schema.optional(Schema.Defect), + }, +) {} + +function toProcessDiagnosticsError(message: string, cause?: unknown): ProcessDiagnosticsError { + return new ProcessDiagnosticsError({ + message, + ...(cause === undefined ? {} : { cause }), + }); +} + +function parsePositiveInt(value: string): number | null { + const parsed = Number.parseInt(value, 10); + return Number.isInteger(parsed) && parsed > 0 ? parsed : null; +} + +function parseNonNegativeInt(value: string): number | null { + const parsed = Number.parseInt(value, 10); + return Number.isInteger(parsed) && parsed >= 0 ? parsed : null; +} + +function parseNumber(value: string): number | null { + const parsed = Number.parseFloat(value); + return Number.isFinite(parsed) ? parsed : null; +} + +export function parsePosixProcessRows(output: string): ReadonlyArray { + const rows: ProcessRow[] = []; + const rowPattern = + /^\s*(\d+)\s+(\d+)\s+(-?\d+)\s+(\S+)\s+([+-]?(?:\d+\.?\d*|\.\d+))\s+(\d+)\s+(\S+)\s+(.+)$/; + + for (const line of output.split(/\r?\n/)) { + if (line.trim().length === 0) continue; + + const match = rowPattern.exec(line); + if (!match) continue; + + const pidText = match[1]; + const ppidText = match[2]; + const pgidText = match[3]; + const status = match[4]; + const cpuText = match[5]; + const rssText = match[6]; + const elapsed = match[7]; + const command = match[8]; + if ( + pidText === undefined || + ppidText === undefined || + pgidText === undefined || + status === undefined || + cpuText === undefined || + rssText === undefined || + elapsed === undefined || + command === undefined + ) { + continue; + } + + const pid = parsePositiveInt(pidText); + const ppid = parseNonNegativeInt(ppidText); + const pgid = Number.parseInt(pgidText, 10); + const cpuPercent = parseNumber(cpuText); + const rssKiB = parseNonNegativeInt(rssText); + if ( + pid === null || + ppid === null || + !Number.isInteger(pgid) || + cpuPercent === null || + rssKiB === null || + !status || + !elapsed || + !command + ) { + continue; + } + + rows.push({ + pid, + ppid, + pgid, + status, + cpuPercent, + rssBytes: rssKiB * 1024, + elapsed, + command, + }); + } + + return rows; +} + +function normalizeWindowsProcessRow(value: unknown): ProcessRow | null { + if (typeof value !== "object" || value === null) return null; + const record = value as Record; + const pid = typeof record.ProcessId === "number" ? record.ProcessId : null; + const ppid = typeof record.ParentProcessId === "number" ? record.ParentProcessId : null; + const commandLine = + typeof record.CommandLine === "string" && record.CommandLine.trim().length > 0 + ? record.CommandLine + : typeof record.Name === "string" + ? record.Name + : null; + const workingSet = + typeof record.WorkingSetSize === "number" && Number.isFinite(record.WorkingSetSize) + ? Math.max(0, Math.round(record.WorkingSetSize)) + : 0; + const cpuPercent = + typeof record.PercentProcessorTime === "number" && Number.isFinite(record.PercentProcessorTime) + ? Math.max(0, record.PercentProcessorTime) + : 0; + + if (!pid || pid <= 0 || ppid === null || ppid < 0 || !commandLine) return null; + return { + pid, + ppid, + pgid: null, + status: typeof record.Status === "string" && record.Status.length > 0 ? record.Status : "Live", + cpuPercent, + rssBytes: workingSet, + elapsed: "", + command: commandLine, + }; +} + +function parseWindowsProcessRows(output: string): ReadonlyArray { + if (output.trim().length === 0) return []; + try { + const parsed = JSON.parse(output) as unknown; + const records = Array.isArray(parsed) ? parsed : [parsed]; + return records.flatMap((record) => { + const row = normalizeWindowsProcessRow(record); + return row ? [row] : []; + }); + } catch { + return []; + } +} + +function buildDescendantEntries( + rows: ReadonlyArray, + serverPid: number, +): ReadonlyArray { + const childrenByParent = new Map(); + for (const row of rows) { + const children = childrenByParent.get(row.ppid) ?? []; + children.push(row); + childrenByParent.set(row.ppid, children); + } + + const entries: ServerProcessDiagnosticsEntry[] = []; + const visited = new Set(); + const stack = [...(childrenByParent.get(serverPid) ?? [])] + .toSorted((left, right) => left.pid - right.pid) + .map((row) => ({ row, depth: 0 })); + + while (stack.length > 0) { + const item = stack.shift(); + if (!item || visited.has(item.row.pid)) continue; + visited.add(item.row.pid); + + const children = [...(childrenByParent.get(item.row.pid) ?? [])].toSorted( + (left, right) => left.pid - right.pid, + ); + entries.push({ + pid: item.row.pid, + ppid: item.row.ppid, + pgid: item.row.pgid, + status: item.row.status, + cpuPercent: item.row.cpuPercent, + rssBytes: item.row.rssBytes, + elapsed: item.row.elapsed || "n/a", + command: item.row.command, + depth: item.depth, + childPids: children.map((child) => child.pid), + }); + + stack.unshift(...children.map((row) => ({ row, depth: item.depth + 1 })).toReversed()); + } + + return entries; +} + +function isDiagnosticsQueryProcess(row: ProcessRow, serverPid: number): boolean { + if (row.ppid !== serverPid) return false; + + const command = row.command.trim(); + return ( + /(?:^|[/\\])ps\s+-axo\s+pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=/.test(command) || + (/\bpowershell(?:\.exe)?\b/i.test(command) && + /\bGet-CimInstance\s+Win32_Process\b/i.test(command)) + ); +} + +function makeResult(input: { + readonly serverPid: number; + readonly rows: ReadonlyArray; + readonly readAt?: Date; + readonly error?: string; +}): ServerProcessDiagnosticsResult { + const readAt = input.readAt ?? new Date(); + const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); + const processes = buildDescendantEntries(rows, input.serverPid); + const totalRssBytes = processes.reduce((total, process) => total + process.rssBytes, 0); + const totalCpuPercent = processes.reduce((total, process) => total + process.cpuPercent, 0); + + return { + serverPid: input.serverPid, + readAt: readAt.toISOString(), + processCount: processes.length, + totalRssBytes, + totalCpuPercent, + processes, + ...(input.error ? { error: { message: input.error } } : {}), + }; +} + +function readPosixProcessRows(): Effect.Effect, ProcessDiagnosticsError> { + return Effect.tryPromise({ + try: async () => { + const result = await runProcess("ps", ["-axo", POSIX_PROCESS_QUERY_COMMAND], { + timeoutMs: PROCESS_QUERY_TIMEOUT_MS, + allowNonZeroExit: true, + maxBufferBytes: 2 * 1024 * 1024, + outputMode: "truncate", + }); + if (result.code !== 0) { + throw toProcessDiagnosticsError(result.stderr.trim() || "ps failed."); + } + return parsePosixProcessRows(result.stdout); + }, + catch: (cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), + }); +} + +function readWindowsProcessRows(): Effect.Effect< + ReadonlyArray, + ProcessDiagnosticsError +> { + const command = [ + "$processes = Get-CimInstance Win32_Process | ForEach-Object {", + '$perf = Get-CimInstance Win32_PerfFormattedData_PerfProc_Process -Filter "IDProcess = $($_.ProcessId)" -ErrorAction SilentlyContinue;', + "[pscustomobject]@{ ProcessId = $_.ProcessId; ParentProcessId = $_.ParentProcessId; Name = $_.Name; CommandLine = $_.CommandLine; Status = $_.Status; WorkingSetSize = $_.WorkingSetSize; PercentProcessorTime = if ($perf) { $perf.PercentProcessorTime } else { 0 } }", + "};", + "$processes | ConvertTo-Json -Compress -Depth 3", + ].join(" "); + + return Effect.tryPromise({ + try: async () => { + const result = await runProcess( + "powershell.exe", + ["-NoProfile", "-NonInteractive", "-Command", command], + { + timeoutMs: PROCESS_QUERY_TIMEOUT_MS, + allowNonZeroExit: true, + maxBufferBytes: 2 * 1024 * 1024, + outputMode: "truncate", + }, + ); + if (result.code !== 0) { + throw toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."); + } + return parseWindowsProcessRows(result.stdout); + }, + catch: (cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), + }); +} + +function readProcessRows(): Effect.Effect, ProcessDiagnosticsError> { + return process.platform === "win32" ? readWindowsProcessRows() : readPosixProcessRows(); +} + +export function aggregateProcessDiagnostics(input: { + readonly serverPid: number; + readonly rows: ReadonlyArray; + readonly readAt?: Date; +}): ServerProcessDiagnosticsResult { + return makeResult(input); +} + +export function readProcessDiagnostics(): Effect.Effect { + const serverPid = process.pid; + return readProcessRows().pipe( + Effect.map((rows) => makeResult({ serverPid, rows })), + Effect.catch((error: ProcessDiagnosticsError) => + Effect.succeed(makeResult({ serverPid, rows: [], error: error.message })), + ), + ); +} + +function assertDescendantPid(pid: number): Effect.Effect { + if (pid === process.pid) { + return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); + } + + return readProcessRows().pipe( + Effect.flatMap((rows) => { + const descendant = buildDescendantEntries(rows, process.pid).some( + (entry) => entry.pid === pid, + ); + return descendant + ? Effect.void + : Effect.fail( + toProcessDiagnosticsError(`Process ${pid} is not a live descendant of the T3 server.`), + ); + }), + ); +} + +export function signalProcess(input: { + readonly pid: number; + readonly signal: ServerProcessSignal; +}): Effect.Effect { + return assertDescendantPid(input.pid).pipe( + Effect.flatMap(() => + Effect.try({ + try: () => { + process.kill(input.pid, input.signal); + return { + pid: input.pid, + signal: input.signal, + signaled: true, + }; + }, + catch: (cause) => + toProcessDiagnosticsError( + `Failed to signal process ${input.pid} with ${input.signal}.`, + cause, + ), + }), + ), + Effect.catch((error: ProcessDiagnosticsError) => + Effect.succeed({ + pid: input.pid, + signal: input.signal, + signaled: false, + message: error.message, + }), + ), + ); +} diff --git a/apps/server/src/diagnostics/TraceDiagnostics.test.ts b/apps/server/src/diagnostics/TraceDiagnostics.test.ts new file mode 100644 index 00000000000..2a5f6ed1434 --- /dev/null +++ b/apps/server/src/diagnostics/TraceDiagnostics.test.ts @@ -0,0 +1,158 @@ +import { assert, describe, it } from "@effect/vitest"; + +import { aggregateTraceDiagnostics } from "./TraceDiagnostics.ts"; + +function ns(ms: number): string { + return String(BigInt(ms) * 1_000_000n); +} + +function record(input: { + readonly name: string; + readonly traceId: string; + readonly spanId: string; + readonly startMs: number; + readonly durationMs: number; + readonly exit?: { readonly _tag: "Success" | "Failure" | "Interrupted"; readonly cause?: string }; + readonly events?: ReadonlyArray; +}) { + return JSON.stringify({ + type: "effect-span", + name: input.name, + traceId: input.traceId, + spanId: input.spanId, + sampled: true, + kind: "internal", + startTimeUnixNano: ns(input.startMs), + endTimeUnixNano: ns(input.startMs + input.durationMs), + durationMs: input.durationMs, + attributes: {}, + events: input.events ?? [], + links: [], + exit: input.exit ?? { _tag: "Success" }, + }); +} + +describe("TraceDiagnostics", () => { + it("aggregates failures, slow spans, log levels, and parse errors", () => { + const diagnostics = aggregateTraceDiagnostics({ + traceFilePath: "/tmp/server.trace.ndjson", + readAt: new Date("2026-05-05T10:00:00.000Z"), + slowSpanThresholdMs: 1_000, + files: [ + { + path: "/tmp/server.trace.ndjson.1", + text: [ + record({ + name: "server.getConfig", + traceId: "trace-a", + spanId: "span-a", + startMs: 1_000, + durationMs: 50, + }), + "not-json", + ].join("\n"), + }, + { + path: "/tmp/server.trace.ndjson", + text: [ + record({ + name: "orchestration.dispatch", + traceId: "trace-b", + spanId: "span-b", + startMs: 2_000, + durationMs: 1_500, + exit: { _tag: "Failure", cause: "Provider crashed" }, + events: [ + { + name: "provider failed", + timeUnixNano: ns(3_400), + attributes: { "effect.logLevel": "Error" }, + }, + ], + }), + record({ + name: "orchestration.dispatch", + traceId: "trace-c", + spanId: "span-c", + startMs: 4_000, + durationMs: 250, + exit: { _tag: "Failure", cause: "Provider crashed" }, + }), + record({ + name: "git.status", + traceId: "trace-d", + spanId: "span-d", + startMs: 5_000, + durationMs: 25, + exit: { _tag: "Interrupted", cause: "Interrupted" }, + events: [ + { + name: "status delayed", + timeUnixNano: ns(5_010), + attributes: { "effect.logLevel": "Warning" }, + }, + ], + }), + ].join("\n"), + }, + ], + }); + + assert.equal(diagnostics.recordCount, 4); + assert.equal(diagnostics.parseErrorCount, 1); + assert.equal(diagnostics.failureCount, 2); + assert.equal(diagnostics.interruptionCount, 1); + assert.equal(diagnostics.slowSpanCount, 1); + assert.equal(diagnostics.logLevelCounts.Error, 1); + assert.equal(diagnostics.logLevelCounts.Warning, 1); + assert.equal(diagnostics.commonFailures[0]?.name, "orchestration.dispatch"); + assert.equal(diagnostics.commonFailures[0]?.count, 2); + assert.equal(diagnostics.latestFailures[0]?.traceId, "trace-c"); + assert.equal(diagnostics.slowestSpans[0]?.traceId, "trace-b"); + assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, "status delayed"); + assert.equal(diagnostics.topSpansByCount[0]?.name, "orchestration.dispatch"); + }); + + it("returns a not-found diagnostic when no files are available", () => { + const diagnostics = aggregateTraceDiagnostics({ + traceFilePath: "/tmp/missing.trace.ndjson", + readAt: new Date("2026-05-05T10:00:00.000Z"), + files: [], + }); + + assert.equal(diagnostics.recordCount, 0); + assert.equal(diagnostics.error?.kind, "trace-file-not-found"); + }); + + it("preserves full failure causes and log messages", () => { + const longCause = `VcsProcessSpawnError: ${"missing executable ".repeat(80)}`.trim(); + const longMessage = `provider warning: ${"retrying command ".repeat(80)}`.trim(); + const diagnostics = aggregateTraceDiagnostics({ + traceFilePath: "/tmp/server.trace.ndjson", + files: [ + { + path: "/tmp/server.trace.ndjson", + text: record({ + name: "VcsProcess.run", + traceId: "trace-long", + spanId: "span-long", + startMs: 1_000, + durationMs: 25, + exit: { _tag: "Failure", cause: longCause }, + events: [ + { + name: longMessage, + timeUnixNano: ns(1_010), + attributes: { "effect.logLevel": "Warning" }, + }, + ], + }), + }, + ], + }); + + assert.equal(diagnostics.latestFailures[0]?.cause, longCause); + assert.equal(diagnostics.commonFailures[0]?.cause, longCause); + assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, longMessage); + }); +}); diff --git a/apps/server/src/diagnostics/TraceDiagnostics.ts b/apps/server/src/diagnostics/TraceDiagnostics.ts new file mode 100644 index 00000000000..e352805b61a --- /dev/null +++ b/apps/server/src/diagnostics/TraceDiagnostics.ts @@ -0,0 +1,387 @@ +import fs from "node:fs/promises"; + +import type { + ServerTraceDiagnosticsFailureSummary, + ServerTraceDiagnosticsLogEvent, + ServerTraceDiagnosticsRecentFailure, + ServerTraceDiagnosticsResult, + ServerTraceDiagnosticsSpanOccurrence, + ServerTraceDiagnosticsSpanSummary, +} from "@t3tools/contracts"; +import { Effect } from "effect"; + +interface TraceRecordLike { + readonly name?: unknown; + readonly traceId?: unknown; + readonly spanId?: unknown; + readonly startTimeUnixNano?: unknown; + readonly endTimeUnixNano?: unknown; + readonly durationMs?: unknown; + readonly exit?: unknown; + readonly events?: unknown; +} + +interface TraceEventLike { + readonly name?: unknown; + readonly timeUnixNano?: unknown; + readonly attributes?: unknown; +} + +interface TraceDiagnosticsOptions { + readonly traceFilePath: string; + readonly maxFiles: number; + readonly slowSpanThresholdMs?: number; + readonly readAt?: Date; +} + +interface TraceDiagnosticsInput { + readonly traceFilePath: string; + readonly files: ReadonlyArray<{ readonly path: string; readonly text: string }>; + readonly slowSpanThresholdMs?: number; + readonly readAt?: Date; +} + +const DEFAULT_SLOW_SPAN_THRESHOLD_MS = 1_000; +const TOP_LIMIT = 10; +const RECENT_LIMIT = 20; +function toRotatedTracePaths(traceFilePath: string, maxFiles: number): ReadonlyArray { + const backupCount = Math.max(0, Math.floor(maxFiles)); + const backups = Array.from( + { length: backupCount }, + (_, index) => `${traceFilePath}.${backupCount - index}`, + ); + return [...backups, traceFilePath]; +} + +function isRecordObject(value: unknown): value is TraceRecordLike { + return typeof value === "object" && value !== null; +} + +function toStringValue(value: unknown): string | null { + return typeof value === "string" && value.trim().length > 0 ? value : null; +} + +function toNumberValue(value: unknown): number | null { + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function unixNanoToIso(value: unknown): string | null { + const text = toStringValue(value); + if (!text) return null; + + try { + const millis = Number(BigInt(text) / 1_000_000n); + if (!Number.isFinite(millis)) return null; + return new Date(millis).toISOString(); + } catch { + return null; + } +} + +function readExitTag(exit: unknown): string | null { + if (!isRecordObject(exit) || !("_tag" in exit)) return null; + return toStringValue(exit._tag); +} + +function readExitCause(exit: unknown): string { + if (!isRecordObject(exit) || !("cause" in exit)) return "Failure"; + return toStringValue(exit.cause)?.trim() ?? "Failure"; +} + +function isTraceEvent(value: unknown): value is TraceEventLike { + return typeof value === "object" && value !== null; +} + +function readEventAttributes(event: TraceEventLike): Readonly> { + return typeof event.attributes === "object" && event.attributes !== null + ? (event.attributes as Readonly>) + : {}; +} + +function makeEmptyDiagnostics(input: { + readonly traceFilePath: string; + readonly scannedFilePaths: ReadonlyArray; + readonly readAt: Date; + readonly slowSpanThresholdMs: number; + readonly error?: ServerTraceDiagnosticsResult["error"]; +}): ServerTraceDiagnosticsResult { + return { + traceFilePath: input.traceFilePath, + scannedFilePaths: [...input.scannedFilePaths], + readAt: input.readAt.toISOString(), + recordCount: 0, + parseErrorCount: 0, + firstSpanAt: null, + lastSpanAt: null, + failureCount: 0, + interruptionCount: 0, + slowSpanThresholdMs: input.slowSpanThresholdMs, + slowSpanCount: 0, + logLevelCounts: {}, + topSpansByCount: [], + slowestSpans: [], + commonFailures: [], + latestFailures: [], + latestWarningAndErrorLogs: [], + ...(input.error ? { error: input.error } : {}), + }; +} + +export function aggregateTraceDiagnostics( + input: TraceDiagnosticsInput, +): ServerTraceDiagnosticsResult { + const readAt = input.readAt ?? new Date(); + const slowSpanThresholdMs = input.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; + const scannedFilePaths = input.files.map((file) => file.path); + if (input.files.length === 0) { + return makeEmptyDiagnostics({ + traceFilePath: input.traceFilePath, + scannedFilePaths, + readAt, + slowSpanThresholdMs, + error: { + kind: "trace-file-not-found", + message: "No local trace files were found.", + }, + }); + } + + let parseErrorCount = 0; + let recordCount = 0; + let failureCount = 0; + let interruptionCount = 0; + let slowSpanCount = 0; + let firstSpanAt: string | null = null; + let lastSpanAt: string | null = null; + + const spansByName = new Map< + string, + { count: number; failureCount: number; totalDurationMs: number; maxDurationMs: number } + >(); + const failuresByKey = new Map(); + const latestFailures: ServerTraceDiagnosticsRecentFailure[] = []; + const slowestSpans: ServerTraceDiagnosticsSpanOccurrence[] = []; + const latestWarningAndErrorLogs: ServerTraceDiagnosticsLogEvent[] = []; + const logLevelCounts: Record = {}; + + for (const file of input.files) { + const lines = file.text.split(/\r?\n/); + for (const line of lines) { + if (line.trim().length === 0) continue; + + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + parseErrorCount += 1; + continue; + } + + if (!isRecordObject(parsed)) { + parseErrorCount += 1; + continue; + } + + const name = toStringValue(parsed.name); + const traceId = toStringValue(parsed.traceId); + const spanId = toStringValue(parsed.spanId); + const durationMs = toNumberValue(parsed.durationMs); + const endedAt = unixNanoToIso(parsed.endTimeUnixNano); + const startedAt = unixNanoToIso(parsed.startTimeUnixNano); + + if (!name || !traceId || !spanId || durationMs === null || !endedAt) { + parseErrorCount += 1; + continue; + } + + recordCount += 1; + firstSpanAt = + startedAt && (firstSpanAt === null || startedAt.localeCompare(firstSpanAt) < 0) + ? startedAt + : firstSpanAt; + lastSpanAt = endedAt.localeCompare(lastSpanAt ?? "") > 0 ? endedAt : lastSpanAt; + + const exitTag = readExitTag(parsed.exit); + const isFailure = exitTag === "Failure"; + const isInterrupted = exitTag === "Interrupted"; + if (isFailure) failureCount += 1; + if (isInterrupted) interruptionCount += 1; + + const spanSummary = spansByName.get(name) ?? { + count: 0, + failureCount: 0, + totalDurationMs: 0, + maxDurationMs: 0, + }; + spanSummary.count += 1; + spanSummary.totalDurationMs += durationMs; + spanSummary.maxDurationMs = Math.max(spanSummary.maxDurationMs, durationMs); + if (isFailure) spanSummary.failureCount += 1; + spansByName.set(name, spanSummary); + + const spanItem = { name, durationMs, endedAt, traceId, spanId }; + if (durationMs >= slowSpanThresholdMs) { + slowSpanCount += 1; + } + slowestSpans.push(spanItem); + + if (isFailure) { + const cause = readExitCause(parsed.exit); + latestFailures.push({ ...spanItem, cause }); + + const failureKey = `${name}\0${cause}`; + const existing = failuresByKey.get(failureKey); + failuresByKey.set(failureKey, { + name, + cause, + count: (existing?.count ?? 0) + 1, + lastSeenAt: + !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 + ? endedAt + : existing.lastSeenAt, + traceId: + !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 + ? traceId + : existing.traceId, + spanId: + !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 ? spanId : existing.spanId, + }); + } + + if (Array.isArray(parsed.events)) { + for (const rawEvent of parsed.events) { + if (!isTraceEvent(rawEvent)) continue; + const attributes = readEventAttributes(rawEvent); + const level = toStringValue(attributes["effect.logLevel"]); + if (!level) continue; + + logLevelCounts[level] = (logLevelCounts[level] ?? 0) + 1; + const normalizedLevel = level.toLowerCase(); + if ( + normalizedLevel !== "warning" && + normalizedLevel !== "warn" && + normalizedLevel !== "error" && + normalizedLevel !== "fatal" + ) { + continue; + } + + const seenAt = unixNanoToIso(rawEvent.timeUnixNano) ?? endedAt; + const message = toStringValue(rawEvent.name)?.trim() ?? "Log event"; + latestWarningAndErrorLogs.push({ + spanName: name, + level, + message, + seenAt, + traceId, + spanId, + }); + } + } + } + } + + const topSpansByCount: ServerTraceDiagnosticsSpanSummary[] = [...spansByName.entries()] + .map(([name, span]) => ({ + name, + count: span.count, + failureCount: span.failureCount, + totalDurationMs: span.totalDurationMs, + averageDurationMs: span.count > 0 ? span.totalDurationMs / span.count : 0, + maxDurationMs: span.maxDurationMs, + })) + .toSorted((left, right) => right.count - left.count || right.maxDurationMs - left.maxDurationMs) + .slice(0, TOP_LIMIT); + + return { + traceFilePath: input.traceFilePath, + scannedFilePaths, + readAt: readAt.toISOString(), + recordCount, + parseErrorCount, + firstSpanAt, + lastSpanAt, + failureCount, + interruptionCount, + slowSpanThresholdMs, + slowSpanCount, + logLevelCounts, + topSpansByCount, + slowestSpans: slowestSpans + .toSorted((left, right) => right.durationMs - left.durationMs) + .slice(0, TOP_LIMIT), + commonFailures: [...failuresByKey.values()] + .toSorted( + (left, right) => + right.count - left.count || right.lastSeenAt.localeCompare(left.lastSeenAt), + ) + .slice(0, TOP_LIMIT), + latestFailures: latestFailures + .toSorted((left, right) => right.endedAt.localeCompare(left.endedAt)) + .slice(0, RECENT_LIMIT), + latestWarningAndErrorLogs: latestWarningAndErrorLogs + .toSorted((left, right) => right.seenAt.localeCompare(left.seenAt)) + .slice(0, RECENT_LIMIT), + }; +} + +export function readTraceDiagnostics( + options: TraceDiagnosticsOptions, +): Effect.Effect { + const readAt = options.readAt ?? new Date(); + const slowSpanThresholdMs = options.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; + const paths = toRotatedTracePaths(options.traceFilePath, options.maxFiles); + + return Effect.promise(async () => { + const files: Array<{ path: string; text: string }> = []; + let readFailure: string | null = null; + + for (const tracePath of paths) { + try { + const text = await fs.readFile(tracePath, "utf8"); + files.push({ path: tracePath, text }); + } catch (error) { + const code = + typeof error === "object" && error !== null && "code" in error + ? String(error.code) + : null; + if (code !== "ENOENT") { + readFailure = error instanceof Error ? error.message : String(error); + } + } + } + + if (readFailure) { + return makeEmptyDiagnostics({ + traceFilePath: options.traceFilePath, + scannedFilePaths: paths, + readAt, + slowSpanThresholdMs, + error: { + kind: "trace-file-read-failed", + message: readFailure.trim(), + }, + }); + } + + if (files.length === 0) { + return makeEmptyDiagnostics({ + traceFilePath: options.traceFilePath, + scannedFilePaths: paths, + readAt, + slowSpanThresholdMs, + error: { + kind: "trace-file-not-found", + message: "No local trace files were found.", + }, + }); + } + + return aggregateTraceDiagnostics({ + traceFilePath: options.traceFilePath, + files, + readAt, + slowSpanThresholdMs, + }); + }); +} diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index a3ac29aa02d..fe201231cbe 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -3,6 +3,21 @@ import { Duration, Effect, Exit, Metric, Stream } from "effect"; import { outcomeFromExit } from "./Attributes.ts"; import { metricAttributes, rpcRequestDuration, rpcRequestsTotal, withMetrics } from "./Metrics.ts"; +const RPC_SPAN_PREFIX = "ws.rpc"; +const DEFAULT_RPC_SPAN_ATTRIBUTES = { + "rpc.transport": "websocket", + "rpc.system": "effect-rpc", +} as const; +const RPC_METHODS_WITH_TRACING_DISABLED = new Set([ + "server.getTraceDiagnostics", + "server.getProcessDiagnostics", + "server.signalProcess", +]); + +function shouldTraceRpc(method: string): boolean { + return !RPC_METHODS_WITH_TRACING_DISABLED.has(method); +} + const annotateRpcSpan = ( method: string, traceAttributes?: Readonly>, @@ -38,8 +53,8 @@ export const observeRpcEffect = ( method: string, effect: Effect.Effect, traceAttributes?: Readonly>, -): Effect.Effect => - Effect.gen(function* () { +): Effect.Effect => { + const instrumented = Effect.gen(function* () { yield* annotateRpcSpan(method, traceAttributes); return yield* effect.pipe( @@ -53,6 +68,18 @@ export const observeRpcEffect = ( ); }); + return shouldTraceRpc(method) + ? instrumented.pipe( + Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: { + ...DEFAULT_RPC_SPAN_ATTRIBUTES, + ...traceAttributes, + }, + }), + ) + : instrumented.pipe(Effect.withTracerEnabled(false)); +}; + export const observeRpcStream = ( method: string, stream: Stream.Stream, @@ -63,7 +90,16 @@ export const observeRpcStream = ( yield* annotateRpcSpan(method, traceAttributes); const startedAt = Date.now(); return stream.pipe(Stream.onExit((exit) => recordRpcStreamMetrics(method, startedAt, exit))); - }), + }).pipe( + shouldTraceRpc(method) + ? Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: { + ...DEFAULT_RPC_SPAN_ATTRIBUTES, + ...traceAttributes, + }, + }) + : Effect.withTracerEnabled(false), + ), ); export const observeRpcStreamEffect = ( @@ -85,5 +121,14 @@ export const observeRpcStreamEffect = recordRpcStreamMetrics(method, startedAt, streamExit)), ); - }), + }).pipe( + shouldTraceRpc(method) + ? Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: { + ...DEFAULT_RPC_SPAN_ATTRIBUTES, + ...traceAttributes, + }, + }) + : Effect.withTracerEnabled(false), + ), ); diff --git a/apps/server/src/vcs/GitVcsDriverCore.ts b/apps/server/src/vcs/GitVcsDriverCore.ts index e01a78a21f1..6569cec2d27 100644 --- a/apps/server/src/vcs/GitVcsDriverCore.ts +++ b/apps/server/src/vcs/GitVcsDriverCore.ts @@ -528,7 +528,7 @@ const createTrace2Monitor = Effect.fn("createTrace2Monitor")(function* ( }; }); -const collectOutput = Effect.fn("collectOutput")(function* ( +const collectOutput = Effect.fnUntraced(function* ( input: Pick, stream: Stream.Stream, maxOutputBytes: number, @@ -541,7 +541,7 @@ const collectOutput = Effect.fn("collectOutput")(function* ( let lineBuffer = ""; let truncated = false; - const emitCompleteLines = Effect.fn("emitCompleteLines")(function* (flush: boolean) { + const emitCompleteLines = Effect.fnUntraced(function* (flush: boolean) { let newlineIndex = lineBuffer.indexOf("\n"); while (newlineIndex >= 0) { const line = lineBuffer.slice(0, newlineIndex).replace(/\r$/, ""); @@ -561,7 +561,7 @@ const collectOutput = Effect.fn("collectOutput")(function* ( } }); - const processChunk = Effect.fn("processChunk")(function* (chunk: Uint8Array) { + const processChunk = Effect.fnUntraced(function* (chunk: Uint8Array) { if (truncateOutputAtMaxBytes && truncated) { return; } @@ -602,20 +602,14 @@ const collectOutput = Effect.fn("collectOutput")(function* ( }; }); -export const makeGitVcsDriverCore = Effect.fn("makeGitVcsDriverCore")(function* (options?: { - executeOverride?: GitVcsDriver.GitVcsDriverShape["execute"]; -}) { +export const makeGitVcsDriverCore = Effect.fn("makeGitVcsDriverCore")(function* () { const fileSystem = yield* FileSystem.FileSystem; const path = yield* Path.Path; + const commandSpawner = yield* ChildProcessSpawner.ChildProcessSpawner; const { worktreesDir } = yield* ServerConfig; - let executeRaw: GitVcsDriver.GitVcsDriverShape["execute"]; - - if (options?.executeOverride) { - executeRaw = options.executeOverride; - } else { - const commandSpawner = yield* ChildProcessSpawner.ChildProcessSpawner; - executeRaw = Effect.fnUntraced(function* (input) { + const executeRaw: GitVcsDriver.GitVcsDriverShape["execute"] = Effect.fnUntraced( + function* (input) { const commandInput = { ...input, args: [...input.args], @@ -712,8 +706,8 @@ export const makeGitVcsDriverCore = Effect.fn("makeGitVcsDriverCore")(function* }), ), ); - }); - } + }, + ); const execute: GitVcsDriver.GitVcsDriverShape["execute"] = (input) => executeRaw(input).pipe( diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index f32953abfbe..d0dfcf6d6fd 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -55,6 +55,8 @@ import { ProjectSetupScriptRunner } from "./project/Services/ProjectSetupScriptR import { RepositoryIdentityResolver } from "./project/Services/RepositoryIdentityResolver.ts"; import { ServerEnvironment } from "./environment/Services/ServerEnvironment.ts"; import { ServerAuth } from "./auth/Services/ServerAuth.ts"; +import { readProcessDiagnostics, signalProcess } from "./diagnostics/ProcessDiagnostics.ts"; +import { readTraceDiagnostics } from "./diagnostics/TraceDiagnostics.ts"; import * as SourceControlDiscoveryLayer from "./sourceControl/SourceControlDiscovery.ts"; import { SourceControlRepositoryService } from "./sourceControl/SourceControlRepositoryService.ts"; import * as AzureDevOpsCli from "./sourceControl/AzureDevOpsCli.ts"; @@ -837,6 +839,25 @@ const makeWsRpcLayer = (currentSessionId: AuthSessionId) => "rpc.aggregate": "server", }, ), + [WS_METHODS.serverGetTraceDiagnostics]: (_input) => + observeRpcEffect( + WS_METHODS.serverGetTraceDiagnostics, + readTraceDiagnostics({ + traceFilePath: config.serverTracePath, + maxFiles: config.traceMaxFiles, + }), + { + "rpc.aggregate": "server", + }, + ), + [WS_METHODS.serverGetProcessDiagnostics]: (_input) => + observeRpcEffect(WS_METHODS.serverGetProcessDiagnostics, readProcessDiagnostics(), { + "rpc.aggregate": "server", + }), + [WS_METHODS.serverSignalProcess]: (input) => + observeRpcEffect(WS_METHODS.serverSignalProcess, signalProcess(input), { + "rpc.aggregate": "server", + }), [WS_METHODS.sourceControlLookupRepository]: (input) => observeRpcEffect( WS_METHODS.sourceControlLookupRepository, @@ -1162,11 +1183,7 @@ export const websocketRpcRouteLayer = Layer.unwrap( const sessions = yield* SessionCredentialService; const session = yield* serverAuth.authenticateWebSocketUpgrade(request); const rpcWebSocketHttpEffect = yield* RpcServer.toHttpEffectWebsocket(WsRpcGroup, { - spanPrefix: "ws.rpc", - spanAttributes: { - "rpc.transport": "websocket", - "rpc.system": "effect-rpc", - }, + disableTracing: true, }).pipe( Effect.provide( makeWsRpcLayer(session.sessionId).pipe( diff --git a/apps/web/src/components/settings/DiagnosticsSettings.tsx b/apps/web/src/components/settings/DiagnosticsSettings.tsx new file mode 100644 index 00000000000..ba03e19bf01 --- /dev/null +++ b/apps/web/src/components/settings/DiagnosticsSettings.tsx @@ -0,0 +1,963 @@ +import { + AlertTriangleIcon, + ChevronDownIcon, + ChevronRightIcon, + CopyIcon, + FolderOpenIcon, + InfoIcon, + RefreshCwIcon, +} from "lucide-react"; +import { useCallback, useMemo, useState, type ReactNode } from "react"; +import type { ServerProcessDiagnosticsEntry, ServerProcessSignal } from "@t3tools/contracts"; + +import { ensureLocalApi } from "../../localApi"; +import { cn } from "../../lib/utils"; +import { resolveAndPersistPreferredEditor } from "../../editorPreferences"; +import { formatRelativeTime } from "../../timestampFormat"; +import { useServerAvailableEditors, useServerObservability } from "../../rpc/serverState"; +import { useProcessDiagnostics } from "../../lib/processDiagnosticsState"; +import { useTraceDiagnostics } from "../../lib/traceDiagnosticsState"; +import { Button } from "../ui/button"; +import { ScrollArea } from "../ui/scroll-area"; +import { Tooltip, TooltipPopup, TooltipTrigger } from "../ui/tooltip"; +import { toastManager } from "../ui/toast"; +import { SettingsPageContainer, SettingsSection, useRelativeTimeTick } from "./settingsLayout"; + +const NUMBER_FORMAT = new Intl.NumberFormat(); + +function formatCount(value: number): string { + return NUMBER_FORMAT.format(value); +} + +function formatDuration(value: number): string { + if (value < 1_000) return `${Math.round(value)} ms`; + return `${(value / 1_000).toFixed(value >= 10_000 ? 1 : 2)} s`; +} + +function formatBytes(value: number): string { + if (value < 1024) return `${value} B`; + const units = ["KB", "MB", "GB"] as const; + let unitIndex = -1; + let next = value; + do { + next /= 1024; + unitIndex += 1; + } while (next >= 1024 && unitIndex < units.length - 1); + return `${next.toFixed(next >= 10 ? 1 : 2)} ${units[unitIndex]}`; +} + +function formatRelative(value: string | null): string { + if (!value) return "No trace records"; + const relative = formatRelativeTime(value); + return relative.suffix ? `${relative.value} ${relative.suffix}` : relative.value; +} + +function formatRelativeNoWrap(value: string | null): string { + return formatRelative(value).replaceAll(" ", "\u00a0"); +} + +function shortenTraceId(traceId: string): string { + if (traceId.length <= 32) return traceId; + return `${traceId.slice(0, 18)}...${traceId.slice(-10)}`; +} + +function isStaleProcessSignalMessage(message: string | undefined): boolean { + return message?.includes("not a live descendant") ?? false; +} + +function StatBlock({ + label, + value, + tooltip, + tone = "default", +}: { + label: string; + value: string; + tooltip?: ReactNode; + tone?: "default" | "warning" | "danger"; +}) { + return ( +
+
+ {label} + {tooltip ? ( + + + + + } + /> + + {tooltip} + + + ) : null} +
+
+ {value} +
+
+ ); +} + +function StatsGrid({ children }: { children: ReactNode }) { + return ( +
+ + + + + {children} +
+ ); +} + +function EmptyRows({ label }: { label: string }) { + return
{label}
; +} + +function ExpandableText({ + text, + className, + collapsedClassName = "line-clamp-3", + expandLabel = "Show full error", +}: { + text: string; + className?: string; + collapsedClassName?: string; + expandLabel?: string; +}) { + const [expanded, setExpanded] = useState(false); + const canExpand = text.length > 180 || text.includes("\n"); + + return ( +
+
+ {text} +
+ {canExpand ? ( + + ) : null} +
+ ); +} + +function DiagnosticsTable({ + headers, + children, + minTableWidth = "min-w-[640px]", + columnWidths, +}: { + headers: ReadonlyArray; + children: ReactNode; + minTableWidth?: string; + columnWidths?: ReadonlyArray; +}) { + return ( + + + {columnWidths ? ( + + {headers.map((header, index) => ( + + ))} + + ) : null} + + + {headers.map((header, index) => ( + + ))} + + + {children} +
+ {header.replaceAll(" ", "\u00a0")} +
+
+ ); +} + +function TraceIdCell({ traceId }: { traceId: string }) { + const [copied, setCopied] = useState(false); + const copyTraceId = useCallback(() => { + void navigator.clipboard + ?.writeText(traceId) + .then(() => { + setCopied(true); + window.setTimeout(() => setCopied(false), 1_200); + }) + .catch(() => undefined); + }, [traceId]); + + return ( +
+ + + {shortenTraceId(traceId)} + + } + /> + + {traceId} + + + + + + + } + /> + {copied ? "Copied" : "Copy full trace ID"} + +
+ ); +} + +function formatProcessName(command: string): string { + const firstToken = command.trim().split(/\s+/)[0]; + if (!firstToken) return command; + const normalized = firstToken.replace(/^['"]|['"]$/g, ""); + const segments = normalized.split(/[\\/]/).filter(Boolean); + return segments.at(-1) ?? normalized; +} + +function formatProcessType(process: ServerProcessDiagnosticsEntry): string { + if (process.depth > 0) return "Subprocess"; + if (/\b(codex|claude|opencode|cursor)\b/i.test(process.command)) return "Agent"; + return "Process"; +} + +function ProcessNameCell({ + process, + isExpanded, + onToggle, +}: { + process: ServerProcessDiagnosticsEntry; + isExpanded: boolean; + onToggle: (pid: number) => void; +}) { + const name = formatProcessName(process.command); + const hasChildren = process.childPids.length > 0; + const ChevronIcon = isExpanded ? ChevronDownIcon : ChevronRightIcon; + + return ( +
+ {hasChildren ? ( + + ) : ( +
+ ); +} + +function ProcessSignalActions({ + process, + isSignaling, + onSignal, +}: { + process: ServerProcessDiagnosticsEntry; + isSignaling: boolean; + onSignal: (pid: number, signal: ServerProcessSignal) => void; +}) { + return ( +
+ + onSignal(process.pid, "SIGINT")} + > + INT + + } + /> + Send SIGINT + + + onSignal(process.pid, "SIGKILL")} + > + KILL + + } + /> + Send SIGKILL + +
+ ); +} + +function ProcessDiagnosticsTable({ + processes, + signalingPid, + onSignal, + emptyLabel, +}: { + processes: ReadonlyArray; + signalingPid: number | null; + onSignal: (pid: number, signal: ServerProcessSignal) => void; + emptyLabel?: string; +}) { + const [collapsedPids, setCollapsedPids] = useState>(() => new Set()); + const visibleProcesses = useMemo(() => { + const visible: ServerProcessDiagnosticsEntry[] = []; + let hiddenChildDepth: number | null = null; + + for (const process of processes) { + if (hiddenChildDepth !== null) { + if (process.depth > hiddenChildDepth) continue; + hiddenChildDepth = null; + } + + visible.push(process); + if (collapsedPids.has(process.pid)) { + hiddenChildDepth = process.depth; + } + } + + return visible; + }, [collapsedPids, processes]); + + const toggleProcess = useCallback((pid: number) => { + setCollapsedPids((previous) => { + const next = new Set(previous); + if (next.has(pid)) { + next.delete(pid); + } else { + next.add(pid); + } + return next; + }); + }, []); + + return ( + + + + + + + + + + + + + + + + + + + + + + + + {visibleProcesses.length === 0 ? ( + + + + ) : null} + {visibleProcesses.map((process) => ( + + + + + + + + + + ))} + +
NameCPUMemoryCommandPIDTypeKill
+ {emptyLabel ?? "No live descendant processes found."} +
+ + + {process.cpuPercent.toFixed(1)}% + + {formatBytes(process.rssBytes)} + + + {process.command}} + /> + + {process.command} + + + + {process.pid} + + {formatProcessType(process)} + + +
+
+ ); +} + +function DiagnosticsLastChecked({ checkedAt }: { checkedAt: string | null }) { + useRelativeTimeTick(); + const relative = checkedAt ? formatRelativeTime(checkedAt) : null; + + if (!relative) { + return Checking; + } + + return ( + + {relative.suffix ? ( + <> + Checked {relative.value} {relative.suffix} + + ) : ( + <>Checked {relative.value} + )} + + ); +} + +function DiagnosticsRefreshButton({ + isPending, + label, + onClick, +}: { + isPending: boolean; + label: string; + onClick: () => void; +}) { + return ( + + + + + } + /> + {label} + + ); +} + +export function DiagnosticsSettingsPanel() { + const observability = useServerObservability(); + const availableEditors = useServerAvailableEditors(); + const { data, error, isPending, refresh } = useTraceDiagnostics(); + const { + data: processData, + error: processError, + isPending: isProcessPending, + refresh: refreshProcesses, + } = useProcessDiagnostics(); + const [isOpeningLogsDirectory, setIsOpeningLogsDirectory] = useState(false); + const [openLogsDirectoryError, setOpenLogsDirectoryError] = useState(null); + const [signalingPid, setSignalingPid] = useState(null); + + const openLogsDirectory = useCallback(() => { + const logsDirectoryPath = observability?.logsDirectoryPath ?? null; + if (!logsDirectoryPath) return; + + const editor = resolveAndPersistPreferredEditor(availableEditors ?? []); + if (!editor) { + setOpenLogsDirectoryError("No available editors found."); + return; + } + + setIsOpeningLogsDirectory(true); + setOpenLogsDirectoryError(null); + void ensureLocalApi() + .shell.openInEditor(logsDirectoryPath, editor) + .catch((error: unknown) => { + setOpenLogsDirectoryError( + error instanceof Error ? error.message : "Unable to open logs folder.", + ); + }) + .finally(() => { + setIsOpeningLogsDirectory(false); + }); + }, [availableEditors, observability?.logsDirectoryPath]); + + const isInitialLoading = isPending && data === null; + const isProcessInitialLoading = isProcessPending && processData === null; + const signalProcess = useCallback( + (pid: number, signal: ServerProcessSignal) => { + if ( + signal === "SIGKILL" && + !window.confirm(`Send SIGKILL to process ${pid}? This cannot be handled by the process.`) + ) { + return; + } + + setSignalingPid(pid); + void ensureLocalApi() + .server.signalProcess({ pid, signal }) + .then((result) => { + if (!result.signaled) { + refreshProcesses(); + if (isStaleProcessSignalMessage(result.message)) { + toastManager.add({ + type: "info", + title: "Process already exited", + description: + "The process is not a child of the T3 Server. It might already have exited.", + }); + return; + } + + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: result.message ?? `Failed to send ${signal}.`, + }); + return; + } + refreshProcesses(); + }) + .catch((error: unknown) => { + toastManager.add({ + type: "error", + title: `Could not send ${signal}`, + description: error instanceof Error ? error.message : `Failed to send ${signal}.`, + }); + }) + .finally(() => { + setSignalingPid(null); + }); + }, + [refreshProcesses], + ); + + return ( + + + + + + } + > + + + + + + + {processData?.error || processError ? ( +
+ {processData?.error ? ( +
+ + {processData.error.message} +
+ ) : null} + {processError ? ( +
+ + {processError} +
+ ) : null} +
+ ) : null} + +
+ + + + + + + + } + /> + Open logs folder + + + + } + > + + + 0 ? "danger" : "default"} + /> + 0 ? "warning" : "default"} + /> + 0 ? "warning" : "default"} + /> + + {openLogsDirectoryError || data?.error || error ? ( +
+ {openLogsDirectoryError ? ( +
+ + {openLogsDirectoryError} +
+ ) : null} + {data?.error ? ( +
+ + {data.error.message} +
+ ) : null} + {error ? ( +
+ + {error} +
+ ) : null} +
+ ) : null} +
+ + + {data && data.latestFailures.length > 0 ? ( + + {data.latestFailures.map((failure) => ( + + + {failure.name} + + + + + + {formatDuration(failure.durationMs)} + + + {formatRelativeNoWrap(failure.endedAt)} + + + ))} + + ) : ( + + )} + + + + {data && data.commonFailures.length > 0 ? ( + + {data.commonFailures.map((failure) => ( + + + {failure.name} + + + {formatCount(failure.count)} + + + + + + {formatRelativeNoWrap(failure.lastSeenAt)} + + + ))} + + ) : ( + + )} + + + + {data && data.slowestSpans.length > 0 ? ( + + {data.slowestSpans.map((span) => ( + + + {span.name} + + + {formatDuration(span.durationMs)} + + + {formatRelativeNoWrap(span.endedAt)} + + + + + + ))} + + ) : ( + + )} + + + + {data && data.latestWarningAndErrorLogs.length > 0 ? ( + + + + + + + + + + + + + + + + + + + + {data.latestWarningAndErrorLogs.map((event) => ( + + + + + + + + ))} + +
TimeLevelSpanMessageTrace
+ {formatRelativeNoWrap(event.seenAt)} + + + {event.level} + + +
{event.spanName}
+
+ + + +
+
+ ) : ( + + )} +
+ + + {data && data.topSpansByCount.length > 0 ? ( + + {data.topSpansByCount.map((span) => ( + + + {span.name} + + + {formatCount(span.count)} + + + {formatCount(span.failureCount)} + + + {formatDuration(span.averageDurationMs)} + + + {formatDuration(span.maxDurationMs)} + + + ))} + + ) : ( + + )} + +
+ ); +} diff --git a/apps/web/src/components/settings/SettingsPanels.logic.test.ts b/apps/web/src/components/settings/SettingsPanels.logic.test.ts index 77206d2484f..7a7c68a6b9c 100644 --- a/apps/web/src/components/settings/SettingsPanels.logic.test.ts +++ b/apps/web/src/components/settings/SettingsPanels.logic.test.ts @@ -5,7 +5,48 @@ import { type ProviderInstanceConfig, } from "@t3tools/contracts"; import { describe, expect, it } from "vitest"; -import { buildProviderInstanceUpdatePatch } from "./SettingsPanels.logic"; +import { + buildProviderInstanceUpdatePatch, + formatDiagnosticsDescription, +} from "./SettingsPanels.logic"; + +describe("formatDiagnosticsDescription", () => { + it("collapses trace and metric URLs that share the same OTEL base path", () => { + expect( + formatDiagnosticsDescription({ + localTracingEnabled: true, + otlpTracesEnabled: true, + otlpTracesUrl: "http://localhost:4318/v1/traces", + otlpMetricsEnabled: true, + otlpMetricsUrl: "http://localhost:4318/v1/metrics", + }), + ).toBe("Local trace file. Exporting OTEL to http://localhost:4318/v1/{traces,metrics}."); + }); + + it("keeps separate trace and metric URLs when their base paths differ", () => { + expect( + formatDiagnosticsDescription({ + localTracingEnabled: true, + otlpTracesEnabled: true, + otlpTracesUrl: "http://localhost:4318/v1/traces", + otlpMetricsEnabled: true, + otlpMetricsUrl: "http://localhost:9000/v1/metrics", + }), + ).toBe( + "Local trace file. Exporting OTEL traces to http://localhost:4318/v1/traces and metrics to http://localhost:9000/v1/metrics.", + ); + }); + + it("omits OTEL text when no exporter is enabled", () => { + expect( + formatDiagnosticsDescription({ + localTracingEnabled: true, + otlpTracesEnabled: false, + otlpMetricsEnabled: false, + }), + ).toBe("Local trace file."); + }); +}); describe("buildProviderInstanceUpdatePatch", () => { it("promotes an edited default provider into providerInstances and resets the legacy provider", () => { diff --git a/apps/web/src/components/settings/SettingsPanels.logic.ts b/apps/web/src/components/settings/SettingsPanels.logic.ts index 57a63b7f2ae..99d7052965a 100644 --- a/apps/web/src/components/settings/SettingsPanels.logic.ts +++ b/apps/web/src/components/settings/SettingsPanels.logic.ts @@ -7,6 +7,54 @@ import type { } from "@t3tools/contracts"; import { DEFAULT_UNIFIED_SETTINGS } from "@t3tools/contracts/settings"; +function collapseOtelSignalsUrl(input: { + readonly tracesUrl: string; + readonly metricsUrl: string; +}): string | null { + const tracesSuffix = "/traces"; + const metricsSuffix = "/metrics"; + if (!input.tracesUrl.endsWith(tracesSuffix) || !input.metricsUrl.endsWith(metricsSuffix)) { + return null; + } + + const tracesBase = input.tracesUrl.slice(0, -tracesSuffix.length); + const metricsBase = input.metricsUrl.slice(0, -metricsSuffix.length); + if (tracesBase !== metricsBase) { + return null; + } + + return `${tracesBase}/{traces,metrics}`; +} + +export function formatDiagnosticsDescription(input: { + readonly localTracingEnabled: boolean; + readonly otlpTracesEnabled: boolean; + readonly otlpTracesUrl?: string | undefined; + readonly otlpMetricsEnabled: boolean; + readonly otlpMetricsUrl?: string | undefined; +}): string { + const mode = input.localTracingEnabled ? "Local trace file" : "Terminal logs only"; + const tracesUrl = input.otlpTracesEnabled ? input.otlpTracesUrl : undefined; + const metricsUrl = input.otlpMetricsEnabled ? input.otlpMetricsUrl : undefined; + + if (tracesUrl && metricsUrl) { + const collapsedUrl = collapseOtelSignalsUrl({ tracesUrl, metricsUrl }); + return collapsedUrl + ? `${mode}. Exporting OTEL to ${collapsedUrl}.` + : `${mode}. Exporting OTEL traces to ${tracesUrl} and metrics to ${metricsUrl}.`; + } + + if (tracesUrl) { + return `${mode}. Exporting OTEL traces to ${tracesUrl}.`; + } + + if (metricsUrl) { + return `${mode}. Exporting OTEL metrics to ${metricsUrl}.`; + } + + return `${mode}.`; +} + export function buildProviderInstanceUpdatePatch(input: { readonly settings: Pick; readonly instanceId: ProviderInstanceId; diff --git a/apps/web/src/components/settings/SettingsPanels.tsx b/apps/web/src/components/settings/SettingsPanels.tsx index d8eca942c60..94327452680 100644 --- a/apps/web/src/components/settings/SettingsPanels.tsx +++ b/apps/web/src/components/settings/SettingsPanels.tsx @@ -1,5 +1,6 @@ import { ArchiveIcon, ArchiveX, LoaderIcon, PlusIcon, RefreshCwIcon } from "lucide-react"; import { useQueryClient } from "@tanstack/react-query"; +import { Link } from "@tanstack/react-router"; import { useCallback, useMemo, useRef, useState } from "react"; import { defaultInstanceIdForDriver, @@ -24,7 +25,6 @@ import { } from "../../components/desktopUpdate.logic"; import { ProviderModelPicker } from "../chat/ProviderModelPicker"; import { TraitsPicker } from "../chat/TraitsPicker"; -import { resolveAndPersistPreferredEditor } from "../../editorPreferences"; import { isElectron } from "../../env"; import { useTheme } from "../../hooks/useTheme"; import { useSettings, useUpdateSettings } from "../../hooks/useSettings"; @@ -66,7 +66,10 @@ import { } from "../ProviderUpdateLaunchNotification.logic"; import { ProviderInstanceCard } from "./ProviderInstanceCard"; import { DRIVER_OPTIONS, getDriverOption } from "./providerDriverMeta"; -import { buildProviderInstanceUpdatePatch } from "./SettingsPanels.logic"; +import { + buildProviderInstanceUpdatePatch, + formatDiagnosticsDescription, +} from "./SettingsPanels.logic"; import { SettingResetButton, SettingsPageContainer, @@ -76,7 +79,6 @@ import { } from "./settingsLayout"; import { ProjectFavicon } from "../ProjectFavicon"; import { - useServerAvailableEditors, useServerObservability, useServerProviders, } from "../../rpc/serverState"; @@ -442,27 +444,15 @@ export function GeneralSettingsPanel() { const { theme, setTheme } = useTheme(); const settings = useSettings(); const { updateSettings } = useUpdateSettings(); - const [openingPathByTarget, setOpeningPathByTarget] = useState({ - logsDirectory: false, - }); - const [openPathErrorByTarget, setOpenPathErrorByTarget] = useState< - Partial> - >({}); - const availableEditors = useServerAvailableEditors(); const observability = useServerObservability(); const serverProviders = useServerProviders(); - const logsDirectoryPath = observability?.logsDirectoryPath ?? null; - const diagnosticsDescription = (() => { - const exports: string[] = []; - if (observability?.otlpTracesEnabled && observability.otlpTracesUrl) { - exports.push(`traces to ${observability.otlpTracesUrl}`); - } - if (observability?.otlpMetricsEnabled && observability.otlpMetricsUrl) { - exports.push(`metrics to ${observability.otlpMetricsUrl}`); - } - const mode = observability?.localTracingEnabled ? "Local trace file" : "Terminal logs only"; - return exports.length > 0 ? `${mode}. OTLP exporting ${exports.join(" and ")}.` : `${mode}.`; - })(); + const diagnosticsDescription = formatDiagnosticsDescription({ + localTracingEnabled: observability?.localTracingEnabled ?? false, + otlpTracesEnabled: observability?.otlpTracesEnabled ?? false, + otlpTracesUrl: observability?.otlpTracesUrl, + otlpMetricsEnabled: observability?.otlpMetricsEnabled ?? false, + otlpMetricsUrl: observability?.otlpMetricsUrl, + }); const textGenerationModelSelection = resolveAppModelSelectionState(settings, serverProviders); const textGenInstanceId = textGenerationModelSelection.instanceId; @@ -487,44 +477,6 @@ export function GeneralSettingsPanel() { DEFAULT_UNIFIED_SETTINGS.textGenerationModelSelection ?? null, ); - const openInPreferredEditor = useCallback( - (target: "logsDirectory", path: string | null, failureMessage: string) => { - if (!path) return; - setOpenPathErrorByTarget((existing) => ({ ...existing, [target]: null })); - setOpeningPathByTarget((existing) => ({ ...existing, [target]: true })); - - const editor = resolveAndPersistPreferredEditor(availableEditors ?? []); - if (!editor) { - setOpenPathErrorByTarget((existing) => ({ - ...existing, - [target]: "No available editors found.", - })); - setOpeningPathByTarget((existing) => ({ ...existing, [target]: false })); - return; - } - - void ensureLocalApi() - .shell.openInEditor(path, editor) - .catch((error) => { - setOpenPathErrorByTarget((existing) => ({ - ...existing, - [target]: error instanceof Error ? error.message : failureMessage, - })); - }) - .finally(() => { - setOpeningPathByTarget((existing) => ({ ...existing, [target]: false })); - }); - }, - [availableEditors], - ); - - const openLogsDirectory = useCallback(() => { - openInPreferredEditor("logsDirectory", logsDirectoryPath, "Unable to open logs folder."); - }, [logsDirectoryPath, openInPreferredEditor]); - - const openDiagnosticsError = openPathErrorByTarget.logsDirectory ?? null; - const isOpeningLogsDirectory = openingPathByTarget.logsDirectory; - return ( @@ -914,24 +866,9 @@ export function GeneralSettingsPanel() { - - {logsDirectoryPath ?? "Resolving logs directory..."} - - {openDiagnosticsError ? ( - {openDiagnosticsError} - ) : null} - - } control={ - } /> diff --git a/apps/web/src/lib/processDiagnosticsState.ts b/apps/web/src/lib/processDiagnosticsState.ts new file mode 100644 index 00000000000..17d13dcf336 --- /dev/null +++ b/apps/web/src/lib/processDiagnosticsState.ts @@ -0,0 +1,63 @@ +import { useAtomValue } from "@effect/atom-react"; +import type { ServerProcessDiagnosticsResult } from "@t3tools/contracts"; +import { Cause, Effect, Option } from "effect"; +import { AsyncResult, Atom } from "effect/unstable/reactivity"; +import { useCallback } from "react"; + +import { ensureLocalApi } from "../localApi"; +import { appAtomRegistry } from "../rpc/atomRegistry"; + +const PROCESS_DIAGNOSTICS_STALE_TIME_MS = 2_000; +const PROCESS_DIAGNOSTICS_IDLE_TTL_MS = 5 * 60_000; + +const processDiagnosticsAtom = Atom.make( + Effect.promise(() => ensureLocalApi().server.getProcessDiagnostics()), +).pipe( + Atom.swr({ + staleTime: PROCESS_DIAGNOSTICS_STALE_TIME_MS, + revalidateOnMount: true, + }), + Atom.setIdleTTL(PROCESS_DIAGNOSTICS_IDLE_TTL_MS), + Atom.withLabel("process-diagnostics"), +); + +export interface ProcessDiagnosticsState { + readonly data: ServerProcessDiagnosticsResult | null; + readonly error: string | null; + readonly isPending: boolean; + readonly refresh: () => void; +} + +function formatProcessDiagnosticsError(error: unknown): string { + return error instanceof Error ? error.message : "Failed to load process diagnostics."; +} + +function readProcessDiagnosticsError( + result: AsyncResult.AsyncResult, +): string | null { + if (result._tag !== "Failure") { + return null; + } + + const squashed = Cause.squash(result.cause); + return formatProcessDiagnosticsError(squashed); +} + +export function refreshProcessDiagnostics(): void { + appAtomRegistry.refresh(processDiagnosticsAtom); +} + +export function useProcessDiagnostics(): ProcessDiagnosticsState { + const result = useAtomValue(processDiagnosticsAtom); + const data = Option.getOrNull(AsyncResult.value(result)); + const refresh = useCallback(() => { + refreshProcessDiagnostics(); + }, []); + + return { + data, + error: readProcessDiagnosticsError(result), + isPending: result.waiting, + refresh, + }; +} diff --git a/apps/web/src/lib/traceDiagnosticsState.ts b/apps/web/src/lib/traceDiagnosticsState.ts new file mode 100644 index 00000000000..bfe1b3a83fa --- /dev/null +++ b/apps/web/src/lib/traceDiagnosticsState.ts @@ -0,0 +1,63 @@ +import { useAtomValue } from "@effect/atom-react"; +import type { ServerTraceDiagnosticsResult } from "@t3tools/contracts"; +import { Cause, Effect, Option } from "effect"; +import { AsyncResult, Atom } from "effect/unstable/reactivity"; +import { useCallback } from "react"; + +import { ensureLocalApi } from "../localApi"; +import { appAtomRegistry } from "../rpc/atomRegistry"; + +const TRACE_DIAGNOSTICS_STALE_TIME_MS = 5_000; +const TRACE_DIAGNOSTICS_IDLE_TTL_MS = 5 * 60_000; + +const traceDiagnosticsAtom = Atom.make( + Effect.promise(() => ensureLocalApi().server.getTraceDiagnostics()), +).pipe( + Atom.swr({ + staleTime: TRACE_DIAGNOSTICS_STALE_TIME_MS, + revalidateOnMount: true, + }), + Atom.setIdleTTL(TRACE_DIAGNOSTICS_IDLE_TTL_MS), + Atom.withLabel("trace-diagnostics"), +); + +export interface TraceDiagnosticsState { + readonly data: ServerTraceDiagnosticsResult | null; + readonly error: string | null; + readonly isPending: boolean; + readonly refresh: () => void; +} + +function formatTraceDiagnosticsError(error: unknown): string { + return error instanceof Error ? error.message : "Failed to load trace diagnostics."; +} + +function readTraceDiagnosticsError( + result: AsyncResult.AsyncResult, +): string | null { + if (result._tag !== "Failure") { + return null; + } + + const squashed = Cause.squash(result.cause); + return formatTraceDiagnosticsError(squashed); +} + +export function refreshTraceDiagnostics(): void { + appAtomRegistry.refresh(traceDiagnosticsAtom); +} + +export function useTraceDiagnostics(): TraceDiagnosticsState { + const result = useAtomValue(traceDiagnosticsAtom); + const data = Option.getOrNull(AsyncResult.value(result)); + const refresh = useCallback(() => { + refreshTraceDiagnostics(); + }, []); + + return { + data, + error: readTraceDiagnosticsError(result), + isPending: result.waiting, + refresh, + }; +} diff --git a/apps/web/src/localApi.ts b/apps/web/src/localApi.ts index a4ba190516b..cbb3427b004 100644 --- a/apps/web/src/localApi.ts +++ b/apps/web/src/localApi.ts @@ -147,6 +147,18 @@ function createBrowserLocalApi(rpcClient?: WsRpcClient): LocalApi { rpcClient ? rpcClient.server.discoverSourceControl() : Promise.reject(unavailableLocalBackendError()), + getTraceDiagnostics: () => + rpcClient + ? rpcClient.server.getTraceDiagnostics() + : Promise.reject(unavailableLocalBackendError()), + getProcessDiagnostics: () => + rpcClient + ? rpcClient.server.getProcessDiagnostics() + : Promise.reject(unavailableLocalBackendError()), + signalProcess: (input) => + rpcClient + ? rpcClient.server.signalProcess(input) + : Promise.reject(unavailableLocalBackendError()), }, }; } diff --git a/apps/web/src/routeTree.gen.ts b/apps/web/src/routeTree.gen.ts index 85a2f9ef8fa..3a9140e278c 100644 --- a/apps/web/src/routeTree.gen.ts +++ b/apps/web/src/routeTree.gen.ts @@ -17,6 +17,7 @@ import { Route as SettingsSourceControlRouteImport } from './routes/settings.sou import { Route as SettingsProvidersRouteImport } from './routes/settings.providers' import { Route as SettingsKeybindingsRouteImport } from './routes/settings.keybindings' import { Route as SettingsGeneralRouteImport } from './routes/settings.general' +import { Route as SettingsDiagnosticsRouteImport } from './routes/settings.diagnostics' import { Route as SettingsConnectionsRouteImport } from './routes/settings.connections' import { Route as SettingsArchivedRouteImport } from './routes/settings.archived' import { Route as ChatDraftDraftIdRouteImport } from './routes/_chat.draft.$draftId' @@ -61,6 +62,11 @@ const SettingsGeneralRoute = SettingsGeneralRouteImport.update({ path: '/general', getParentRoute: () => SettingsRoute, } as any) +const SettingsDiagnosticsRoute = SettingsDiagnosticsRouteImport.update({ + id: '/diagnostics', + path: '/diagnostics', + getParentRoute: () => SettingsRoute, +} as any) const SettingsConnectionsRoute = SettingsConnectionsRouteImport.update({ id: '/connections', path: '/connections', @@ -89,6 +95,7 @@ export interface FileRoutesByFullPath { '/settings': typeof SettingsRouteWithChildren '/settings/archived': typeof SettingsArchivedRoute '/settings/connections': typeof SettingsConnectionsRoute + '/settings/diagnostics': typeof SettingsDiagnosticsRoute '/settings/general': typeof SettingsGeneralRoute '/settings/keybindings': typeof SettingsKeybindingsRoute '/settings/providers': typeof SettingsProvidersRoute @@ -101,6 +108,7 @@ export interface FileRoutesByTo { '/settings': typeof SettingsRouteWithChildren '/settings/archived': typeof SettingsArchivedRoute '/settings/connections': typeof SettingsConnectionsRoute + '/settings/diagnostics': typeof SettingsDiagnosticsRoute '/settings/general': typeof SettingsGeneralRoute '/settings/keybindings': typeof SettingsKeybindingsRoute '/settings/providers': typeof SettingsProvidersRoute @@ -116,6 +124,7 @@ export interface FileRoutesById { '/settings': typeof SettingsRouteWithChildren '/settings/archived': typeof SettingsArchivedRoute '/settings/connections': typeof SettingsConnectionsRoute + '/settings/diagnostics': typeof SettingsDiagnosticsRoute '/settings/general': typeof SettingsGeneralRoute '/settings/keybindings': typeof SettingsKeybindingsRoute '/settings/providers': typeof SettingsProvidersRoute @@ -132,6 +141,7 @@ export interface FileRouteTypes { | '/settings' | '/settings/archived' | '/settings/connections' + | '/settings/diagnostics' | '/settings/general' | '/settings/keybindings' | '/settings/providers' @@ -144,6 +154,7 @@ export interface FileRouteTypes { | '/settings' | '/settings/archived' | '/settings/connections' + | '/settings/diagnostics' | '/settings/general' | '/settings/keybindings' | '/settings/providers' @@ -158,6 +169,7 @@ export interface FileRouteTypes { | '/settings' | '/settings/archived' | '/settings/connections' + | '/settings/diagnostics' | '/settings/general' | '/settings/keybindings' | '/settings/providers' @@ -231,6 +243,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof SettingsGeneralRouteImport parentRoute: typeof SettingsRoute } + '/settings/diagnostics': { + id: '/settings/diagnostics' + path: '/diagnostics' + fullPath: '/settings/diagnostics' + preLoaderRoute: typeof SettingsDiagnosticsRouteImport + parentRoute: typeof SettingsRoute + } '/settings/connections': { id: '/settings/connections' path: '/connections' @@ -279,6 +298,7 @@ const ChatRouteWithChildren = ChatRoute._addFileChildren(ChatRouteChildren) interface SettingsRouteChildren { SettingsArchivedRoute: typeof SettingsArchivedRoute SettingsConnectionsRoute: typeof SettingsConnectionsRoute + SettingsDiagnosticsRoute: typeof SettingsDiagnosticsRoute SettingsGeneralRoute: typeof SettingsGeneralRoute SettingsKeybindingsRoute: typeof SettingsKeybindingsRoute SettingsProvidersRoute: typeof SettingsProvidersRoute @@ -288,6 +308,7 @@ interface SettingsRouteChildren { const SettingsRouteChildren: SettingsRouteChildren = { SettingsArchivedRoute: SettingsArchivedRoute, SettingsConnectionsRoute: SettingsConnectionsRoute, + SettingsDiagnosticsRoute: SettingsDiagnosticsRoute, SettingsGeneralRoute: SettingsGeneralRoute, SettingsKeybindingsRoute: SettingsKeybindingsRoute, SettingsProvidersRoute: SettingsProvidersRoute, diff --git a/apps/web/src/routes/settings.diagnostics.tsx b/apps/web/src/routes/settings.diagnostics.tsx new file mode 100644 index 00000000000..e86e5c5cb14 --- /dev/null +++ b/apps/web/src/routes/settings.diagnostics.tsx @@ -0,0 +1,7 @@ +import { createFileRoute } from "@tanstack/react-router"; + +import { DiagnosticsSettingsPanel } from "../components/settings/DiagnosticsSettings"; + +export const Route = createFileRoute("/settings/diagnostics")({ + component: DiagnosticsSettingsPanel, +}); diff --git a/apps/web/src/rpc/wsRpcClient.ts b/apps/web/src/rpc/wsRpcClient.ts index a13f2173cd5..ca56b6143c0 100644 --- a/apps/web/src/rpc/wsRpcClient.ts +++ b/apps/web/src/rpc/wsRpcClient.ts @@ -129,6 +129,11 @@ export interface WsRpcClient { readonly discoverSourceControl: RpcUnaryNoArgMethod< typeof WS_METHODS.serverDiscoverSourceControl >; + readonly getTraceDiagnostics: RpcUnaryNoArgMethod; + readonly getProcessDiagnostics: RpcUnaryNoArgMethod< + typeof WS_METHODS.serverGetProcessDiagnostics + >; + readonly signalProcess: RpcUnaryMethod; readonly subscribeConfig: RpcStreamMethod; readonly subscribeLifecycle: RpcStreamMethod; readonly subscribeAuthAccess: RpcStreamMethod; @@ -247,6 +252,18 @@ export function createWsRpcClient(transport: WsTransport): WsRpcClient { transport.request((client) => client[WS_METHODS.serverUpdateSettings]({ patch })), discoverSourceControl: () => transport.request((client) => client[WS_METHODS.serverDiscoverSourceControl]({})), + getTraceDiagnostics: () => + transport.request((client) => + client[WS_METHODS.serverGetTraceDiagnostics]({}).pipe(Effect.withTracerEnabled(false)), + ), + getProcessDiagnostics: () => + transport.request((client) => + client[WS_METHODS.serverGetProcessDiagnostics]({}).pipe(Effect.withTracerEnabled(false)), + ), + signalProcess: (input) => + transport.request((client) => + client[WS_METHODS.serverSignalProcess](input).pipe(Effect.withTracerEnabled(false)), + ), subscribeConfig: (listener, options) => transport.subscribe((client) => client[WS_METHODS.subscribeServerConfig]({}), listener, { ...options, diff --git a/packages/contracts/src/ipc.ts b/packages/contracts/src/ipc.ts index f2c7a28e719..eca3bb4e66b 100644 --- a/packages/contracts/src/ipc.ts +++ b/packages/contracts/src/ipc.ts @@ -28,9 +28,13 @@ import type { import type { ProviderInstanceId } from "./providerInstance.ts"; import type { ServerConfig, + ServerProcessDiagnosticsResult, ServerProviderUpdateInput, ServerProviderUpdatedPayload, ServerRemoveKeybindingResult, + ServerSignalProcessInput, + ServerSignalProcessResult, + ServerTraceDiagnosticsResult, ServerUpsertKeybindingResult, } from "./server.ts"; import type { @@ -304,6 +308,9 @@ export interface LocalApi { getSettings: () => Promise; updateSettings: (patch: ServerSettingsPatch) => Promise; discoverSourceControl: () => Promise; + getTraceDiagnostics: () => Promise; + getProcessDiagnostics: () => Promise; + signalProcess: (input: ServerSignalProcessInput) => Promise; }; } diff --git a/packages/contracts/src/rpc.ts b/packages/contracts/src/rpc.ts index 33cddc98a4b..1c3ddc93739 100644 --- a/packages/contracts/src/rpc.ts +++ b/packages/contracts/src/rpc.ts @@ -77,6 +77,10 @@ import { ServerRemoveKeybindingInput, ServerRemoveKeybindingResult, ServerProviderUpdatedPayload, + ServerTraceDiagnosticsResult, + ServerProcessDiagnosticsResult, + ServerSignalProcessInput, + ServerSignalProcessResult, ServerUpsertKeybindingInput, ServerUpsertKeybindingResult, } from "./server.ts"; @@ -139,6 +143,9 @@ export const WS_METHODS = { serverGetSettings: "server.getSettings", serverUpdateSettings: "server.updateSettings", serverDiscoverSourceControl: "server.discoverSourceControl", + serverGetTraceDiagnostics: "server.getTraceDiagnostics", + serverGetProcessDiagnostics: "server.getProcessDiagnostics", + serverSignalProcess: "server.signalProcess", // Source control methods sourceControlLookupRepository: "sourceControl.lookupRepository", @@ -207,6 +214,21 @@ export const WsServerDiscoverSourceControlRpc = Rpc.make(WS_METHODS.serverDiscov success: SourceControlDiscoveryResult, }); +export const WsServerGetTraceDiagnosticsRpc = Rpc.make(WS_METHODS.serverGetTraceDiagnostics, { + payload: Schema.Struct({}), + success: ServerTraceDiagnosticsResult, +}); + +export const WsServerGetProcessDiagnosticsRpc = Rpc.make(WS_METHODS.serverGetProcessDiagnostics, { + payload: Schema.Struct({}), + success: ServerProcessDiagnosticsResult, +}); + +export const WsServerSignalProcessRpc = Rpc.make(WS_METHODS.serverSignalProcess, { + payload: ServerSignalProcessInput, + success: ServerSignalProcessResult, +}); + export const WsSourceControlLookupRepositoryRpc = Rpc.make( WS_METHODS.sourceControlLookupRepository, { @@ -439,6 +461,9 @@ export const WsRpcGroup = RpcGroup.make( WsServerGetSettingsRpc, WsServerUpdateSettingsRpc, WsServerDiscoverSourceControlRpc, + WsServerGetTraceDiagnosticsRpc, + WsServerGetProcessDiagnosticsRpc, + WsServerSignalProcessRpc, WsSourceControlLookupRepositoryRpc, WsSourceControlCloneRepositoryRpc, WsSourceControlPublishRepositoryRpc, diff --git a/packages/contracts/src/server.ts b/packages/contracts/src/server.ts index ec6840d8072..3549bca360c 100644 --- a/packages/contracts/src/server.ts +++ b/packages/contracts/src/server.ts @@ -4,6 +4,7 @@ import { ServerAuthDescriptor } from "./auth.ts"; import { IsoDateTime, NonNegativeInt, + PositiveInt, ProjectId, ThreadId, TrimmedNonEmptyString, @@ -211,6 +212,134 @@ export const ServerObservability = Schema.Struct({ }); export type ServerObservability = typeof ServerObservability.Type; +export const ServerTraceDiagnosticsErrorKind = Schema.Literals([ + "trace-file-not-found", + "trace-file-read-failed", +]); +export type ServerTraceDiagnosticsErrorKind = typeof ServerTraceDiagnosticsErrorKind.Type; + +export const ServerTraceDiagnosticsSpanSummary = Schema.Struct({ + name: TrimmedNonEmptyString, + count: NonNegativeInt, + failureCount: NonNegativeInt, + totalDurationMs: Schema.Number, + averageDurationMs: Schema.Number, + maxDurationMs: Schema.Number, +}); +export type ServerTraceDiagnosticsSpanSummary = typeof ServerTraceDiagnosticsSpanSummary.Type; + +export const ServerTraceDiagnosticsFailureSummary = Schema.Struct({ + name: TrimmedNonEmptyString, + cause: TrimmedNonEmptyString, + count: NonNegativeInt, + lastSeenAt: IsoDateTime, + traceId: TrimmedNonEmptyString, + spanId: TrimmedNonEmptyString, +}); +export type ServerTraceDiagnosticsFailureSummary = typeof ServerTraceDiagnosticsFailureSummary.Type; + +export const ServerTraceDiagnosticsRecentFailure = Schema.Struct({ + name: TrimmedNonEmptyString, + cause: TrimmedNonEmptyString, + durationMs: Schema.Number, + endedAt: IsoDateTime, + traceId: TrimmedNonEmptyString, + spanId: TrimmedNonEmptyString, +}); +export type ServerTraceDiagnosticsRecentFailure = typeof ServerTraceDiagnosticsRecentFailure.Type; + +export const ServerTraceDiagnosticsSpanOccurrence = Schema.Struct({ + name: TrimmedNonEmptyString, + durationMs: Schema.Number, + endedAt: IsoDateTime, + traceId: TrimmedNonEmptyString, + spanId: TrimmedNonEmptyString, +}); +export type ServerTraceDiagnosticsSpanOccurrence = typeof ServerTraceDiagnosticsSpanOccurrence.Type; + +export const ServerTraceDiagnosticsLogEvent = Schema.Struct({ + spanName: TrimmedNonEmptyString, + level: TrimmedNonEmptyString, + message: TrimmedNonEmptyString, + seenAt: IsoDateTime, + traceId: TrimmedNonEmptyString, + spanId: TrimmedNonEmptyString, +}); +export type ServerTraceDiagnosticsLogEvent = typeof ServerTraceDiagnosticsLogEvent.Type; + +export const ServerTraceDiagnosticsResult = Schema.Struct({ + traceFilePath: TrimmedNonEmptyString, + scannedFilePaths: Schema.Array(TrimmedNonEmptyString), + readAt: IsoDateTime, + recordCount: NonNegativeInt, + parseErrorCount: NonNegativeInt, + firstSpanAt: Schema.NullOr(IsoDateTime), + lastSpanAt: Schema.NullOr(IsoDateTime), + failureCount: NonNegativeInt, + interruptionCount: NonNegativeInt, + slowSpanThresholdMs: NonNegativeInt, + slowSpanCount: NonNegativeInt, + logLevelCounts: Schema.Record(TrimmedNonEmptyString, NonNegativeInt), + topSpansByCount: Schema.Array(ServerTraceDiagnosticsSpanSummary), + slowestSpans: Schema.Array(ServerTraceDiagnosticsSpanOccurrence), + commonFailures: Schema.Array(ServerTraceDiagnosticsFailureSummary), + latestFailures: Schema.Array(ServerTraceDiagnosticsRecentFailure), + latestWarningAndErrorLogs: Schema.Array(ServerTraceDiagnosticsLogEvent), + error: Schema.optional( + Schema.Struct({ + kind: ServerTraceDiagnosticsErrorKind, + message: TrimmedNonEmptyString, + }), + ), +}); +export type ServerTraceDiagnosticsResult = typeof ServerTraceDiagnosticsResult.Type; + +export const ServerProcessSignal = Schema.Literals(["SIGINT", "SIGKILL"]); +export type ServerProcessSignal = typeof ServerProcessSignal.Type; + +export const ServerProcessDiagnosticsEntry = Schema.Struct({ + pid: PositiveInt, + ppid: NonNegativeInt, + pgid: Schema.NullOr(Schema.Int), + status: TrimmedNonEmptyString, + cpuPercent: Schema.Number, + rssBytes: NonNegativeInt, + elapsed: TrimmedNonEmptyString, + command: TrimmedNonEmptyString, + depth: NonNegativeInt, + childPids: Schema.Array(PositiveInt), +}); +export type ServerProcessDiagnosticsEntry = typeof ServerProcessDiagnosticsEntry.Type; + +export const ServerProcessDiagnosticsResult = Schema.Struct({ + serverPid: PositiveInt, + readAt: IsoDateTime, + processCount: NonNegativeInt, + totalRssBytes: NonNegativeInt, + totalCpuPercent: Schema.Number, + processes: Schema.Array(ServerProcessDiagnosticsEntry), + error: Schema.optional( + Schema.Struct({ + message: TrimmedNonEmptyString, + }), + ), +}); +export type ServerProcessDiagnosticsResult = typeof ServerProcessDiagnosticsResult.Type; + +export const ServerSignalProcessInput = Schema.Struct({ + pid: PositiveInt, + signal: ServerProcessSignal, +}); +export type ServerSignalProcessInput = typeof ServerSignalProcessInput.Type; + +export const ServerSignalProcessResult = Schema.Struct({ + pid: PositiveInt, + signal: ServerProcessSignal, + signaled: Schema.Boolean, + message: Schema.optional(TrimmedNonEmptyString), +}); +export type ServerSignalProcessResult = typeof ServerSignalProcessResult.Type; + export const ServerConfig = Schema.Struct({ environment: ExecutionEnvironmentDescriptor, auth: ServerAuthDescriptor, From f7f05caa79b38f039b4dcea1ca840ccd9cbb3422 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 14:49:28 -0700 Subject: [PATCH 02/15] Refactor diagnostics into injectable services - Move process and trace diagnostics behind Effect layers - Preserve partial trace reads and service-based process querying - Update server, web settings, and tests for the new diagnostics API --- .../diagnostics/ProcessDiagnostics.test.ts | 279 ++++++++++++------ .../src/diagnostics/ProcessDiagnostics.ts | 238 +++++++++------ .../src/diagnostics/TraceDiagnostics.test.ts | 271 ++++++++++------- .../src/diagnostics/TraceDiagnostics.ts | 159 ++++++---- apps/server/src/server.test.ts | 44 +++ apps/server/src/server.ts | 4 + apps/server/src/ws.ts | 26 +- .../settings/DiagnosticsSettings.tsx | 13 +- packages/contracts/src/server.ts | 1 + 9 files changed, 676 insertions(+), 359 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts index 92829b343b6..4013b647e06 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -1,105 +1,198 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "@effect/vitest"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import { ChildProcessSpawner } from "effect/unstable/process"; -import { aggregateProcessDiagnostics, parsePosixProcessRows } from "./ProcessDiagnostics.ts"; +import * as ProcessDiagnostics from "./ProcessDiagnostics.ts"; +import * as VcsProcess from "../vcs/VcsProcess.ts"; -describe("ProcessDiagnostics", () => { - it("parses POSIX ps rows with full commands", () => { - const rows = parsePosixProcessRows( - [ - " 10 1 10 Ss 0.0 1024 01:02.03 /usr/bin/node server.js", - " 11 10 10 S+ 12.5 20480 00:04 codex app-server --config /tmp/one two", - ].join("\n"), - ); +const processOutput = (stdout: string): VcsProcess.VcsProcessOutput => ({ + exitCode: ChildProcessSpawner.ExitCode(0), + stdout, + stderr: "", + stdoutTruncated: false, + stderrTruncated: false, +}); - expect(rows).toEqual([ - { - pid: 10, - ppid: 1, - pgid: 10, - status: "Ss", - cpuPercent: 0, - rssBytes: 1024 * 1024, - elapsed: "01:02.03", - command: "/usr/bin/node server.js", - }, - { - pid: 11, - ppid: 10, - pgid: 10, - status: "S+", - cpuPercent: 12.5, - rssBytes: 20480 * 1024, - elapsed: "00:04", - command: "codex app-server --config /tmp/one two", - }, - ]); - }); +describe("ProcessDiagnostics", () => { + it.effect("parses POSIX ps rows with full commands", () => + Effect.sync(() => { + const rows = ProcessDiagnostics.parsePosixProcessRows( + [ + " 10 1 10 Ss 0.0 1024 01:02.03 /usr/bin/node server.js", + " 11 10 10 S+ 12.5 20480 00:04 codex app-server --config /tmp/one two", + ].join("\n"), + ); - it("aggregates only descendants of the server process", () => { - const diagnostics = aggregateProcessDiagnostics({ - serverPid: 100, - readAt: new Date("2026-05-05T10:00:00.000Z"), - rows: [ + expect(rows).toEqual([ { - pid: 100, + pid: 10, ppid: 1, - pgid: 100, - status: "S", + pgid: 10, + status: "Ss", cpuPercent: 0, - rssBytes: 1_000, - elapsed: "01:00", - command: "t3 server", - }, - { - pid: 101, - ppid: 100, - pgid: 100, - status: "S", - cpuPercent: 1.5, - rssBytes: 2_000, - elapsed: "00:20", - command: "codex app-server", - }, - { - pid: 102, - ppid: 101, - pgid: 100, - status: "R", - cpuPercent: 3.25, - rssBytes: 4_000, - elapsed: "00:05", - command: "git status", - }, - { - pid: 200, - ppid: 1, - pgid: 200, - status: "S", - cpuPercent: 99, - rssBytes: 8_000, - elapsed: "00:01", - command: "unrelated", + rssBytes: 1024 * 1024, + elapsed: "01:02.03", + command: "/usr/bin/node server.js", }, { - pid: 201, - ppid: 100, - pgid: 100, - status: "R", - cpuPercent: 9, - rssBytes: 9_000, - elapsed: "00:00", - command: "ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=", + pid: 11, + ppid: 10, + pgid: 10, + status: "S+", + cpuPercent: 12.5, + rssBytes: 20480 * 1024, + elapsed: "00:04", + command: "codex app-server --config /tmp/one two", }, - ], - }); + ]); + }), + ); + + it.effect("aggregates only descendants of the server process", () => + Effect.sync(() => { + const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ + serverPid: 100, + readAt: new Date("2026-05-05T10:00:00.000Z"), + rows: [ + { + pid: 100, + ppid: 1, + pgid: 100, + status: "S", + cpuPercent: 0, + rssBytes: 1_000, + elapsed: "01:00", + command: "t3 server", + }, + { + pid: 101, + ppid: 100, + pgid: 100, + status: "S", + cpuPercent: 1.5, + rssBytes: 2_000, + elapsed: "00:20", + command: "codex app-server", + }, + { + pid: 102, + ppid: 101, + pgid: 100, + status: "R", + cpuPercent: 3.25, + rssBytes: 4_000, + elapsed: "00:05", + command: "git status", + }, + { + pid: 200, + ppid: 1, + pgid: 200, + status: "S", + cpuPercent: 99, + rssBytes: 8_000, + elapsed: "00:01", + command: "unrelated", + }, + { + pid: 201, + ppid: 100, + pgid: 100, + status: "R", + cpuPercent: 9, + rssBytes: 9_000, + elapsed: "00:00", + command: "ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=", + }, + ], + }); + + expect(diagnostics.serverPid).toBe(100); + expect(diagnostics.readAt).toBe("2026-05-05T10:00:00.000Z"); + expect(diagnostics.processCount).toBe(2); + expect(diagnostics.totalRssBytes).toBe(6_000); + expect(diagnostics.totalCpuPercent).toBe(4.75); + expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); + expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); + expect(diagnostics.processes[0]?.childPids).toEqual([102]); + }), + ); + + it.effect("preserves ascending sibling order for nested descendants", () => + Effect.sync(() => { + const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ + serverPid: 100, + rows: [ + { + pid: 101, + ppid: 100, + pgid: 100, + status: "S", + cpuPercent: 0, + rssBytes: 100, + elapsed: "00:10", + command: "agent", + }, + { + pid: 103, + ppid: 101, + pgid: 100, + status: "S", + cpuPercent: 0, + rssBytes: 100, + elapsed: "00:10", + command: "child-b", + }, + { + pid: 102, + ppid: 101, + pgid: 100, + status: "S", + cpuPercent: 0, + rssBytes: 100, + elapsed: "00:10", + command: "child-a", + }, + ], + }); + + expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102, 103]); + }), + ); + + it.effect("queries processes through the VcsProcess service", () => + Effect.gen(function* () { + const run = vi.fn(); + run.mockReturnValueOnce( + Effect.succeed( + processOutput( + [ + ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, + ` 4242 ${process.pid} ${process.pid} S 1.5 2048 00:04 agent`, + ].join("\n"), + ), + ), + ); + const layer = ProcessDiagnostics.layer.pipe( + Layer.provide(Layer.mock(VcsProcess.VcsProcess)({ run })), + ); + + const diagnostics = yield* ProcessDiagnostics.readProcessDiagnostics().pipe( + Effect.provide(layer), + ); - expect(diagnostics.serverPid).toBe(100); - expect(diagnostics.readAt).toBe("2026-05-05T10:00:00.000Z"); - expect(diagnostics.processCount).toBe(2); - expect(diagnostics.totalRssBytes).toBe(6_000); - expect(diagnostics.totalCpuPercent).toBe(4.75); - expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); - expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); - expect(diagnostics.processes[0]?.childPids).toEqual([102]); - }); + expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); + expect(run).toHaveBeenCalledWith({ + operation: "ProcessDiagnostics.readPosixProcessRows", + command: "ps", + args: ["-axo", "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="], + cwd: process.cwd(), + timeoutMs: 1_000, + allowNonZeroExit: true, + maxOutputBytes: 2 * 1024 * 1024, + truncateOutputAtMaxBytes: true, + }); + }), + ); }); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index cd00982d14d..b55d7135899 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -4,9 +4,12 @@ import type { ServerProcessSignal, ServerSignalProcessResult, } from "@t3tools/contracts"; -import { Effect, Schema } from "effect"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as Layer from "effect/Layer"; +import * as Schema from "effect/Schema"; -import { runProcess } from "../processRunner.ts"; +import * as VcsProcess from "../vcs/VcsProcess.ts"; interface ProcessRow { readonly pid: number; @@ -21,6 +24,20 @@ interface ProcessRow { const PROCESS_QUERY_TIMEOUT_MS = 1_000; const POSIX_PROCESS_QUERY_COMMAND = "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="; +const PROCESS_QUERY_MAX_OUTPUT_BYTES = 2 * 1024 * 1024; + +export interface ProcessDiagnosticsShape { + readonly read: Effect.Effect; + readonly signal: (input: { + readonly pid: number; + readonly signal: ServerProcessSignal; + }) => Effect.Effect; +} + +export class ProcessDiagnostics extends Context.Service< + ProcessDiagnostics, + ProcessDiagnosticsShape +>()("t3/diagnostics/ProcessDiagnostics") {} class ProcessDiagnosticsError extends Schema.TaggedErrorClass()( "ProcessDiagnosticsError", @@ -202,7 +219,7 @@ function buildDescendantEntries( childPids: children.map((child) => child.pid), }); - stack.unshift(...children.map((row) => ({ row, depth: item.depth + 1 })).toReversed()); + stack.unshift(...children.map((row) => ({ row, depth: item.depth + 1 }))); } return entries; @@ -242,31 +259,37 @@ function makeResult(input: { }; } -function readPosixProcessRows(): Effect.Effect, ProcessDiagnosticsError> { - return Effect.tryPromise({ - try: async () => { - const result = await runProcess("ps", ["-axo", POSIX_PROCESS_QUERY_COMMAND], { - timeoutMs: PROCESS_QUERY_TIMEOUT_MS, - allowNonZeroExit: true, - maxBufferBytes: 2 * 1024 * 1024, - outputMode: "truncate", - }); - if (result.code !== 0) { - throw toProcessDiagnosticsError(result.stderr.trim() || "ps failed."); - } - return parsePosixProcessRows(result.stdout); - }, - catch: (cause) => - Schema.is(ProcessDiagnosticsError)(cause) - ? cause - : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), - }); +function readPosixProcessRows( + vcsProcess: VcsProcess.VcsProcessShape, +): Effect.Effect, ProcessDiagnosticsError> { + return vcsProcess + .run({ + operation: "ProcessDiagnostics.readPosixProcessRows", + command: "ps", + args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], + cwd: process.cwd(), + timeoutMs: PROCESS_QUERY_TIMEOUT_MS, + allowNonZeroExit: true, + maxOutputBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, + truncateOutputAtMaxBytes: true, + }) + .pipe( + Effect.flatMap((result) => + result.exitCode !== 0 + ? Effect.fail(toProcessDiagnosticsError(result.stderr.trim() || "ps failed.")) + : Effect.succeed(parsePosixProcessRows(result.stdout)), + ), + Effect.mapError((cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), + ), + ); } -function readWindowsProcessRows(): Effect.Effect< - ReadonlyArray, - ProcessDiagnosticsError -> { +function readWindowsProcessRows( + vcsProcess: VcsProcess.VcsProcessShape, +): Effect.Effect, ProcessDiagnosticsError> { const command = [ "$processes = Get-CimInstance Win32_Process | ForEach-Object {", '$perf = Get-CimInstance Win32_PerfFormattedData_PerfProc_Process -Filter "IDProcess = $($_.ProcessId)" -ErrorAction SilentlyContinue;', @@ -275,32 +298,39 @@ function readWindowsProcessRows(): Effect.Effect< "$processes | ConvertTo-Json -Compress -Depth 3", ].join(" "); - return Effect.tryPromise({ - try: async () => { - const result = await runProcess( - "powershell.exe", - ["-NoProfile", "-NonInteractive", "-Command", command], - { - timeoutMs: PROCESS_QUERY_TIMEOUT_MS, - allowNonZeroExit: true, - maxBufferBytes: 2 * 1024 * 1024, - outputMode: "truncate", - }, - ); - if (result.code !== 0) { - throw toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."); - } - return parseWindowsProcessRows(result.stdout); - }, - catch: (cause) => - Schema.is(ProcessDiagnosticsError)(cause) - ? cause - : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), - }); + return vcsProcess + .run({ + operation: "ProcessDiagnostics.readWindowsProcessRows", + command: "powershell.exe", + args: ["-NoProfile", "-NonInteractive", "-Command", command], + cwd: process.cwd(), + timeoutMs: PROCESS_QUERY_TIMEOUT_MS, + allowNonZeroExit: true, + maxOutputBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, + truncateOutputAtMaxBytes: true, + }) + .pipe( + Effect.flatMap((result) => + result.exitCode !== 0 + ? Effect.fail( + toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."), + ) + : Effect.succeed(parseWindowsProcessRows(result.stdout)), + ), + Effect.mapError((cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), + ), + ); } -function readProcessRows(): Effect.Effect, ProcessDiagnosticsError> { - return process.platform === "win32" ? readWindowsProcessRows() : readPosixProcessRows(); +function readProcessRows( + vcsProcess: VcsProcess.VcsProcessShape, +): Effect.Effect, ProcessDiagnosticsError> { + return process.platform === "win32" + ? readWindowsProcessRows(vcsProcess) + : readPosixProcessRows(vcsProcess); } export function aggregateProcessDiagnostics(input: { @@ -311,22 +341,15 @@ export function aggregateProcessDiagnostics(input: { return makeResult(input); } -export function readProcessDiagnostics(): Effect.Effect { - const serverPid = process.pid; - return readProcessRows().pipe( - Effect.map((rows) => makeResult({ serverPid, rows })), - Effect.catch((error: ProcessDiagnosticsError) => - Effect.succeed(makeResult({ serverPid, rows: [], error: error.message })), - ), - ); -} - -function assertDescendantPid(pid: number): Effect.Effect { +function assertDescendantPid( + vcsProcess: VcsProcess.VcsProcessShape, + pid: number, +): Effect.Effect { if (pid === process.pid) { return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); } - return readProcessRows().pipe( + return readProcessRows(vcsProcess).pipe( Effect.flatMap((rows) => { const descendant = buildDescendantEntries(rows, process.pid).some( (entry) => entry.pid === pid, @@ -340,35 +363,70 @@ function assertDescendantPid(pid: number): Effect.Effect { - return assertDescendantPid(input.pid).pipe( - Effect.flatMap(() => - Effect.try({ - try: () => { - process.kill(input.pid, input.signal); - return { - pid: input.pid, - signal: input.signal, - signaled: true, - }; - }, - catch: (cause) => - toProcessDiagnosticsError( - `Failed to signal process ${input.pid} with ${input.signal}.`, - cause, - ), - }), - ), +export const make = Effect.fn("makeProcessDiagnostics")(function* () { + const vcsProcess = yield* VcsProcess.VcsProcess; + + const read: ProcessDiagnosticsShape["read"] = readProcessRows(vcsProcess).pipe( + Effect.map((rows) => makeResult({ serverPid: process.pid, rows })), Effect.catch((error: ProcessDiagnosticsError) => - Effect.succeed({ - pid: input.pid, - signal: input.signal, - signaled: false, - message: error.message, - }), + Effect.succeed(makeResult({ serverPid: process.pid, rows: [], error: error.message })), ), ); + + const signal: ProcessDiagnosticsShape["signal"] = Effect.fn("ProcessDiagnostics.signal")( + function* (input) { + return yield* assertDescendantPid(vcsProcess, input.pid).pipe( + Effect.flatMap(() => + Effect.try({ + try: () => { + process.kill(input.pid, input.signal); + return { + pid: input.pid, + signal: input.signal, + signaled: true, + }; + }, + catch: (cause) => + toProcessDiagnosticsError( + `Failed to signal process ${input.pid} with ${input.signal}.`, + cause, + ), + }), + ), + Effect.catch((error: ProcessDiagnosticsError) => + Effect.succeed({ + pid: input.pid, + signal: input.signal, + signaled: false, + message: error.message, + }), + ), + ); + }, + ); + + return ProcessDiagnostics.of({ read, signal }); +}); + +export const layer = Layer.effect(ProcessDiagnostics, make()); + +export function readProcessDiagnostics(): Effect.Effect< + ServerProcessDiagnosticsResult, + never, + ProcessDiagnostics +> { + return Effect.gen(function* () { + const diagnostics = yield* ProcessDiagnostics; + return yield* diagnostics.read; + }); +} + +export function signalProcess(input: { + readonly pid: number; + readonly signal: ServerProcessSignal; +}): Effect.Effect { + return Effect.gen(function* () { + const diagnostics = yield* ProcessDiagnostics; + return yield* diagnostics.signal(input); + }); } diff --git a/apps/server/src/diagnostics/TraceDiagnostics.test.ts b/apps/server/src/diagnostics/TraceDiagnostics.test.ts index 2a5f6ed1434..d8861181c06 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.test.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.test.ts @@ -1,6 +1,10 @@ import { assert, describe, it } from "@effect/vitest"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; +import * as Layer from "effect/Layer"; +import * as PlatformError from "effect/PlatformError"; -import { aggregateTraceDiagnostics } from "./TraceDiagnostics.ts"; +import * as TraceDiagnostics from "./TraceDiagnostics.ts"; function ns(ms: number): string { return String(BigInt(ms) * 1_000_000n); @@ -33,126 +37,171 @@ function record(input: { } describe("TraceDiagnostics", () => { - it("aggregates failures, slow spans, log levels, and parse errors", () => { - const diagnostics = aggregateTraceDiagnostics({ - traceFilePath: "/tmp/server.trace.ndjson", - readAt: new Date("2026-05-05T10:00:00.000Z"), - slowSpanThresholdMs: 1_000, - files: [ - { - path: "/tmp/server.trace.ndjson.1", - text: [ - record({ - name: "server.getConfig", - traceId: "trace-a", - spanId: "span-a", + it.effect("aggregates failures, slow spans, log levels, and parse errors", () => + Effect.sync(() => { + const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ + traceFilePath: "/tmp/server.trace.ndjson", + readAt: new Date("2026-05-05T10:00:00.000Z"), + slowSpanThresholdMs: 1_000, + files: [ + { + path: "/tmp/server.trace.ndjson.1", + text: [ + record({ + name: "server.getConfig", + traceId: "trace-a", + spanId: "span-a", + startMs: 1_000, + durationMs: 50, + }), + "not-json", + ].join("\n"), + }, + { + path: "/tmp/server.trace.ndjson", + text: [ + record({ + name: "orchestration.dispatch", + traceId: "trace-b", + spanId: "span-b", + startMs: 2_000, + durationMs: 1_500, + exit: { _tag: "Failure", cause: "Provider crashed" }, + events: [ + { + name: "provider failed", + timeUnixNano: ns(3_400), + attributes: { "effect.logLevel": "Error" }, + }, + ], + }), + record({ + name: "orchestration.dispatch", + traceId: "trace-c", + spanId: "span-c", + startMs: 4_000, + durationMs: 250, + exit: { _tag: "Failure", cause: "Provider crashed" }, + }), + record({ + name: "git.status", + traceId: "trace-d", + spanId: "span-d", + startMs: 5_000, + durationMs: 25, + exit: { _tag: "Interrupted", cause: "Interrupted" }, + events: [ + { + name: "status delayed", + timeUnixNano: ns(5_010), + attributes: { "effect.logLevel": "Warning" }, + }, + ], + }), + ].join("\n"), + }, + ], + }); + + assert.equal(diagnostics.recordCount, 4); + assert.equal(diagnostics.parseErrorCount, 1); + assert.equal(diagnostics.failureCount, 2); + assert.equal(diagnostics.interruptionCount, 1); + assert.equal(diagnostics.slowSpanCount, 1); + assert.equal(diagnostics.logLevelCounts.Error, 1); + assert.equal(diagnostics.logLevelCounts.Warning, 1); + assert.equal(diagnostics.commonFailures[0]?.name, "orchestration.dispatch"); + assert.equal(diagnostics.commonFailures[0]?.count, 2); + assert.equal(diagnostics.latestFailures[0]?.traceId, "trace-c"); + assert.equal(diagnostics.slowestSpans[0]?.traceId, "trace-b"); + assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, "status delayed"); + assert.equal(diagnostics.topSpansByCount[0]?.name, "orchestration.dispatch"); + }), + ); + + it.effect("returns a not-found diagnostic when no files are available", () => + Effect.sync(() => { + const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ + traceFilePath: "/tmp/missing.trace.ndjson", + readAt: new Date("2026-05-05T10:00:00.000Z"), + files: [], + }); + + assert.equal(diagnostics.recordCount, 0); + assert.equal(diagnostics.error?.kind, "trace-file-not-found"); + }), + ); + + it.effect("preserves full failure causes and log messages", () => + Effect.sync(() => { + const longCause = `VcsProcessSpawnError: ${"missing executable ".repeat(80)}`.trim(); + const longMessage = `provider warning: ${"retrying command ".repeat(80)}`.trim(); + const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ + traceFilePath: "/tmp/server.trace.ndjson", + files: [ + { + path: "/tmp/server.trace.ndjson", + text: record({ + name: "VcsProcess.run", + traceId: "trace-long", + spanId: "span-long", startMs: 1_000, - durationMs: 50, - }), - "not-json", - ].join("\n"), - }, - { - path: "/tmp/server.trace.ndjson", - text: [ - record({ - name: "orchestration.dispatch", - traceId: "trace-b", - spanId: "span-b", - startMs: 2_000, - durationMs: 1_500, - exit: { _tag: "Failure", cause: "Provider crashed" }, - events: [ - { - name: "provider failed", - timeUnixNano: ns(3_400), - attributes: { "effect.logLevel": "Error" }, - }, - ], - }), - record({ - name: "orchestration.dispatch", - traceId: "trace-c", - spanId: "span-c", - startMs: 4_000, - durationMs: 250, - exit: { _tag: "Failure", cause: "Provider crashed" }, - }), - record({ - name: "git.status", - traceId: "trace-d", - spanId: "span-d", - startMs: 5_000, durationMs: 25, - exit: { _tag: "Interrupted", cause: "Interrupted" }, + exit: { _tag: "Failure", cause: longCause }, events: [ { - name: "status delayed", - timeUnixNano: ns(5_010), + name: longMessage, + timeUnixNano: ns(1_010), attributes: { "effect.logLevel": "Warning" }, }, ], }), - ].join("\n"), - }, - ], - }); + }, + ], + }); - assert.equal(diagnostics.recordCount, 4); - assert.equal(diagnostics.parseErrorCount, 1); - assert.equal(diagnostics.failureCount, 2); - assert.equal(diagnostics.interruptionCount, 1); - assert.equal(diagnostics.slowSpanCount, 1); - assert.equal(diagnostics.logLevelCounts.Error, 1); - assert.equal(diagnostics.logLevelCounts.Warning, 1); - assert.equal(diagnostics.commonFailures[0]?.name, "orchestration.dispatch"); - assert.equal(diagnostics.commonFailures[0]?.count, 2); - assert.equal(diagnostics.latestFailures[0]?.traceId, "trace-c"); - assert.equal(diagnostics.slowestSpans[0]?.traceId, "trace-b"); - assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, "status delayed"); - assert.equal(diagnostics.topSpansByCount[0]?.name, "orchestration.dispatch"); - }); + assert.equal(diagnostics.latestFailures[0]?.cause, longCause); + assert.equal(diagnostics.commonFailures[0]?.cause, longCause); + assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, longMessage); + }), + ); - it("returns a not-found diagnostic when no files are available", () => { - const diagnostics = aggregateTraceDiagnostics({ - traceFilePath: "/tmp/missing.trace.ndjson", - readAt: new Date("2026-05-05T10:00:00.000Z"), - files: [], - }); + it.effect("keeps loaded trace data when one rotated trace file fails to read", () => + Effect.gen(function* () { + const traceFilePath = "/tmp/server.trace.ndjson"; + const fileSystemLayer = FileSystem.layerNoop({ + readFileString: (path) => + path === `${traceFilePath}.1` + ? Effect.fail( + PlatformError.systemError({ + _tag: "PermissionDenied", + module: "FileSystem", + method: "readFileString", + description: "permission denied", + pathOrDescriptor: path, + }), + ) + : Effect.succeed( + record({ + name: "server.getConfig", + traceId: "trace-a", + spanId: "span-a", + startMs: 1_000, + durationMs: 50, + }), + ), + }); - assert.equal(diagnostics.recordCount, 0); - assert.equal(diagnostics.error?.kind, "trace-file-not-found"); - }); - - it("preserves full failure causes and log messages", () => { - const longCause = `VcsProcessSpawnError: ${"missing executable ".repeat(80)}`.trim(); - const longMessage = `provider warning: ${"retrying command ".repeat(80)}`.trim(); - const diagnostics = aggregateTraceDiagnostics({ - traceFilePath: "/tmp/server.trace.ndjson", - files: [ - { - path: "/tmp/server.trace.ndjson", - text: record({ - name: "VcsProcess.run", - traceId: "trace-long", - spanId: "span-long", - startMs: 1_000, - durationMs: 25, - exit: { _tag: "Failure", cause: longCause }, - events: [ - { - name: longMessage, - timeUnixNano: ns(1_010), - attributes: { "effect.logLevel": "Warning" }, - }, - ], - }), - }, - ], - }); + const diagnostics = yield* TraceDiagnostics.readTraceDiagnostics({ + traceFilePath, + maxFiles: 1, + readAt: new Date("2026-05-05T10:00:00.000Z"), + }).pipe(Effect.provide(TraceDiagnostics.layer.pipe(Layer.provide(fileSystemLayer)))); - assert.equal(diagnostics.latestFailures[0]?.cause, longCause); - assert.equal(diagnostics.commonFailures[0]?.cause, longCause); - assert.equal(diagnostics.latestWarningAndErrorLogs[0]?.message, longMessage); - }); + assert.equal(diagnostics.recordCount, 1); + assert.equal(diagnostics.partialFailure, true); + assert.equal(diagnostics.error?.kind, "trace-file-read-failed"); + assert.deepStrictEqual(diagnostics.scannedFilePaths, [`${traceFilePath}.1`, traceFilePath]); + }), + ); }); diff --git a/apps/server/src/diagnostics/TraceDiagnostics.ts b/apps/server/src/diagnostics/TraceDiagnostics.ts index e352805b61a..01710bd2e5c 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.ts @@ -1,5 +1,3 @@ -import fs from "node:fs/promises"; - import type { ServerTraceDiagnosticsFailureSummary, ServerTraceDiagnosticsLogEvent, @@ -8,7 +6,11 @@ import type { ServerTraceDiagnosticsSpanOccurrence, ServerTraceDiagnosticsSpanSummary, } from "@t3tools/contracts"; -import { Effect } from "effect"; +import * as Context from "effect/Context"; +import * as Effect from "effect/Effect"; +import * as FileSystem from "effect/FileSystem"; +import * as Layer from "effect/Layer"; +import * as PlatformError from "effect/PlatformError"; interface TraceRecordLike { readonly name?: unknown; @@ -27,18 +29,29 @@ interface TraceEventLike { readonly attributes?: unknown; } -interface TraceDiagnosticsOptions { +export interface TraceDiagnosticsOptions { readonly traceFilePath: string; readonly maxFiles: number; readonly slowSpanThresholdMs?: number; readonly readAt?: Date; } +export interface TraceDiagnosticsShape { + readonly read: (options: TraceDiagnosticsOptions) => Effect.Effect; +} + +export class TraceDiagnostics extends Context.Service()( + "t3/diagnostics/TraceDiagnostics", +) {} + interface TraceDiagnosticsInput { readonly traceFilePath: string; readonly files: ReadonlyArray<{ readonly path: string; readonly text: string }>; + readonly scannedFilePaths?: ReadonlyArray; readonly slowSpanThresholdMs?: number; readonly readAt?: Date; + readonly error?: ServerTraceDiagnosticsResult["error"]; + readonly partialFailure?: boolean; } const DEFAULT_SLOW_SPAN_THRESHOLD_MS = 1_000; @@ -104,6 +117,7 @@ function makeEmptyDiagnostics(input: { readonly readAt: Date; readonly slowSpanThresholdMs: number; readonly error?: ServerTraceDiagnosticsResult["error"]; + readonly partialFailure?: boolean; }): ServerTraceDiagnosticsResult { return { traceFilePath: input.traceFilePath, @@ -123,26 +137,36 @@ function makeEmptyDiagnostics(input: { commonFailures: [], latestFailures: [], latestWarningAndErrorLogs: [], + ...(input.partialFailure ? { partialFailure: true } : {}), ...(input.error ? { error: input.error } : {}), }; } +function isNotFoundError(error: PlatformError.PlatformError): boolean { + return error.reason._tag === "NotFound"; +} + +function platformErrorMessage(error: PlatformError.PlatformError): string { + return error.message || String(error); +} + export function aggregateTraceDiagnostics( input: TraceDiagnosticsInput, ): ServerTraceDiagnosticsResult { const readAt = input.readAt ?? new Date(); const slowSpanThresholdMs = input.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; - const scannedFilePaths = input.files.map((file) => file.path); + const scannedFilePaths = input.scannedFilePaths ?? input.files.map((file) => file.path); if (input.files.length === 0) { return makeEmptyDiagnostics({ traceFilePath: input.traceFilePath, scannedFilePaths, readAt, slowSpanThresholdMs, - error: { + error: input.error ?? { kind: "trace-file-not-found", message: "No local trace files were found.", }, + ...(input.partialFailure ? { partialFailure: true } : {}), }); } @@ -322,66 +346,93 @@ export function aggregateTraceDiagnostics( latestWarningAndErrorLogs: latestWarningAndErrorLogs .toSorted((left, right) => right.seenAt.localeCompare(left.seenAt)) .slice(0, RECENT_LIMIT), + ...(input.partialFailure ? { partialFailure: true } : {}), + ...(input.error ? { error: input.error } : {}), }; } -export function readTraceDiagnostics( - options: TraceDiagnosticsOptions, -): Effect.Effect { - const readAt = options.readAt ?? new Date(); - const slowSpanThresholdMs = options.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; - const paths = toRotatedTracePaths(options.traceFilePath, options.maxFiles); - - return Effect.promise(async () => { - const files: Array<{ path: string; text: string }> = []; - let readFailure: string | null = null; +type TraceFileReadResult = + | { readonly _tag: "Loaded"; readonly path: string; readonly text: string } + | { readonly _tag: "Missing"; readonly path: string } + | { readonly _tag: "Failed"; readonly path: string; readonly message: string }; + +function readTraceFile( + fileSystem: FileSystem.FileSystem, + path: string, +): Effect.Effect { + return fileSystem.readFileString(path).pipe( + Effect.map((text) => ({ _tag: "Loaded" as const, path, text })), + Effect.catch((error: PlatformError.PlatformError) => + Effect.succeed( + isNotFoundError(error) + ? { _tag: "Missing" as const, path } + : { _tag: "Failed" as const, path, message: platformErrorMessage(error) }, + ), + ), + ); +} - for (const tracePath of paths) { - try { - const text = await fs.readFile(tracePath, "utf8"); - files.push({ path: tracePath, text }); - } catch (error) { - const code = - typeof error === "object" && error !== null && "code" in error - ? String(error.code) - : null; - if (code !== "ENOENT") { - readFailure = error instanceof Error ? error.message : String(error); - } +export const make = Effect.fn("makeTraceDiagnostics")(function* () { + const fileSystem = yield* FileSystem.FileSystem; + + const read: TraceDiagnosticsShape["read"] = Effect.fn("TraceDiagnostics.read")( + function* (options) { + const readAt = options.readAt ?? new Date(); + const slowSpanThresholdMs = options.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; + const paths = toRotatedTracePaths(options.traceFilePath, options.maxFiles); + const results = yield* Effect.all( + paths.map((path) => readTraceFile(fileSystem, path)), + { + concurrency: 1, + }, + ); + const files = results.flatMap((result) => + result._tag === "Loaded" ? [{ path: result.path, text: result.text }] : [], + ); + const readFailure = results.find((result) => result._tag === "Failed"); + const readFailureError = readFailure + ? ({ + kind: "trace-file-read-failed", + message: readFailure.message.trim() || `Failed to read ${readFailure.path}.`, + } satisfies ServerTraceDiagnosticsResult["error"]) + : undefined; + + if (files.length === 0) { + return makeEmptyDiagnostics({ + traceFilePath: options.traceFilePath, + scannedFilePaths: paths, + readAt, + slowSpanThresholdMs, + error: + readFailureError ?? + ({ + kind: "trace-file-not-found", + message: "No local trace files were found.", + } satisfies ServerTraceDiagnosticsResult["error"]), + }); } - } - if (readFailure) { - return makeEmptyDiagnostics({ + return aggregateTraceDiagnostics({ traceFilePath: options.traceFilePath, + files, scannedFilePaths: paths, readAt, slowSpanThresholdMs, - error: { - kind: "trace-file-read-failed", - message: readFailure.trim(), - }, + ...(readFailureError ? { partialFailure: true, error: readFailureError } : {}), }); - } + }, + ); - if (files.length === 0) { - return makeEmptyDiagnostics({ - traceFilePath: options.traceFilePath, - scannedFilePaths: paths, - readAt, - slowSpanThresholdMs, - error: { - kind: "trace-file-not-found", - message: "No local trace files were found.", - }, - }); - } + return TraceDiagnostics.of({ read }); +}); - return aggregateTraceDiagnostics({ - traceFilePath: options.traceFilePath, - files, - readAt, - slowSpanThresholdMs, - }); +export const layer = Layer.effect(TraceDiagnostics, make()); + +export function readTraceDiagnostics( + options: TraceDiagnosticsOptions, +): Effect.Effect { + return Effect.gen(function* () { + const diagnostics = yield* TraceDiagnostics; + return yield* diagnostics.read(options); }); } diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index 655eec78314..e49a5a7c7c7 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -114,6 +114,8 @@ import * as GitWorkflowService from "./git/GitWorkflowService.ts"; import * as SourceControlRepositoryService from "./sourceControl/SourceControlRepositoryService.ts"; import { ServerSecretStoreLive } from "./auth/Layers/ServerSecretStore.ts"; import { ServerAuthLive } from "./auth/Layers/ServerAuth.ts"; +import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; +import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; const defaultProjectId = ProjectId.make("project-default"); const defaultThreadId = ThreadId.make("thread-default"); @@ -543,6 +545,48 @@ const buildAppUnderTest = (options?: { ...options?.layers?.open, }), ), + Layer.provide( + Layer.mock(ProcessDiagnostics.ProcessDiagnostics)({ + read: Effect.succeed({ + serverPid: process.pid, + readAt: TEST_EPOCH.toString(), + processCount: 0, + totalRssBytes: 0, + totalCpuPercent: 0, + processes: [], + }), + signal: (input) => + Effect.succeed({ + pid: input.pid, + signal: input.signal, + signaled: true, + }), + }), + ), + Layer.provide( + Layer.mock(TraceDiagnostics.TraceDiagnostics)({ + read: () => + Effect.succeed({ + traceFilePath: "", + scannedFilePaths: [], + readAt: TEST_EPOCH.toString(), + recordCount: 0, + parseErrorCount: 0, + firstSpanAt: null, + lastSpanAt: null, + failureCount: 0, + interruptionCount: 0, + slowSpanThresholdMs: 1_000, + slowSpanCount: 0, + logLevelCounts: {}, + topSpansByCount: [], + slowestSpans: [], + commonFailures: [], + latestFailures: [], + latestWarningAndErrorLogs: [], + }), + }), + ), Layer.provide(gitManagerLayer), Layer.provide(gitVcsDriverLayer), Layer.provide(gitWorkflowLayer), diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 939b2c8abf8..980aa82268b 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -74,6 +74,8 @@ import { } from "./auth/http.ts"; import { ServerSecretStoreLive } from "./auth/Layers/ServerSecretStore.ts"; import { ServerAuthLive } from "./auth/Layers/ServerAuth.ts"; +import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; +import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; import { OrchestrationLayerLive } from "./orchestration/runtimeLayer.ts"; import { clearPersistedServerRuntimeState, @@ -276,6 +278,8 @@ const RuntimeCoreDependenciesLive = ReactorLayerLive.pipe( const RuntimeDependenciesLive = RuntimeCoreDependenciesLive.pipe( // Misc. + Layer.provideMerge(ProcessDiagnostics.layer), + Layer.provideMerge(TraceDiagnostics.layer), Layer.provideMerge(AnalyticsServiceLayerLive), Layer.provideMerge(OpenLive), Layer.provideMerge(ServerLifecycleEventsLive), diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index d0dfcf6d6fd..28b05ec63d5 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -55,8 +55,8 @@ import { ProjectSetupScriptRunner } from "./project/Services/ProjectSetupScriptR import { RepositoryIdentityResolver } from "./project/Services/RepositoryIdentityResolver.ts"; import { ServerEnvironment } from "./environment/Services/ServerEnvironment.ts"; import { ServerAuth } from "./auth/Services/ServerAuth.ts"; -import { readProcessDiagnostics, signalProcess } from "./diagnostics/ProcessDiagnostics.ts"; -import { readTraceDiagnostics } from "./diagnostics/TraceDiagnostics.ts"; +import * as ProcessDiagnostics from "./diagnostics/ProcessDiagnostics.ts"; +import * as TraceDiagnostics from "./diagnostics/TraceDiagnostics.ts"; import * as SourceControlDiscoveryLayer from "./sourceControl/SourceControlDiscovery.ts"; import { SourceControlRepositoryService } from "./sourceControl/SourceControlRepositoryService.ts"; import * as AzureDevOpsCli from "./sourceControl/AzureDevOpsCli.ts"; @@ -842,7 +842,7 @@ const makeWsRpcLayer = (currentSessionId: AuthSessionId) => [WS_METHODS.serverGetTraceDiagnostics]: (_input) => observeRpcEffect( WS_METHODS.serverGetTraceDiagnostics, - readTraceDiagnostics({ + TraceDiagnostics.readTraceDiagnostics({ traceFilePath: config.serverTracePath, maxFiles: config.traceMaxFiles, }), @@ -851,13 +851,21 @@ const makeWsRpcLayer = (currentSessionId: AuthSessionId) => }, ), [WS_METHODS.serverGetProcessDiagnostics]: (_input) => - observeRpcEffect(WS_METHODS.serverGetProcessDiagnostics, readProcessDiagnostics(), { - "rpc.aggregate": "server", - }), + observeRpcEffect( + WS_METHODS.serverGetProcessDiagnostics, + ProcessDiagnostics.readProcessDiagnostics(), + { + "rpc.aggregate": "server", + }, + ), [WS_METHODS.serverSignalProcess]: (input) => - observeRpcEffect(WS_METHODS.serverSignalProcess, signalProcess(input), { - "rpc.aggregate": "server", - }), + observeRpcEffect( + WS_METHODS.serverSignalProcess, + ProcessDiagnostics.signalProcess(input), + { + "rpc.aggregate": "server", + }, + ), [WS_METHODS.sourceControlLookupRepository]: (input) => observeRpcEffect( WS_METHODS.sourceControlLookupRepository, diff --git a/apps/web/src/components/settings/DiagnosticsSettings.tsx b/apps/web/src/components/settings/DiagnosticsSettings.tsx index ba03e19bf01..7e56952098b 100644 --- a/apps/web/src/components/settings/DiagnosticsSettings.tsx +++ b/apps/web/src/components/settings/DiagnosticsSettings.tsx @@ -764,9 +764,18 @@ export function DiagnosticsSettingsPanel() { ) : null} {data?.error ? ( -
+
- {data.error.message} + + {data.partialFailure + ? `Some trace files could not be read, so diagnostics may be incomplete. ${data.error.message}` + : data.error.message} +
) : null} {error ? ( diff --git a/packages/contracts/src/server.ts b/packages/contracts/src/server.ts index 3549bca360c..3a6e4f9055b 100644 --- a/packages/contracts/src/server.ts +++ b/packages/contracts/src/server.ts @@ -285,6 +285,7 @@ export const ServerTraceDiagnosticsResult = Schema.Struct({ commonFailures: Schema.Array(ServerTraceDiagnosticsFailureSummary), latestFailures: Schema.Array(ServerTraceDiagnosticsRecentFailure), latestWarningAndErrorLogs: Schema.Array(ServerTraceDiagnosticsLogEvent), + partialFailure: Schema.optional(Schema.Boolean), error: Schema.optional( Schema.Struct({ kind: ServerTraceDiagnosticsErrorKind, From 4b2472c559e76c012df66db72d8e36c0ecefba27 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 15:42:16 -0700 Subject: [PATCH 03/15] Refactor process stream collection to shared helper - Add reusable Uint8Array stream text collector with truncation support - Switch process diagnostics and VcsProcess to ChildProcessSpawner - Cover the new stream helper with tests --- .../diagnostics/ProcessDiagnostics.test.ts | 84 ++++++---- .../src/diagnostics/ProcessDiagnostics.ts | 157 +++++++++++------- 2 files changed, 149 insertions(+), 92 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts index 4013b647e06..d3a9286f731 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -1,18 +1,33 @@ -import { describe, expect, it, vi } from "@effect/vitest"; +import { describe, expect, it } from "@effect/vitest"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Sink from "effect/Sink"; +import * as Stream from "effect/Stream"; import { ChildProcessSpawner } from "effect/unstable/process"; import * as ProcessDiagnostics from "./ProcessDiagnostics.ts"; -import * as VcsProcess from "../vcs/VcsProcess.ts"; -const processOutput = (stdout: string): VcsProcess.VcsProcessOutput => ({ - exitCode: ChildProcessSpawner.ExitCode(0), - stdout, - stderr: "", - stdoutTruncated: false, - stderrTruncated: false, -}); +const encoder = new TextEncoder(); + +function mockHandle(result: { + readonly stdout?: string; + readonly stderr?: string; + readonly code?: number; +}) { + return ChildProcessSpawner.makeHandle({ + pid: ChildProcessSpawner.ProcessId(1), + exitCode: Effect.succeed(ChildProcessSpawner.ExitCode(result.code ?? 0)), + isRunning: Effect.succeed(false), + kill: () => Effect.void, + unref: Effect.succeed(Effect.void), + stdin: Sink.drain, + stdout: Stream.make(encoder.encode(result.stdout ?? "")), + stderr: Stream.make(encoder.encode(result.stderr ?? "")), + all: Stream.empty, + getInputFd: () => Sink.drain, + getOutputFd: () => Stream.empty, + }); +} describe("ProcessDiagnostics", () => { it.effect("parses POSIX ps rows with full commands", () => @@ -161,38 +176,41 @@ describe("ProcessDiagnostics", () => { }), ); - it.effect("queries processes through the VcsProcess service", () => + it.effect("queries processes through the ChildProcessSpawner service", () => Effect.gen(function* () { - const run = vi.fn(); - run.mockReturnValueOnce( - Effect.succeed( - processOutput( - [ - ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, - ` 4242 ${process.pid} ${process.pid} S 1.5 2048 00:04 agent`, - ].join("\n"), - ), - ), - ); - const layer = ProcessDiagnostics.layer.pipe( - Layer.provide(Layer.mock(VcsProcess.VcsProcess)({ run })), + const commands: Array<{ readonly command: string; readonly args: ReadonlyArray }> = + []; + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make((command) => { + const childProcess = command as unknown as { + readonly command: string; + readonly args: ReadonlyArray; + }; + commands.push({ command: childProcess.command, args: childProcess.args }); + return Effect.succeed( + mockHandle({ + stdout: [ + ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, + ` 4242 ${process.pid} ${process.pid} S 1.5 2048 00:04 agent`, + ].join("\n"), + }), + ); + }), ); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); const diagnostics = yield* ProcessDiagnostics.readProcessDiagnostics().pipe( Effect.provide(layer), ); expect(diagnostics.processes.map((process) => process.pid)).toEqual([4242]); - expect(run).toHaveBeenCalledWith({ - operation: "ProcessDiagnostics.readPosixProcessRows", - command: "ps", - args: ["-axo", "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="], - cwd: process.cwd(), - timeoutMs: 1_000, - allowNonZeroExit: true, - maxOutputBytes: 2 * 1024 * 1024, - truncateOutputAtMaxBytes: true, - }); + expect(commands).toEqual([ + { + command: "ps", + args: ["-axo", "pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command="], + }, + ]); }), ); }); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index b55d7135899..cfe4c3447e3 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -5,11 +5,14 @@ import type { ServerSignalProcessResult, } from "@t3tools/contracts"; import * as Context from "effect/Context"; +import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as Schema from "effect/Schema"; +import { ChildProcess, ChildProcessSpawner } from "effect/unstable/process"; -import * as VcsProcess from "../vcs/VcsProcess.ts"; +import { collectUint8StreamText } from "../stream/collectUint8StreamText.ts"; interface ProcessRow { readonly pid: number; @@ -259,36 +262,84 @@ function makeResult(input: { }; } +interface ProcessOutput { + readonly exitCode: number; + readonly stdout: string; + readonly stderr: string; +} + +function runProcess( + spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], + input: { + readonly command: string; + readonly args: ReadonlyArray; + readonly errorMessage: string; + }, +): Effect.Effect { + return Effect.gen(function* () { + const child = yield* spawner.spawn( + ChildProcess.make(input.command, input.args, { + cwd: process.cwd(), + shell: process.platform === "win32", + }), + ); + const [stdout, stderr, exitCode] = yield* Effect.all( + [ + collectUint8StreamText({ + stream: child.stdout, + maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, + truncatedMarker: "\n\n[truncated]", + }), + collectUint8StreamText({ + stream: child.stderr, + maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, + truncatedMarker: "\n\n[truncated]", + }), + child.exitCode.pipe(Effect.map(Number)), + ], + { concurrency: "unbounded" }, + ); + + return { + exitCode, + stdout: stdout.text, + stderr: stderr.text, + } satisfies ProcessOutput; + }).pipe( + Effect.scoped, + Effect.timeoutOption(Duration.millis(PROCESS_QUERY_TIMEOUT_MS)), + Effect.flatMap((result) => + Option.match(result, { + onNone: () => Effect.fail(toProcessDiagnosticsError(`${input.errorMessage} timed out.`)), + onSome: Effect.succeed, + }), + ), + Effect.mapError((cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError(input.errorMessage, cause), + ), + ); +} + function readPosixProcessRows( - vcsProcess: VcsProcess.VcsProcessShape, + spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], ): Effect.Effect, ProcessDiagnosticsError> { - return vcsProcess - .run({ - operation: "ProcessDiagnostics.readPosixProcessRows", - command: "ps", - args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], - cwd: process.cwd(), - timeoutMs: PROCESS_QUERY_TIMEOUT_MS, - allowNonZeroExit: true, - maxOutputBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncateOutputAtMaxBytes: true, - }) - .pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail(toProcessDiagnosticsError(result.stderr.trim() || "ps failed.")) - : Effect.succeed(parsePosixProcessRows(result.stdout)), - ), - Effect.mapError((cause) => - Schema.is(ProcessDiagnosticsError)(cause) - ? cause - : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), - ), - ); + return runProcess(spawner, { + command: "ps", + args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], + errorMessage: "Failed to query process diagnostics.", + }).pipe( + Effect.flatMap((result) => + result.exitCode !== 0 + ? Effect.fail(toProcessDiagnosticsError(result.stderr.trim() || "ps failed.")) + : Effect.succeed(parsePosixProcessRows(result.stdout)), + ), + ); } function readWindowsProcessRows( - vcsProcess: VcsProcess.VcsProcessShape, + spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], ): Effect.Effect, ProcessDiagnosticsError> { const command = [ "$processes = Get-CimInstance Win32_Process | ForEach-Object {", @@ -298,39 +349,27 @@ function readWindowsProcessRows( "$processes | ConvertTo-Json -Compress -Depth 3", ].join(" "); - return vcsProcess - .run({ - operation: "ProcessDiagnostics.readWindowsProcessRows", - command: "powershell.exe", - args: ["-NoProfile", "-NonInteractive", "-Command", command], - cwd: process.cwd(), - timeoutMs: PROCESS_QUERY_TIMEOUT_MS, - allowNonZeroExit: true, - maxOutputBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, - truncateOutputAtMaxBytes: true, - }) - .pipe( - Effect.flatMap((result) => - result.exitCode !== 0 - ? Effect.fail( - toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."), - ) - : Effect.succeed(parseWindowsProcessRows(result.stdout)), - ), - Effect.mapError((cause) => - Schema.is(ProcessDiagnosticsError)(cause) - ? cause - : toProcessDiagnosticsError("Failed to query process diagnostics.", cause), - ), - ); + return runProcess(spawner, { + command: "powershell.exe", + args: ["-NoProfile", "-NonInteractive", "-Command", command], + errorMessage: "Failed to query process diagnostics.", + }).pipe( + Effect.flatMap((result) => + result.exitCode !== 0 + ? Effect.fail( + toProcessDiagnosticsError(result.stderr.trim() || "PowerShell process query failed."), + ) + : Effect.succeed(parseWindowsProcessRows(result.stdout)), + ), + ); } function readProcessRows( - vcsProcess: VcsProcess.VcsProcessShape, + spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], ): Effect.Effect, ProcessDiagnosticsError> { return process.platform === "win32" - ? readWindowsProcessRows(vcsProcess) - : readPosixProcessRows(vcsProcess); + ? readWindowsProcessRows(spawner) + : readPosixProcessRows(spawner); } export function aggregateProcessDiagnostics(input: { @@ -342,14 +381,14 @@ export function aggregateProcessDiagnostics(input: { } function assertDescendantPid( - vcsProcess: VcsProcess.VcsProcessShape, + spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], pid: number, ): Effect.Effect { if (pid === process.pid) { return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); } - return readProcessRows(vcsProcess).pipe( + return readProcessRows(spawner).pipe( Effect.flatMap((rows) => { const descendant = buildDescendantEntries(rows, process.pid).some( (entry) => entry.pid === pid, @@ -364,9 +403,9 @@ function assertDescendantPid( } export const make = Effect.fn("makeProcessDiagnostics")(function* () { - const vcsProcess = yield* VcsProcess.VcsProcess; + const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - const read: ProcessDiagnosticsShape["read"] = readProcessRows(vcsProcess).pipe( + const read: ProcessDiagnosticsShape["read"] = readProcessRows(spawner).pipe( Effect.map((rows) => makeResult({ serverPid: process.pid, rows })), Effect.catch((error: ProcessDiagnosticsError) => Effect.succeed(makeResult({ serverPid: process.pid, rows: [], error: error.message })), @@ -375,7 +414,7 @@ export const make = Effect.fn("makeProcessDiagnostics")(function* () { const signal: ProcessDiagnosticsShape["signal"] = Effect.fn("ProcessDiagnostics.signal")( function* (input) { - return yield* assertDescendantPid(vcsProcess, input.pid).pipe( + return yield* assertDescendantPid(spawner, input.pid).pipe( Effect.flatMap(() => Effect.try({ try: () => { From 7859576f067205e43812a57ddf0bc4ddea0f9e8d Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 15:50:03 -0700 Subject: [PATCH 04/15] observe the mutation --- apps/server/src/observability/RpcInstrumentation.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index fe201231cbe..3ff0df1afdf 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -11,7 +11,6 @@ const DEFAULT_RPC_SPAN_ATTRIBUTES = { const RPC_METHODS_WITH_TRACING_DISABLED = new Set([ "server.getTraceDiagnostics", "server.getProcessDiagnostics", - "server.signalProcess", ]); function shouldTraceRpc(method: string): boolean { From 766070b8b1221c03e6d23f3fc671475f7b7e8bb0 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 16:06:10 -0700 Subject: [PATCH 05/15] Address diagnostics review feedback Co-authored-by: codex --- .../diagnostics/ProcessDiagnostics.test.ts | 31 +++++++++++++++++++ .../src/diagnostics/ProcessDiagnostics.ts | 3 +- .../src/diagnostics/TraceDiagnostics.test.ts | 29 +++++++++++++++++ .../src/diagnostics/TraceDiagnostics.ts | 24 +++++++++++--- 4 files changed, 82 insertions(+), 5 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts index d3a9286f731..dfd114255f9 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -213,4 +213,35 @@ describe("ProcessDiagnostics", () => { ]); }), ); + + it.effect("does not allow signaling the diagnostics query process", () => + Effect.gen(function* () { + const spawnerLayer = Layer.succeed( + ChildProcessSpawner.ChildProcessSpawner, + ChildProcessSpawner.make(() => + Effect.succeed( + mockHandle({ + stdout: [ + ` ${process.pid} 1 ${process.pid} Ss 0.0 1024 01:02.03 t3 server`, + ` 4242 ${process.pid} ${process.pid} R 1.5 2048 00:00 ps -axo pid=,ppid=,pgid=,stat=,pcpu=,rss=,etime=,command=`, + ].join("\n"), + }), + ), + ), + ); + const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); + + const result = yield* ProcessDiagnostics.signalProcess({ + pid: 4242, + signal: "SIGINT", + }).pipe(Effect.provide(layer)); + + expect(result).toEqual({ + pid: 4242, + signal: "SIGINT", + signaled: false, + message: "Process 4242 is not a live descendant of the T3 server.", + }); + }), + ); }); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index cfe4c3447e3..65f4f31df1a 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -390,7 +390,8 @@ function assertDescendantPid( return readProcessRows(spawner).pipe( Effect.flatMap((rows) => { - const descendant = buildDescendantEntries(rows, process.pid).some( + const filteredRows = rows.filter((row) => !isDiagnosticsQueryProcess(row, process.pid)); + const descendant = buildDescendantEntries(filteredRows, process.pid).some( (entry) => entry.pid === pid, ); return descendant diff --git a/apps/server/src/diagnostics/TraceDiagnostics.test.ts b/apps/server/src/diagnostics/TraceDiagnostics.test.ts index d8861181c06..f0e011ee805 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.test.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.test.ts @@ -204,4 +204,33 @@ describe("TraceDiagnostics", () => { assert.deepStrictEqual(diagnostics.scannedFilePaths, [`${traceFilePath}.1`, traceFilePath]); }), ); + + it.effect("keeps only the slowest span occurrences while aggregating large inputs", () => + Effect.sync(() => { + const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ + traceFilePath: "/tmp/server.trace.ndjson", + files: [ + { + path: "/tmp/server.trace.ndjson", + text: Array.from({ length: 25 }, (_, index) => + record({ + name: `span-${index}`, + traceId: `trace-${index}`, + spanId: `span-${index}`, + startMs: index * 1_000, + durationMs: index, + }), + ).join("\n"), + }, + ], + }); + + assert.equal(diagnostics.recordCount, 25); + assert.equal(diagnostics.slowestSpans.length, 10); + assert.deepStrictEqual( + diagnostics.slowestSpans.map((span) => span.durationMs), + [24, 23, 22, 21, 20, 19, 18, 17, 16, 15], + ); + }), + ); }); diff --git a/apps/server/src/diagnostics/TraceDiagnostics.ts b/apps/server/src/diagnostics/TraceDiagnostics.ts index 01710bd2e5c..6d92db0f203 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.ts @@ -150,6 +150,24 @@ function platformErrorMessage(error: PlatformError.PlatformError): string { return error.message || String(error); } +function insertBoundedSlowestSpan( + slowestSpans: ServerTraceDiagnosticsSpanOccurrence[], + span: ServerTraceDiagnosticsSpanOccurrence, +): void { + if ( + slowestSpans.length >= TOP_LIMIT && + span.durationMs <= slowestSpans[slowestSpans.length - 1]!.durationMs + ) { + return; + } + + slowestSpans.push(span); + slowestSpans.sort((left, right) => right.durationMs - left.durationMs); + if (slowestSpans.length > TOP_LIMIT) { + slowestSpans.length = TOP_LIMIT; + } +} + export function aggregateTraceDiagnostics( input: TraceDiagnosticsInput, ): ServerTraceDiagnosticsResult { @@ -247,7 +265,7 @@ export function aggregateTraceDiagnostics( if (durationMs >= slowSpanThresholdMs) { slowSpanCount += 1; } - slowestSpans.push(spanItem); + insertBoundedSlowestSpan(slowestSpans, spanItem); if (isFailure) { const cause = readExitCause(parsed.exit); @@ -331,9 +349,7 @@ export function aggregateTraceDiagnostics( slowSpanCount, logLevelCounts, topSpansByCount, - slowestSpans: slowestSpans - .toSorted((left, right) => right.durationMs - left.durationMs) - .slice(0, TOP_LIMIT), + slowestSpans, commonFailures: [...failuresByKey.values()] .toSorted( (left, right) => From 87e3f64470a07883510590206ff46e0785873e9a Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 16:38:58 -0700 Subject: [PATCH 06/15] Inline process diagnostics service in websocket handlers - Refactor process diagnostic helpers to use the service directly - Remove wrapper functions and wire ws RPCs to the injected service --- .../src/diagnostics/ProcessDiagnostics.ts | 105 +++++++----------- apps/server/src/ws.ts | 21 ++-- 2 files changed, 49 insertions(+), 77 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index 65f4f31df1a..de59d92cb50 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -268,15 +268,13 @@ interface ProcessOutput { readonly stderr: string; } -function runProcess( - spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], - input: { +const runProcess = Effect.fn("runProcess")( + function* (input: { readonly command: string; readonly args: ReadonlyArray; readonly errorMessage: string; - }, -): Effect.Effect { - return Effect.gen(function* () { + }) { + const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; const child = yield* spawner.spawn( ChildProcess.make(input.command, input.args, { cwd: process.cwd(), @@ -295,7 +293,7 @@ function runProcess( maxBytes: PROCESS_QUERY_MAX_OUTPUT_BYTES, truncatedMarker: "\n\n[truncated]", }), - child.exitCode.pipe(Effect.map(Number)), + child.exitCode, ], { concurrency: "unbounded" }, ); @@ -305,27 +303,31 @@ function runProcess( stdout: stdout.text, stderr: stderr.text, } satisfies ProcessOutput; - }).pipe( - Effect.scoped, - Effect.timeoutOption(Duration.millis(PROCESS_QUERY_TIMEOUT_MS)), - Effect.flatMap((result) => - Option.match(result, { - onNone: () => Effect.fail(toProcessDiagnosticsError(`${input.errorMessage} timed out.`)), - onSome: Effect.succeed, - }), - ), - Effect.mapError((cause) => - Schema.is(ProcessDiagnosticsError)(cause) - ? cause - : toProcessDiagnosticsError(input.errorMessage, cause), + }, + (effect, input) => + effect.pipe( + Effect.scoped, + Effect.timeoutOption(Duration.millis(PROCESS_QUERY_TIMEOUT_MS)), + Effect.flatMap((result) => + Option.match(result, { + onNone: () => Effect.fail(toProcessDiagnosticsError(`${input.errorMessage} timed out.`)), + onSome: Effect.succeed, + }), + ), + Effect.mapError((cause) => + Schema.is(ProcessDiagnosticsError)(cause) + ? cause + : toProcessDiagnosticsError(input.errorMessage, cause), + ), ), - ); -} +); -function readPosixProcessRows( - spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], -): Effect.Effect, ProcessDiagnosticsError> { - return runProcess(spawner, { +function readPosixProcessRows(): Effect.Effect< + ReadonlyArray, + ProcessDiagnosticsError, + ChildProcessSpawner.ChildProcessSpawner +> { + return runProcess({ command: "ps", args: ["-axo", POSIX_PROCESS_QUERY_COMMAND], errorMessage: "Failed to query process diagnostics.", @@ -338,9 +340,11 @@ function readPosixProcessRows( ); } -function readWindowsProcessRows( - spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], -): Effect.Effect, ProcessDiagnosticsError> { +function readWindowsProcessRows(): Effect.Effect< + ReadonlyArray, + ProcessDiagnosticsError, + ChildProcessSpawner.ChildProcessSpawner +> { const command = [ "$processes = Get-CimInstance Win32_Process | ForEach-Object {", '$perf = Get-CimInstance Win32_PerfFormattedData_PerfProc_Process -Filter "IDProcess = $($_.ProcessId)" -ErrorAction SilentlyContinue;', @@ -349,7 +353,7 @@ function readWindowsProcessRows( "$processes | ConvertTo-Json -Compress -Depth 3", ].join(" "); - return runProcess(spawner, { + return runProcess({ command: "powershell.exe", args: ["-NoProfile", "-NonInteractive", "-Command", command], errorMessage: "Failed to query process diagnostics.", @@ -364,13 +368,8 @@ function readWindowsProcessRows( ); } -function readProcessRows( - spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], -): Effect.Effect, ProcessDiagnosticsError> { - return process.platform === "win32" - ? readWindowsProcessRows(spawner) - : readPosixProcessRows(spawner); -} +const readProcessRows = (platform = process.platform) => + platform === "win32" ? readWindowsProcessRows() : readPosixProcessRows(); export function aggregateProcessDiagnostics(input: { readonly serverPid: number; @@ -381,14 +380,13 @@ export function aggregateProcessDiagnostics(input: { } function assertDescendantPid( - spawner: ChildProcessSpawner.ChildProcessSpawner["Service"], pid: number, -): Effect.Effect { +): Effect.Effect { if (pid === process.pid) { return Effect.fail(toProcessDiagnosticsError("Refusing to signal the T3 server process.")); } - return readProcessRows(spawner).pipe( + return readProcessRows().pipe( Effect.flatMap((rows) => { const filteredRows = rows.filter((row) => !isDiagnosticsQueryProcess(row, process.pid)); const descendant = buildDescendantEntries(filteredRows, process.pid).some( @@ -406,7 +404,8 @@ function assertDescendantPid( export const make = Effect.fn("makeProcessDiagnostics")(function* () { const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - const read: ProcessDiagnosticsShape["read"] = readProcessRows(spawner).pipe( + const read: ProcessDiagnosticsShape["read"] = readProcessRows().pipe( + Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), Effect.map((rows) => makeResult({ serverPid: process.pid, rows })), Effect.catch((error: ProcessDiagnosticsError) => Effect.succeed(makeResult({ serverPid: process.pid, rows: [], error: error.message })), @@ -415,7 +414,8 @@ export const make = Effect.fn("makeProcessDiagnostics")(function* () { const signal: ProcessDiagnosticsShape["signal"] = Effect.fn("ProcessDiagnostics.signal")( function* (input) { - return yield* assertDescendantPid(spawner, input.pid).pipe( + return yield* assertDescendantPid(input.pid).pipe( + Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), Effect.flatMap(() => Effect.try({ try: () => { @@ -449,24 +449,3 @@ export const make = Effect.fn("makeProcessDiagnostics")(function* () { }); export const layer = Layer.effect(ProcessDiagnostics, make()); - -export function readProcessDiagnostics(): Effect.Effect< - ServerProcessDiagnosticsResult, - never, - ProcessDiagnostics -> { - return Effect.gen(function* () { - const diagnostics = yield* ProcessDiagnostics; - return yield* diagnostics.read; - }); -} - -export function signalProcess(input: { - readonly pid: number; - readonly signal: ServerProcessSignal; -}): Effect.Effect { - return Effect.gen(function* () { - const diagnostics = yield* ProcessDiagnostics; - return yield* diagnostics.signal(input); - }); -} diff --git a/apps/server/src/ws.ts b/apps/server/src/ws.ts index 28b05ec63d5..476140dd3ae 100644 --- a/apps/server/src/ws.ts +++ b/apps/server/src/ws.ts @@ -170,6 +170,7 @@ const makeWsRpcLayer = (currentSessionId: AuthSessionId) => const sourceControlRepositories = yield* SourceControlRepositoryService; const bootstrapCredentials = yield* BootstrapCredentialService; const sessions = yield* SessionCredentialService; + const processDiagnostics = yield* ProcessDiagnostics.ProcessDiagnostics; const serverCommandId = (tag: string) => CommandId.make(`server:${tag}:${crypto.randomUUID()}`); @@ -851,21 +852,13 @@ const makeWsRpcLayer = (currentSessionId: AuthSessionId) => }, ), [WS_METHODS.serverGetProcessDiagnostics]: (_input) => - observeRpcEffect( - WS_METHODS.serverGetProcessDiagnostics, - ProcessDiagnostics.readProcessDiagnostics(), - { - "rpc.aggregate": "server", - }, - ), + observeRpcEffect(WS_METHODS.serverGetProcessDiagnostics, processDiagnostics.read, { + "rpc.aggregate": "server", + }), [WS_METHODS.serverSignalProcess]: (input) => - observeRpcEffect( - WS_METHODS.serverSignalProcess, - ProcessDiagnostics.signalProcess(input), - { - "rpc.aggregate": "server", - }, - ), + observeRpcEffect(WS_METHODS.serverSignalProcess, processDiagnostics.signal(input), { + "rpc.aggregate": "server", + }), [WS_METHODS.sourceControlLookupRepository]: (input) => observeRpcEffect( WS_METHODS.sourceControlLookupRepository, From ee88b9a2171820e828d2d611c0c55ec7aacde374 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 16:49:29 -0700 Subject: [PATCH 07/15] Update process diagnostics test to use service interface - Switch tests to access `ProcessDiagnostics` via the service - Exercise the instance methods for read and signal --- .../server/src/diagnostics/ProcessDiagnostics.test.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts index dfd114255f9..5567ab0d758 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -200,7 +200,8 @@ describe("ProcessDiagnostics", () => { ); const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); - const diagnostics = yield* ProcessDiagnostics.readProcessDiagnostics().pipe( + const diagnostics = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( + Effect.flatMap((pd) => pd.read), Effect.provide(layer), ); @@ -231,10 +232,10 @@ describe("ProcessDiagnostics", () => { ); const layer = ProcessDiagnostics.layer.pipe(Layer.provide(spawnerLayer)); - const result = yield* ProcessDiagnostics.signalProcess({ - pid: 4242, - signal: "SIGINT", - }).pipe(Effect.provide(layer)); + const result = yield* Effect.service(ProcessDiagnostics.ProcessDiagnostics).pipe( + Effect.flatMap((pd) => pd.signal({ pid: 4242, signal: "SIGINT" })), + Effect.provide(layer), + ); expect(result).toEqual({ pid: 4242, From 9df6ecbc167f541f5773e7b160164884b5e6946c Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 17:03:03 -0700 Subject: [PATCH 08/15] Route diagnostics out of settings overview - Replace the inline diagnostics section with a diagnostics link - Update browser coverage for the router-backed diagnostics panel --- .../settings/SettingsPanels.browser.tsx | 73 +++++++++++++++++-- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/apps/web/src/components/settings/SettingsPanels.browser.tsx b/apps/web/src/components/settings/SettingsPanels.browser.tsx index 9249ff69778..68515f71977 100644 --- a/apps/web/src/components/settings/SettingsPanels.browser.tsx +++ b/apps/web/src/components/settings/SettingsPanels.browser.tsx @@ -20,15 +20,40 @@ import { DateTime, Option } from "effect"; import { page } from "vitest/browser"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { render } from "vitest-browser-react"; +import type { ReactNode } from "react"; +import { + RouterProvider, + createMemoryHistory, + createRootRoute, + createRoute, + createRouter, +} from "@tanstack/react-router"; import { __resetLocalApiForTests } from "../../localApi"; import { AppAtomRegistryProvider, resetAppAtomRegistryForTests } from "../../rpc/atomRegistry"; import { resetServerStateForTests, setServerConfigSnapshot } from "../../rpc/serverState"; import { useUiStateStore } from "../../uiStateStore"; import { ConnectionsSettings } from "./ConnectionsSettings"; +import { DiagnosticsSettingsPanel } from "./DiagnosticsSettings"; import { GeneralSettingsPanel, ProviderSettingsPanel } from "./SettingsPanels"; import { SourceControlSettingsPanel } from "./SourceControlSettings"; +function renderWithTestRouter(children: ReactNode) { + const rootRoute = createRootRoute({ + component: () => children, + }); + const indexRoute = createRoute({ + getParentRoute: () => rootRoute, + path: "/", + }); + const router = createRouter({ + routeTree: rootRoute.addChildren([indexRoute]), + history: createMemoryHistory({ initialEntries: ["/"] }), + }); + + return render(); +} + const authAccessHarness = vi.hoisted(() => { type Snapshot = AuthAccessSnapshot; let snapshot: Snapshot = { @@ -694,25 +719,24 @@ describe("GeneralSettingsPanel observability", () => { await expect.element(page.getByText("http://127.0.0.1:3773/").first()).toBeInTheDocument(); }); - it("shows diagnostics inside About with a single logs-folder action", async () => { + it("shows diagnostics inside About with a diagnostics link", async () => { setServerConfigSnapshot(createBaseServerConfig()); - mounted = await render( + mounted = await renderWithTestRouter( , ); await expect.element(page.getByText("About")).toBeInTheDocument(); - await expect.element(page.getByText("Diagnostics")).toBeInTheDocument(); - await expect.element(page.getByText("Open logs folder")).toBeInTheDocument(); await expect - .element(page.getByText("/repo/project/.t3/logs", { exact: true })) + .element(page.getByRole("heading", { name: "Diagnostics", exact: true })) .toBeInTheDocument(); + await expect.element(page.getByRole("link", { name: "View diagnostics" })).toBeInTheDocument(); await expect .element( page.getByText( - "Local trace file. OTLP exporting traces to http://localhost:4318/v1/traces.", + "Local trace file. Exporting OTEL traces to http://localhost:4318/v1/traces.", ), ) .toBeInTheDocument(); @@ -1020,20 +1044,53 @@ describe("GeneralSettingsPanel observability", () => { it("opens the logs folder in the preferred editor", async () => { const openInEditor = vi.fn().mockResolvedValue(undefined); window.nativeApi = { + persistence: { + getClientSettings: vi.fn().mockResolvedValue(null), + setClientSettings: vi.fn().mockResolvedValue(undefined), + }, shell: { openInEditor, }, + server: { + getProcessDiagnostics: vi.fn().mockResolvedValue({ + serverPid: 1234, + readAt: "2036-04-07T00:00:00.000Z", + processCount: 0, + totalRssBytes: 0, + totalCpuPercent: 0, + processes: [], + }), + getTraceDiagnostics: vi.fn().mockResolvedValue({ + traceFilePath: "/repo/project/.t3/traces.jsonl", + scannedFilePaths: ["/repo/project/.t3/traces.jsonl"], + readAt: "2036-04-07T00:00:00.000Z", + recordCount: 0, + parseErrorCount: 0, + firstSpanAt: null, + lastSpanAt: null, + failureCount: 0, + interruptionCount: 0, + slowSpanThresholdMs: 5_000, + slowSpanCount: 0, + logLevelCounts: {}, + topSpansByCount: [], + slowestSpans: [], + commonFailures: [], + latestFailures: [], + latestWarningAndErrorLogs: [], + }), + }, } as unknown as LocalApi; setServerConfigSnapshot(createBaseServerConfig()); mounted = await render( - + , ); - const openLogsButton = page.getByText("Open logs folder"); + const openLogsButton = page.getByLabelText("Open logs folder"); await openLogsButton.click(); expect(openInEditor).toHaveBeenCalledWith("/repo/project/.t3/logs", "cursor"); From 405aceae173d3e4ef7e799a025a4f7b9b83b0aa1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 6 May 2026 00:03:21 +0000 Subject: [PATCH 09/15] fix: use WS_METHODS constants and add serverSignalProcess to tracing disabled set - Replace hardcoded string literals with WS_METHODS constants from @t3tools/contracts to prevent drift if method names change. - Add serverSignalProcess to RPC_METHODS_WITH_TRACING_DISABLED so diagnostic signal actions don't pollute the trace diagnostics view. --- apps/server/src/observability/RpcInstrumentation.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index 3ff0df1afdf..106915bbbc3 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -1,3 +1,4 @@ +import { WS_METHODS } from "@t3tools/contracts"; import { Duration, Effect, Exit, Metric, Stream } from "effect"; import { outcomeFromExit } from "./Attributes.ts"; @@ -9,8 +10,9 @@ const DEFAULT_RPC_SPAN_ATTRIBUTES = { "rpc.system": "effect-rpc", } as const; const RPC_METHODS_WITH_TRACING_DISABLED = new Set([ - "server.getTraceDiagnostics", - "server.getProcessDiagnostics", + WS_METHODS.serverGetTraceDiagnostics, + WS_METHODS.serverGetProcessDiagnostics, + WS_METHODS.serverSignalProcess, ]); function shouldTraceRpc(method: string): boolean { From 7e101efa86bfaee25a25dd221080255769b26ffc Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 17:08:52 -0700 Subject: [PATCH 10/15] Add keybindings settings editor (#2533) Co-authored-by: codex --- apps/web/src/components/settings/SettingsPanels.tsx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/apps/web/src/components/settings/SettingsPanels.tsx b/apps/web/src/components/settings/SettingsPanels.tsx index 94327452680..ee75fba5d06 100644 --- a/apps/web/src/components/settings/SettingsPanels.tsx +++ b/apps/web/src/components/settings/SettingsPanels.tsx @@ -78,10 +78,7 @@ import { useRelativeTimeTick, } from "./settingsLayout"; import { ProjectFavicon } from "../ProjectFavicon"; -import { - useServerObservability, - useServerProviders, -} from "../../rpc/serverState"; +import { useServerObservability, useServerProviders } from "../../rpc/serverState"; const THEME_OPTIONS = [ { From f37e8aa7e6064ab801cff2e0980ae42accce7621 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 17:20:21 -0700 Subject: [PATCH 11/15] Type RPC tracing disable set as readonly Co-authored-by: codex --- apps/server/src/observability/RpcInstrumentation.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index 106915bbbc3..34a8ebdbef4 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -9,7 +9,7 @@ const DEFAULT_RPC_SPAN_ATTRIBUTES = { "rpc.transport": "websocket", "rpc.system": "effect-rpc", } as const; -const RPC_METHODS_WITH_TRACING_DISABLED = new Set([ +const RPC_METHODS_WITH_TRACING_DISABLED: ReadonlySet = new Set([ WS_METHODS.serverGetTraceDiagnostics, WS_METHODS.serverGetProcessDiagnostics, WS_METHODS.serverSignalProcess, From bd1fa8cd510377ab2d8707f37830f9c2338266f4 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 6 May 2026 00:53:10 +0000 Subject: [PATCH 12/15] Fix redundant span annotations and premature stream span closure - Remove annotateRpcSpan helper that duplicated traceAttributes already set by Effect.withSpan at span creation time. Consolidate all span attributes (including rpc.method) into a single rpcSpanAttributes helper used at span creation. - Replace Effect.withSpan wrapping the setup effect inside Stream.unwrap with Stream.withSpan wrapping the entire stream in observeRpcStream and observeRpcStreamEffect. This ensures spans cover the full stream lifetime rather than closing immediately after setup. Applied via @cursor push command --- .../src/observability/RpcInstrumentation.ts | 89 +++++++++---------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index 34a8ebdbef4..65d0594fc4f 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -19,14 +19,14 @@ function shouldTraceRpc(method: string): boolean { return !RPC_METHODS_WITH_TRACING_DISABLED.has(method); } -const annotateRpcSpan = ( +const rpcSpanAttributes = ( method: string, traceAttributes?: Readonly>, -): Effect.Effect => - Effect.annotateCurrentSpan({ - "rpc.method": method, - ...traceAttributes, - }); +): Record => ({ + ...DEFAULT_RPC_SPAN_ATTRIBUTES, + "rpc.method": method, + ...traceAttributes, +}); const recordRpcStreamMetrics = ( method: string, @@ -55,27 +55,20 @@ export const observeRpcEffect = ( effect: Effect.Effect, traceAttributes?: Readonly>, ): Effect.Effect => { - const instrumented = Effect.gen(function* () { - yield* annotateRpcSpan(method, traceAttributes); - - return yield* effect.pipe( - withMetrics({ - counter: rpcRequestsTotal, - timer: rpcRequestDuration, - attributes: { - method, - }, - }), - ); - }); + const instrumented = effect.pipe( + withMetrics({ + counter: rpcRequestsTotal, + timer: rpcRequestDuration, + attributes: { + method, + }, + }), + ); return shouldTraceRpc(method) ? instrumented.pipe( Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { - attributes: { - ...DEFAULT_RPC_SPAN_ATTRIBUTES, - ...traceAttributes, - }, + attributes: rpcSpanAttributes(method, traceAttributes), }), ) : instrumented.pipe(Effect.withTracerEnabled(false)); @@ -85,32 +78,30 @@ export const observeRpcStream = ( method: string, stream: Stream.Stream, traceAttributes?: Readonly>, -): Stream.Stream => - Stream.unwrap( +): Stream.Stream => { + const instrumented = Stream.unwrap( Effect.gen(function* () { - yield* annotateRpcSpan(method, traceAttributes); const startedAt = Date.now(); return stream.pipe(Stream.onExit((exit) => recordRpcStreamMetrics(method, startedAt, exit))); - }).pipe( - shouldTraceRpc(method) - ? Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { - attributes: { - ...DEFAULT_RPC_SPAN_ATTRIBUTES, - ...traceAttributes, - }, - }) - : Effect.withTracerEnabled(false), - ), + }), ); + return shouldTraceRpc(method) + ? instrumented.pipe( + Stream.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: rpcSpanAttributes(method, traceAttributes), + }), + ) + : instrumented; +}; + export const observeRpcStreamEffect = ( method: string, effect: Effect.Effect, EffectError, EffectContext>, traceAttributes?: Readonly>, -): Stream.Stream => - Stream.unwrap( +): Stream.Stream => { + const instrumented = Stream.unwrap( Effect.gen(function* () { - yield* annotateRpcSpan(method, traceAttributes); const startedAt = Date.now(); const exit = yield* Effect.exit(effect); @@ -122,14 +113,14 @@ export const observeRpcStreamEffect = recordRpcStreamMetrics(method, startedAt, streamExit)), ); - }).pipe( - shouldTraceRpc(method) - ? Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { - attributes: { - ...DEFAULT_RPC_SPAN_ATTRIBUTES, - ...traceAttributes, - }, - }) - : Effect.withTracerEnabled(false), - ), + }), ); + + return shouldTraceRpc(method) + ? instrumented.pipe( + Stream.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: rpcSpanAttributes(method, traceAttributes), + }), + ) + : instrumented; +}; From 694f23412278bf5b5aee3b2c6af8df1fa817e21e Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 21:00:29 -0700 Subject: [PATCH 13/15] Use nanosecond timing for RPC metrics and tracing - Record RPC and metrics durations from `Clock.currentTimeNanos` - Add coverage for nanosecond timing and tracing enablement --- apps/server/src/observability/Metrics.test.ts | 46 +++++- apps/server/src/observability/Metrics.ts | 8 +- .../observability/RpcInstrumentation.test.ts | 148 +++++++++++++++++- .../src/observability/RpcInstrumentation.ts | 63 +++++--- 4 files changed, 234 insertions(+), 31 deletions(-) diff --git a/apps/server/src/observability/Metrics.test.ts b/apps/server/src/observability/Metrics.test.ts index b5eeedaaa43..57bfebaeceb 100644 --- a/apps/server/src/observability/Metrics.test.ts +++ b/apps/server/src/observability/Metrics.test.ts @@ -1,6 +1,7 @@ import { assert, describe, it } from "@effect/vitest"; import { ProviderDriverKind } from "@t3tools/contracts"; -import { Effect, Metric } from "effect"; +import { Duration, Effect, Fiber, Metric } from "effect"; +import { TestClock } from "effect/testing"; import { withMetrics } from "./Metrics.ts"; @@ -15,6 +16,18 @@ const hasMetricSnapshot = ( Object.entries(attributes).every(([key, value]) => snapshot.attributes?.[key] === value), ); +const findHistogramSnapshot = ( + snapshots: ReadonlyArray, + id: string, + attributes: Readonly>, +) => + snapshots.find( + (snapshot): snapshot is Extract => + snapshot.type === "Histogram" && + snapshot.id === id && + Object.entries(attributes).every(([key, value]) => snapshot.attributes?.[key] === value), + ); + describe("withMetrics", () => { it.effect("supports pipe-style usage", () => Effect.gen(function* () { @@ -110,4 +123,35 @@ describe("withMetrics", () => { ); }), ); + + it.effect("records timer durations from nanosecond clock readings", () => + Effect.gen(function* () { + const duration = Duration.nanos(1_500_000n); + const timer = Metric.timer("with_metrics_nanos_duration"); + + yield* Effect.gen(function* () { + const fiber = yield* Effect.sleep(duration).pipe( + withMetrics({ + timer, + attributes: { + operation: "nanos", + }, + }), + Effect.forkChild, + ); + + yield* Effect.yieldNow; + yield* TestClock.adjust(duration); + yield* Fiber.join(fiber); + }).pipe(Effect.provide(TestClock.layer())); + + const snapshots = yield* Metric.snapshot; + const snapshot = findHistogramSnapshot(snapshots, "with_metrics_nanos_duration", { + operation: "nanos", + }); + + assert.equal(snapshot?.state.count, 1); + assert.equal(snapshot?.state.sum, 1.5); + }), + ); }); diff --git a/apps/server/src/observability/Metrics.ts b/apps/server/src/observability/Metrics.ts index 3e527c7cb45..976bf7ccdb7 100644 --- a/apps/server/src/observability/Metrics.ts +++ b/apps/server/src/observability/Metrics.ts @@ -1,4 +1,4 @@ -import { Duration, Effect, Exit, Metric } from "effect"; +import { Clock, Duration, Effect, Exit, Metric } from "effect"; import { dual } from "effect/Function"; import { @@ -96,9 +96,11 @@ const withMetricsImpl = ( options: WithMetricsOptions, ): Effect.Effect => Effect.gen(function* () { - const startedAt = Date.now(); + const startedAt = yield* Clock.currentTimeNanos; const exit = yield* Effect.exit(effect); - const duration = Duration.millis(Math.max(0, Date.now() - startedAt)); + const endedAt = yield* Clock.currentTimeNanos; + const elapsedNanos = endedAt > startedAt ? endedAt - startedAt : 0n; + const duration = Duration.nanos(elapsedNanos); const baseAttributes = typeof options.attributes === "function" ? options.attributes() : (options.attributes ?? {}); diff --git a/apps/server/src/observability/RpcInstrumentation.test.ts b/apps/server/src/observability/RpcInstrumentation.test.ts index d29b05f3c2b..b0aa7c874f4 100644 --- a/apps/server/src/observability/RpcInstrumentation.test.ts +++ b/apps/server/src/observability/RpcInstrumentation.test.ts @@ -1,5 +1,7 @@ import { assert, describe, it } from "@effect/vitest"; -import { Effect, Exit, Metric, Stream } from "effect"; +import { WS_METHODS } from "@t3tools/contracts"; +import { Duration, Effect, Exit, Fiber, Metric, Stream, Tracer } from "effect"; +import { TestClock } from "effect/testing"; import { observeRpcEffect, @@ -18,6 +20,44 @@ const hasMetricSnapshot = ( Object.entries(attributes).every(([key, value]) => snapshot.attributes?.[key] === value), ); +const findHistogramSnapshot = ( + snapshots: ReadonlyArray, + id: string, + attributes: Readonly>, +) => + snapshots.find( + (snapshot): snapshot is Extract => + snapshot.type === "Histogram" && + snapshot.id === id && + Object.entries(attributes).every(([key, value]) => snapshot.attributes?.[key] === value), + ); + +const collectSpanNames = ( + effect: Effect.Effect, +): Effect.Effect, E, R> => + Effect.gen(function* () { + const spanNames: Array = []; + const tracer = Tracer.make({ + span: (options) => { + const span = new Tracer.NativeSpan(options); + const end = span.end.bind(span); + + span.end = (endTime, exit) => { + end(endTime, exit); + if (span.sampled) { + spanNames.push(span.name); + } + }; + + return span; + }, + }); + + yield* effect.pipe(Effect.withTracer(tracer)); + + return spanNames; + }); + describe("RpcInstrumentation", () => { it.effect("records success metrics for unary RPC handlers", () => Effect.gen(function* () { @@ -129,6 +169,37 @@ describe("RpcInstrumentation", () => { }), ); + it.effect("records direct stream durations from nanosecond clock readings", () => + Effect.gen(function* () { + const duration = Duration.nanos(1_500_000n); + const events = yield* Effect.gen(function* () { + const fiber = yield* Stream.runCollect( + observeRpcStream( + WS_METHODS.serverGetProcessDiagnostics, + Stream.fromEffect(Effect.sleep(duration).pipe(Effect.as("ok"))), + { + "rpc.aggregate": "test", + }, + ), + ).pipe(Effect.forkChild); + + yield* Effect.yieldNow; + yield* TestClock.adjust(duration); + return yield* Fiber.join(fiber); + }).pipe(Effect.provide(TestClock.layer())); + + assert.deepStrictEqual(Array.from(events), ["ok"]); + + const snapshots = yield* Metric.snapshot; + const snapshot = findHistogramSnapshot(snapshots, "t3_rpc_request_duration", { + method: WS_METHODS.serverGetProcessDiagnostics, + }); + + assert.equal(snapshot?.state.count, 1); + assert.equal(snapshot?.state.sum, 1.5); + }), + ); + it.effect("records failure outcomes when a stream RPC effect produces a failing stream", () => Effect.gen(function* () { const exit = yield* Stream.runCollect( @@ -158,4 +229,79 @@ describe("RpcInstrumentation", () => { ); }), ); + + it.effect("records spans for traced stream RPC handlers", () => + Effect.gen(function* () { + const spanNames = yield* collectSpanNames( + Stream.runCollect( + observeRpcStream( + "rpc.instrumentation.traced.stream", + Stream.fromEffect( + Effect.succeed("ok").pipe(Effect.withSpan("rpc.instrumentation.traced.stream.child")), + ), + { "rpc.aggregate": "test" }, + ), + ), + ); + + assert.equal(spanNames.includes("ws.rpc.rpc.instrumentation.traced.stream"), true); + assert.equal(spanNames.includes("rpc.instrumentation.traced.stream.child"), true); + }), + ); + + it.effect("does not create spans for disabled unary RPC handlers", () => + Effect.gen(function* () { + const spanNames = yield* collectSpanNames( + observeRpcEffect( + WS_METHODS.serverGetTraceDiagnostics, + Effect.succeed("ok").pipe(Effect.withSpan("rpc.instrumentation.disabled.unary.child")), + { "rpc.aggregate": "test" }, + ), + ); + + assert.deepStrictEqual(spanNames, []); + }), + ); + + it.effect("does not create spans for disabled direct stream RPC handlers", () => + Effect.gen(function* () { + const spanNames = yield* collectSpanNames( + Stream.runCollect( + observeRpcStream( + WS_METHODS.serverGetTraceDiagnostics, + Stream.fromEffect( + Effect.succeed("ok").pipe( + Effect.withSpan("rpc.instrumentation.disabled.stream.child"), + ), + ), + { "rpc.aggregate": "test" }, + ), + ), + ); + + assert.deepStrictEqual(spanNames, []); + }), + ); + + it.effect("does not create spans for disabled stream effect RPC handlers", () => + Effect.gen(function* () { + const spanNames = yield* collectSpanNames( + Stream.runCollect( + observeRpcStreamEffect( + WS_METHODS.serverGetTraceDiagnostics, + Effect.succeed( + Stream.fromEffect( + Effect.succeed("ok").pipe( + Effect.withSpan("rpc.instrumentation.disabled.stream.effect.consume"), + ), + ), + ).pipe(Effect.withSpan("rpc.instrumentation.disabled.stream.effect.create")), + { "rpc.aggregate": "test" }, + ), + ), + ); + + assert.deepStrictEqual(spanNames, []); + }), + ); }); diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index 65d0594fc4f..1f0635b87a4 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -1,5 +1,5 @@ import { WS_METHODS } from "@t3tools/contracts"; -import { Duration, Effect, Exit, Metric, Stream } from "effect"; +import { Clock, Duration, Effect, Exit, Metric, References, Stream } from "effect"; import { outcomeFromExit } from "./Attributes.ts"; import { metricAttributes, rpcRequestDuration, rpcRequestsTotal, withMetrics } from "./Metrics.ts"; @@ -28,15 +28,44 @@ const rpcSpanAttributes = ( ...traceAttributes, }); +const withRpcEffectTracing = ( + method: string, + effect: Effect.Effect, + traceAttributes?: Readonly>, +): Effect.Effect => + shouldTraceRpc(method) + ? effect.pipe( + Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: rpcSpanAttributes(method, traceAttributes), + }), + ) + : effect.pipe(Effect.withTracerEnabled(false)); + +const withRpcStreamTracing = ( + method: string, + stream: Stream.Stream, + traceAttributes?: Readonly>, +): Stream.Stream => + shouldTraceRpc(method) + ? stream.pipe( + Stream.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { + attributes: rpcSpanAttributes(method, traceAttributes), + }), + ) + : stream.pipe(Stream.provideService(References.TracerEnabled, false)); + const recordRpcStreamMetrics = ( method: string, - startedAt: number, + startedAt: bigint, exit: Exit.Exit, ): Effect.Effect => Effect.gen(function* () { + const endedAt = yield* Clock.currentTimeNanos; + const elapsedNanos = endedAt > startedAt ? endedAt - startedAt : 0n; + yield* Metric.update( Metric.withAttributes(rpcRequestDuration, metricAttributes({ method })), - Duration.millis(Math.max(0, Date.now() - startedAt)), + Duration.nanos(elapsedNanos), ); yield* Metric.update( Metric.withAttributes( @@ -65,13 +94,7 @@ export const observeRpcEffect = ( }), ); - return shouldTraceRpc(method) - ? instrumented.pipe( - Effect.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { - attributes: rpcSpanAttributes(method, traceAttributes), - }), - ) - : instrumented.pipe(Effect.withTracerEnabled(false)); + return withRpcEffectTracing(method, instrumented, traceAttributes); }; export const observeRpcStream = ( @@ -81,18 +104,12 @@ export const observeRpcStream = ( ): Stream.Stream => { const instrumented = Stream.unwrap( Effect.gen(function* () { - const startedAt = Date.now(); + const startedAt = yield* Clock.currentTimeNanos; return stream.pipe(Stream.onExit((exit) => recordRpcStreamMetrics(method, startedAt, exit))); }), ); - return shouldTraceRpc(method) - ? instrumented.pipe( - Stream.withSpan(`${RPC_SPAN_PREFIX}.${method}`, { - attributes: rpcSpanAttributes(method, traceAttributes), - }), - ) - : instrumented; + return withRpcStreamTracing(method, instrumented, traceAttributes); }; export const observeRpcStreamEffect = ( @@ -102,7 +119,7 @@ export const observeRpcStreamEffect = => { const instrumented = Stream.unwrap( Effect.gen(function* () { - const startedAt = Date.now(); + const startedAt = yield* Clock.currentTimeNanos; const exit = yield* Effect.exit(effect); if (Exit.isFailure(exit)) { @@ -116,11 +133,5 @@ export const observeRpcStreamEffect = Date: Tue, 5 May 2026 21:05:05 -0700 Subject: [PATCH 14/15] nit --- apps/server/src/observability/RpcInstrumentation.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/server/src/observability/RpcInstrumentation.ts b/apps/server/src/observability/RpcInstrumentation.ts index 1f0635b87a4..c03e1c2b8a8 100644 --- a/apps/server/src/observability/RpcInstrumentation.ts +++ b/apps/server/src/observability/RpcInstrumentation.ts @@ -39,7 +39,7 @@ const withRpcEffectTracing = ( attributes: rpcSpanAttributes(method, traceAttributes), }), ) - : effect.pipe(Effect.withTracerEnabled(false)); + : effect.pipe(Effect.provideService(References.TracerEnabled, false)); const withRpcStreamTracing = ( method: string, From fdb485d45965b2b2271753706503194ea0ecd6e6 Mon Sep 17 00:00:00 2001 From: Julius Marminge Date: Tue, 5 May 2026 21:19:13 -0700 Subject: [PATCH 15/15] Adopt DateTime and Option in diagnostics contracts - switch process and trace diagnostics to `DateTime.Utc` - represent optional diagnostics fields with `Option` - update UI and tests for the new shapes --- .../diagnostics/ProcessDiagnostics.test.ts | 10 +- .../src/diagnostics/ProcessDiagnostics.ts | 32 +++--- .../src/diagnostics/TraceDiagnostics.test.ts | 34 +++++-- .../src/diagnostics/TraceDiagnostics.ts | 98 +++++++++++-------- apps/server/src/server.test.ts | 12 ++- .../settings/DiagnosticsSettings.tsx | 44 +++++---- .../settings/SettingsPanels.browser.tsx | 11 ++- packages/contracts/src/server.ts | 26 ++--- 8 files changed, 168 insertions(+), 99 deletions(-) diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts index 5567ab0d758..18a54326de1 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.test.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.test.ts @@ -1,6 +1,8 @@ import { describe, expect, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as Sink from "effect/Sink"; import * as Stream from "effect/Stream"; import { ChildProcessSpawner } from "effect/unstable/process"; @@ -68,7 +70,7 @@ describe("ProcessDiagnostics", () => { Effect.sync(() => { const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ serverPid: 100, - readAt: new Date("2026-05-05T10:00:00.000Z"), + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), rows: [ { pid: 100, @@ -124,12 +126,13 @@ describe("ProcessDiagnostics", () => { }); expect(diagnostics.serverPid).toBe(100); - expect(diagnostics.readAt).toBe("2026-05-05T10:00:00.000Z"); + expect(DateTime.formatIso(diagnostics.readAt)).toBe("2026-05-05T10:00:00.000Z"); expect(diagnostics.processCount).toBe(2); expect(diagnostics.totalRssBytes).toBe(6_000); expect(diagnostics.totalCpuPercent).toBe(4.75); expect(diagnostics.processes.map((process) => process.pid)).toEqual([101, 102]); expect(diagnostics.processes.map((process) => process.depth)).toEqual([0, 1]); + expect(Option.getOrNull(diagnostics.processes[0]!.pgid)).toBe(100); expect(diagnostics.processes[0]?.childPids).toEqual([102]); }), ); @@ -138,6 +141,7 @@ describe("ProcessDiagnostics", () => { Effect.sync(() => { const diagnostics = ProcessDiagnostics.aggregateProcessDiagnostics({ serverPid: 100, + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), rows: [ { pid: 101, @@ -241,7 +245,7 @@ describe("ProcessDiagnostics", () => { pid: 4242, signal: "SIGINT", signaled: false, - message: "Process 4242 is not a live descendant of the T3 server.", + message: Option.some("Process 4242 is not a live descendant of the T3 server."), }); }), ); diff --git a/apps/server/src/diagnostics/ProcessDiagnostics.ts b/apps/server/src/diagnostics/ProcessDiagnostics.ts index de59d92cb50..2e1b255c303 100644 --- a/apps/server/src/diagnostics/ProcessDiagnostics.ts +++ b/apps/server/src/diagnostics/ProcessDiagnostics.ts @@ -5,6 +5,7 @@ import type { ServerSignalProcessResult, } from "@t3tools/contracts"; import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; import * as Duration from "effect/Duration"; import * as Effect from "effect/Effect"; import * as Layer from "effect/Layer"; @@ -212,7 +213,7 @@ function buildDescendantEntries( entries.push({ pid: item.row.pid, ppid: item.row.ppid, - pgid: item.row.pgid, + pgid: Option.fromNullishOr(item.row.pgid), status: item.row.status, cpuPercent: item.row.cpuPercent, rssBytes: item.row.rssBytes, @@ -242,10 +243,10 @@ function isDiagnosticsQueryProcess(row: ProcessRow, serverPid: number): boolean function makeResult(input: { readonly serverPid: number; readonly rows: ReadonlyArray; - readonly readAt?: Date; + readonly readAt: DateTime.Utc; readonly error?: string; }): ServerProcessDiagnosticsResult { - const readAt = input.readAt ?? new Date(); + const readAt = input.readAt; const rows = input.rows.filter((row) => !isDiagnosticsQueryProcess(row, input.serverPid)); const processes = buildDescendantEntries(rows, input.serverPid); const totalRssBytes = processes.reduce((total, process) => total + process.rssBytes, 0); @@ -253,12 +254,12 @@ function makeResult(input: { return { serverPid: input.serverPid, - readAt: readAt.toISOString(), + readAt, processCount: processes.length, totalRssBytes, totalCpuPercent, processes, - ...(input.error ? { error: { message: input.error } } : {}), + error: input.error ? Option.some({ message: input.error }) : Option.none(), }; } @@ -374,7 +375,7 @@ const readProcessRows = (platform = process.platform) => export function aggregateProcessDiagnostics(input: { readonly serverPid: number; readonly rows: ReadonlyArray; - readonly readAt?: Date; + readonly readAt: DateTime.Utc; }): ServerProcessDiagnosticsResult { return makeResult(input); } @@ -404,11 +405,19 @@ function assertDescendantPid( export const make = Effect.fn("makeProcessDiagnostics")(function* () { const spawner = yield* ChildProcessSpawner.ChildProcessSpawner; - const read: ProcessDiagnosticsShape["read"] = readProcessRows().pipe( - Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), - Effect.map((rows) => makeResult({ serverPid: process.pid, rows })), + const read: ProcessDiagnosticsShape["read"] = Effect.gen(function* () { + const readAt = yield* DateTime.now; + const rows = yield* readProcessRows().pipe( + Effect.provideService(ChildProcessSpawner.ChildProcessSpawner, spawner), + ); + return makeResult({ serverPid: process.pid, rows, readAt }); + }).pipe( Effect.catch((error: ProcessDiagnosticsError) => - Effect.succeed(makeResult({ serverPid: process.pid, rows: [], error: error.message })), + DateTime.now.pipe( + Effect.map((readAt) => + makeResult({ serverPid: process.pid, rows: [], readAt, error: error.message }), + ), + ), ), ); @@ -424,6 +433,7 @@ export const make = Effect.fn("makeProcessDiagnostics")(function* () { pid: input.pid, signal: input.signal, signaled: true, + message: Option.none(), }; }, catch: (cause) => @@ -438,7 +448,7 @@ export const make = Effect.fn("makeProcessDiagnostics")(function* () { pid: input.pid, signal: input.signal, signaled: false, - message: error.message, + message: Option.some(error.message), }), ), ); diff --git a/apps/server/src/diagnostics/TraceDiagnostics.test.ts b/apps/server/src/diagnostics/TraceDiagnostics.test.ts index f0e011ee805..d4ffa4a5fc2 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.test.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.test.ts @@ -1,7 +1,9 @@ import { assert, describe, it } from "@effect/vitest"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as PlatformError from "effect/PlatformError"; import * as TraceDiagnostics from "./TraceDiagnostics.ts"; @@ -41,7 +43,7 @@ describe("TraceDiagnostics", () => { Effect.sync(() => { const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ traceFilePath: "/tmp/server.trace.ndjson", - readAt: new Date("2026-05-05T10:00:00.000Z"), + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), slowSpanThresholdMs: 1_000, files: [ { @@ -104,6 +106,21 @@ describe("TraceDiagnostics", () => { }); assert.equal(diagnostics.recordCount, 4); + assert.equal(DateTime.formatIso(diagnostics.readAt), "2026-05-05T10:00:00.000Z"); + assert.equal( + Option.match(diagnostics.firstSpanAt, { + onNone: () => null, + onSome: DateTime.formatIso, + }), + "1970-01-01T00:00:01.000Z", + ); + assert.equal( + Option.match(diagnostics.lastSpanAt, { + onNone: () => null, + onSome: DateTime.formatIso, + }), + "1970-01-01T00:00:05.025Z", + ); assert.equal(diagnostics.parseErrorCount, 1); assert.equal(diagnostics.failureCount, 2); assert.equal(diagnostics.interruptionCount, 1); @@ -123,12 +140,12 @@ describe("TraceDiagnostics", () => { Effect.sync(() => { const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ traceFilePath: "/tmp/missing.trace.ndjson", - readAt: new Date("2026-05-05T10:00:00.000Z"), + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), files: [], }); assert.equal(diagnostics.recordCount, 0); - assert.equal(diagnostics.error?.kind, "trace-file-not-found"); + assert.equal(Option.getOrUndefined(diagnostics.error)?.kind, "trace-file-not-found"); }), ); @@ -138,6 +155,7 @@ describe("TraceDiagnostics", () => { const longMessage = `provider warning: ${"retrying command ".repeat(80)}`.trim(); const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ traceFilePath: "/tmp/server.trace.ndjson", + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), files: [ { path: "/tmp/server.trace.ndjson", @@ -195,12 +213,15 @@ describe("TraceDiagnostics", () => { const diagnostics = yield* TraceDiagnostics.readTraceDiagnostics({ traceFilePath, maxFiles: 1, - readAt: new Date("2026-05-05T10:00:00.000Z"), + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), }).pipe(Effect.provide(TraceDiagnostics.layer.pipe(Layer.provide(fileSystemLayer)))); assert.equal(diagnostics.recordCount, 1); - assert.equal(diagnostics.partialFailure, true); - assert.equal(diagnostics.error?.kind, "trace-file-read-failed"); + assert.equal( + Option.getOrElse(diagnostics.partialFailure, () => false), + true, + ); + assert.equal(Option.getOrUndefined(diagnostics.error)?.kind, "trace-file-read-failed"); assert.deepStrictEqual(diagnostics.scannedFilePaths, [`${traceFilePath}.1`, traceFilePath]); }), ); @@ -209,6 +230,7 @@ describe("TraceDiagnostics", () => { Effect.sync(() => { const diagnostics = TraceDiagnostics.aggregateTraceDiagnostics({ traceFilePath: "/tmp/server.trace.ndjson", + readAt: DateTime.makeUnsafe("2026-05-05T10:00:00.000Z"), files: [ { path: "/tmp/server.trace.ndjson", diff --git a/apps/server/src/diagnostics/TraceDiagnostics.ts b/apps/server/src/diagnostics/TraceDiagnostics.ts index 6d92db0f203..d90186a8647 100644 --- a/apps/server/src/diagnostics/TraceDiagnostics.ts +++ b/apps/server/src/diagnostics/TraceDiagnostics.ts @@ -1,4 +1,5 @@ import type { + ServerTraceDiagnosticsErrorKind, ServerTraceDiagnosticsFailureSummary, ServerTraceDiagnosticsLogEvent, ServerTraceDiagnosticsRecentFailure, @@ -7,9 +8,11 @@ import type { ServerTraceDiagnosticsSpanSummary, } from "@t3tools/contracts"; import * as Context from "effect/Context"; +import * as DateTime from "effect/DateTime"; import * as Effect from "effect/Effect"; import * as FileSystem from "effect/FileSystem"; import * as Layer from "effect/Layer"; +import * as Option from "effect/Option"; import * as PlatformError from "effect/PlatformError"; interface TraceRecordLike { @@ -33,7 +36,7 @@ export interface TraceDiagnosticsOptions { readonly traceFilePath: string; readonly maxFiles: number; readonly slowSpanThresholdMs?: number; - readonly readAt?: Date; + readonly readAt?: DateTime.Utc; } export interface TraceDiagnosticsShape { @@ -49,11 +52,16 @@ interface TraceDiagnosticsInput { readonly files: ReadonlyArray<{ readonly path: string; readonly text: string }>; readonly scannedFilePaths?: ReadonlyArray; readonly slowSpanThresholdMs?: number; - readonly readAt?: Date; - readonly error?: ServerTraceDiagnosticsResult["error"]; + readonly readAt: DateTime.Utc; + readonly error?: TraceDiagnosticsErrorSummary; readonly partialFailure?: boolean; } +interface TraceDiagnosticsErrorSummary { + readonly kind: ServerTraceDiagnosticsErrorKind; + readonly message: string; +} + const DEFAULT_SLOW_SPAN_THRESHOLD_MS = 1_000; const TOP_LIMIT = 10; const RECENT_LIMIT = 20; @@ -78,19 +86,27 @@ function toNumberValue(value: unknown): number | null { return typeof value === "number" && Number.isFinite(value) ? value : null; } -function unixNanoToIso(value: unknown): string | null { +function unixNanoToDateTime(value: unknown): DateTime.Utc | null { const text = toStringValue(value); if (!text) return null; try { const millis = Number(BigInt(text) / 1_000_000n); if (!Number.isFinite(millis)) return null; - return new Date(millis).toISOString(); + return DateTime.makeUnsafe(millis); } catch { return null; } } +function isAfter(left: DateTime.Utc, right: DateTime.Utc): boolean { + return DateTime.toEpochMillis(left) > DateTime.toEpochMillis(right); +} + +function isBefore(left: DateTime.Utc, right: DateTime.Utc): boolean { + return DateTime.toEpochMillis(left) < DateTime.toEpochMillis(right); +} + function readExitTag(exit: unknown): string | null { if (!isRecordObject(exit) || !("_tag" in exit)) return null; return toStringValue(exit._tag); @@ -114,19 +130,19 @@ function readEventAttributes(event: TraceEventLike): Readonly; - readonly readAt: Date; + readonly readAt: DateTime.Utc; readonly slowSpanThresholdMs: number; - readonly error?: ServerTraceDiagnosticsResult["error"]; + readonly error?: TraceDiagnosticsErrorSummary; readonly partialFailure?: boolean; }): ServerTraceDiagnosticsResult { return { traceFilePath: input.traceFilePath, scannedFilePaths: [...input.scannedFilePaths], - readAt: input.readAt.toISOString(), + readAt: input.readAt, recordCount: 0, parseErrorCount: 0, - firstSpanAt: null, - lastSpanAt: null, + firstSpanAt: Option.none(), + lastSpanAt: Option.none(), failureCount: 0, interruptionCount: 0, slowSpanThresholdMs: input.slowSpanThresholdMs, @@ -137,8 +153,8 @@ function makeEmptyDiagnostics(input: { commonFailures: [], latestFailures: [], latestWarningAndErrorLogs: [], - ...(input.partialFailure ? { partialFailure: true } : {}), - ...(input.error ? { error: input.error } : {}), + partialFailure: input.partialFailure ? Option.some(true) : Option.none(), + error: Option.fromNullishOr(input.error), }; } @@ -171,7 +187,7 @@ function insertBoundedSlowestSpan( export function aggregateTraceDiagnostics( input: TraceDiagnosticsInput, ): ServerTraceDiagnosticsResult { - const readAt = input.readAt ?? new Date(); + const readAt = input.readAt; const slowSpanThresholdMs = input.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; const scannedFilePaths = input.scannedFilePaths ?? input.files.map((file) => file.path); if (input.files.length === 0) { @@ -193,8 +209,8 @@ export function aggregateTraceDiagnostics( let failureCount = 0; let interruptionCount = 0; let slowSpanCount = 0; - let firstSpanAt: string | null = null; - let lastSpanAt: string | null = null; + let firstSpanAt: DateTime.Utc | null = null; + let lastSpanAt: DateTime.Utc | null = null; const spansByName = new Map< string, @@ -228,8 +244,8 @@ export function aggregateTraceDiagnostics( const traceId = toStringValue(parsed.traceId); const spanId = toStringValue(parsed.spanId); const durationMs = toNumberValue(parsed.durationMs); - const endedAt = unixNanoToIso(parsed.endTimeUnixNano); - const startedAt = unixNanoToIso(parsed.startTimeUnixNano); + const endedAt = unixNanoToDateTime(parsed.endTimeUnixNano); + const startedAt = unixNanoToDateTime(parsed.startTimeUnixNano); if (!name || !traceId || !spanId || durationMs === null || !endedAt) { parseErrorCount += 1; @@ -238,10 +254,10 @@ export function aggregateTraceDiagnostics( recordCount += 1; firstSpanAt = - startedAt && (firstSpanAt === null || startedAt.localeCompare(firstSpanAt) < 0) + startedAt && (firstSpanAt === null || isBefore(startedAt, firstSpanAt)) ? startedAt : firstSpanAt; - lastSpanAt = endedAt.localeCompare(lastSpanAt ?? "") > 0 ? endedAt : lastSpanAt; + lastSpanAt = lastSpanAt === null || isAfter(endedAt, lastSpanAt) ? endedAt : lastSpanAt; const exitTag = readExitTag(parsed.exit); const isFailure = exitTag === "Failure"; @@ -273,20 +289,14 @@ export function aggregateTraceDiagnostics( const failureKey = `${name}\0${cause}`; const existing = failuresByKey.get(failureKey); + const isLatestFailure = !existing || isAfter(endedAt, existing.lastSeenAt); failuresByKey.set(failureKey, { name, cause, count: (existing?.count ?? 0) + 1, - lastSeenAt: - !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 - ? endedAt - : existing.lastSeenAt, - traceId: - !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 - ? traceId - : existing.traceId, - spanId: - !existing || endedAt.localeCompare(existing.lastSeenAt) > 0 ? spanId : existing.spanId, + lastSeenAt: isLatestFailure ? endedAt : existing!.lastSeenAt, + traceId: isLatestFailure ? traceId : existing!.traceId, + spanId: isLatestFailure ? spanId : existing!.spanId, }); } @@ -308,7 +318,7 @@ export function aggregateTraceDiagnostics( continue; } - const seenAt = unixNanoToIso(rawEvent.timeUnixNano) ?? endedAt; + const seenAt = unixNanoToDateTime(rawEvent.timeUnixNano) ?? endedAt; const message = toStringValue(rawEvent.name)?.trim() ?? "Log event"; latestWarningAndErrorLogs.push({ spanName: name, @@ -338,11 +348,11 @@ export function aggregateTraceDiagnostics( return { traceFilePath: input.traceFilePath, scannedFilePaths, - readAt: readAt.toISOString(), + readAt, recordCount, parseErrorCount, - firstSpanAt, - lastSpanAt, + firstSpanAt: Option.fromNullishOr(firstSpanAt), + lastSpanAt: Option.fromNullishOr(lastSpanAt), failureCount, interruptionCount, slowSpanThresholdMs, @@ -353,17 +363,23 @@ export function aggregateTraceDiagnostics( commonFailures: [...failuresByKey.values()] .toSorted( (left, right) => - right.count - left.count || right.lastSeenAt.localeCompare(left.lastSeenAt), + right.count - left.count || + DateTime.toEpochMillis(right.lastSeenAt) - DateTime.toEpochMillis(left.lastSeenAt), ) .slice(0, TOP_LIMIT), latestFailures: latestFailures - .toSorted((left, right) => right.endedAt.localeCompare(left.endedAt)) + .toSorted( + (left, right) => + DateTime.toEpochMillis(right.endedAt) - DateTime.toEpochMillis(left.endedAt), + ) .slice(0, RECENT_LIMIT), latestWarningAndErrorLogs: latestWarningAndErrorLogs - .toSorted((left, right) => right.seenAt.localeCompare(left.seenAt)) + .toSorted( + (left, right) => DateTime.toEpochMillis(right.seenAt) - DateTime.toEpochMillis(left.seenAt), + ) .slice(0, RECENT_LIMIT), - ...(input.partialFailure ? { partialFailure: true } : {}), - ...(input.error ? { error: input.error } : {}), + partialFailure: input.partialFailure ? Option.some(true) : Option.none(), + error: Option.fromNullishOr(input.error), }; } @@ -393,7 +409,7 @@ export const make = Effect.fn("makeTraceDiagnostics")(function* () { const read: TraceDiagnosticsShape["read"] = Effect.fn("TraceDiagnostics.read")( function* (options) { - const readAt = options.readAt ?? new Date(); + const readAt = options.readAt ?? (yield* DateTime.now); const slowSpanThresholdMs = options.slowSpanThresholdMs ?? DEFAULT_SLOW_SPAN_THRESHOLD_MS; const paths = toRotatedTracePaths(options.traceFilePath, options.maxFiles); const results = yield* Effect.all( @@ -410,7 +426,7 @@ export const make = Effect.fn("makeTraceDiagnostics")(function* () { ? ({ kind: "trace-file-read-failed", message: readFailure.message.trim() || `Failed to read ${readFailure.path}.`, - } satisfies ServerTraceDiagnosticsResult["error"]) + } satisfies TraceDiagnosticsErrorSummary) : undefined; if (files.length === 0) { @@ -424,7 +440,7 @@ export const make = Effect.fn("makeTraceDiagnostics")(function* () { ({ kind: "trace-file-not-found", message: "No local trace files were found.", - } satisfies ServerTraceDiagnosticsResult["error"]), + } satisfies TraceDiagnosticsErrorSummary), }); } diff --git a/apps/server/src/server.test.ts b/apps/server/src/server.test.ts index e49a5a7c7c7..32261dd618b 100644 --- a/apps/server/src/server.test.ts +++ b/apps/server/src/server.test.ts @@ -549,17 +549,19 @@ const buildAppUnderTest = (options?: { Layer.mock(ProcessDiagnostics.ProcessDiagnostics)({ read: Effect.succeed({ serverPid: process.pid, - readAt: TEST_EPOCH.toString(), + readAt: TEST_EPOCH, processCount: 0, totalRssBytes: 0, totalCpuPercent: 0, processes: [], + error: Option.none(), }), signal: (input) => Effect.succeed({ pid: input.pid, signal: input.signal, signaled: true, + message: Option.none(), }), }), ), @@ -569,11 +571,11 @@ const buildAppUnderTest = (options?: { Effect.succeed({ traceFilePath: "", scannedFilePaths: [], - readAt: TEST_EPOCH.toString(), + readAt: TEST_EPOCH, recordCount: 0, parseErrorCount: 0, - firstSpanAt: null, - lastSpanAt: null, + firstSpanAt: Option.none(), + lastSpanAt: Option.none(), failureCount: 0, interruptionCount: 0, slowSpanThresholdMs: 1_000, @@ -584,6 +586,8 @@ const buildAppUnderTest = (options?: { commonFailures: [], latestFailures: [], latestWarningAndErrorLogs: [], + partialFailure: Option.none(), + error: Option.none(), }), }), ), diff --git a/apps/web/src/components/settings/DiagnosticsSettings.tsx b/apps/web/src/components/settings/DiagnosticsSettings.tsx index 7e56952098b..bd590a1242c 100644 --- a/apps/web/src/components/settings/DiagnosticsSettings.tsx +++ b/apps/web/src/components/settings/DiagnosticsSettings.tsx @@ -9,6 +9,7 @@ import { } from "lucide-react"; import { useCallback, useMemo, useState, type ReactNode } from "react"; import type { ServerProcessDiagnosticsEntry, ServerProcessSignal } from "@t3tools/contracts"; +import { DateTime, Option } from "effect"; import { ensureLocalApi } from "../../localApi"; import { cn } from "../../lib/utils"; @@ -46,13 +47,13 @@ function formatBytes(value: number): string { return `${next.toFixed(next >= 10 ? 1 : 2)} ${units[unitIndex]}`; } -function formatRelative(value: string | null): string { +function formatRelative(value: DateTime.Utc | null): string { if (!value) return "No trace records"; - const relative = formatRelativeTime(value); + const relative = formatRelativeTime(DateTime.formatIso(value)); return relative.suffix ? `${relative.value} ${relative.suffix}` : relative.value; } -function formatRelativeNoWrap(value: string | null): string { +function formatRelativeNoWrap(value: DateTime.Utc | null): string { return formatRelative(value).replaceAll(" ", "\u00a0"); } @@ -500,9 +501,9 @@ function ProcessDiagnosticsTable({ ); } -function DiagnosticsLastChecked({ checkedAt }: { checkedAt: string | null }) { +function DiagnosticsLastChecked({ checkedAt }: { checkedAt: DateTime.Utc | null }) { useRelativeTimeTick(); - const relative = checkedAt ? formatRelativeTime(checkedAt) : null; + const relative = checkedAt ? formatRelativeTime(DateTime.formatIso(checkedAt)) : null; if (!relative) { return Checking; @@ -605,8 +606,9 @@ export function DiagnosticsSettingsPanel() { .server.signalProcess({ pid, signal }) .then((result) => { if (!result.signaled) { + const message = Option.getOrUndefined(result.message); refreshProcesses(); - if (isStaleProcessSignalMessage(result.message)) { + if (isStaleProcessSignalMessage(message)) { toastManager.add({ type: "info", title: "Process already exited", @@ -619,7 +621,7 @@ export function DiagnosticsSettingsPanel() { toastManager.add({ type: "error", title: `Could not send ${signal}`, - description: result.message ?? `Failed to send ${signal}.`, + description: message ?? `Failed to send ${signal}.`, }); return; } @@ -639,6 +641,12 @@ export function DiagnosticsSettingsPanel() { [refreshProcesses], ); + const processDiagnosticsError = processData ? Option.getOrNull(processData.error) : null; + const traceDiagnosticsError = data ? Option.getOrNull(data.error) : null; + const traceDiagnosticsPartialFailure = data + ? Option.getOrElse(data.partialFailure, () => false) + : false; + return ( - {processData?.error || processError ? ( + {processDiagnosticsError || processError ? (
- {processData?.error ? ( + {processDiagnosticsError ? (
- {processData.error.message} + {processDiagnosticsError.message}
) : null} {processError ? ( @@ -755,7 +763,7 @@ export function DiagnosticsSettingsPanel() { tone={data && data.parseErrorCount > 0 ? "warning" : "default"} /> - {openLogsDirectoryError || data?.error || error ? ( + {openLogsDirectoryError || traceDiagnosticsError || error ? (
{openLogsDirectoryError ? (
@@ -763,18 +771,20 @@ export function DiagnosticsSettingsPanel() { {openLogsDirectoryError}
) : null} - {data?.error ? ( + {traceDiagnosticsError ? (
- {data.partialFailure - ? `Some trace files could not be read, so diagnostics may be incomplete. ${data.error.message}` - : data.error.message} + {traceDiagnosticsPartialFailure + ? `Some trace files could not be read, so diagnostics may be incomplete. ${traceDiagnosticsError.message}` + : traceDiagnosticsError.message}
) : null} @@ -900,7 +910,7 @@ export function DiagnosticsSettingsPanel() { {data.latestWarningAndErrorLogs.map((event) => ( diff --git a/apps/web/src/components/settings/SettingsPanels.browser.tsx b/apps/web/src/components/settings/SettingsPanels.browser.tsx index 68515f71977..9e3d52e89c9 100644 --- a/apps/web/src/components/settings/SettingsPanels.browser.tsx +++ b/apps/web/src/components/settings/SettingsPanels.browser.tsx @@ -1054,20 +1054,21 @@ describe("GeneralSettingsPanel observability", () => { server: { getProcessDiagnostics: vi.fn().mockResolvedValue({ serverPid: 1234, - readAt: "2036-04-07T00:00:00.000Z", + readAt: makeUtc("2036-04-07T00:00:00.000Z"), processCount: 0, totalRssBytes: 0, totalCpuPercent: 0, processes: [], + error: Option.none(), }), getTraceDiagnostics: vi.fn().mockResolvedValue({ traceFilePath: "/repo/project/.t3/traces.jsonl", scannedFilePaths: ["/repo/project/.t3/traces.jsonl"], - readAt: "2036-04-07T00:00:00.000Z", + readAt: makeUtc("2036-04-07T00:00:00.000Z"), recordCount: 0, parseErrorCount: 0, - firstSpanAt: null, - lastSpanAt: null, + firstSpanAt: Option.none(), + lastSpanAt: Option.none(), failureCount: 0, interruptionCount: 0, slowSpanThresholdMs: 5_000, @@ -1078,6 +1079,8 @@ describe("GeneralSettingsPanel observability", () => { commonFailures: [], latestFailures: [], latestWarningAndErrorLogs: [], + partialFailure: Option.none(), + error: Option.none(), }), }, } as unknown as LocalApi; diff --git a/packages/contracts/src/server.ts b/packages/contracts/src/server.ts index 3a6e4f9055b..15afea93ad9 100644 --- a/packages/contracts/src/server.ts +++ b/packages/contracts/src/server.ts @@ -232,7 +232,7 @@ export const ServerTraceDiagnosticsFailureSummary = Schema.Struct({ name: TrimmedNonEmptyString, cause: TrimmedNonEmptyString, count: NonNegativeInt, - lastSeenAt: IsoDateTime, + lastSeenAt: Schema.DateTimeUtc, traceId: TrimmedNonEmptyString, spanId: TrimmedNonEmptyString, }); @@ -242,7 +242,7 @@ export const ServerTraceDiagnosticsRecentFailure = Schema.Struct({ name: TrimmedNonEmptyString, cause: TrimmedNonEmptyString, durationMs: Schema.Number, - endedAt: IsoDateTime, + endedAt: Schema.DateTimeUtc, traceId: TrimmedNonEmptyString, spanId: TrimmedNonEmptyString, }); @@ -251,7 +251,7 @@ export type ServerTraceDiagnosticsRecentFailure = typeof ServerTraceDiagnosticsR export const ServerTraceDiagnosticsSpanOccurrence = Schema.Struct({ name: TrimmedNonEmptyString, durationMs: Schema.Number, - endedAt: IsoDateTime, + endedAt: Schema.DateTimeUtc, traceId: TrimmedNonEmptyString, spanId: TrimmedNonEmptyString, }); @@ -261,7 +261,7 @@ export const ServerTraceDiagnosticsLogEvent = Schema.Struct({ spanName: TrimmedNonEmptyString, level: TrimmedNonEmptyString, message: TrimmedNonEmptyString, - seenAt: IsoDateTime, + seenAt: Schema.DateTimeUtc, traceId: TrimmedNonEmptyString, spanId: TrimmedNonEmptyString, }); @@ -270,11 +270,11 @@ export type ServerTraceDiagnosticsLogEvent = typeof ServerTraceDiagnosticsLogEve export const ServerTraceDiagnosticsResult = Schema.Struct({ traceFilePath: TrimmedNonEmptyString, scannedFilePaths: Schema.Array(TrimmedNonEmptyString), - readAt: IsoDateTime, + readAt: Schema.DateTimeUtc, recordCount: NonNegativeInt, parseErrorCount: NonNegativeInt, - firstSpanAt: Schema.NullOr(IsoDateTime), - lastSpanAt: Schema.NullOr(IsoDateTime), + firstSpanAt: Schema.Option(Schema.DateTimeUtc), + lastSpanAt: Schema.Option(Schema.DateTimeUtc), failureCount: NonNegativeInt, interruptionCount: NonNegativeInt, slowSpanThresholdMs: NonNegativeInt, @@ -285,8 +285,8 @@ export const ServerTraceDiagnosticsResult = Schema.Struct({ commonFailures: Schema.Array(ServerTraceDiagnosticsFailureSummary), latestFailures: Schema.Array(ServerTraceDiagnosticsRecentFailure), latestWarningAndErrorLogs: Schema.Array(ServerTraceDiagnosticsLogEvent), - partialFailure: Schema.optional(Schema.Boolean), - error: Schema.optional( + partialFailure: Schema.Option(Schema.Boolean), + error: Schema.Option( Schema.Struct({ kind: ServerTraceDiagnosticsErrorKind, message: TrimmedNonEmptyString, @@ -301,7 +301,7 @@ export type ServerProcessSignal = typeof ServerProcessSignal.Type; export const ServerProcessDiagnosticsEntry = Schema.Struct({ pid: PositiveInt, ppid: NonNegativeInt, - pgid: Schema.NullOr(Schema.Int), + pgid: Schema.Option(Schema.Int), status: TrimmedNonEmptyString, cpuPercent: Schema.Number, rssBytes: NonNegativeInt, @@ -314,12 +314,12 @@ export type ServerProcessDiagnosticsEntry = typeof ServerProcessDiagnosticsEntry export const ServerProcessDiagnosticsResult = Schema.Struct({ serverPid: PositiveInt, - readAt: IsoDateTime, + readAt: Schema.DateTimeUtc, processCount: NonNegativeInt, totalRssBytes: NonNegativeInt, totalCpuPercent: Schema.Number, processes: Schema.Array(ServerProcessDiagnosticsEntry), - error: Schema.optional( + error: Schema.Option( Schema.Struct({ message: TrimmedNonEmptyString, }), @@ -337,7 +337,7 @@ export const ServerSignalProcessResult = Schema.Struct({ pid: PositiveInt, signal: ServerProcessSignal, signaled: Schema.Boolean, - message: Schema.optional(TrimmedNonEmptyString), + message: Schema.Option(TrimmedNonEmptyString), }); export type ServerSignalProcessResult = typeof ServerSignalProcessResult.Type;