From 8da0de7d2d6d036cce4be4ba88431dcda0aa7f2d Mon Sep 17 00:00:00 2001 From: Minsu Lee Date: Fri, 29 May 2026 00:21:01 +0900 Subject: [PATCH 1/2] feat(stats): port savings.jsonl telemetry from semble --- src/stats.test.ts | 255 +++++++++++++++++++++++++++++++++++++++++ src/stats.ts | 281 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 536 insertions(+) create mode 100644 src/stats.test.ts create mode 100644 src/stats.ts diff --git a/src/stats.test.ts b/src/stats.test.ts new file mode 100644 index 0000000..42a4b26 --- /dev/null +++ b/src/stats.test.ts @@ -0,0 +1,255 @@ +// Tests for src/stats.ts — port of src/semble/stats.py +import { afterEach, beforeEach, describe, expect, test } from 'bun:test' +import { appendFileSync, existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import path from 'node:path' +import { + BucketStats, + buildSavingsSummary, + formatSavingsReport, + resetStatsFile, + saveSearchStats, + setStatsFile, + type StatsSearchResult, +} from './stats.ts' + +function makeResult(content: string, filePath: string): StatsSearchResult { + return { chunk: { content, filePath } } +} + +let tmpDir: string +let statsFile: string + +beforeEach(() => { + tmpDir = mkdtempSync(path.join(tmpdir(), 'csp-stats-')) + statsFile = path.join(tmpDir, 'savings.jsonl') + setStatsFile(statsFile) +}) + +afterEach(() => { + resetStatsFile() + rmSync(tmpDir, { recursive: true, force: true }) +}) + +describe('BucketStats', () => { + test('add accumulates fields and clamps savedChars to >= 0', () => { + const b = new BucketStats() + b.add(100, 400) + b.add(100, 400) + expect(b.calls).toBe(2) + expect(b.snippetChars).toBe(200) + expect(b.fileChars).toBe(800) + expect(b.savedChars).toBe(600) + }) + + test('add does not produce negative savedChars when snippet > file', () => { + const b = new BucketStats() + b.add(500, 100) + expect(b.savedChars).toBe(0) + expect(b.snippetChars).toBe(500) + expect(b.fileChars).toBe(100) + }) +}) + +describe('saveSearchStats', () => { + test('appends one valid JSONL line per call', () => { + const results = [makeResult('hello world', '/a.ts'), makeResult('foo bar baz', '/b.ts')] + saveSearchStats(results, 'search', { '/a.ts': 100, '/b.ts': 200 }) + + const content = readFileSync(statsFile, 'utf8') + const lines = content.split('\n').filter(l => l.length > 0) + expect(lines).toHaveLength(1) + const record = JSON.parse(lines[0]!) as Record + expect(record.call).toBe('search') + expect(record.results).toBe(2) + expect(record.snippet_chars).toBe('hello world'.length + 'foo bar baz'.length) + expect(record.file_chars).toBe(300) + expect(typeof record.ts).toBe('number') + }) + + test('two calls produce two lines', () => { + saveSearchStats([makeResult('abc', '/a.ts')], 'search', { '/a.ts': 50 }) + saveSearchStats([makeResult('xy', '/b.ts')], 'find_related', { '/b.ts': 20 }) + + const content = readFileSync(statsFile, 'utf8') + const lines = content.split('\n').filter(l => l.length > 0) + expect(lines).toHaveLength(2) + const r1 = JSON.parse(lines[0]!) as Record + const r2 = JSON.parse(lines[1]!) as Record + expect(r1.call).toBe('search') + expect(r2.call).toBe('find_related') + }) + + test('deduplicates file_chars per unique filePath', () => { + // Same path twice — file should only count once toward file_chars. + const results = [makeResult('aaa', '/a.ts'), makeResult('bbb', '/a.ts')] + saveSearchStats(results, 'search', { '/a.ts': 100 }) + + const content = readFileSync(statsFile, 'utf8') + const lines = content.split('\n').filter(l => l.length > 0) + const record = JSON.parse(lines[0]!) as Record + expect(record.file_chars).toBe(100) + expect(record.snippet_chars).toBe(6) + }) + + test('ignores paths missing from fileSizes', () => { + const results = [makeResult('aaa', '/a.ts'), makeResult('bbb', '/missing.ts')] + saveSearchStats(results, 'search', { '/a.ts': 100 }) + + const content = readFileSync(statsFile, 'utf8') + const lines = content.split('\n').filter(l => l.length > 0) + const record = JSON.parse(lines[0]!) as Record + expect(record.file_chars).toBe(100) + }) + + test('never throws on I/O error', () => { + // Point stats file at a path whose parent cannot be created (a regular + // file used as a directory). saveSearchStats must swallow the error. + const conflictFile = path.join(tmpDir, 'conflict') + writeFileSync(conflictFile, 'not a directory') + setStatsFile(path.join(conflictFile, 'nested', 'savings.jsonl')) + + expect(() => { + saveSearchStats([makeResult('x', '/a.ts')], 'search', { '/a.ts': 10 }) + }).not.toThrow() + }) +}) + +describe('buildSavingsSummary', () => { + test('parses all valid lines and skips malformed ones', () => { + const now = Date.now() / 1000 + const lines = [ + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + 'this is not json', + JSON.stringify({ ts: now, call: 'find_related', results: 1, snippet_chars: 50, file_chars: 200 }), + '{"incomplete": ', + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + ] + writeFileSync(statsFile, `${lines.join('\n')}\n`) + + const summary = buildSavingsSummary() + expect(summary.buckets['All time']!.calls).toBe(3) + expect(summary.callTypeCounts).toEqual({ search: 2, find_related: 1 }) + }) + + test('bucket math: 2 search calls with snippet=100/file=400 → savedChars=600, ratio 0.75', () => { + const now = Date.now() / 1000 + const lines = [ + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + ] + writeFileSync(statsFile, `${lines.join('\n')}\n`) + + const summary = buildSavingsSummary() + const all = summary.buckets['All time']! + expect(all.calls).toBe(2) + expect(all.snippetChars).toBe(200) + expect(all.fileChars).toBe(800) + expect(all.savedChars).toBe(600) + expect(all.savedChars / all.fileChars).toBe(0.75) + + expect(summary.buckets['Today']!.calls).toBe(2) + expect(summary.buckets['Last 7 days']!.calls).toBe(2) + }) + + test('older entries fall outside Today and Last 7 days buckets', () => { + const now = Date.now() / 1000 + const tenDaysAgo = now - 10 * 24 * 60 * 60 + const lines = [ + JSON.stringify({ ts: tenDaysAgo, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + ] + writeFileSync(statsFile, `${lines.join('\n')}\n`) + + const summary = buildSavingsSummary() + expect(summary.buckets['All time']!.calls).toBe(2) + expect(summary.buckets['Last 7 days']!.calls).toBe(1) + expect(summary.buckets['Today']!.calls).toBe(1) + }) + + test('missing stats file returns empty summary', () => { + const summary = buildSavingsSummary(path.join(tmpDir, 'does-not-exist.jsonl')) + expect(summary.buckets['All time']!.calls).toBe(0) + expect(summary.callTypeCounts).toEqual({}) + }) +}) + +describe('formatSavingsReport', () => { + test('shows "Csp Token Savings" header and bucket labels', () => { + const now = Date.now() / 1000 + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, + ) + + const report = formatSavingsReport() + expect(report).toContain('Csp Token Savings') + expect(report).toContain('Today') + expect(report).toContain('Last 7 days') + expect(report).toContain('All time') + expect(report).not.toContain('Semble Token Savings') + }) + + test('empty / missing stats file returns the "no stats yet" message', () => { + expect(existsSync(statsFile)).toBe(false) + expect(formatSavingsReport()).toBe('No stats yet. Run a search first.') + }) + + test('formats saved tokens with ~Nk suffix at 1500 → ~1.5k', () => { + // file=6400, snippet=400 ⇒ saved=6000 chars ⇒ 6000/4 = 1500 tokens ⇒ "~1.5k" + const now = Date.now() / 1000 + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 400, file_chars: 6400 })}\n`, + ) + + const report = formatSavingsReport() + expect(report).toContain('~1.5k') + }) + + test('verbose appends Usage Breakdown section with sorted call types', () => { + const now = Date.now() / 1000 + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, + ) + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'find_related', results: 1, snippet_chars: 50, file_chars: 200 })}\n`, + ) + + const report = formatSavingsReport({ verbose: true }) + expect(report).toContain('Usage Breakdown') + expect(report).toContain('Call type') + expect(report).toContain('search') + expect(report).toContain('find_related') + // Sorted alphabetically — find_related should appear before search. + const findIdx = report.indexOf('find_related') + const searchHeadingsStripped = report.replace('Csp Token Savings', '') + const searchIdx = searchHeadingsStripped.indexOf('search') + expect(findIdx).toBeLessThan(searchIdx + 'Csp Token Savings'.length) + }) + + test('renders bar with filled blocks proportional to ratio', () => { + const now = Date.now() / 1000 + // ratio = 0.75 ⇒ 12 filled / 4 empty out of 16. + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 })}\n`, + ) + const report = formatSavingsReport() + expect(report).toContain('[████████████░░░░]') + expect(report).toContain('(75%)') + }) + + test('formats saved tokens with ~N.NM suffix at 1M+ tokens', () => { + // saved_chars = 4_000_000 ⇒ tokens = 1_000_000 ⇒ "~1.0M" + const now = Date.now() / 1000 + appendFileSync( + statsFile, + `${JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 0, file_chars: 4_000_000 })}\n`, + ) + const report = formatSavingsReport() + expect(report).toContain('~1.0M') + }) +}) diff --git a/src/stats.ts b/src/stats.ts new file mode 100644 index 0000000..a619add --- /dev/null +++ b/src/stats.ts @@ -0,0 +1,281 @@ +// Port of src/semble/stats.py +import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs' +import { homedir } from 'node:os' +import path from 'node:path' + +/** + * Call type for token-savings tracking. + * + * Mirrors `CallType` from `src/semble/types.py`. Defined here as a minimal + * type to avoid creating a cross-unit dependency before `src/types.ts` + * lands. Once that exists, this should be re-exported from there. + */ +export type CallType = 'search' | 'find_related' + +/** + * Minimal chunk shape needed by `saveSearchStats`. + * + * Uses camelCase fields per the csp public API surface. + */ +export interface StatsChunk { + content: string + filePath: string +} + +/** + * Minimal search-result shape needed by `saveSearchStats`. + */ +export interface StatsSearchResult { + chunk: StatsChunk +} + +/** + * Per-bucket aggregate counters for the savings report. + */ +export class BucketStats { + calls: number = 0 + snippetChars: number = 0 + fileChars: number = 0 + savedChars: number = 0 + + /** Update stats with a call and its character counts. */ + add(snippetChars: number, fileChars: number): void { + this.calls += 1 + this.snippetChars += snippetChars + this.fileChars += fileChars + this.savedChars += Math.max(0, fileChars - snippetChars) + } +} + +/** + * Aggregated savings, grouped into time buckets plus per-call-type counts. + */ +export interface SavingsSummary { + buckets: Record + callTypeCounts: Record +} + +const DEFAULT_STATS_FILE = path.join(homedir(), '.csp', 'savings.jsonl') + +let _STATS_FILE = DEFAULT_STATS_FILE + +/** + * Override the stats file location. Intended for tests only — production + * callers should leave the default in place so behavior matches semble. + */ +export function setStatsFile(filePath: string): void { + _STATS_FILE = filePath +} + +/** Return the current stats file path. */ +export function getStatsFile(): string { + return _STATS_FILE +} + +/** Reset the stats file path back to the default `~/.csp/savings.jsonl`. */ +export function resetStatsFile(): void { + _STATS_FILE = DEFAULT_STATS_FILE +} + +/** + * Save stats about a search or find_related call to the stats file. + * + * Best-effort: any I/O error is silently swallowed so stats writes never + * impact a live search. + */ +export function saveSearchStats( + results: StatsSearchResult[], + callType: CallType, + fileSizes: Record, +): void { + try { + const snippetChars = results.reduce((sum, r) => sum + r.chunk.content.length, 0) + const uniquePaths = new Set(results.map(r => r.chunk.filePath)) + let fileChars = 0 + for (const p of uniquePaths) { + if (Object.prototype.hasOwnProperty.call(fileSizes, p)) { + fileChars += fileSizes[p] ?? 0 + } + } + + const record = { + ts: Date.now() / 1000, + call: callType, + results: results.length, + snippet_chars: snippetChars, + file_chars: fileChars, + } + const dir = path.dirname(_STATS_FILE) + mkdirSync(dir, { recursive: true }) + appendFileSync(_STATS_FILE, `${JSON.stringify(record)}\n`) + } + catch { + // Swallow — stats writes must never throw. + } +} + +interface StatsRecord { + ts: number + call: string + results: number + snippet_chars: number + file_chars: number +} + +function isStatsRecord(value: unknown): value is StatsRecord { + if (value === null || typeof value !== 'object') + return false + const v = value as Record + return ( + typeof v.ts === 'number' + && typeof v.call === 'string' + && typeof v.snippet_chars === 'number' + && typeof v.file_chars === 'number' + ) +} + +function ymdUtc(timestampSeconds: number): string { + const d = new Date(timestampSeconds * 1000) + const y = d.getUTCFullYear() + const m = String(d.getUTCMonth() + 1).padStart(2, '0') + const day = String(d.getUTCDate()).padStart(2, '0') + return `${y}-${m}-${day}` +} + +/** + * Read `savings.jsonl` and return a {@link SavingsSummary}. + * + * Malformed lines are skipped silently. If the file is missing, an empty + * summary is returned. + */ +export function buildSavingsSummary(filePath?: string): SavingsSummary { + const target = filePath ?? _STATS_FILE + const now = new Date() + const nowSec = now.getTime() / 1000 + const today = ymdUtc(nowSec) + const sevenDaysAgo = ymdUtc(nowSec - 7 * 24 * 60 * 60) + + const buckets: Record = { + 'Today': new BucketStats(), + 'Last 7 days': new BucketStats(), + 'All time': new BucketStats(), + } + const callTypeCounts: Record = {} + + if (!existsSync(target)) + return { buckets, callTypeCounts } + + const raw = readFileSync(target, 'utf8') + const lines = raw.split('\n') + for (const line of lines) { + if (line.length === 0) + continue + let record: unknown + try { + record = JSON.parse(line) + } + catch { + // Match semble: skip malformed lines silently (semble logs a warning; + // we omit the warning to keep stats imports side-effect-free). + continue + } + if (!isStatsRecord(record)) + continue + + const snippetChars = record.snippet_chars + const fileChars = record.file_chars + const callType = record.call + callTypeCounts[callType] = (callTypeCounts[callType] ?? 0) + 1 + + const day = ymdUtc(record.ts) + const inToday = day === today + const inLast7 = day > sevenDaysAgo + + buckets['All time']!.add(snippetChars, fileChars) + if (inLast7) + buckets['Last 7 days']!.add(snippetChars, fileChars) + if (inToday) + buckets['Today']!.add(snippetChars, fileChars) + } + + return { buckets, callTypeCounts } +} + +function padRight(s: string, width: number): string { + if (s.length >= width) + return s + return s + ' '.repeat(width - s.length) +} + +function formatSavedTokens(savedTokens: number): string { + if (savedTokens >= 1_000_000) + return `~${(savedTokens / 1_000_000).toFixed(1)}M` + if (savedTokens >= 1000) + return `~${(savedTokens / 1000).toFixed(1)}k` + return `~${savedTokens}` +} + +function formatCalls(calls: number): string { + return calls >= 1000 ? `${(calls / 1000).toFixed(1)}k` : String(calls) +} + +export interface FormatSavingsReportOptions { + path?: string + verbose?: boolean +} + +/** + * Return a formatted token-savings report. + * + * Output mirrors semble's ASCII bar chart byte-for-byte, with the header + * swapped from "Semble Token Savings" → "Csp Token Savings". + */ +export function formatSavingsReport(options: FormatSavingsReportOptions = {}): string { + const target = options.path ?? _STATS_FILE + const verbose = options.verbose ?? false + + if (!existsSync(target)) + return 'No stats yet. Run a search first.' + + const summary = buildSavingsSummary(target) + const barWidth = 16 + const heavyLine = ` ${'═'.repeat(64)}` + const lightLine = ` ${'─'.repeat(64)}` + + const lines: string[] = [ + '', + ' Csp Token Savings', + heavyLine, + ` ${padRight('Period', 12)} ${padRight('Calls', 6)} Savings`, + lightLine, + ] + + for (const [label, bucket] of Object.entries(summary.buckets)) { + const savedTokens = Math.floor(bucket.savedChars / 4) // ~4 chars/token approximation + const savedStr = formatSavedTokens(savedTokens) + const callsStr = formatCalls(bucket.calls) + if (bucket.fileChars > 0) { + const ratio = bucket.savedChars / bucket.fileChars + const filled = Math.round(ratio * barWidth) + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled) + const pct = Math.round(ratio * 100) + lines.push(` ${padRight(label, 12)} ${padRight(callsStr, 6)} [${bar}] ${savedStr} tokens (${pct}%)`) + } + else { + lines.push(` ${padRight(label, 12)} ${padRight(callsStr, 6)} [${'░'.repeat(barWidth)}] ${savedStr} tokens`) + } + } + + const callTypeEntries = Object.entries(summary.callTypeCounts) + if (verbose && callTypeEntries.length > 0) { + lines.push('', ' Usage Breakdown', lightLine, ` ${padRight('Call type', 16)} Calls`) + const sorted = callTypeEntries.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0)) + for (const [callType, count] of sorted) { + const countStr = count >= 1000 ? `${(count / 1000).toFixed(1)}k` : String(count) + lines.push(` ${padRight(callType, 16)} ${countStr}`) + } + lines.push(heavyLine) + } + lines.push('') + return lines.join('\n') +} From 1c244f1c67345c8aaea1a9947b3533710c807eba Mon Sep 17 00:00:00 2001 From: Minsu Lee Date: Fri, 29 May 2026 00:44:15 +0900 Subject: [PATCH 2/2] review(stats): apply gemini-code-assist feedback - Reject NaN in isStatsRecord type guard. typeof NaN === 'number' would otherwise let malformed lines through and propagate NaN into date formatting ('NaN-NaN-NaN') and bucket arithmetic. - Initialize callTypeCounts with Object.create(null) so JSONL call values matching built-in object properties (toString, __proto__) don't collide with prototype methods. - Add tests covering NaN rejection and call-type/prototype collision. --- src/stats.test.ts | 35 +++++++++++++++++++++++++++++++++++ src/stats.ts | 10 +++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/stats.test.ts b/src/stats.test.ts index 42a4b26..cb08457 100644 --- a/src/stats.test.ts +++ b/src/stats.test.ts @@ -172,6 +172,41 @@ describe('buildSavingsSummary', () => { expect(summary.buckets['All time']!.calls).toBe(0) expect(summary.callTypeCounts).toEqual({}) }) + + test('skips records with NaN numeric fields', () => { + // `typeof NaN === 'number'` would otherwise let these through and + // poison date formatting / bucket math with NaN. + const now = Date.now() / 1000 + const lines = [ + // NaN serializes as `null` in JSON.stringify, so emit NaN literally. + '{"ts": NaN, "call": "search", "results": 1, "snippet_chars": 0, "file_chars": 0}', + '{"ts": 0, "call": "search", "results": 1, "snippet_chars": NaN, "file_chars": 0}', + '{"ts": 0, "call": "search", "results": 1, "snippet_chars": 0, "file_chars": NaN}', + JSON.stringify({ ts: now, call: 'search', results: 1, snippet_chars: 100, file_chars: 400 }), + ] + writeFileSync(statsFile, `${lines.join('\n')}\n`) + + const summary = buildSavingsSummary() + // Only the last valid record is counted. + expect(summary.buckets['All time']!.calls).toBe(1) + expect(summary.callTypeCounts).toEqual({ search: 1 }) + }) + + test('call types matching built-in object properties do not collide', () => { + // Without Object.create(null), `callTypeCounts["toString"]` would + // resolve to Function.prototype.toString and arithmetic would coerce + // it to a string instead of incrementing. + const now = Date.now() / 1000 + const lines = [ + JSON.stringify({ ts: now, call: 'toString', results: 1, snippet_chars: 1, file_chars: 1 }), + JSON.stringify({ ts: now, call: 'toString', results: 1, snippet_chars: 1, file_chars: 1 }), + JSON.stringify({ ts: now, call: 'hasOwnProperty', results: 1, snippet_chars: 1, file_chars: 1 }), + ] + writeFileSync(statsFile, `${lines.join('\n')}\n`) + + const summary = buildSavingsSummary() + expect(summary.callTypeCounts).toEqual({ toString: 2, hasOwnProperty: 1 }) + }) }) describe('formatSavingsReport', () => { diff --git a/src/stats.ts b/src/stats.ts index a619add..02ea0c9 100644 --- a/src/stats.ts +++ b/src/stats.ts @@ -126,11 +126,17 @@ function isStatsRecord(value: unknown): value is StatsRecord { if (value === null || typeof value !== 'object') return false const v = value as Record + // Reject NaN explicitly: `typeof NaN === 'number'` is true, but NaN + // values would propagate into date formatting ("NaN-NaN-NaN") and + // bucket arithmetic. Treat such lines as malformed. return ( typeof v.ts === 'number' + && !Number.isNaN(v.ts) && typeof v.call === 'string' && typeof v.snippet_chars === 'number' + && !Number.isNaN(v.snippet_chars) && typeof v.file_chars === 'number' + && !Number.isNaN(v.file_chars) ) } @@ -160,7 +166,9 @@ export function buildSavingsSummary(filePath?: string): SavingsSummary { 'Last 7 days': new BucketStats(), 'All time': new BucketStats(), } - const callTypeCounts: Record = {} + // Use a prototype-less object so JSONL `call` values like "toString" or + // "__proto__" can't collide with built-in object properties. + const callTypeCounts: Record = Object.create(null) as Record if (!existsSync(target)) return { buckets, callTypeCounts }