From 1e237090d4891361cdf8823eed829c3a0dce72cc Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Sun, 15 Mar 2026 04:20:58 +0000 Subject: [PATCH] feat(yaml-parser): add input_files shorthand to EVAL.yaml test cases Adds `input_files:` as a shorthand at the test level that expands to type:file content blocks prepended before the type:text block in the user message. Only supported with a string `input` in v1. Expansion rule (file blocks first, text block last): input_files: - path/to/file.csv input: "Summarize this." expands to: input: - role: user content: - type: file value: path/to/file.csv - type: text value: "Summarize this." Paths are resolved using the same convention as explicit type:file blocks. Multi-turn array inputs are not supported in v1 (returns undefined). --- .../evals/dataset.eval.yaml | 76 ++++++++++ .../input-files-shorthand/fixtures/sales.csv | 7 + .../evaluation/loaders/shorthand-expansion.ts | 69 +++++++++ packages/core/src/evaluation/yaml-parser.ts | 2 + .../evaluation/input-files-shorthand.test.ts | 141 ++++++++++++++++++ .../loaders/shorthand-expansion.test.ts | 125 ++++++++++++++++ 6 files changed, 420 insertions(+) create mode 100644 examples/features/input-files-shorthand/evals/dataset.eval.yaml create mode 100644 examples/features/input-files-shorthand/fixtures/sales.csv create mode 100644 packages/core/test/evaluation/input-files-shorthand.test.ts diff --git a/examples/features/input-files-shorthand/evals/dataset.eval.yaml b/examples/features/input-files-shorthand/evals/dataset.eval.yaml new file mode 100644 index 000000000..b209b359b --- /dev/null +++ b/examples/features/input-files-shorthand/evals/dataset.eval.yaml @@ -0,0 +1,76 @@ +# input_files shorthand example +# +# `input_files` is a shorthand at the test level that expands to type:file content +# blocks prepended before the text in the user message. This avoids repeating the +# verbose content-block syntax when you just want to attach one or more files. +# +# Shorthand form: +# +# input_files: +# - fixtures/sales.csv +# input: "Summarize the monthly trends in this CSV." +# +# Expands to: +# +# input: +# - role: user +# content: +# - type: file +# value: fixtures/sales.csv +# - type: text +# value: "Summarize the monthly trends in this CSV." +# +# Rules: +# - File blocks come first, text block last +# - Only supported with a string `input` (not multi-turn arrays) +# - Paths are resolved the same way as explicit type:file blocks + +description: Demonstrates input_files shorthand for attaching files to test inputs + +execution: + target: default + +tests: + # ========================================== + # Example 1: Single file with string input (shorthand) + # ========================================== + - id: summarize-sales-shorthand + criteria: > + Agent summarizes the monthly revenue trends from the CSV file, + identifying which product performed better each month and overall direction. + + # Shorthand form — equivalent to explicit type:file + type:text content blocks + input_files: + - ../fixtures/sales.csv + input: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?" + + # ========================================== + # Example 2: Equivalent explicit form for reference + # Both forms produce identical runtime behaviour. + # ========================================== + - id: summarize-sales-explicit + criteria: > + Agent summarizes the monthly revenue trends from the CSV file, + identifying which product performed better each month and overall direction. + + # Explicit type:file + type:text form (same runtime result as shorthand above) + input: + - role: user + content: + - type: file + value: ../fixtures/sales.csv + - type: text + value: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?" + + # ========================================== + # Example 3: Multiple files with input_files + # ========================================== + - id: compare-two-files + criteria: > + Agent compares the two data files and identifies key differences + between the datasets. + + input_files: + - ../fixtures/sales.csv + - ../fixtures/sales.csv + input: "Compare these two data files and describe any differences." diff --git a/examples/features/input-files-shorthand/fixtures/sales.csv b/examples/features/input-files-shorthand/fixtures/sales.csv new file mode 100644 index 000000000..d9225a810 --- /dev/null +++ b/examples/features/input-files-shorthand/fixtures/sales.csv @@ -0,0 +1,7 @@ +month,product,revenue +Jan,Widget A,12000 +Jan,Widget B,8500 +Feb,Widget A,13500 +Feb,Widget B,9200 +Mar,Widget A,11800 +Mar,Widget B,10100 diff --git a/packages/core/src/evaluation/loaders/shorthand-expansion.ts b/packages/core/src/evaluation/loaders/shorthand-expansion.ts index b614c486d..e52c3c701 100644 --- a/packages/core/src/evaluation/loaders/shorthand-expansion.ts +++ b/packages/core/src/evaluation/loaders/shorthand-expansion.ts @@ -3,6 +3,7 @@ * * Supports: * - `input` with string shorthand or message array + * - `input_files` shorthand (string input only): expands to type:file + type:text content blocks * - `expected_output` with string/object shorthand or message array */ @@ -85,13 +86,81 @@ export function expandExpectedOutputShorthand( return undefined; } +/** + * Expand `input_files` shorthand combined with a string `input` into a single user message + * whose content is an array of type:file blocks (one per path) followed by a type:text block. + * + * Only supported when `input` is a string. Returns undefined if: + * - `inputFiles` is undefined/null or not an array of strings + * - `inputText` is not a string (multi-turn array inputs are not supported in v1) + * + * Example YAML: + * ```yaml + * input_files: + * - evals/files/sales.csv + * input: "Summarize the monthly trends in this CSV." + * ``` + * + * Expands to: + * ```yaml + * input: + * - role: user + * content: + * - type: file + * value: evals/files/sales.csv + * - type: text + * value: "Summarize the monthly trends in this CSV." + * ``` + * + * @param inputFiles The raw `input_files` value from YAML + * @param inputText The raw `input` value from YAML (must be a string) + * @returns Expanded message array or undefined if preconditions not met + */ +export function expandInputFilesShorthand( + inputFiles: JsonValue | undefined, + inputText: JsonValue | undefined, +): TestMessage[] | undefined { + if (inputFiles === undefined || inputFiles === null) { + return undefined; + } + + // input_files must be an array of strings + if (!Array.isArray(inputFiles)) { + return undefined; + } + + const filePaths = inputFiles.filter((f): f is string => typeof f === 'string'); + if (filePaths.length === 0) { + return undefined; + } + + // input must be a string (multi-turn arrays not supported in v1) + if (typeof inputText !== 'string') { + return undefined; + } + + const contentBlocks: JsonObject[] = [ + ...filePaths.map((filePath): JsonObject => ({ type: 'file', value: filePath })), + { type: 'text', value: inputText }, + ]; + + return [{ role: 'user', content: contentBlocks }]; +} + /** * Resolve input from raw eval case data. * + * When `input_files` is present alongside a string `input`, the shorthand is expanded + * into a user message with type:file content blocks followed by a type:text block. + * Otherwise, `input` is expanded via the standard shorthand rules. + * * @param raw Raw eval case object from YAML/JSONL * @returns Resolved input messages array or undefined if none found */ export function resolveInputMessages(raw: JsonObject): TestMessage[] | undefined { + if (raw.input_files !== undefined) { + return expandInputFilesShorthand(raw.input_files, raw.input); + } return expandInputShorthand(raw.input); } diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 05c49cf63..fc861d5d2 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -106,6 +106,8 @@ type RawEvalCase = JsonObject & { /** @deprecated Use `criteria` instead */ readonly expected_outcome?: JsonValue; readonly input?: JsonValue; + /** Shorthand: list of file paths to prepend as type:file content blocks in the user message. */ + readonly input_files?: JsonValue; readonly expected_output?: JsonValue; readonly execution?: JsonValue; readonly evaluators?: JsonValue; diff --git a/packages/core/test/evaluation/input-files-shorthand.test.ts b/packages/core/test/evaluation/input-files-shorthand.test.ts new file mode 100644 index 000000000..c086a67f3 --- /dev/null +++ b/packages/core/test/evaluation/input-files-shorthand.test.ts @@ -0,0 +1,141 @@ +import { afterAll, beforeAll, describe, expect, it } from 'bun:test'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +import { loadTests } from '../../src/evaluation/yaml-parser.js'; + +describe('input_files shorthand', () => { + let tempDir: string; + + beforeAll(async () => { + tempDir = path.join(os.tmpdir(), `agentv-input-files-${Date.now()}`); + await mkdir(tempDir, { recursive: true }); + // Create a dummy fixture file for file resolution tests + await writeFile(path.join(tempDir, 'sales.csv'), 'month,revenue\nJan,100\nFeb,200\n'); + }); + + afterAll(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('expands input_files + string input to type:file + type:text content blocks', async () => { + await writeFile( + path.join(tempDir, 'input-files-basic.eval.yaml'), + `tests: + - id: summarize-csv + criteria: "Summarizes monthly trends" + input_files: + - ./sales.csv + input: "Summarize the monthly trends in this CSV." +`, + ); + + const tests = await loadTests(path.join(tempDir, 'input-files-basic.eval.yaml'), tempDir); + + expect(tests).toHaveLength(1); + expect(tests[0].id).toBe('summarize-csv'); + + // The test should have a single user message with content blocks + expect(tests[0].input).toHaveLength(1); + const message = tests[0].input[0]; + expect(message.role).toBe('user'); + + // Content should be an array of content blocks + const content = message.content; + expect(Array.isArray(content)).toBe(true); + const blocks = content as Array<{ type: string; value: string }>; + expect(blocks).toHaveLength(2); + expect(blocks[0].type).toBe('file'); + expect(blocks[0].value).toBe('./sales.csv'); + expect(blocks[1].type).toBe('text'); + expect(blocks[1].value).toBe('Summarize the monthly trends in this CSV.'); + }); + + it('places multiple file blocks before text block', async () => { + await writeFile(path.join(tempDir, 'b.csv'), 'month,revenue\nMar,300\n'); + + await writeFile( + path.join(tempDir, 'input-files-multi.eval.yaml'), + `tests: + - id: compare-csvs + criteria: "Compares two CSV files" + input_files: + - ./sales.csv + - ./b.csv + input: "Compare these two files." +`, + ); + + const tests = await loadTests(path.join(tempDir, 'input-files-multi.eval.yaml'), tempDir); + + expect(tests).toHaveLength(1); + const message = tests[0].input[0]; + const content = message.content as Array<{ type: string; value: string }>; + expect(content).toHaveLength(3); + expect(content[0]).toEqual({ type: 'file', value: './sales.csv' }); + expect(content[1]).toEqual({ type: 'file', value: './b.csv' }); + expect(content[2]).toEqual({ type: 'text', value: 'Compare these two files.' }); + }); + + it('produces identical runtime behaviour to explicit type:file + type:text form', async () => { + await writeFile( + path.join(tempDir, 'input-files-shorthand.eval.yaml'), + `tests: + - id: shorthand-form + criteria: "Shorthand form works" + input_files: + - ./sales.csv + input: "Summarize this." +`, + ); + + await writeFile( + path.join(tempDir, 'input-files-explicit.eval.yaml'), + `tests: + - id: explicit-form + criteria: "Explicit form works" + input: + - role: user + content: + - type: file + value: ./sales.csv + - type: text + value: "Summarize this." +`, + ); + + const [shorthandTests, explicitTests] = await Promise.all([ + loadTests(path.join(tempDir, 'input-files-shorthand.eval.yaml'), tempDir), + loadTests(path.join(tempDir, 'input-files-explicit.eval.yaml'), tempDir), + ]); + + expect(shorthandTests).toHaveLength(1); + expect(explicitTests).toHaveLength(1); + + // Both forms should resolve to the same input structure + const shorthandMsg = shorthandTests[0].input[0]; + const explicitMsg = explicitTests[0].input[0]; + expect(shorthandMsg.role).toBe(explicitMsg.role); + expect(shorthandMsg.content).toEqual(explicitMsg.content); + + // Both should produce the same file_paths resolution + expect(shorthandTests[0].file_paths).toEqual(explicitTests[0].file_paths); + }); + + it('is skipped and falls back to plain input when input_files is absent', async () => { + await writeFile( + path.join(tempDir, 'no-input-files.eval.yaml'), + `tests: + - id: plain-input + criteria: "Uses plain string input" + input: "What is 2+2?" +`, + ); + + const tests = await loadTests(path.join(tempDir, 'no-input-files.eval.yaml'), tempDir); + + expect(tests).toHaveLength(1); + expect(tests[0].input[0]).toEqual({ role: 'user', content: 'What is 2+2?' }); + }); +}); diff --git a/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts b/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts index 597e98756..95f28200d 100644 --- a/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts +++ b/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from 'bun:test'; import { expandExpectedOutputShorthand, + expandInputFilesShorthand, expandInputShorthand, resolveExpectedMessages, resolveInputMessages, @@ -118,6 +119,86 @@ describe('expandExpectedOutputShorthand', () => { }); }); +describe('expandInputFilesShorthand', () => { + it('expands single file path + string input to user message with content blocks', () => { + const result = expandInputFilesShorthand( + ['evals/files/sales.csv'], + 'Summarize the monthly trends in this CSV.', + ); + + expect(result).toEqual([ + { + role: 'user', + content: [ + { type: 'file', value: 'evals/files/sales.csv' }, + { type: 'text', value: 'Summarize the monthly trends in this CSV.' }, + ], + }, + ]); + }); + + it('places multiple file blocks before the text block', () => { + const result = expandInputFilesShorthand( + ['evals/files/a.csv', 'evals/files/b.csv'], + 'Compare these two files.', + ); + + expect(result).toEqual([ + { + role: 'user', + content: [ + { type: 'file', value: 'evals/files/a.csv' }, + { type: 'file', value: 'evals/files/b.csv' }, + { type: 'text', value: 'Compare these two files.' }, + ], + }, + ]); + }); + + it('returns undefined when input_files is undefined', () => { + expect(expandInputFilesShorthand(undefined, 'hello')).toBeUndefined(); + }); + + it('returns undefined when input_files is null', () => { + expect(expandInputFilesShorthand(null, 'hello')).toBeUndefined(); + }); + + it('returns undefined when input_files is not an array', () => { + expect(expandInputFilesShorthand('not-an-array', 'hello')).toBeUndefined(); + }); + + it('returns undefined when input_files array is empty after filtering non-strings', () => { + expect(expandInputFilesShorthand([42, true, null], 'hello')).toBeUndefined(); + }); + + it('returns undefined when input is not a string (multi-turn not supported in v1)', () => { + const multiTurn = [{ role: 'user', content: 'Hello' }]; + expect(expandInputFilesShorthand(['file.csv'], multiTurn)).toBeUndefined(); + }); + + it('returns undefined when input is undefined', () => { + expect(expandInputFilesShorthand(['file.csv'], undefined)).toBeUndefined(); + }); + + it('filters non-string entries from input_files array', () => { + const result = expandInputFilesShorthand( + ['valid.csv', 42, null, 'also-valid.txt'], + 'Analyze these files.', + ); + + expect(result).toEqual([ + { + role: 'user', + content: [ + { type: 'file', value: 'valid.csv' }, + { type: 'file', value: 'also-valid.txt' }, + { type: 'text', value: 'Analyze these files.' }, + ], + }, + ]); + }); +}); + describe('resolveInputMessages', () => { it('resolves input message array', () => { const raw = { @@ -161,6 +242,50 @@ describe('resolveInputMessages', () => { expect(result).toBeUndefined(); }); + + it('expands input_files shorthand with string input', () => { + const raw = { + input_files: ['evals/files/sales.csv'], + input: 'Summarize the monthly trends in this CSV.', + }; + + const result = resolveInputMessages(raw); + + expect(result).toEqual([ + { + role: 'user', + content: [ + { type: 'file', value: 'evals/files/sales.csv' }, + { type: 'text', value: 'Summarize the monthly trends in this CSV.' }, + ], + }, + ]); + }); + + it('prefers input_files expansion over plain input when input_files is present', () => { + const raw = { + input_files: ['data.csv'], + input: 'What does this show?', + }; + + const result = resolveInputMessages(raw); + + expect(result).toHaveLength(1); + expect(result?.[0].role).toBe('user'); + const content = result?.[0].content; + expect(Array.isArray(content)).toBe(true); + }); + + it('returns undefined when input_files is present but input is a multi-turn array', () => { + const raw = { + input_files: ['file.csv'], + input: [{ role: 'user', content: 'Hello' }], + }; + + const result = resolveInputMessages(raw); + + expect(result).toBeUndefined(); + }); }); describe('resolveExpectedMessages', () => {