Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions examples/features/input-files-shorthand/evals/dataset.eval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# input_files shorthand example
#
# `input_files` is a shorthand at the test level that expands to type:file content
# blocks prepended before the text in the user message. This avoids repeating the
# verbose content-block syntax when you just want to attach one or more files.
#
# Shorthand form:
#
# input_files:
# - fixtures/sales.csv
# input: "Summarize the monthly trends in this CSV."
#
# Expands to:
#
# input:
# - role: user
# content:
# - type: file
# value: fixtures/sales.csv
# - type: text
# value: "Summarize the monthly trends in this CSV."
#
# Rules:
# - File blocks come first, text block last
# - Only supported with a string `input` (not multi-turn arrays)
# - Paths are resolved the same way as explicit type:file blocks

description: Demonstrates input_files shorthand for attaching files to test inputs

execution:
target: default

tests:
# ==========================================
# Example 1: Single file with string input (shorthand)
# ==========================================
- id: summarize-sales-shorthand
criteria: >
Agent summarizes the monthly revenue trends from the CSV file,
identifying which product performed better each month and overall direction.

# Shorthand form — equivalent to explicit type:file + type:text content blocks
input_files:
- ../fixtures/sales.csv
input: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?"

# ==========================================
# Example 2: Equivalent explicit form for reference
# Both forms produce identical runtime behaviour.
# ==========================================
- id: summarize-sales-explicit
criteria: >
Agent summarizes the monthly revenue trends from the CSV file,
identifying which product performed better each month and overall direction.

# Explicit type:file + type:text form (same runtime result as shorthand above)
input:
- role: user
content:
- type: file
value: ../fixtures/sales.csv
- type: text
value: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?"

# ==========================================
# Example 3: Multiple files with input_files
# ==========================================
- id: compare-two-files
criteria: >
Agent compares the two data files and identifies key differences
between the datasets.

input_files:
- ../fixtures/sales.csv
- ../fixtures/sales.csv
input: "Compare these two data files and describe any differences."
7 changes: 7 additions & 0 deletions examples/features/input-files-shorthand/fixtures/sales.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
month,product,revenue
Jan,Widget A,12000
Jan,Widget B,8500
Feb,Widget A,13500
Feb,Widget B,9200
Mar,Widget A,11800
Mar,Widget B,10100
69 changes: 69 additions & 0 deletions packages/core/src/evaluation/loaders/shorthand-expansion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*
* Supports:
* - `input` with string shorthand or message array
* - `input_files` shorthand (string input only): expands to type:file + type:text content blocks
* - `expected_output` with string/object shorthand or message array
*/

Expand Down Expand Up @@ -85,13 +86,81 @@ export function expandExpectedOutputShorthand(
return undefined;
}

/**
* Expand `input_files` shorthand combined with a string `input` into a single user message
* whose content is an array of type:file blocks (one per path) followed by a type:text block.
*
* Only supported when `input` is a string. Returns undefined if:
* - `inputFiles` is undefined/null or not an array of strings
* - `inputText` is not a string (multi-turn array inputs are not supported in v1)
*
* Example YAML:
* ```yaml
* input_files:
* - evals/files/sales.csv
* input: "Summarize the monthly trends in this CSV."
* ```
*
* Expands to:
* ```yaml
* input:
* - role: user
* content:
* - type: file
* value: evals/files/sales.csv
* - type: text
* value: "Summarize the monthly trends in this CSV."
* ```
*
* @param inputFiles The raw `input_files` value from YAML
* @param inputText The raw `input` value from YAML (must be a string)
* @returns Expanded message array or undefined if preconditions not met
*/
export function expandInputFilesShorthand(
inputFiles: JsonValue | undefined,
inputText: JsonValue | undefined,
): TestMessage[] | undefined {
if (inputFiles === undefined || inputFiles === null) {
return undefined;
}

// input_files must be an array of strings
if (!Array.isArray(inputFiles)) {
return undefined;
}

const filePaths = inputFiles.filter((f): f is string => typeof f === 'string');
if (filePaths.length === 0) {
return undefined;
}

// input must be a string (multi-turn arrays not supported in v1)
if (typeof inputText !== 'string') {
return undefined;
}

const contentBlocks: JsonObject[] = [
...filePaths.map((filePath): JsonObject => ({ type: 'file', value: filePath })),
{ type: 'text', value: inputText },
];

return [{ role: 'user', content: contentBlocks }];
}

/**
* Resolve input from raw eval case data.
*
* When `input_files` is present alongside a string `input`, the shorthand is expanded
* into a user message with type:file content blocks followed by a type:text block.
* Otherwise, `input` is expanded via the standard shorthand rules.
*
* @param raw Raw eval case object from YAML/JSONL
* @returns Resolved input messages array or undefined if none found
*/
export function resolveInputMessages(raw: JsonObject): TestMessage[] | undefined {
if (raw.input_files !== undefined) {
return expandInputFilesShorthand(raw.input_files, raw.input);
}
return expandInputShorthand(raw.input);
}

Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/evaluation/yaml-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ type RawEvalCase = JsonObject & {
/** @deprecated Use `criteria` instead */
readonly expected_outcome?: JsonValue;
readonly input?: JsonValue;
/** Shorthand: list of file paths to prepend as type:file content blocks in the user message. */
readonly input_files?: JsonValue;
readonly expected_output?: JsonValue;
readonly execution?: JsonValue;
readonly evaluators?: JsonValue;
Expand Down
141 changes: 141 additions & 0 deletions packages/core/test/evaluation/input-files-shorthand.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { afterAll, beforeAll, describe, expect, it } from 'bun:test';
import { mkdir, rm, writeFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';

import { loadTests } from '../../src/evaluation/yaml-parser.js';

describe('input_files shorthand', () => {
let tempDir: string;

beforeAll(async () => {
tempDir = path.join(os.tmpdir(), `agentv-input-files-${Date.now()}`);
await mkdir(tempDir, { recursive: true });
// Create a dummy fixture file for file resolution tests
await writeFile(path.join(tempDir, 'sales.csv'), 'month,revenue\nJan,100\nFeb,200\n');
});

afterAll(async () => {
await rm(tempDir, { recursive: true, force: true });
});

it('expands input_files + string input to type:file + type:text content blocks', async () => {
await writeFile(
path.join(tempDir, 'input-files-basic.eval.yaml'),
`tests:
- id: summarize-csv
criteria: "Summarizes monthly trends"
input_files:
- ./sales.csv
input: "Summarize the monthly trends in this CSV."
`,
);

const tests = await loadTests(path.join(tempDir, 'input-files-basic.eval.yaml'), tempDir);

expect(tests).toHaveLength(1);
expect(tests[0].id).toBe('summarize-csv');

// The test should have a single user message with content blocks
expect(tests[0].input).toHaveLength(1);
const message = tests[0].input[0];
expect(message.role).toBe('user');

// Content should be an array of content blocks
const content = message.content;
expect(Array.isArray(content)).toBe(true);
const blocks = content as Array<{ type: string; value: string }>;
expect(blocks).toHaveLength(2);
expect(blocks[0].type).toBe('file');
expect(blocks[0].value).toBe('./sales.csv');
expect(blocks[1].type).toBe('text');
expect(blocks[1].value).toBe('Summarize the monthly trends in this CSV.');
});

it('places multiple file blocks before text block', async () => {
await writeFile(path.join(tempDir, 'b.csv'), 'month,revenue\nMar,300\n');

await writeFile(
path.join(tempDir, 'input-files-multi.eval.yaml'),
`tests:
- id: compare-csvs
criteria: "Compares two CSV files"
input_files:
- ./sales.csv
- ./b.csv
input: "Compare these two files."
`,
);

const tests = await loadTests(path.join(tempDir, 'input-files-multi.eval.yaml'), tempDir);

expect(tests).toHaveLength(1);
const message = tests[0].input[0];
const content = message.content as Array<{ type: string; value: string }>;
expect(content).toHaveLength(3);
expect(content[0]).toEqual({ type: 'file', value: './sales.csv' });
expect(content[1]).toEqual({ type: 'file', value: './b.csv' });
expect(content[2]).toEqual({ type: 'text', value: 'Compare these two files.' });
});

it('produces identical runtime behaviour to explicit type:file + type:text form', async () => {
await writeFile(
path.join(tempDir, 'input-files-shorthand.eval.yaml'),
`tests:
- id: shorthand-form
criteria: "Shorthand form works"
input_files:
- ./sales.csv
input: "Summarize this."
`,
);

await writeFile(
path.join(tempDir, 'input-files-explicit.eval.yaml'),
`tests:
- id: explicit-form
criteria: "Explicit form works"
input:
- role: user
content:
- type: file
value: ./sales.csv
- type: text
value: "Summarize this."
`,
);

const [shorthandTests, explicitTests] = await Promise.all([
loadTests(path.join(tempDir, 'input-files-shorthand.eval.yaml'), tempDir),
loadTests(path.join(tempDir, 'input-files-explicit.eval.yaml'), tempDir),
]);

expect(shorthandTests).toHaveLength(1);
expect(explicitTests).toHaveLength(1);

// Both forms should resolve to the same input structure
const shorthandMsg = shorthandTests[0].input[0];
const explicitMsg = explicitTests[0].input[0];
expect(shorthandMsg.role).toBe(explicitMsg.role);
expect(shorthandMsg.content).toEqual(explicitMsg.content);

// Both should produce the same file_paths resolution
expect(shorthandTests[0].file_paths).toEqual(explicitTests[0].file_paths);
});

it('is skipped and falls back to plain input when input_files is absent', async () => {
await writeFile(
path.join(tempDir, 'no-input-files.eval.yaml'),
`tests:
- id: plain-input
criteria: "Uses plain string input"
input: "What is 2+2?"
`,
);

const tests = await loadTests(path.join(tempDir, 'no-input-files.eval.yaml'), tempDir);

expect(tests).toHaveLength(1);
expect(tests[0].input[0]).toEqual({ role: 'user', content: 'What is 2+2?' });
});
});
Loading