From 1e237090d4891361cdf8823eed829c3a0dce72cc Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Sun, 15 Mar 2026 04:20:58 +0000
Subject: [PATCH] feat(yaml-parser): add input_files shorthand to EVAL.yaml
 test cases

Adds `input_files:` as a shorthand at the test level that expands to
type:file content blocks prepended before the type:text block in the
user message. Only supported with a string `input` in v1.

Expansion rule (file blocks first, text block last):

  input_files:
    - path/to/file.csv
  input: "Summarize this."

expands to:

  input:
    - role: user
      content:
        - type: file
          value: path/to/file.csv
        - type: text
          value: "Summarize this."

Paths are resolved using the same convention as explicit type:file blocks.
Multi-turn array inputs are not supported in v1 (returns undefined).
---
 .../evals/dataset.eval.yaml                   |  76 ++++++++++
 .../input-files-shorthand/fixtures/sales.csv  |   7 +
 .../evaluation/loaders/shorthand-expansion.ts |  69 +++++++++
 packages/core/src/evaluation/yaml-parser.ts   |   2 +
 .../evaluation/input-files-shorthand.test.ts  | 141 ++++++++++++++++++
 .../loaders/shorthand-expansion.test.ts       | 125 ++++++++++++++++
 6 files changed, 420 insertions(+)
 create mode 100644 examples/features/input-files-shorthand/evals/dataset.eval.yaml
 create mode 100644 examples/features/input-files-shorthand/fixtures/sales.csv
 create mode 100644 packages/core/test/evaluation/input-files-shorthand.test.ts

diff --git a/examples/features/input-files-shorthand/evals/dataset.eval.yaml b/examples/features/input-files-shorthand/evals/dataset.eval.yaml
new file mode 100644
index 000000000..b209b359b
--- /dev/null
+++ b/examples/features/input-files-shorthand/evals/dataset.eval.yaml
@@ -0,0 +1,76 @@
+# input_files shorthand example
+#
+# `input_files` is a shorthand at the test level that expands to type:file content
+# blocks prepended before the text in the user message. This avoids repeating the
+# verbose content-block syntax when you just want to attach one or more files.
+#
+# Shorthand form:
+#
+#   input_files:
+#     - fixtures/sales.csv
+#   input: "Summarize the monthly trends in this CSV."
+#
+# Expands to:
+#
+#   input:
+#     - role: user
+#       content:
+#         - type: file
+#           value: fixtures/sales.csv
+#         - type: text
+#           value: "Summarize the monthly trends in this CSV."
+#
+# Rules:
+#   - File blocks come first, text block last
+#   - Only supported with a string `input` (not multi-turn arrays)
+#   - Paths are resolved the same way as explicit type:file blocks
+
+description: Demonstrates input_files shorthand for attaching files to test inputs
+
+execution:
+  target: default
+
+tests:
+  # ==========================================
+  # Example 1: Single file with string input (shorthand)
+  # ==========================================
+  - id: summarize-sales-shorthand
+    criteria: >
+      Agent summarizes the monthly revenue trends from the CSV file,
+      identifying which product performed better each month and overall direction.
+
+    # Shorthand form — equivalent to explicit type:file + type:text content blocks
+    input_files:
+      - ../fixtures/sales.csv
+    input: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?"
+
+  # ==========================================
+  # Example 2: Equivalent explicit form for reference
+  # Both forms produce identical runtime behaviour.
+  # ==========================================
+  - id: summarize-sales-explicit
+    criteria: >
+      Agent summarizes the monthly revenue trends from the CSV file,
+      identifying which product performed better each month and overall direction.
+
+    # Explicit type:file + type:text form (same runtime result as shorthand above)
+    input:
+      - role: user
+        content:
+          - type: file
+            value: ../fixtures/sales.csv
+          - type: text
+            value: "Summarize the monthly revenue trends in this CSV. Which product is growing faster?"
+
+  # ==========================================
+  # Example 3: Multiple files with input_files
+  # ==========================================
+  - id: compare-two-files
+    criteria: >
+      Agent compares the two data files and identifies key differences
+      between the datasets.
+
+    input_files:
+      - ../fixtures/sales.csv
+      - ../fixtures/sales.csv
+    input: "Compare these two data files and describe any differences."
diff --git a/examples/features/input-files-shorthand/fixtures/sales.csv b/examples/features/input-files-shorthand/fixtures/sales.csv
new file mode 100644
index 000000000..d9225a810
--- /dev/null
+++ b/examples/features/input-files-shorthand/fixtures/sales.csv
@@ -0,0 +1,7 @@
+month,product,revenue
+Jan,Widget A,12000
+Jan,Widget B,8500
+Feb,Widget A,13500
+Feb,Widget B,9200
+Mar,Widget A,11800
+Mar,Widget B,10100
diff --git a/packages/core/src/evaluation/loaders/shorthand-expansion.ts b/packages/core/src/evaluation/loaders/shorthand-expansion.ts
index b614c486d..e52c3c701 100644
--- a/packages/core/src/evaluation/loaders/shorthand-expansion.ts
+++ b/packages/core/src/evaluation/loaders/shorthand-expansion.ts
@@ -3,6 +3,7 @@
  *
  * Supports:
  * - `input` with string shorthand or message array
+ * - `input_files` shorthand (string input only): expands to type:file + type:text content blocks
  * - `expected_output` with string/object shorthand or message array
  */
 
@@ -85,13 +86,81 @@ export function expandExpectedOutputShorthand(
   return undefined;
 }
 
+/**
+ * Expand `input_files` shorthand combined with a string `input` into a single user message
+ * whose content is an array of type:file blocks (one per path) followed by a type:text block.
+ *
+ * Only supported when `input` is a string. Returns undefined if:
+ * - `inputFiles` is undefined/null or not an array of strings
+ * - `inputText` is not a string (multi-turn array inputs are not supported in v1)
+ *
+ * Example YAML:
+ * ```yaml
+ * input_files:
+ *   - evals/files/sales.csv
+ * input: "Summarize the monthly trends in this CSV."
+ * ```
+ *
+ * Expands to:
+ * ```yaml
+ * input:
+ *   - role: user
+ *     content:
+ *       - type: file
+ *         value: evals/files/sales.csv
+ *       - type: text
+ *         value: "Summarize the monthly trends in this CSV."
+ * ```
+ *
+ * @param inputFiles The raw `input_files` value from YAML
+ * @param inputText The raw `input` value from YAML (must be a string)
+ * @returns Expanded message array or undefined if preconditions not met
+ */
+export function expandInputFilesShorthand(
+  inputFiles: JsonValue | undefined,
+  inputText: JsonValue | undefined,
+): TestMessage[] | undefined {
+  if (inputFiles === undefined || inputFiles === null) {
+    return undefined;
+  }
+
+  // input_files must be an array of strings
+  if (!Array.isArray(inputFiles)) {
+    return undefined;
+  }
+
+  const filePaths = inputFiles.filter((f): f is string => typeof f === 'string');
+  if (filePaths.length === 0) {
+    return undefined;
+  }
+
+  // input must be a string (multi-turn arrays not supported in v1)
+  if (typeof inputText !== 'string') {
+    return undefined;
+  }
+
+  const contentBlocks: JsonObject[] = [
+    ...filePaths.map((filePath): JsonObject => ({ type: 'file', value: filePath })),
+    { type: 'text', value: inputText },
+  ];
+
+  return [{ role: 'user', content: contentBlocks }];
+}
+
 /**
  * Resolve input from raw eval case data.
  *
+ * When `input_files` is present alongside a string `input`, the shorthand is expanded
+ * into a user message with type:file content blocks followed by a type:text block.
+ * Otherwise, `input` is expanded via the standard shorthand rules.
+ *
  * @param raw Raw eval case object from YAML/JSONL
  * @returns Resolved input messages array or undefined if none found
  */
 export function resolveInputMessages(raw: JsonObject): TestMessage[] | undefined {
+  if (raw.input_files !== undefined) {
+    return expandInputFilesShorthand(raw.input_files, raw.input);
+  }
   return expandInputShorthand(raw.input);
 }
 
diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
index 05c49cf63..fc861d5d2 100644
--- a/packages/core/src/evaluation/yaml-parser.ts
+++ b/packages/core/src/evaluation/yaml-parser.ts
@@ -106,6 +106,8 @@ type RawEvalCase = JsonObject & {
   /** @deprecated Use `criteria` instead */
   readonly expected_outcome?: JsonValue;
   readonly input?: JsonValue;
+  /** Shorthand: list of file paths to prepend as type:file content blocks in the user message. */
+  readonly input_files?: JsonValue;
   readonly expected_output?: JsonValue;
   readonly execution?: JsonValue;
   readonly evaluators?: JsonValue;
diff --git a/packages/core/test/evaluation/input-files-shorthand.test.ts b/packages/core/test/evaluation/input-files-shorthand.test.ts
new file mode 100644
index 000000000..c086a67f3
--- /dev/null
+++ b/packages/core/test/evaluation/input-files-shorthand.test.ts
@@ -0,0 +1,141 @@
+import { afterAll, beforeAll, describe, expect, it } from 'bun:test';
+import { mkdir, rm, writeFile } from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+
+import { loadTests } from '../../src/evaluation/yaml-parser.js';
+
+describe('input_files shorthand', () => {
+  let tempDir: string;
+
+  beforeAll(async () => {
+    tempDir = path.join(os.tmpdir(), `agentv-input-files-${Date.now()}`);
+    await mkdir(tempDir, { recursive: true });
+    // Create a dummy fixture file for file resolution tests
+    await writeFile(path.join(tempDir, 'sales.csv'), 'month,revenue\nJan,100\nFeb,200\n');
+  });
+
+  afterAll(async () => {
+    await rm(tempDir, { recursive: true, force: true });
+  });
+
+  it('expands input_files + string input to type:file + type:text content blocks', async () => {
+    await writeFile(
+      path.join(tempDir, 'input-files-basic.eval.yaml'),
+      `tests:
+  - id: summarize-csv
+    criteria: "Summarizes monthly trends"
+    input_files:
+      - ./sales.csv
+    input: "Summarize the monthly trends in this CSV."
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'input-files-basic.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].id).toBe('summarize-csv');
+
+    // The test should have a single user message with content blocks
+    expect(tests[0].input).toHaveLength(1);
+    const message = tests[0].input[0];
+    expect(message.role).toBe('user');
+
+    // Content should be an array of content blocks
+    const content = message.content;
+    expect(Array.isArray(content)).toBe(true);
+    const blocks = content as Array<{ type: string; value: string }>;
+    expect(blocks).toHaveLength(2);
+    expect(blocks[0].type).toBe('file');
+    expect(blocks[0].value).toBe('./sales.csv');
+    expect(blocks[1].type).toBe('text');
+    expect(blocks[1].value).toBe('Summarize the monthly trends in this CSV.');
+  });
+
+  it('places multiple file blocks before text block', async () => {
+    await writeFile(path.join(tempDir, 'b.csv'), 'month,revenue\nMar,300\n');
+
+    await writeFile(
+      path.join(tempDir, 'input-files-multi.eval.yaml'),
+      `tests:
+  - id: compare-csvs
+    criteria: "Compares two CSV files"
+    input_files:
+      - ./sales.csv
+      - ./b.csv
+    input: "Compare these two files."
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'input-files-multi.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    const message = tests[0].input[0];
+    const content = message.content as Array<{ type: string; value: string }>;
+    expect(content).toHaveLength(3);
+    expect(content[0]).toEqual({ type: 'file', value: './sales.csv' });
+    expect(content[1]).toEqual({ type: 'file', value: './b.csv' });
+    expect(content[2]).toEqual({ type: 'text', value: 'Compare these two files.' });
+  });
+
+  it('produces identical runtime behaviour to explicit type:file + type:text form', async () => {
+    await writeFile(
+      path.join(tempDir, 'input-files-shorthand.eval.yaml'),
+      `tests:
+  - id: shorthand-form
+    criteria: "Shorthand form works"
+    input_files:
+      - ./sales.csv
+    input: "Summarize this."
+`,
+    );
+
+    await writeFile(
+      path.join(tempDir, 'input-files-explicit.eval.yaml'),
+      `tests:
+  - id: explicit-form
+    criteria: "Explicit form works"
+    input:
+      - role: user
+        content:
+          - type: file
+            value: ./sales.csv
+          - type: text
+            value: "Summarize this."
+`,
+    );
+
+    const [shorthandTests, explicitTests] = await Promise.all([
+      loadTests(path.join(tempDir, 'input-files-shorthand.eval.yaml'), tempDir),
+      loadTests(path.join(tempDir, 'input-files-explicit.eval.yaml'), tempDir),
+    ]);
+
+    expect(shorthandTests).toHaveLength(1);
+    expect(explicitTests).toHaveLength(1);
+
+    // Both forms should resolve to the same input structure
+    const shorthandMsg = shorthandTests[0].input[0];
+    const explicitMsg = explicitTests[0].input[0];
+    expect(shorthandMsg.role).toBe(explicitMsg.role);
+    expect(shorthandMsg.content).toEqual(explicitMsg.content);
+
+    // Both should produce the same file_paths resolution
+    expect(shorthandTests[0].file_paths).toEqual(explicitTests[0].file_paths);
+  });
+
+  it('is skipped and falls back to plain input when input_files is absent', async () => {
+    await writeFile(
+      path.join(tempDir, 'no-input-files.eval.yaml'),
+      `tests:
+  - id: plain-input
+    criteria: "Uses plain string input"
+    input: "What is 2+2?"
+`,
+    );
+
+    const tests = await loadTests(path.join(tempDir, 'no-input-files.eval.yaml'), tempDir);
+
+    expect(tests).toHaveLength(1);
+    expect(tests[0].input[0]).toEqual({ role: 'user', content: 'What is 2+2?' });
+  });
+});
diff --git a/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts b/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts
index 597e98756..95f28200d 100644
--- a/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts
+++ b/packages/core/test/evaluation/loaders/shorthand-expansion.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from 'bun:test';
 
 import {
   expandExpectedOutputShorthand,
+  expandInputFilesShorthand,
   expandInputShorthand,
   resolveExpectedMessages,
   resolveInputMessages,
@@ -118,6 +119,86 @@ describe('expandExpectedOutputShorthand', () => {
   });
 });
 
+describe('expandInputFilesShorthand', () => {
+  it('expands single file path + string input to user message with content blocks', () => {
+    const result = expandInputFilesShorthand(
+      ['evals/files/sales.csv'],
+      'Summarize the monthly trends in this CSV.',
+    );
+
+    expect(result).toEqual([
+      {
+        role: 'user',
+        content: [
+          { type: 'file', value: 'evals/files/sales.csv' },
+          { type: 'text', value: 'Summarize the monthly trends in this CSV.' },
+        ],
+      },
+    ]);
+  });
+
+  it('places multiple file blocks before the text block', () => {
+    const result = expandInputFilesShorthand(
+      ['evals/files/a.csv', 'evals/files/b.csv'],
+      'Compare these two files.',
+    );
+
+    expect(result).toEqual([
+      {
+        role: 'user',
+        content: [
+          { type: 'file', value: 'evals/files/a.csv' },
+          { type: 'file', value: 'evals/files/b.csv' },
+          { type: 'text', value: 'Compare these two files.' },
+        ],
+      },
+    ]);
+  });
+
+  it('returns undefined when input_files is undefined', () => {
+    expect(expandInputFilesShorthand(undefined, 'hello')).toBeUndefined();
+  });
+
+  it('returns undefined when input_files is null', () => {
+    expect(expandInputFilesShorthand(null, 'hello')).toBeUndefined();
+  });
+
+  it('returns undefined when input_files is not an array', () => {
+    expect(expandInputFilesShorthand('not-an-array', 'hello')).toBeUndefined();
+  });
+
+  it('returns undefined when input_files array is empty after filtering non-strings', () => {
+    expect(expandInputFilesShorthand([42, true, null], 'hello')).toBeUndefined();
+  });
+
+  it('returns undefined when input is not a string (multi-turn not supported in v1)', () => {
+    const multiTurn = [{ role: 'user', content: 'Hello' }];
+    expect(expandInputFilesShorthand(['file.csv'], multiTurn)).toBeUndefined();
+  });
+
+  it('returns undefined when input is undefined', () => {
+    expect(expandInputFilesShorthand(['file.csv'], undefined)).toBeUndefined();
+  });
+
+  it('filters non-string entries from input_files array', () => {
+    const result = expandInputFilesShorthand(
+      ['valid.csv', 42, null, 'also-valid.txt'],
+      'Analyze these files.',
+    );
+
+    expect(result).toEqual([
+      {
+        role: 'user',
+        content: [
+          { type: 'file', value: 'valid.csv' },
+          { type: 'file', value: 'also-valid.txt' },
+          { type: 'text', value: 'Analyze these files.' },
+        ],
+      },
+    ]);
+  });
+});
+
 describe('resolveInputMessages', () => {
   it('resolves input message array', () => {
     const raw = {
@@ -161,6 +242,50 @@ describe('resolveInputMessages', () => {
 
     expect(result).toBeUndefined();
   });
+
+  it('expands input_files shorthand with string input', () => {
+    const raw = {
+      input_files: ['evals/files/sales.csv'],
+      input: 'Summarize the monthly trends in this CSV.',
+    };
+
+    const result = resolveInputMessages(raw);
+
+    expect(result).toEqual([
+      {
+        role: 'user',
+        content: [
+          { type: 'file', value: 'evals/files/sales.csv' },
+          { type: 'text', value: 'Summarize the monthly trends in this CSV.' },
+        ],
+      },
+    ]);
+  });
+
+  it('prefers input_files expansion over plain input when input_files is present', () => {
+    const raw = {
+      input_files: ['data.csv'],
+      input: 'What does this show?',
+    };
+
+    const result = resolveInputMessages(raw);
+
+    expect(result).toHaveLength(1);
+    expect(result?.[0].role).toBe('user');
+    const content = result?.[0].content;
+    expect(Array.isArray(content)).toBe(true);
+  });
+
+  it('returns undefined when input_files is present but input is a multi-turn array', () => {
+    const raw = {
+      input_files: ['file.csv'],
+      input: [{ role: 'user', content: 'Hello' }],
+    };
+
+    const result = resolveInputMessages(raw);
+
+    expect(result).toBeUndefined();
+  });
 });
 
 describe('resolveExpectedMessages', () => {