diff --git a/packages/core/package.json b/packages/core/package.json index 36a5fcdd4ede..d1744a5fa079 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -91,6 +91,7 @@ "@opentelemetry/exporter-trace-otlp-http": "0.214.0", "@opentelemetry/sdk-trace-base": "2.6.1", "@parcel/watcher": "2.5.1", + "@silvia-odwyer/photon-node": "0.3.4", "@openrouter/ai-sdk-provider": "2.9.0", "ai-gateway-provider": "3.1.2", "bun-pty": "0.4.8", diff --git a/packages/core/src/filesystem.ts b/packages/core/src/filesystem.ts index af3bf2de58e8..83288b421c15 100644 --- a/packages/core/src/filesystem.ts +++ b/packages/core/src/filesystem.ts @@ -22,6 +22,7 @@ export type ReadInput = typeof ReadInput.Type export const MAX_READ_LINES = 2_000 export const MAX_READ_BYTES = 50 * 1024 +export const READ_SAMPLE_BYTES = 4 * 1024 const MAX_LINE_LENGTH = 2_000 const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)` @@ -179,6 +180,7 @@ export interface Interface { readonly resolveReadPath: (input: ReadInput) => Effect.Effect readonly resolveRead: (input: ReadInput) => Effect.Effect readonly readResolved: (target: ReadTarget, maximumBytes?: number) => Effect.Effect + readonly readSampleResolved: (target: ReadTarget, maximumBytes: number) => Effect.Effect readonly readTextPageResolved: (target: ReadTarget, page?: TextPageInput) => Effect.Effect readonly list: (input?: ListInput) => Effect.Effect /** Select a contained canonical read root without asserting leaf policy. */ @@ -345,6 +347,21 @@ export const layer = Layer.effect( }), ) }) + const readSampleResolved = Effect.fn("FileSystem.readSampleResolved")(function* ( + target: ReadTarget, + maximumBytes: number, + ) { + return yield* Effect.scoped( + Effect.gen(function* () { + const file = yield* fs.open(target.real, { flag: "r" }).pipe(Effect.orDie) + const info = yield* file.stat.pipe(Effect.orDie) + if (info.type !== "File") return yield* Effect.die(new Error("Path is not a file")) + if (info.dev !== target.dev || Option.getOrUndefined(info.ino) !== target.ino) + return yield* Effect.die(new Error("File changed after permission approval")) + return Option.getOrElse(yield* file.readAlloc(maximumBytes).pipe(Effect.orDie), () => new Uint8Array()) + }), + ) + }) const readTextPageResolved = Effect.fn("FileSystem.readTextPageResolved")(function* ( target: ReadTarget, page: TextPageInput = {}, @@ -534,6 +551,7 @@ export const layer = Layer.effect( resolveReadPath, resolveRead, readResolved, + readSampleResolved, readTextPageResolved, list: Effect.fn("FileSystem.list")(function* (input) { return yield* listResolved(yield* resolveList(input)) diff --git a/packages/core/src/tool/read.ts b/packages/core/src/tool/read.ts index bd7b75f5efbb..c25d9b837869 100644 --- a/packages/core/src/tool/read.ts +++ b/packages/core/src/tool/read.ts @@ -2,13 +2,33 @@ export * as ReadTool from "./read" import { Tool, ToolFailure } from "@opencode-ai/llm" import { Cause, Effect, Layer, Schema } from "effect" +import path from "node:path" +import { fileURLToPath } from "node:url" +import { Config } from "../config" import { FileSystem } from "../filesystem" import { NonNegativeInt, PositiveInt } from "../schema" import { PermissionV2 } from "../permission" import { ToolOutputStore } from "../tool-output-store" +import { FSUtil } from "../fs-util" import { ToolRegistry } from "./registry" export const name = "read" +const MAX_IMAGE_BASE64_BYTES = 5 * 1024 * 1024 +const MAX_IMAGE_WIDTH = 2_000 +const MAX_IMAGE_HEIGHT = 2_000 +const JPEG_QUALITIES = [80, 85, 70, 55, 40] +const SUPPORTED_IMAGE_MIMES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]) +const startsWith = (bytes: Uint8Array, prefix: number[]) => prefix.every((value, index) => bytes[index] === value) +const imageMime = (bytes: Uint8Array, fallback: string) => { + if (startsWith(bytes, [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])) return "image/png" + if (startsWith(bytes, [0xff, 0xd8, 0xff])) return "image/jpeg" + if (startsWith(bytes, [0x47, 0x49, 0x46, 0x38])) return "image/gif" + if (startsWith(bytes, [0x52, 0x49, 0x46, 0x46]) && startsWith(bytes.subarray(8), [0x57, 0x45, 0x42, 0x50])) + return "image/webp" + return fallback +} + +class ImageSizeError extends Error {} const LocationInput = Schema.Struct({ ...FileSystem.ReadInput.fields, offset: FileSystem.ListPageInput.fields.offset.annotate({ @@ -28,9 +48,21 @@ const Success = Schema.Union([FileSystem.Content, FileSystem.TextPage, FileSyste const definition = Tool.make({ description: - "Read a text or binary file, page through a large UTF-8 text file by line offset, list a directory page relative to the current location, or page through a managed tool-output resource by opaque URI.", + "Read a text file or supported image, page through a large UTF-8 text file by line offset, list a directory page relative to the current location, or page through a managed tool-output resource by opaque URI.", parameters: Input, success: Success, + toModelOutput: ({ parameters, output }) => { + if (!("type" in output) || output.type !== "binary" || !SUPPORTED_IMAGE_MIMES.has(output.mime)) return [] + return [ + { type: "text", text: "Image read successfully" }, + { + type: "file", + source: { type: "data", data: output.content }, + mime: output.mime, + ...(parameters && "path" in parameters ? { name: parameters.path } : {}), + }, + ] + }, }) export const layer = Layer.effectDiscard( @@ -38,6 +70,14 @@ export const layer = Layer.effectDiscard( const registry = yield* ToolRegistry.Service const filesystem = yield* FileSystem.Service const resources = yield* ToolOutputStore.Service + const config = yield* Config.Service + const loadPhoton = yield* Effect.cached( + Effect.sync(() => { + const photonWasm = fileURLToPath(import.meta.resolve("@silvia-odwyer/photon-node/photon_rs_bg.wasm")) + ;(globalThis as typeof globalThis & { __OPENCODE_PHOTON_WASM_PATH?: string }).__OPENCODE_PHOTON_WASM_PATH = + path.isAbsolute(photonWasm) ? photonWasm : fileURLToPath(new URL(photonWasm, import.meta.url)) + }).pipe(Effect.andThen(() => Effect.promise(() => import("@silvia-odwyer/photon-node")))), + ) yield* registry.contribute((editor) => editor.set(name, { @@ -70,19 +110,101 @@ export const layer = Layer.effectDiscard( const final = yield* filesystem.resolveReadPath(input) if (final.type !== "file" || final.target.resource !== target.resource || final.target.real !== target.real) return yield* Effect.die(new Error("File changed after permission approval")) - if ( - final.target.size > FileSystem.MAX_READ_BYTES || - input.offset !== undefined || - input.limit !== undefined + const mime = imageMime( + yield* filesystem.readSampleResolved(final.target, FileSystem.READ_SAMPLE_BYTES), + FSUtil.mimeType(final.target.real), + ) + if (!SUPPORTED_IMAGE_MIMES.has(mime)) { + if ( + final.target.size > FileSystem.MAX_READ_BYTES || + input.offset !== undefined || + input.limit !== undefined + ) + return yield* filesystem.readTextPageResolved(final.target, { offset: input.offset, limit: input.limit }) + return yield* filesystem.readResolved(final.target, FileSystem.MAX_READ_BYTES) + } + const content = yield* filesystem.readResolved(final.target) + if (content.type !== "binary") return content + const image = Object.assign( + {}, + ...(yield* config.entries()).flatMap((entry) => + entry.type === "document" && entry.info.attachments?.image ? [entry.info.attachments.image] : [], + ), ) - return yield* filesystem.readTextPageResolved(final.target, { offset: input.offset, limit: input.limit }) - return yield* filesystem.readResolved(final.target, FileSystem.MAX_READ_BYTES) + const limits = { + autoResize: image.auto_resize ?? true, + maxWidth: image.max_width ?? MAX_IMAGE_WIDTH, + maxHeight: image.max_height ?? MAX_IMAGE_HEIGHT, + maxBase64Bytes: image.max_base64_bytes ?? MAX_IMAGE_BASE64_BYTES, + } + const photon = yield* loadPhoton + const decoded = yield* Effect.sync(() => + photon.PhotonImage.new_from_byteslice(Buffer.from(content.content, "base64")), + ) + try { + const width = decoded.get_width() + const height = decoded.get_height() + if ( + width <= limits.maxWidth && + height <= limits.maxHeight && + Buffer.byteLength(content.content, "utf8") <= limits.maxBase64Bytes + ) + return new FileSystem.BinaryContent({ ...content, mime }) + if (!limits.autoResize) + return yield* Effect.die( + new ImageSizeError( + `Image ${width}x${height} with base64 size ${Buffer.byteLength(content.content, "utf8")} exceeds configured limits ${limits.maxWidth}x${limits.maxHeight}/${limits.maxBase64Bytes} bytes`, + ), + ) + const scale = Math.min(1, limits.maxWidth / width, limits.maxHeight / height) + const sizes = Array.from({ length: 32 }).reduce>((acc) => { + const previous = acc.at(-1) ?? { + width: Math.max(1, Math.round(width * scale)), + height: Math.max(1, Math.round(height * scale)), + } + const next = + acc.length === 0 + ? previous + : { + width: previous.width === 1 ? 1 : Math.max(1, Math.floor(previous.width * 0.75)), + height: previous.height === 1 ? 1 : Math.max(1, Math.floor(previous.height * 0.75)), + } + return acc.some((item) => item.width === next.width && item.height === next.height) ? acc : [...acc, next] + }, []) + for (const size of sizes) { + const resized = photon.resize(decoded, size.width, size.height, photon.SamplingFilter.Lanczos3) + const candidate = [ + { content: Buffer.from(resized.get_bytes()).toString("base64"), mime: "image/png" }, + ...JPEG_QUALITIES.map((quality) => ({ + content: Buffer.from(resized.get_bytes_jpeg(quality)).toString("base64"), + mime: "image/jpeg", + })), + ].find((item) => Buffer.byteLength(item.content, "utf8") <= limits.maxBase64Bytes) + resized.free() + if (candidate) + return new FileSystem.BinaryContent({ + type: "binary", + content: candidate.content, + encoding: "base64", + mime: candidate.mime, + }) + } + return yield* Effect.die( + new ImageSizeError( + `Image ${width}x${height} with base64 size ${Buffer.byteLength(content.content, "utf8")} exceeds configured limits and could not be resized below ${limits.maxWidth}x${limits.maxHeight}/${limits.maxBase64Bytes} bytes`, + ), + ) + } finally { + decoded.free() + } }).pipe( Effect.catchCause((cause) => Effect.gen(function* () { const error = Cause.squash(cause) const message = - error instanceof FileSystem.BinaryFileError || error instanceof FileSystem.ReadLimitError + error instanceof FileSystem.BinaryFileError || + error instanceof FileSystem.ReadLimitError || + error instanceof ImageSizeError ? error.message : `Unable to read ${"resource" in input ? input.resource : input.path}` return yield* new ToolFailure({ message, error }) diff --git a/packages/core/test/tool-glob.test.ts b/packages/core/test/tool-glob.test.ts index 6e49af3612f9..ee75e3ae04ad 100644 --- a/packages/core/test/tool-glob.test.ts +++ b/packages/core/test/tool-glob.test.ts @@ -39,6 +39,7 @@ const filesystem = Layer.succeed( resolveReadPath: () => Effect.die("unused"), resolveRead: () => Effect.die("unused"), readResolved: () => Effect.die("unused"), + readSampleResolved: () => Effect.die("unused"), readTextPageResolved: () => Effect.die("unused"), list: () => Effect.die("unused"), resolveRoot: (input = {}) => diff --git a/packages/core/test/tool-grep.test.ts b/packages/core/test/tool-grep.test.ts index 2a038736d819..adebfe48d0b4 100644 --- a/packages/core/test/tool-grep.test.ts +++ b/packages/core/test/tool-grep.test.ts @@ -34,6 +34,7 @@ const filesystem = Layer.succeed( resolveReadPath: () => Effect.die("unused"), resolveRead: () => Effect.die("unused"), readResolved: () => Effect.die("unused"), + readSampleResolved: () => Effect.die("unused"), readTextPageResolved: () => Effect.die("unused"), list: () => Effect.die("unused"), resolveRoot: (input = {}) => diff --git a/packages/core/test/tool-read.test.ts b/packages/core/test/tool-read.test.ts index ab5800b31271..bfed17dde5dc 100644 --- a/packages/core/test/tool-read.test.ts +++ b/packages/core/test/tool-read.test.ts @@ -1,5 +1,7 @@ import { describe, expect } from "bun:test" import { Effect, Layer } from "effect" +import { Config } from "@opencode-ai/core/config" +import { ConfigAttachments } from "@opencode-ai/core/config/attachments" import { FileSystem } from "@opencode-ai/core/filesystem" import { PermissionV2 } from "@opencode-ai/core/permission" import { SessionV2 } from "@opencode-ai/core/session" @@ -11,6 +13,7 @@ import { testEffect } from "./lib/effect" const assertions: PermissionV2.AssertInput[] = [] const reads: FileSystem.ReadInput[] = [] +const samples: number[] = [] const textPageInputs: FileSystem.TextPageInput[] = [] const pages: FileSystem.ListTarget[] = [] const pageInputs: Pick[] = [] @@ -22,6 +25,13 @@ let size = 5 let real = "/project/README.md" let afterApproval = () => {} let readFailure: unknown +let readContent: FileSystem.Content = new FileSystem.TextContent({ + type: "text", + content: "hello", + mime: "text/plain", +}) +let sample = new TextEncoder().encode("hello") +let configEntries: Config.Entry[] = [] const resourceReads: ToolOutputStore.ReadInput[] = [] const filesystem = Layer.succeed( FileSystem.Service, @@ -71,9 +81,14 @@ const filesystem = Layer.succeed( readFailure === undefined ? Effect.sync(() => { reads.push({ path: RelativePath.make("README.md") }) - return new FileSystem.TextContent({ type: "text", content: "hello", mime: "text/plain" }) + return readContent }) : Effect.die(readFailure), + readSampleResolved: (_target, maximumBytes) => + Effect.sync(() => { + samples.push(maximumBytes) + return sample.slice(0, maximumBytes) + }), readTextPageResolved: (_target, page = {}) => readFailure === undefined ? Effect.sync(() => { @@ -152,13 +167,15 @@ const resources = Layer.succeed( }), }), ) +const config = Layer.succeed(Config.Service, Config.Service.of({ entries: () => Effect.succeed(configEntries) })) const read = ReadTool.layer.pipe( Layer.provide(registry), Layer.provide(filesystem), Layer.provide(permission), Layer.provide(resources), + Layer.provide(config), ) -const it = testEffect(Layer.mergeAll(registry, filesystem, permission, resources, read)) +const it = testEffect(Layer.mergeAll(registry, filesystem, permission, resources, config, read)) const sessionID = SessionV2.ID.make("ses_read_tool_test") describe("ReadTool", () => { @@ -173,6 +190,9 @@ describe("ReadTool", () => { real = "/project/README.md" afterApproval = () => {} readFailure = undefined + readContent = new FileSystem.TextContent({ type: "text", content: "hello", mime: "text/plain" }) + sample = new TextEncoder().encode("hello") + configEntries = [] resolvedInput = undefined const registry = yield* ToolRegistry.Service @@ -241,6 +261,90 @@ describe("ReadTool", () => { }), ) + it.effect("returns supported images as model-native media", () => + Effect.gen(function* () { + const photon = yield* Effect.promise(() => import("@silvia-odwyer/photon-node")) + const source = new photon.PhotonImage(new Uint8Array(Array.from({ length: 4 }, () => 255)), 1, 1) + const content = Buffer.from(source.get_bytes()).toString("base64") + source.free() + allow = true + resolveFailure = undefined + listResolveFailure = new Error("not a directory") + size = 4 + real = "/project/image.png" + afterApproval = () => {} + readFailure = undefined + readContent = new FileSystem.BinaryContent({ + type: "binary", + content, + encoding: "base64", + mime: "image/png", + }) + sample = new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]) + const registry = yield* ToolRegistry.Service + + expect( + yield* registry.execute({ + sessionID, + call: { type: "tool-call", id: "call-image", name: "read", input: { path: "image.png" } }, + }), + ).toEqual({ + type: "content", + value: [ + { type: "text", text: "Image read successfully" }, + { type: "media", mediaType: "image/png", data: content, filename: "image.png" }, + ], + }) + expect(samples.at(-1)).toBe(FileSystem.READ_SAMPLE_BYTES) + }), + ) + + it.effect("applies configured image dimension limits before returning media", () => + Effect.gen(function* () { + const photon = yield* Effect.promise(() => import("@silvia-odwyer/photon-node")) + const source = new photon.PhotonImage(new Uint8Array(Array.from({ length: 16 * 4 }, () => 255)), 16, 1) + allow = true + resolveFailure = undefined + listResolveFailure = new Error("not a directory") + size = source.get_bytes().length + real = "/project/wide.png" + afterApproval = () => {} + readFailure = undefined + readContent = new FileSystem.BinaryContent({ + type: "binary", + content: Buffer.from(source.get_bytes()).toString("base64"), + encoding: "base64", + mime: "image/png", + }) + source.free() + sample = new Uint8Array([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]) + configEntries = [ + new Config.Document({ + type: "document", + info: new Config.Info({ + attachments: new ConfigAttachments.Info({ + image: new ConfigAttachments.Image({ max_width: 4, max_height: 4 }), + }), + }), + }), + ] + const registry = yield* ToolRegistry.Service + const result = yield* registry.execute({ + sessionID, + call: { type: "tool-call", id: "call-resize-image", name: "read", input: { path: "wide.png" } }, + }) + expect(result.type).toBe("content") + if (result.type !== "content") return + const media = result.value[1] + expect(media?.type).toBe("media") + if (media?.type !== "media") return + const resized = photon.PhotonImage.new_from_byteslice(Buffer.from(media.data, "base64")) + expect(resized.get_width()).toBeLessThanOrEqual(4) + expect(resized.get_height()).toBeLessThanOrEqual(4) + resized.free() + }), + ) + it.effect("lists a bounded directory page through read", () => Effect.gen(function* () { assertions.length = 0 @@ -364,6 +468,9 @@ describe("ReadTool", () => { real = "/project/large.txt" afterApproval = () => {} readFailure = undefined + readContent = new FileSystem.TextContent({ type: "text", content: "hello", mime: "text/plain" }) + sample = new TextEncoder().encode("hello") + configEntries = [] const registry = yield* ToolRegistry.Service expect(