From 3da71da68924f90550d3ac5ff2b0ef97dda5e412 Mon Sep 17 00:00:00 2001 From: Reflex Date: Sat, 27 Jun 2026 01:24:24 +0000 Subject: [PATCH] fix(files): stream file uploads from disk instead of buffering in memory upload_file (and any PathLike file input) read the entire file into memory via read_bytes() before the request was built, contradicting the streaming behavior these large-file endpoints imply. Hand httpx an open file handle so its multipart encoder reads lazily in chunks. Tests assert an io.IOBase handle is returned and close it to avoid resource warnings. Co-Authored-By: Claude Opus 4.8 --- src/runloop_api_client/_files.py | 13 +++++++-- tests/test_files.py | 49 +++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/src/runloop_api_client/_files.py b/src/runloop_api_client/_files.py index 1a013e434..6afd86ad1 100644 --- a/src/runloop_api_client/_files.py +++ b/src/runloop_api_client/_files.py @@ -66,7 +66,11 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes: if is_file_content(file): if isinstance(file, os.PathLike): path = pathlib.Path(file) - return (path.name, path.read_bytes()) + # Hand httpx an open file handle so its multipart encoder reads the + # file lazily in chunks. read_bytes() would buffer the entire file + # in memory up front, which is wasteful for the large files this + # endpoint targets. + return (path.name, path.open("rb")) return file @@ -107,8 +111,11 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles async def _async_transform_file(file: FileTypes) -> HttpxFileTypes: if is_file_content(file): if isinstance(file, os.PathLike): - path = anyio.Path(file) - return (path.name, await path.read_bytes()) + path = pathlib.Path(file) + # Same rationale as the sync path: stream from an open handle rather + # than buffering the whole file. httpx's multipart encoder reads from + # this handle in chunks as it serializes the request body. + return (path.name, path.open("rb")) return file diff --git a/tests/test_files.py b/tests/test_files.py index b5ee73ae2..84b3c41c8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -1,8 +1,9 @@ +import io from pathlib import Path import anyio import pytest -from dirty_equals import IsDict, IsList, IsBytes, IsTuple +from dirty_equals import IsDict, IsList, IsTuple, IsInstance from runloop_api_client._files import to_httpx_files, deepcopy_with_paths, async_to_httpx_files from runloop_api_client._utils import extract_files @@ -10,37 +11,63 @@ readme_path = Path(__file__).parent.parent.joinpath("README.md") +def _close_file_handles(value: object) -> None: + """Recursively close any open file handles in to_httpx_files output. + + The transform returns streaming file handles, so tests that don't actually + issue a request must close them to avoid resource warnings. + """ + if isinstance(value, io.IOBase): + value.close() + elif isinstance(value, dict): + for v in value.values(): + _close_file_handles(v) + elif isinstance(value, (list, tuple)): + for v in value: + _close_file_handles(v) + + def test_pathlib_includes_file_name() -> None: result = to_httpx_files({"file": readme_path}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) def test_tuple_input() -> None: result = to_httpx_files([("file", readme_path)]) - print(result) - assert result == IsList(IsTuple("file", IsTuple("README.md", IsBytes()))) + try: + assert result == IsList(IsTuple("file", IsTuple("README.md", IsInstance(io.IOBase)))) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_pathlib_includes_file_name() -> None: result = await async_to_httpx_files({"file": readme_path}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_supports_anyio_path() -> None: result = await async_to_httpx_files({"file": anyio.Path(readme_path)}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_tuple_input() -> None: result = await async_to_httpx_files([("file", readme_path)]) - print(result) - assert result == IsList(IsTuple("file", IsTuple("README.md", IsBytes()))) + try: + assert result == IsList(IsTuple("file", IsTuple("README.md", IsInstance(io.IOBase)))) + finally: + _close_file_handles(result) def test_string_not_allowed() -> None: