diff --git a/src/runloop_api_client/_files.py b/src/runloop_api_client/_files.py index 1a013e434..6afd86ad1 100644 --- a/src/runloop_api_client/_files.py +++ b/src/runloop_api_client/_files.py @@ -66,7 +66,11 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes: if is_file_content(file): if isinstance(file, os.PathLike): path = pathlib.Path(file) - return (path.name, path.read_bytes()) + # Hand httpx an open file handle so its multipart encoder reads the + # file lazily in chunks. read_bytes() would buffer the entire file + # in memory up front, which is wasteful for the large files this + # endpoint targets. + return (path.name, path.open("rb")) return file @@ -107,8 +111,11 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles async def _async_transform_file(file: FileTypes) -> HttpxFileTypes: if is_file_content(file): if isinstance(file, os.PathLike): - path = anyio.Path(file) - return (path.name, await path.read_bytes()) + path = pathlib.Path(file) + # Same rationale as the sync path: stream from an open handle rather + # than buffering the whole file. httpx's multipart encoder reads from + # this handle in chunks as it serializes the request body. + return (path.name, path.open("rb")) return file diff --git a/tests/test_files.py b/tests/test_files.py index b5ee73ae2..84b3c41c8 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -1,8 +1,9 @@ +import io from pathlib import Path import anyio import pytest -from dirty_equals import IsDict, IsList, IsBytes, IsTuple +from dirty_equals import IsDict, IsList, IsTuple, IsInstance from runloop_api_client._files import to_httpx_files, deepcopy_with_paths, async_to_httpx_files from runloop_api_client._utils import extract_files @@ -10,37 +11,63 @@ readme_path = Path(__file__).parent.parent.joinpath("README.md") +def _close_file_handles(value: object) -> None: + """Recursively close any open file handles in to_httpx_files output. + + The transform returns streaming file handles, so tests that don't actually + issue a request must close them to avoid resource warnings. + """ + if isinstance(value, io.IOBase): + value.close() + elif isinstance(value, dict): + for v in value.values(): + _close_file_handles(v) + elif isinstance(value, (list, tuple)): + for v in value: + _close_file_handles(v) + + def test_pathlib_includes_file_name() -> None: result = to_httpx_files({"file": readme_path}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) def test_tuple_input() -> None: result = to_httpx_files([("file", readme_path)]) - print(result) - assert result == IsList(IsTuple("file", IsTuple("README.md", IsBytes()))) + try: + assert result == IsList(IsTuple("file", IsTuple("README.md", IsInstance(io.IOBase)))) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_pathlib_includes_file_name() -> None: result = await async_to_httpx_files({"file": readme_path}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_supports_anyio_path() -> None: result = await async_to_httpx_files({"file": anyio.Path(readme_path)}) - print(result) - assert result == IsDict({"file": IsTuple("README.md", IsBytes())}) + try: + assert result == IsDict({"file": IsTuple("README.md", IsInstance(io.IOBase))}) + finally: + _close_file_handles(result) @pytest.mark.asyncio async def test_async_tuple_input() -> None: result = await async_to_httpx_files([("file", readme_path)]) - print(result) - assert result == IsList(IsTuple("file", IsTuple("README.md", IsBytes()))) + try: + assert result == IsList(IsTuple("file", IsTuple("README.md", IsInstance(io.IOBase)))) + finally: + _close_file_handles(result) def test_string_not_allowed() -> None: