From 9850c246e977f72b0371c37c0e51d05005579cae Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Mon, 15 Jun 2026 16:27:33 +0300 Subject: [PATCH 1/7] fix: retry stream read errors in SimpleDownloader --- .../transfer/inbound/downloader/simple.py | 30 ++-- changelog.d/+read-error-retry.fixed.md | 1 + .../transfer/downloader/test_simple.py | 136 ++++++++++++++++++ 3 files changed, 155 insertions(+), 12 deletions(-) create mode 100644 changelog.d/+read-error-retry.fixed.md create mode 100644 test/unit/internal/transfer/downloader/test_simple.py diff --git a/b2sdk/_internal/transfer/inbound/downloader/simple.py b/b2sdk/_internal/transfer/inbound/downloader/simple.py index 87fabac05..31821abc8 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/simple.py +++ b/b2sdk/_internal/transfer/inbound/downloader/simple.py @@ -12,6 +12,7 @@ import logging from io import IOBase +from requests.exceptions import ChunkedEncodingError, ContentDecodingError from requests.models import Response from b2sdk._internal.encryption.setting import EncryptionSetting @@ -43,10 +44,13 @@ def _download( chunk_size = self._get_chunk_size(actual_size) decoded_bytes_read = 0 - for data in response.iter_content(chunk_size=chunk_size): - file.write(data) - digest.update(data) - decoded_bytes_read += len(data) + try: + for data in response.iter_content(chunk_size=chunk_size): + file.write(data) + digest.update(data) + decoded_bytes_read += len(data) + except (ChunkedEncodingError, ContentDecodingError) as exc: + logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read = response.raw.tell() response.close() @@ -58,8 +62,7 @@ def _download( # or something and the server closes connection, while neither tcp or http have a problem # with the truncated output, so we detect it here and try to continue - num_tries = 5 # this is hardcoded because we are going to replace the entire retry interface soon, so we'll avoid deprecation here and keep it private - retries_left = num_tries - 1 + retries_left = 4 # this is hardcoded because we are going to replace the entire retry interface soon, so we'll avoid deprecation here and keep it private while retries_left and bytes_read < download_version.content_length: new_range = self._get_remote_range( response, @@ -79,12 +82,15 @@ def _download( new_range.as_tuple(), encryption=encryption, ) as followup_response: - for data in followup_response.iter_content( - chunk_size=self._get_chunk_size(actual_size) - ): - file.write(data) - digest.update(data) - decoded_bytes_read += len(data) + try: + for data in followup_response.iter_content( + chunk_size=self._get_chunk_size(actual_size) + ): + file.write(data) + digest.update(data) + decoded_bytes_read += len(data) + except (ChunkedEncodingError, ContentDecodingError) as exc: + logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read += followup_response.raw.tell() retries_left -= 1 return bytes_read, digest.hexdigest() diff --git a/changelog.d/+read-error-retry.fixed.md b/changelog.d/+read-error-retry.fixed.md new file mode 100644 index 000000000..241a89682 --- /dev/null +++ b/changelog.d/+read-error-retry.fixed.md @@ -0,0 +1 @@ +Retry stream read errors during download in `SimpleDownloader`. diff --git a/test/unit/internal/transfer/downloader/test_simple.py b/test/unit/internal/transfer/downloader/test_simple.py new file mode 100644 index 000000000..a4b18bc95 --- /dev/null +++ b/test/unit/internal/transfer/downloader/test_simple.py @@ -0,0 +1,136 @@ +###################################################################### +# +# File: test/unit/internal/transfer/downloader/test_simple.py +# +# Copyright 2026 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import os +from collections.abc import Callable, Iterator +from io import BytesIO +from itertools import count +from types import ModuleType +from typing import Any + +import pytest +from apiver_deps import B2Api, Bucket, DownloadVersion, SimpleDownloader +from requests.exceptions import ChunkedEncodingError, ContentDecodingError +from requests.models import Response +from urllib3.exceptions import DecodeError, IncompleteRead, ProtocolError + +CHUNKED_ENCODING_ERROR = ChunkedEncodingError( + ProtocolError( + 'Connection broken: IncompleteRead(1 bytes read, 99 more expected)', + IncompleteRead(1, 99), + ) +) +CONTENT_DECODING_ERROR = ContentDecodingError( + DecodeError('Error -3 while decompressing data: incorrect header check') +) + + +@pytest.fixture +def file_size() -> int: + return 100 + + +@pytest.fixture +def file_content(file_size: int) -> bytes: + return os.urandom(file_size) + + +@pytest.fixture +def mock_download_response( + apiver_module: ModuleType, + bucket: Bucket, + file_content: bytes, +) -> tuple[Response, DownloadVersion]: + file_version = bucket.upload_bytes(file_content, f'dummy_file_{len(file_content)}.txt') + + url = bucket.api.session.get_download_url_by_name(bucket.name, file_version.file_name) + response = bucket.api.services.session.download_file_from_url(url).__enter__() + + return ( + response, + apiver_module.DownloadVersionFactory(bucket.api).from_response_headers(response.headers), + ) + + +@pytest.fixture +def output_file() -> BytesIO: + return BytesIO() + + +@pytest.fixture +def downloader(apiver_module: ModuleType) -> SimpleDownloader: + return apiver_module.SimpleDownloader(force_chunk_size=5) + + +def _make_iter_content( + response: Response, + attempts: Iterator[int], + fail_count: int, + stream_error: ChunkedEncodingError | ContentDecodingError, +) -> Callable[..., Iterator[bytes]]: + def iter_content(chunk_size: int = 1, decode_unicode: bool = False) -> Iterator[bytes]: + attempt = next(attempts) + chunk = response.raw.read(1) + if chunk: + yield chunk + if attempt <= fail_count: + raise stream_error + while True: + chunk = response.raw.read(chunk_size) + if not chunk: + break + yield chunk + + return iter_content + + +@pytest.mark.parametrize('fail_count', [0, 1, 2, 4, 5]) +@pytest.mark.parametrize( + 'stream_error', + [ + pytest.param(CHUNKED_ENCODING_ERROR, id='ChunkedEncodingError'), + pytest.param(CONTENT_DECODING_ERROR, id='ContentDecodingError'), + ], +) +def test_download_file__stream_read_error( + b2api: B2Api, + bucket: Bucket, + downloader: SimpleDownloader, + output_file: BytesIO, + file_size: int, + file_content: bytes, + mock_download_response: tuple[Response, DownloadVersion], + fail_count: int, + stream_error: ChunkedEncodingError | ContentDecodingError, +) -> None: + mock_response, download_version = mock_download_response + + attempts = count(1) + mock_response.iter_content = _make_iter_content( + mock_response, attempts, fail_count, stream_error + ) + + download_func = bucket.api.services.session.download_file_from_url + + def download_func_mock(*args: Any, **kwargs: Any) -> Response: + response = download_func(*args, **kwargs).__enter__() + response.iter_content = _make_iter_content(response, attempts, fail_count, stream_error) + return response + + bucket.api.services.session.download_file_from_url = download_func_mock + + bytes_written, _ = downloader.download( + output_file, mock_response, download_version, b2api.session + ) + + if fail_count < 5: + assert bytes_written == file_size + assert output_file.getvalue() == file_content + else: + assert bytes_written == fail_count From bcf8297ffcd3244078ba43c0b8365f35a354bba7 Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Mon, 29 Jun 2026 14:32:29 +0300 Subject: [PATCH 2/7] fix: handle ConnectionError mid-stream in SimpleDownloader --- b2sdk/_internal/transfer/inbound/downloader/simple.py | 6 +++--- test/unit/internal/transfer/downloader/test_simple.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/b2sdk/_internal/transfer/inbound/downloader/simple.py b/b2sdk/_internal/transfer/inbound/downloader/simple.py index 31821abc8..0d0cf2d75 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/simple.py +++ b/b2sdk/_internal/transfer/inbound/downloader/simple.py @@ -12,7 +12,7 @@ import logging from io import IOBase -from requests.exceptions import ChunkedEncodingError, ContentDecodingError +from requests.exceptions import ChunkedEncodingError, ConnectionError, ContentDecodingError from requests.models import Response from b2sdk._internal.encryption.setting import EncryptionSetting @@ -49,7 +49,7 @@ def _download( file.write(data) digest.update(data) decoded_bytes_read += len(data) - except (ChunkedEncodingError, ContentDecodingError) as exc: + except (ChunkedEncodingError, ConnectionError, ContentDecodingError) as exc: logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read = response.raw.tell() response.close() @@ -89,7 +89,7 @@ def _download( file.write(data) digest.update(data) decoded_bytes_read += len(data) - except (ChunkedEncodingError, ContentDecodingError) as exc: + except (ChunkedEncodingError, ConnectionError, ContentDecodingError) as exc: logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read += followup_response.raw.tell() retries_left -= 1 diff --git a/test/unit/internal/transfer/downloader/test_simple.py b/test/unit/internal/transfer/downloader/test_simple.py index a4b18bc95..7758c10c5 100644 --- a/test/unit/internal/transfer/downloader/test_simple.py +++ b/test/unit/internal/transfer/downloader/test_simple.py @@ -16,9 +16,9 @@ import pytest from apiver_deps import B2Api, Bucket, DownloadVersion, SimpleDownloader -from requests.exceptions import ChunkedEncodingError, ContentDecodingError +from requests.exceptions import ChunkedEncodingError, ConnectionError, ContentDecodingError from requests.models import Response -from urllib3.exceptions import DecodeError, IncompleteRead, ProtocolError +from urllib3.exceptions import DecodeError, IncompleteRead, ProtocolError, ReadTimeoutError CHUNKED_ENCODING_ERROR = ChunkedEncodingError( ProtocolError( @@ -29,6 +29,7 @@ CONTENT_DECODING_ERROR = ContentDecodingError( DecodeError('Error -3 while decompressing data: incorrect header check') ) +CONNECTION_ERROR = ConnectionError(ReadTimeoutError(None, None, 'Read timed out.')) @pytest.fixture @@ -72,7 +73,7 @@ def _make_iter_content( response: Response, attempts: Iterator[int], fail_count: int, - stream_error: ChunkedEncodingError | ContentDecodingError, + stream_error: ChunkedEncodingError | ConnectionError | ContentDecodingError, ) -> Callable[..., Iterator[bytes]]: def iter_content(chunk_size: int = 1, decode_unicode: bool = False) -> Iterator[bytes]: attempt = next(attempts) @@ -95,6 +96,7 @@ def iter_content(chunk_size: int = 1, decode_unicode: bool = False) -> Iterator[ 'stream_error', [ pytest.param(CHUNKED_ENCODING_ERROR, id='ChunkedEncodingError'), + pytest.param(CONNECTION_ERROR, id='ConnectionError'), pytest.param(CONTENT_DECODING_ERROR, id='ContentDecodingError'), ], ) @@ -107,7 +109,7 @@ def test_download_file__stream_read_error( file_content: bytes, mock_download_response: tuple[Response, DownloadVersion], fail_count: int, - stream_error: ChunkedEncodingError | ContentDecodingError, + stream_error: ChunkedEncodingError | ConnectionError | ContentDecodingError, ) -> None: mock_response, download_version = mock_download_response From 24b3e57b9a3c87edd1bcc48782c2d57c92722be6 Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Mon, 29 Jun 2026 18:54:40 +0300 Subject: [PATCH 3/7] fix: do not try to resume download in SimpleDownloader when content is decoded --- b2sdk/_internal/file_version.py | 4 ++ .../transfer/inbound/downloaded_file.py | 35 ++++------ .../transfer/inbound/downloader/abstract.py | 3 +- .../transfer/inbound/downloader/simple.py | 7 +- .../transfer/downloader/test_simple.py | 50 ++++++++++++++ .../internal/transfer/test_downloaded_file.py | 65 +++++++++++++++++++ 6 files changed, 140 insertions(+), 24 deletions(-) create mode 100644 test/unit/internal/transfer/test_downloaded_file.py diff --git a/b2sdk/_internal/file_version.py b/b2sdk/_internal/file_version.py index ae0ccf699..655d9df69 100644 --- a/b2sdk/_internal/file_version.py +++ b/b2sdk/_internal/file_version.py @@ -471,6 +471,10 @@ def expires_parsed(self) -> dt.datetime | None: return None return parse_http_date(self.expires) + @property + def _should_be_decoded(self) -> bool: + return bool(self.content_encoding and self.api.api_config.decode_content) + def as_dict(self) -> dict: result = super().as_dict() if self.cache_control is not None: diff --git a/b2sdk/_internal/transfer/inbound/downloaded_file.py b/b2sdk/_internal/transfer/inbound/downloaded_file.py index 7242b19df..3abf8dd56 100644 --- a/b2sdk/_internal/transfer/inbound/downloaded_file.py +++ b/b2sdk/_internal/transfer/inbound/downloaded_file.py @@ -170,29 +170,22 @@ def __init__( self.check_hash = check_hash def _validate_download(self, bytes_read, actual_sha1): + desired_length = self.range_[1] - self.range_[0] + 1 if self.range_ is not None else self.download_version.content_length + if bytes_read != desired_length: + raise TruncatedOutput(bytes_read, desired_length) + if ( - self.download_version.content_encoding is not None - and self.download_version.api.api_config.decode_content + not self.download_version._should_be_decoded + and self.check_hash + and self.range_ is None + and self.download_version.content_sha1 != 'none' + and actual_sha1 != self.download_version.content_sha1 ): - return - if self.range_ is None: - if bytes_read != self.download_version.content_length: - raise TruncatedOutput(bytes_read, self.download_version.content_length) - - if ( - self.check_hash - and self.download_version.content_sha1 != 'none' - and actual_sha1 != self.download_version.content_sha1 - ): - raise ChecksumMismatch( - checksum_type='sha1', - expected=self.download_version.content_sha1, - actual=actual_sha1, - ) - else: - desired_length = self.range_[1] - self.range_[0] + 1 - if bytes_read != desired_length: - raise TruncatedOutput(bytes_read, desired_length) + raise ChecksumMismatch( + checksum_type='sha1', + expected=self.download_version.content_sha1, + actual=actual_sha1, + ) def save(self, file: BinaryIO, allow_seeking: bool | None = None) -> None: """ diff --git a/b2sdk/_internal/transfer/inbound/downloader/abstract.py b/b2sdk/_internal/transfer/inbound/downloader/abstract.py index 86c71d22d..360026c4e 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/abstract.py +++ b/b2sdk/_internal/transfer/inbound/downloader/abstract.py @@ -118,8 +118,7 @@ def is_suitable(self, download_version: DownloadVersion, allow_seeking: bool): return False if ( not self.SUPPORTS_DECODE_CONTENT - and download_version.content_encoding - and download_version.api.api_config.decode_content + and download_version._should_be_decoded ): return False return True diff --git a/b2sdk/_internal/transfer/inbound/downloader/simple.py b/b2sdk/_internal/transfer/inbound/downloader/simple.py index 0d0cf2d75..2452bb1ad 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/simple.py +++ b/b2sdk/_internal/transfer/inbound/downloader/simple.py @@ -42,6 +42,7 @@ def _download( response.close() return 0, digest.hexdigest() chunk_size = self._get_chunk_size(actual_size) + should_be_decoded = download_version._should_be_decoded decoded_bytes_read = 0 try: @@ -50,6 +51,8 @@ def _download( digest.update(data) decoded_bytes_read += len(data) except (ChunkedEncodingError, ConnectionError, ContentDecodingError) as exc: + if should_be_decoded: + raise # cannot resume a partially decoded stream logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read = response.raw.tell() response.close() @@ -63,7 +66,9 @@ def _download( # with the truncated output, so we detect it here and try to continue retries_left = 4 # this is hardcoded because we are going to replace the entire retry interface soon, so we'll avoid deprecation here and keep it private - while retries_left and bytes_read < download_version.content_length: + while ( + bytes_read < download_version.content_length and not should_be_decoded and retries_left + ): new_range = self._get_remote_range( response, download_version, diff --git a/test/unit/internal/transfer/downloader/test_simple.py b/test/unit/internal/transfer/downloader/test_simple.py index 7758c10c5..b510aefb2 100644 --- a/test/unit/internal/transfer/downloader/test_simple.py +++ b/test/unit/internal/transfer/downloader/test_simple.py @@ -136,3 +136,53 @@ def download_func_mock(*args: Any, **kwargs: Any) -> Response: assert output_file.getvalue() == file_content else: assert bytes_written == fail_count + + +@pytest.mark.parametrize( + 'stream_error', + [ + pytest.param(CHUNKED_ENCODING_ERROR, id='ChunkedEncodingError'), + pytest.param(CONNECTION_ERROR, id='ConnectionError'), + pytest.param(CONTENT_DECODING_ERROR, id='ContentDecodingError'), + ], +) +def test_download_file__decoded_stream_stream_read_error_reraises( + b2api: B2Api, + bucket: Bucket, + downloader: SimpleDownloader, + output_file: BytesIO, + file_content: bytes, + mock_download_response: tuple[Response, DownloadVersion], + stream_error: ChunkedEncodingError | ConnectionError | ContentDecodingError, +) -> None: + """ + Test that a stream read error during a decoded stream download is re-raised and not retried + """ + + mock_response, download_version = mock_download_response + download_version.content_encoding = 'gzip' + download_version.api.api_config.decode_content = True + + attempts = count(1) + mock_response.iter_content = _make_iter_content( + mock_response, attempts, 1, stream_error + ) + + followup_calls = 0 + download_func = bucket.api.services.session.download_file_from_url + + def download_func_mock(*args: Any, **kwargs: Any) -> Response: + nonlocal followup_calls + followup_calls += 1 + response = download_func(*args, **kwargs).__enter__() + response.iter_content = _make_iter_content(response, attempts, 1, stream_error) + return response + + bucket.api.services.session.download_file_from_url = download_func_mock + + with pytest.raises(type(stream_error)): + downloader.download( + output_file, mock_response, download_version, b2api.session + ) + + assert followup_calls == 0 diff --git a/test/unit/internal/transfer/test_downloaded_file.py b/test/unit/internal/transfer/test_downloaded_file.py new file mode 100644 index 000000000..88f4277ba --- /dev/null +++ b/test/unit/internal/transfer/test_downloaded_file.py @@ -0,0 +1,65 @@ +###################################################################### +# +# File: test/unit/internal/transfer/test_downloaded_file.py +# +# Copyright 2026 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +from unittest.mock import Mock + +import pytest +from apiver_deps import DownloadedFile +from apiver_deps_exception import ChecksumMismatch, TruncatedOutput + + +def _generate_downloaded_file( + *, + decode_content: bool, + content_length: int = 100, + content_sha1: str = 'abc', + range_: tuple[int, int] | None = None, + check_hash: bool = True, +): + download_version = Mock() + download_version.content_encoding = 'gzip' if decode_content else None + download_version.content_length = content_length + download_version.content_sha1 = content_sha1 + download_version.api.api_config.decode_content = decode_content + download_version._should_be_decoded = decode_content + return DownloadedFile( + download_version=download_version, + download_manager=Mock(), + range_=range_, + response=Mock(), + encryption=None, + progress_listener=Mock(), + check_hash=check_hash, + ) + + +@pytest.mark.parametrize('decode_content', [True, False]) +def test_validate_download_truncated_full_download(decode_content): + # range not set, length doesn't match + downloaded_file = _generate_downloaded_file(decode_content=decode_content) + with pytest.raises(TruncatedOutput): + downloaded_file._validate_download(99, 'abc') + + +@pytest.mark.parametrize('decode_content', [True, False]) +def test_validate_download_truncated_range_download(decode_content): + # range set, length doesn't match + downloaded_file = _generate_downloaded_file(decode_content=decode_content, range_=(10, 19)) + with pytest.raises(TruncatedOutput): + downloaded_file._validate_download(9, 'abc') + + +@pytest.mark.parametrize('decode_content', [True, False]) +def test_validate_download_hash_check(decode_content): + downloaded_file = _generate_downloaded_file(decode_content=decode_content, check_hash=True) + if decode_content: + downloaded_file._validate_download(100, 'wrong') + else: + with pytest.raises(ChecksumMismatch): + downloaded_file._validate_download(100, 'wrong') From 3db2dd0c148b9158fd06ed89dbe99d0b9dbbedaa Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Wed, 1 Jul 2026 21:42:46 +0300 Subject: [PATCH 4/7] fix: preserve `api_config` in v1.B2Api --- b2sdk/v1/api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/b2sdk/v1/api.py b/b2sdk/v1/api.py index fafbe7d88..64d820f9e 100644 --- a/b2sdk/v1/api.py +++ b/b2sdk/v1/api.py @@ -68,6 +68,7 @@ def __init__( raw_api=raw_api, api_config=api_config, ) + self.api_config = api_config self.file_version_factory = self.FILE_VERSION_FACTORY_CLASS(self) self.download_version_factory = self.DOWNLOAD_VERSION_FACTORY_CLASS(self) self.services = Services( From 86b5ead9e4f3f659ae5f477f381c9c94813a96b0 Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Thu, 2 Jul 2026 17:06:31 +0300 Subject: [PATCH 5/7] fix: fix inaccessible `v1.B2Api.api_config` when not passed explicitly --- .../transfer/inbound/downloader/abstract.py | 5 +- .../transfer/inbound/downloader/simple.py | 9 +-- b2sdk/v1/api.py | 2 +- b2sdk/v1/session.py | 5 +- changelog.d/+read-error-retry.fixed.md | 4 ++ test/unit/v_all/test_api.py | 16 +++++ test/unit/v_all/test_download_version.py | 69 +++++++++++++++++++ 7 files changed, 96 insertions(+), 14 deletions(-) create mode 100644 test/unit/v_all/test_download_version.py diff --git a/b2sdk/_internal/transfer/inbound/downloader/abstract.py b/b2sdk/_internal/transfer/inbound/downloader/abstract.py index 360026c4e..78400bc70 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/abstract.py +++ b/b2sdk/_internal/transfer/inbound/downloader/abstract.py @@ -116,10 +116,7 @@ def is_suitable(self, download_version: DownloadVersion, allow_seeking: bool): """ if self.REQUIRES_SEEKING and not allow_seeking: return False - if ( - not self.SUPPORTS_DECODE_CONTENT - and download_version._should_be_decoded - ): + if not self.SUPPORTS_DECODE_CONTENT and download_version._should_be_decoded: return False return True diff --git a/b2sdk/_internal/transfer/inbound/downloader/simple.py b/b2sdk/_internal/transfer/inbound/downloader/simple.py index 2452bb1ad..d953a47cf 100644 --- a/b2sdk/_internal/transfer/inbound/downloader/simple.py +++ b/b2sdk/_internal/transfer/inbound/downloader/simple.py @@ -27,6 +27,7 @@ class SimpleDownloader(AbstractDownloader): REQUIRES_SEEKING = False SUPPORTS_DECODE_CONTENT = True + MAX_DOWNLOAD_ATTEMPTS = 5 def _download( self, @@ -44,12 +45,10 @@ def _download( chunk_size = self._get_chunk_size(actual_size) should_be_decoded = download_version._should_be_decoded - decoded_bytes_read = 0 try: for data in response.iter_content(chunk_size=chunk_size): file.write(data) digest.update(data) - decoded_bytes_read += len(data) except (ChunkedEncodingError, ConnectionError, ContentDecodingError) as exc: if should_be_decoded: raise # cannot resume a partially decoded stream @@ -65,7 +64,7 @@ def _download( # or something and the server closes connection, while neither tcp or http have a problem # with the truncated output, so we detect it here and try to continue - retries_left = 4 # this is hardcoded because we are going to replace the entire retry interface soon, so we'll avoid deprecation here and keep it private + retries_left = self.MAX_DOWNLOAD_ATTEMPTS - 1 while ( bytes_read < download_version.content_length and not should_be_decoded and retries_left ): @@ -76,10 +75,9 @@ def _download( # original response is not closed at this point yet, as another layer is responsible for closing it, so a new socket might be allocated, # but this is a very rare case and so it is not worth the optimization logger.debug( - 're-download attempts remaining: %i, bytes read: %i (decoded: %i). Getting range %s now.', + 're-download attempts remaining: %i, bytes read: %i. Getting range %s now.', retries_left, bytes_read, - decoded_bytes_read, new_range, ) with session.download_file_from_url( @@ -93,7 +91,6 @@ def _download( ): file.write(data) digest.update(data) - decoded_bytes_read += len(data) except (ChunkedEncodingError, ConnectionError, ContentDecodingError) as exc: logger.debug('Stream read error during download, will retry if needed: %s', exc) bytes_read += followup_response.raw.tell() diff --git a/b2sdk/v1/api.py b/b2sdk/v1/api.py index 64d820f9e..80d49c95a 100644 --- a/b2sdk/v1/api.py +++ b/b2sdk/v1/api.py @@ -68,7 +68,7 @@ def __init__( raw_api=raw_api, api_config=api_config, ) - self.api_config = api_config + self.api_config = self.session.api_config self.file_version_factory = self.FILE_VERSION_FACTORY_CLASS(self) self.download_version_factory = self.DOWNLOAD_VERSION_FACTORY_CLASS(self) self.services = Services( diff --git a/b2sdk/v1/session.py b/b2sdk/v1/session.py index 075feab8a..2225ff8cd 100644 --- a/b2sdk/v1/session.py +++ b/b2sdk/v1/session.py @@ -31,9 +31,8 @@ def __init__( 'raw_api,api_config', 'Provide at most one of: raw_api, api_config' ) - if api_config is None: - api_config = v2.DEFAULT_HTTP_API_CONFIG - super().__init__(account_info=account_info, cache=cache, api_config=api_config) + self.api_config = api_config or v2.DEFAULT_HTTP_API_CONFIG + super().__init__(account_info=account_info, cache=cache, api_config=self.api_config) if raw_api is not None: self.raw_api = raw_api diff --git a/changelog.d/+read-error-retry.fixed.md b/changelog.d/+read-error-retry.fixed.md index 241a89682..c0fd1387b 100644 --- a/changelog.d/+read-error-retry.fixed.md +++ b/changelog.d/+read-error-retry.fixed.md @@ -1 +1,5 @@ Retry stream read errors during download in `SimpleDownloader`. + +Decoded downloads with `decode_content=True` now validate truncation; previously all post-download checks were skipped for decoded streams. + +Fix `b2sdk.v1.B2Api` not exposing `api_config`. diff --git a/test/unit/v_all/test_api.py b/test/unit/v_all/test_api.py index d40741e31..56a1b0ede 100644 --- a/test/unit/v_all/test_api.py +++ b/test/unit/v_all/test_api.py @@ -21,6 +21,7 @@ InMemoryAccountInfo, InMemoryCache, RawSimulator, + StubAccountInfo, ) from apiver_deps_exception import BucketIdNotFound @@ -37,6 +38,21 @@ def __init__(self, *args, **kwargs): pass +def test_b2api_stores_api_config_when_specified(): + api_config = B2HttpApiConfig(decode_content=True) + api = B2Api(StubAccountInfo(), api_config=api_config) + assert api.api_config is api_config + if apiver_deps.V <= 1: + assert api.api_config is api.session.api_config + + +@pytest.mark.apiver(to_ver=1) +def test_b2api_stores_api_config_when_not_specified(): + api = B2Api(StubAccountInfo()) + assert isinstance(api.api_config, B2HttpApiConfig) + assert api.api_config is api.session.api_config + + class TestServices: @pytest.mark.apiver(from_ver=2) @pytest.mark.parametrize( diff --git a/test/unit/v_all/test_download_version.py b/test/unit/v_all/test_download_version.py new file mode 100644 index 000000000..14f35b688 --- /dev/null +++ b/test/unit/v_all/test_download_version.py @@ -0,0 +1,69 @@ +###################################################################### +# +# File: test/unit/v_all/test_download_version.py +# +# Copyright 2026 Backblaze Inc. All Rights Reserved. +# +# License https://www.backblaze.com/using_b2_code.html +# +###################################################################### +import pytest +from apiver_deps import ( + B2Api, + B2HttpApiConfig, + DownloadVersion, + EMPTY_RANGE, + SSE_NONE, + StubAccountInfo, +) + + +def _make_download_version(api, *, content_encoding): + return DownloadVersion( + api=api, + id_='file_id', + file_name='file_name', + size=100, + content_type='text/plain', + content_sha1='abc', + file_info={}, + upload_timestamp=0, + server_side_encryption=SSE_NONE, + range_=EMPTY_RANGE, + content_disposition=None, + content_length=100, + content_language=None, + expires=None, + cache_control=None, + content_encoding=content_encoding, + ) + + +@pytest.mark.parametrize( + 'content_encoding, decode_content, expected', + [ + (None, False, False), + (None, True, False), + ('gzip', False, False), + ('gzip', True, True), + ], +) +def test_should_be_decoded(content_encoding, decode_content, expected): + api = B2Api(StubAccountInfo(), api_config=B2HttpApiConfig(decode_content=decode_content)) + download_version = _make_download_version(api, content_encoding=content_encoding) + assert download_version._should_be_decoded is expected + + +@pytest.mark.apiver(to_ver=1) +@pytest.mark.parametrize( + 'content_encoding, decode_content, expected', + [ + (None, False, False), + ('gzip', True, True), + ], +) +def test_should_be_decoded_without_api_config_kwarg(content_encoding, decode_content, expected): + api = B2Api(StubAccountInfo()) + api.api_config.decode_content = decode_content + download_version = _make_download_version(api, content_encoding=content_encoding) + assert download_version._should_be_decoded is expected From 3aee93be3b14ca320fa30460f80302a6fa2f429b Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Thu, 2 Jul 2026 17:55:03 +0300 Subject: [PATCH 6/7] fix: lint fixes --- b2sdk/_internal/transfer/inbound/downloaded_file.py | 6 +++++- noxfile.py | 2 +- test/unit/internal/transfer/downloader/test_simple.py | 8 ++------ test/unit/v_all/test_download_version.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/b2sdk/_internal/transfer/inbound/downloaded_file.py b/b2sdk/_internal/transfer/inbound/downloaded_file.py index 3abf8dd56..c67081f2b 100644 --- a/b2sdk/_internal/transfer/inbound/downloaded_file.py +++ b/b2sdk/_internal/transfer/inbound/downloaded_file.py @@ -170,7 +170,11 @@ def __init__( self.check_hash = check_hash def _validate_download(self, bytes_read, actual_sha1): - desired_length = self.range_[1] - self.range_[0] + 1 if self.range_ is not None else self.download_version.content_length + desired_length = ( + self.range_[1] - self.range_[0] + 1 + if self.range_ is not None + else self.download_version.content_length + ) if bytes_read != desired_length: raise TruncatedOutput(bytes_read, desired_length) diff --git a/noxfile.py b/noxfile.py index fedc60dda..0e65bcd98 100644 --- a/noxfile.py +++ b/noxfile.py @@ -210,7 +210,7 @@ def build(session): "'v2': pathlib.Path(v2.__file__).resolve(), " "'v3': pathlib.Path(v3.__file__).resolve()}; " 'print(module_files); ' - "assert all(not path.is_relative_to(source_root) for path in module_files.values()), " + 'assert all(not path.is_relative_to(source_root) for path in module_files.values()), ' "f'Imported modules from checkout: {module_files!r}'; " "assert all('site-packages' in path.parts for path in module_files.values()), " "f'Imported modules from an unexpected location: {module_files!r}'" diff --git a/test/unit/internal/transfer/downloader/test_simple.py b/test/unit/internal/transfer/downloader/test_simple.py index b510aefb2..120383d60 100644 --- a/test/unit/internal/transfer/downloader/test_simple.py +++ b/test/unit/internal/transfer/downloader/test_simple.py @@ -164,9 +164,7 @@ def test_download_file__decoded_stream_stream_read_error_reraises( download_version.api.api_config.decode_content = True attempts = count(1) - mock_response.iter_content = _make_iter_content( - mock_response, attempts, 1, stream_error - ) + mock_response.iter_content = _make_iter_content(mock_response, attempts, 1, stream_error) followup_calls = 0 download_func = bucket.api.services.session.download_file_from_url @@ -181,8 +179,6 @@ def download_func_mock(*args: Any, **kwargs: Any) -> Response: bucket.api.services.session.download_file_from_url = download_func_mock with pytest.raises(type(stream_error)): - downloader.download( - output_file, mock_response, download_version, b2api.session - ) + downloader.download(output_file, mock_response, download_version, b2api.session) assert followup_calls == 0 diff --git a/test/unit/v_all/test_download_version.py b/test/unit/v_all/test_download_version.py index 14f35b688..68195a1d2 100644 --- a/test/unit/v_all/test_download_version.py +++ b/test/unit/v_all/test_download_version.py @@ -9,11 +9,11 @@ ###################################################################### import pytest from apiver_deps import ( + EMPTY_RANGE, + SSE_NONE, B2Api, B2HttpApiConfig, DownloadVersion, - EMPTY_RANGE, - SSE_NONE, StubAccountInfo, ) From 6b1d2382d04c4bf1b3e7558fd806b2a96a0f815a Mon Sep 17 00:00:00 2001 From: Aleksandr Goncharov Date: Thu, 2 Jul 2026 18:04:45 +0300 Subject: [PATCH 7/7] feat: add test to verify that range downloads do not perform hash check --- test/unit/internal/transfer/test_downloaded_file.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/unit/internal/transfer/test_downloaded_file.py b/test/unit/internal/transfer/test_downloaded_file.py index 88f4277ba..45cc2a3ef 100644 --- a/test/unit/internal/transfer/test_downloaded_file.py +++ b/test/unit/internal/transfer/test_downloaded_file.py @@ -63,3 +63,13 @@ def test_validate_download_hash_check(decode_content): else: with pytest.raises(ChecksumMismatch): downloaded_file._validate_download(100, 'wrong') + + +@pytest.mark.parametrize('decode_content', [True, False]) +def test_validate_download_hash_check_skipped_for_range_download(decode_content): + downloaded_file = _generate_downloaded_file( + decode_content=decode_content, + range_=(10, 19), + check_hash=True, + ) + downloaded_file._validate_download(10, 'wrong')