Skip to content

Commit 7a4c824

Browse files
SEC: Use zlib decompression limit when retrieving XFA data (#3658)
1 parent 4f1260f commit 7a4c824

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

pypdf/_doc_common.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
# POSSIBILITY OF SUCH DAMAGE.
3030

3131
import struct
32-
import zlib
3332
from abc import abstractmethod
3433
from collections.abc import Generator, Iterable, Iterator, Mapping
3534
from datetime import datetime
@@ -61,6 +60,7 @@
6160
from .constants import FieldDictionaryAttributes as FA
6261
from .constants import PageAttributes as PG
6362
from .errors import PdfReadError, PyPdfError
63+
from .filters import _decompress_with_limit
6464
from .generic import (
6565
ArrayObject,
6666
BooleanObject,
@@ -1324,7 +1324,6 @@ def is_encrypted(self) -> bool:
13241324

13251325
@property
13261326
def xfa(self) -> Optional[dict[str, Any]]:
1327-
tree: Optional[TreeObject] = None
13281327
retval: dict[str, Any] = {}
13291328
catalog = self.root_object
13301329

@@ -1342,7 +1341,7 @@ def xfa(self) -> Optional[dict[str, Any]]:
13421341
if isinstance(f, IndirectObject):
13431342
field = cast(Optional[EncodedStreamObject], f.get_object())
13441343
if field:
1345-
es = zlib.decompress(field._data)
1344+
es = _decompress_with_limit(field._data)
13461345
retval[tag] = es
13471346
return retval
13481347

tests/test_doc_common.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
import pytest
1212

1313
from pypdf import PdfReader, PdfWriter
14-
from pypdf.errors import PdfReadError
14+
from pypdf.errors import LimitReachedError, PdfReadError
15+
from pypdf.filters import FlateDecode
1516
from pypdf.generic import (
1617
ArrayObject,
1718
DictionaryObject,
1819
EmbeddedFile,
20+
EncodedStreamObject,
1921
NameObject,
2022
NullObject,
2123
TextStringObject,
@@ -552,3 +554,30 @@ def test_get_outline__cyclic_references__nested_handling(caplog):
552554
]
553555
]
554556
assert caplog.messages[0].startswith("Detected cycle in outline structure for {")
557+
558+
559+
def test_xfa__decompression_limit():
560+
payload = b"A" * 100_0000
561+
compressed = FlateDecode.encode(payload, 9)
562+
563+
writer = PdfWriter()
564+
writer.add_blank_page(width=72, height=72)
565+
566+
stream = EncodedStreamObject()
567+
stream._data = compressed
568+
stream[NameObject("/Filter")] = NameObject("/FlateDecode")
569+
stream_reference = writer._add_object(stream)
570+
571+
acro = DictionaryObject()
572+
acro[NameObject("/XFA")] = ArrayObject([TextStringObject("datasets"), stream_reference])
573+
writer.root_object[NameObject("/AcroForm")] = writer._add_object(acro)
574+
575+
data = BytesIO()
576+
writer.write(data)
577+
data.flush()
578+
579+
reader = PdfReader(data)
580+
with mock.patch("pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH", 75_000), pytest.raises(
581+
expected_exception=LimitReachedError, match=r"^Limit reached while decompressing. 902 bytes remaining.$"
582+
):
583+
_ = reader.xfa

0 commit comments

Comments
 (0)