From 1cea9bbef5764ef2db5cfdfacf872ce2b37eed76 Mon Sep 17 00:00:00 2001 From: Kushagar Garg Date: Fri, 16 Jan 2026 23:16:48 +0530 Subject: [PATCH] Fix #41: Fix PDF binary detection and apply formatting Signed-off-by: Kushagar Garg --- src/typecode/contenttype.py | 8 ++++++++ tests/test_contenttype.py | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/src/typecode/contenttype.py b/src/typecode/contenttype.py index 3243e33..cfb83aa 100644 --- a/src/typecode/contenttype.py +++ b/src/typecode/contenttype.py @@ -362,6 +362,14 @@ def is_binary(self): self._is_binary = False if self.is_file is True: self._is_binary = is_binary(self.location) + if not self._is_binary: + try: + with open(self.location, "rb") as f: + if f.read(5) == b"%PDF-": + self._is_binary = True + except Exception: + pass + return self._is_binary @property diff --git a/tests/test_contenttype.py b/tests/test_contenttype.py index 1e389c6..563f8c4 100644 --- a/tests/test_contenttype.py +++ b/tests/test_contenttype.py @@ -395,3 +395,10 @@ def test_size(self): test_dir = self.get_test_loc("contenttype/size") result = size(test_dir) assert result == 18 + + def test_is_binary_handles_pdf_signature(self): + test_dir = self.get_temp_dir() + test_file = os.path.join(test_dir, "test_pdf.pdf") + with open(test_file, "wb") as f: + f.write(b"%PDF-1.4\nSome binary content \x00\xff") + assert is_binary(test_file) is True