Skip to content

pymupdf.Document.scrub raises AttributeError for a document with annotations #4928

@mahlzahn

Description

@mahlzahn

Description of the bug

Log output of trace
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-2-df56e242e8d3> in ?()
      1 import pymupdf
----> 2 pymupdf.Document('annotated_pdf.pdf').scrub()

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(doc, attached_files, clean_pages, embedded_files, hidden_text, javascript, metadata, redactions, redact_images, remove_links, reset_fields, reset_responses, thumbnails, xml_metadata)
   6677                 for link in links:  # remove all links
   6678                     page.delete_link(link)
   6679 
   6680             found_redacts = False
-> 6681             for annot in page.annots():
   6682                 if annot.type[0] == mupdf.PDF_ANNOT_FILE_ATTACHMENT and attached_files:
   6683                     annot.update_file(buffer_=b" ")  # set file content to empty
   6684                 if reset_responses:

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, types)
  10687             annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
  10688         else:
  10689             annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
  10690         for xref in annot_xrefs:
> 10691             annot = self.load_annot(xref)
  10692             annot._yielded=True
  10693             yield annot

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, ident)
  12760             xref = ident
  12761             name = None
  12762         else:
  12763             raise ValueError("identifier must be a string or integer")
> 12764         val = self._load_annot(name, xref)
  12765         if not val:
  12766             return val
  12767         val.thisown = True

/usr/lib/python3.14/site-packages/pymupdf/__init__.py in ?(self, name, xref)
  10219         if xref == 0:
  10220             annot = JM_get_annot_by_name(page, name)
  10221         else:
  10222             annot = JM_get_annot_by_xref(page, xref)
> 10223         if annot.m_internal:
  10224             return Annot(annot)

AttributeError: 'NoneType' object has no attribute 'm_internal'

How to reproduce the bug

  1. Get sample file:
    wget https://github.com/py-pdf/sample-files/raw/main/024-annotations/annotated_pdf.pdf
  2. Run following commands in python console or program:
    import pymupdf
    pymupdf.Document('annotated_pdf.pdf').scrub()

PyMuPDF version

1.27.1

Operating system

Linux

Python version

3.14

Metadata

Metadata

Assignees

No one assigned

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions