From 256be6471ec5d696922ea37a4424c5f29c582a59 Mon Sep 17 00:00:00 2001 From: Jamie Lemon Date: Wed, 17 Jun 2026 14:48:03 +0100 Subject: [PATCH] Docs: Updates with an example on how to check for black & white pages. Also some updates to file conversion section & docs README --- docs/README.md | 2 +- docs/the-basics.rst | 51 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index 9792bae72..09efb894a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -89,7 +89,7 @@ Note: subsequent runs can omit the `-a` parameter to speed up builds (it will ju Note: When build the corresponding `.mo` binary files will also be generated - these updated binaries should also be committed to Git. -### Depoloying +### Deploying Docs will be automatically deployed to RTD once pushes are made to relevant branches. diff --git a/docs/the-basics.rst b/docs/the-basics.rst index 36a9f276c..927934ab7 100644 --- a/docs/the-basics.rst +++ b/docs/the-basics.rst @@ -1085,9 +1085,58 @@ Another example could be redacting an area of a page, but not to redact any line Converting PDF Documents ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-docx** library to provide simple document conversion from |PDF| to **DOCX** format. +See :doc:`converting-files` for more information. +.. note:: + + **PDF -> DOCX** + + We recommend the pdf2docx_ library which uses |PyMuPDF| and the **python-docx** library to provide simple document conversion from |PDF| to **DOCX** format. + + +.. _The Basics_Checking_Black_and_White: + +Detecting if a page is black and white +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A simple check to determine if a page is black and white or has color can be done by checking for whether a page is monochrome. + +This can be done by using the :meth:`Page.get_pixmap` method to get a pixmap of the page and then checking the `is_monochrome` attribute of the pixmap. We need to ensure that there is no anti-aliasing applied to the pixmap, so we set the anti-aliasing level to 0 using :meth:`Tools.set_aa_level`. + + +The following example demonstrates how to do this for each page in a |PDF| document: + +.. code-block:: python + + import pymupdf + + pymupdf.TOOLS.set_aa_level(0) # prevent anti-aliasing + + def analyze_pdf(path): + + results = [] + + with pymupdf.open(path) as doc: + for i, page in enumerate(doc): + pix=page.get_pixmap(colorspace=pymupdf.csGRAY) + bw = pix.is_monochrome + + results.append(bw) + label = "black & white" if bw==1 else "has color" + print(f"Page {i + 1}/{len(doc)}: {label}") + + return results + + if __name__ == "__main__": + import sys + + if len(sys.argv) < 2: + print("Usage: python detect_bw_pages.py ") + sys.exit(1) + + pdf_path = sys.argv[1] + analyze_pdf(pdf_path) .. include:: footer.rst