From c0ceaf5f67cb186c3698f18c14838520516ae07d Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 22:59:58 +0100 Subject: [PATCH 1/6] chore: add no-unused-vars rule --- .eslintrc.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.eslintrc.json b/.eslintrc.json index dc7ee2a..e387905 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -36,7 +36,11 @@ "allowSingleLine": true } ], - "@typescript-eslint/no-empty-interface": 0 + "@typescript-eslint/no-empty-interface": 0, + "@typescript-eslint/no-unused-vars": { + "argsIgnorePattern": "^_", + "varsIgnorePattern": "^_" + } } } ] From fc647725c7415cb34870ff05d4610f149a353fc3 Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 23:00:45 +0100 Subject: [PATCH 2/6] feat: add PDF repair check functionality --- packages/repair/src/PDFRepair.ts | 38 +++++++++++++++++++++++- packages/repair/src/PDFRepairRegistry.ts | 7 +++++ packages/repair/src/PDFRepairStatus.ts | 19 ++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 packages/repair/src/PDFRepairStatus.ts diff --git a/packages/repair/src/PDFRepair.ts b/packages/repair/src/PDFRepair.ts index f46eb64..03e8d55 100644 --- a/packages/repair/src/PDFRepair.ts +++ b/packages/repair/src/PDFRepair.ts @@ -1,14 +1,25 @@ import { PDFDocument } from "@peculiarventures/pdf-doc"; import { PDFRepairRegistry, IRepairRule, globalRepairRegistry } from "./PDFRepairRegistry"; +import { PDFRepairStatus } from "./PDFRepairStatus"; /** * An object that records repair notes for a PDF file. The keys are rule IDs and the values are arrays of strings * containing the repair notes. */ -interface RepairNotes { +export interface RepairNotes { [key: string]: string[]; } +export interface RepairCheckRule { + status: PDFRepairStatus; + description: string; +} + +export interface RepairCheck { + status: PDFRepairStatus; + rules: Record; +} + /** * A class for repairing PDF documents by applying a set of rules to them. */ @@ -39,4 +50,29 @@ export class PDFRepair { return repairNotes; } + + /** + * Checks if a PDF document needs to be repaired. + * @param doc - The PDF document to check. + * @returns + */ + async checkDocument(doc: PDFDocument): Promise { + const check: RepairCheck = { + status: PDFRepairStatus.notNeeded, + rules: {}, + }; + + for (const rule of this.rules) { + const status = await rule.check(doc); + if (check.status !== PDFRepairStatus.requireClone && status !== PDFRepairStatus.notNeeded) { + check.status = status; + } + check.rules[rule.id] = { + status, + description: rule.description, + }; + } + + return check; + } } diff --git a/packages/repair/src/PDFRepairRegistry.ts b/packages/repair/src/PDFRepairRegistry.ts index 53f2128..4f8c1fa 100644 --- a/packages/repair/src/PDFRepairRegistry.ts +++ b/packages/repair/src/PDFRepairRegistry.ts @@ -1,4 +1,5 @@ import { PDFDocument } from "@peculiarventures/pdf-doc"; +import { PDFRepairStatus } from "./PDFRepairStatus"; /** * An interface representing a repair rule for a PDF document. @@ -18,6 +19,12 @@ export interface IRepairRule { * @returns A string representing the repair note generated by the rule, or null if no repair was performed. */ apply: (doc: PDFDocument) => Promise; + /** + * Checks if the repair rule is applicable to a PDF document. + * @param doc + * @returns An enum value indicating the repair status of the PDF document. + */ + check: (doc: PDFDocument) => Promise; } /** diff --git a/packages/repair/src/PDFRepairStatus.ts b/packages/repair/src/PDFRepairStatus.ts new file mode 100644 index 0000000..6b89614 --- /dev/null +++ b/packages/repair/src/PDFRepairStatus.ts @@ -0,0 +1,19 @@ + +/** + * Represents the repair status of a PDF document. + */ +export enum PDFRepairStatus { + /** + * The PDF document is not repairable. It is either corrupt or malformed. + * The document should be fixed via the `PDFDocument.clone()` method. + */ + requireClone = "RequireClone", + /** + * The PDF document is repairable and doesn't need to be rewritten. + */ + repairable = "Repairable", + /** + * The PDF document is correctly formatted and does not need to be repaired. + */ + notNeeded = "NotNeeded", +} From e40023f7bf0d3ec6f03368f3d52c74d547145fa2 Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 23:02:25 +0100 Subject: [PATCH 3/6] feat: detect incorrect EOL for cross-reference table entries --- .../core/src/structure/CrossReferenceTable.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/core/src/structure/CrossReferenceTable.ts b/packages/core/src/structure/CrossReferenceTable.ts index a4af49a..1d95b61 100644 --- a/packages/core/src/structure/CrossReferenceTable.ts +++ b/packages/core/src/structure/CrossReferenceTable.ts @@ -12,9 +12,25 @@ import type { CrossReferenceStream } from "./CrossReferenceStream"; export class CrossReferenceTable extends TrailerDictionary implements CrossReference { + /** + * End of line for cross-reference table entries. This property allows to + * change the EOL for cross-reference table entries during writing. It + * should be used for testing purposes only. + * @internal + */ + public static EOL = "\r\n"; + public objects: PDFDocumentObject[] = []; public xrefStream?: CrossReferenceStream; + /** + * Indicates that the cross-reference table has incorrect eol for some entries. + * + * @remarks + * Some documents use 19 bytes for each entry instead of 20 bytes. + */ + public hasIncorrectEol: boolean = false; + protected override onFromPDF(reader: ViewReader): void { reader.findIndex(c => !CharSet.whiteSpaceChars.includes(c)); if (!CharSet.xrefChars.every(c => c === reader.readByte())) { @@ -47,6 +63,7 @@ export class CrossReferenceTable extends TrailerDictionary implements CrossRefer } else { // In some cases, the line has 19 characters reader.read(1); + this.hasIncorrectEol = true; } const matches = /([0-9]{10}) ([0-9]{5}) ([fn])/.exec(line); @@ -113,7 +130,7 @@ export class CrossReferenceTable extends TrailerDictionary implements CrossRefer } const offset = item.offset.toString().padStart(10, "0"); const generation = item.generation.toString().padStart(5, "0"); - writer.writeString(`${offset} ${generation} ${item.type}\r\n`); + writer.writeString(`${offset} ${generation} ${item.type}${CrossReferenceTable.EOL}`); } } From f6c10c38d00d1d2e86d7da799902613af9a245f1 Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 23:05:47 +0100 Subject: [PATCH 4/6] feat: add xrefHybrid and xrefTableEol rules --- packages/repair/src/rules/index.ts | 2 + packages/repair/src/rules/xrefHybrid.spec.ts | 41 +++++++++++++++++++ packages/repair/src/rules/xrefHybrid.ts | 18 ++++++++ .../repair/src/rules/xrefTableEol.spec.ts | 37 +++++++++++++++++ packages/repair/src/rules/xrefTableEol.ts | 31 ++++++++++++++ 5 files changed, 129 insertions(+) create mode 100644 packages/repair/src/rules/xrefHybrid.spec.ts create mode 100644 packages/repair/src/rules/xrefHybrid.ts create mode 100644 packages/repair/src/rules/xrefTableEol.spec.ts create mode 100644 packages/repair/src/rules/xrefTableEol.ts diff --git a/packages/repair/src/rules/index.ts b/packages/repair/src/rules/index.ts index da593fa..c4e6041 100644 --- a/packages/repair/src/rules/index.ts +++ b/packages/repair/src/rules/index.ts @@ -1,4 +1,6 @@ // NOTE: Rules will be executed in the order they are imported. +import "./xrefTableEol"; +import "./xrefHybrid"; import "./annotationHasPage"; import "./removeNeedAppearances"; diff --git a/packages/repair/src/rules/xrefHybrid.spec.ts b/packages/repair/src/rules/xrefHybrid.spec.ts new file mode 100644 index 0000000..bb46bca --- /dev/null +++ b/packages/repair/src/rules/xrefHybrid.spec.ts @@ -0,0 +1,41 @@ +import * as assert from "node:assert"; +import { CrossReferenceStream, CrossReferenceTable, PDFIndirectObject } from "@peculiarventures/pdf-core"; +import { PDFDocument } from "@peculiarventures/pdf-doc"; +import { PDFRepair, PDFRepairStatus, globalRepairRegistry } from "@peculiarventures/pdf-repair"; + +context("PDFRepair:xrefHybrid", () => { + it("should return requireClone", async () => { + const doc = await PDFDocument.create({ + useXrefTable: true, + }); + doc.pages.create(); + const xref = doc.target.update.xref; + assert(xref instanceof CrossReferenceTable); + const xRefStm = doc.target.createNumber(0, 5); + xref.set("XRefStm", xRefStm); + const xRefStream = CrossReferenceStream.create(doc.target).makeIndirect(); + xRefStream.Size = xref.Size; + xRefStream.objects = xref.objects; + xRefStream.W = [1, 2, 1]; + const raw = await doc.save(); + + const xRefStreamOffset = (xRefStream.getIndirect() as PDFIndirectObject).view.byteOffset.toString(); + xRefStm.view.set(Buffer.from(xRefStreamOffset), 0); + + const doc2 = await PDFDocument.load(raw); + const repair = new PDFRepair(globalRepairRegistry, ["xrefHybrid"]); + const report = await repair.checkDocument(doc2); + assert.strictEqual(report.status, PDFRepairStatus.requireClone); + }); + + it("should return notNeeded", async () => { + const doc = await PDFDocument.create(); + doc.pages.create(); + const raw = await doc.save(); + + const doc2 = await PDFDocument.load(raw); + const repair = new PDFRepair(globalRepairRegistry, ["xrefHybrid"]); + const report = await repair.checkDocument(doc2); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + }); +}); diff --git a/packages/repair/src/rules/xrefHybrid.ts b/packages/repair/src/rules/xrefHybrid.ts new file mode 100644 index 0000000..44036c3 --- /dev/null +++ b/packages/repair/src/rules/xrefHybrid.ts @@ -0,0 +1,18 @@ +import { PDFDocument } from "@peculiarventures/pdf-doc"; +import { globalRepairRegistry } from "../PDFRepairRegistry"; +import { PDFRepairStatus } from "../PDFRepairStatus"; + +globalRepairRegistry.addRule({ + id: "xrefHybrid", + description: "Checks if the PDF document uses hybrid xref tables", + apply: async (_doc: PDFDocument) => { + return []; + }, + check: async (doc: PDFDocument) => { + if (doc.hasHybridReference()) { + return PDFRepairStatus.requireClone; + } + + return PDFRepairStatus.notNeeded; + }, +}); diff --git a/packages/repair/src/rules/xrefTableEol.spec.ts b/packages/repair/src/rules/xrefTableEol.spec.ts new file mode 100644 index 0000000..38faf47 --- /dev/null +++ b/packages/repair/src/rules/xrefTableEol.spec.ts @@ -0,0 +1,37 @@ +import * as assert from "node:assert"; +import { CrossReferenceTable } from "@peculiarventures/pdf-core"; +import { PDFDocument } from "@peculiarventures/pdf-doc"; +import { PDFRepair, PDFRepairStatus, globalRepairRegistry } from "@peculiarventures/pdf-repair"; + +context("PDFRepair:xrefTableEol", () => { + after(() => { + CrossReferenceTable.EOL = "\r\n"; + }); + it("should return requireClone", async () => { + CrossReferenceTable.EOL = "\n"; // incorrect EOL + const doc = await PDFDocument.create({ + useXrefTable: true, + }); + doc.pages.create(); + const raw = await doc.save(); + + const doc2 = await PDFDocument.load(raw); + const repair = new PDFRepair(globalRepairRegistry, ["xrefTableEol"]); + const report = await repair.checkDocument(doc2); + assert.strictEqual(report.status, PDFRepairStatus.requireClone); + }); + + it("should return notNeeded", async () => { + CrossReferenceTable.EOL = "\r\n"; // correct EOL + const doc = await PDFDocument.create({ + useXrefTable: true, + }); + doc.pages.create(); + const raw = await doc.save(); + + const doc2 = await PDFDocument.load(raw); + const repair = new PDFRepair(globalRepairRegistry, ["xrefTableEol"]); + const report = await repair.checkDocument(doc2); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + }); +}); diff --git a/packages/repair/src/rules/xrefTableEol.ts b/packages/repair/src/rules/xrefTableEol.ts new file mode 100644 index 0000000..8de1567 --- /dev/null +++ b/packages/repair/src/rules/xrefTableEol.ts @@ -0,0 +1,31 @@ +import { PDFDocument } from "@peculiarventures/pdf-doc"; +import { CrossReferenceTable, PDFDocumentUpdate } from "@peculiarventures/pdf-core"; +import { globalRepairRegistry } from "../PDFRepairRegistry"; +import { PDFRepairStatus } from "../PDFRepairStatus"; + +/** + * Some PDF documents use 19 bytes for each entry instead of 20 bytes. In this case Acrobat Reader edits the file + * on opening and shows a save dialog on closing. This also breaks the verification result for signed documents. + * It's impossible to fix this issue without rewriting the whole file. + */ + +globalRepairRegistry.addRule({ + id: "xrefTableEol", + description: "Checks if xref table item has incorrect size of cross-reference entries", + apply: async (_doc: PDFDocument) => { + return []; + }, + check: async (doc: PDFDocument) => { + let update: PDFDocumentUpdate | null = doc.target.update; + while (update) { + const xref = update.xref; + if (xref instanceof CrossReferenceTable && xref.hasIncorrectEol) { + return PDFRepairStatus.requireClone; + } + + update = update.previous; + } + + return PDFRepairStatus.notNeeded; + }, +}); From f9aed4794040e7be233ab374f60e2f98f898e6aa Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 23:06:16 +0100 Subject: [PATCH 5/6] fix: add PDFRepairStatus export --- packages/repair/src/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/repair/src/index.ts b/packages/repair/src/index.ts index e7b111b..095eee0 100644 --- a/packages/repair/src/index.ts +++ b/packages/repair/src/index.ts @@ -2,3 +2,4 @@ import "./rules"; export * from "./PDFRepairRegistry"; export * from "./PDFRepair"; +export * from "./PDFRepairStatus"; From 61bf9bcceb1dd1063b8b14b5c7b7668ce4860dd2 Mon Sep 17 00:00:00 2001 From: microshine Date: Tue, 23 Jan 2024 23:07:12 +0100 Subject: [PATCH 6/6] feat: implement check methods for old rules and test them --- .../repair/src/rules/annotationHasPage.spec.ts | 10 +++++++++- packages/repair/src/rules/annotationHasPage.ts | 16 ++++++++++++++++ .../src/rules/removeNeedAppearances.spec.ts | 16 +++++++++++++++- .../repair/src/rules/removeNeedAppearances.ts | 15 ++++++++++++++- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/packages/repair/src/rules/annotationHasPage.spec.ts b/packages/repair/src/rules/annotationHasPage.spec.ts index 47579c3..8375378 100644 --- a/packages/repair/src/rules/annotationHasPage.spec.ts +++ b/packages/repair/src/rules/annotationHasPage.spec.ts @@ -1,6 +1,6 @@ import * as assert from "node:assert"; import { PDFDocument } from "@peculiarventures/pdf-doc"; -import { PDFRepair, globalRepairRegistry } from "@peculiarventures/pdf-repair"; +import { PDFRepair, PDFRepairStatus, globalRepairRegistry } from "@peculiarventures/pdf-repair"; context("PDFRepair:AnnotationHasPage", () => { it("should add page reference to annotation", async () => { @@ -11,6 +11,10 @@ context("PDFRepair:AnnotationHasPage", () => { assert.strictEqual(annot.target.has("P"), false); const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "annotationHasPage")); + + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.repairable); + const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 1); assert.strictEqual(/Annotation '(\d+) (\d+) R' has no P. Set P to page '(\d+) (\d+) R'/.test(notes.annotationHasPage[0]), true); @@ -24,6 +28,10 @@ context("PDFRepair:AnnotationHasPage", () => { assert.strictEqual(annot.target.has("P"), true); const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "annotationHasPage")); + + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 0); }); diff --git a/packages/repair/src/rules/annotationHasPage.ts b/packages/repair/src/rules/annotationHasPage.ts index 9eff82b..4b29528 100644 --- a/packages/repair/src/rules/annotationHasPage.ts +++ b/packages/repair/src/rules/annotationHasPage.ts @@ -1,6 +1,7 @@ import { PDFDocument } from "@peculiarventures/pdf-doc"; import { globalRepairRegistry } from "../PDFRepairRegistry"; import { PDFDictionary } from "@peculiarventures/pdf-core"; +import { PDFRepairStatus } from "../PDFRepairStatus"; globalRepairRegistry.addRule({ id: "annotationHasPage", @@ -25,4 +26,19 @@ globalRepairRegistry.addRule({ return notes; }, + check: async (doc: PDFDocument) => { + for (const page of doc.pages) { + for (const annot of page.target.annots || []) { + if (!(annot instanceof PDFDictionary)) { + continue; + } + + if (!annot.has("P")) { + return PDFRepairStatus.repairable; + } + } + } + + return PDFRepairStatus.notNeeded; + }, }); diff --git a/packages/repair/src/rules/removeNeedAppearances.spec.ts b/packages/repair/src/rules/removeNeedAppearances.spec.ts index f32b2bc..d89194c 100644 --- a/packages/repair/src/rules/removeNeedAppearances.spec.ts +++ b/packages/repair/src/rules/removeNeedAppearances.spec.ts @@ -1,6 +1,6 @@ import * as assert from "node:assert"; import { PDFDocument } from "@peculiarventures/pdf-doc"; -import { PDFRepair, globalRepairRegistry } from "@peculiarventures/pdf-repair"; +import { PDFRepair, PDFRepairStatus, globalRepairRegistry } from "@peculiarventures/pdf-repair"; context("PDFRepair:RemoveNeedAppearances", () => { it("should remove NeedAppearances flag", async () => { @@ -11,6 +11,8 @@ context("PDFRepair:RemoveNeedAppearances", () => { acroForm.needAppearances = true; const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "removeNeedAppearances")); + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.repairable); const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 1); assert.strictEqual(notes.removeNeedAppearances[0], "Removed NeedAppearances from AcroForm."); @@ -26,6 +28,10 @@ context("PDFRepair:RemoveNeedAppearances", () => { acroForm.needAppearances = false; const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "removeNeedAppearances")); + + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 0); }); @@ -35,6 +41,10 @@ context("PDFRepair:RemoveNeedAppearances", () => { doc.pages.create(); const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "removeNeedAppearances")); + + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 0); }); @@ -44,6 +54,10 @@ context("PDFRepair:RemoveNeedAppearances", () => { doc.pages.create(); const repair = new PDFRepair(globalRepairRegistry.filter(o => o.id === "removeNeedAppearances")); + + const report = await repair.checkDocument(doc); + assert.strictEqual(report.status, PDFRepairStatus.notNeeded); + const notes = await repair.repairDocument(doc); assert.strictEqual(Object.keys(notes).length, 0); }); diff --git a/packages/repair/src/rules/removeNeedAppearances.ts b/packages/repair/src/rules/removeNeedAppearances.ts index a411270..9b3f4e1 100644 --- a/packages/repair/src/rules/removeNeedAppearances.ts +++ b/packages/repair/src/rules/removeNeedAppearances.ts @@ -1,5 +1,6 @@ import { PDFDocument } from "@peculiarventures/pdf-doc"; import { globalRepairRegistry } from "../PDFRepairRegistry"; +import { PDFRepairStatus } from "../PDFRepairStatus"; globalRepairRegistry.addRule({ id: "removeNeedAppearances", @@ -18,7 +19,19 @@ globalRepairRegistry.addRule({ } return notes; - } + }, + check: async (doc: PDFDocument) => { + const catalog = doc.target.update.catalog; + if (!catalog || !catalog.AcroForm.has()) { + return PDFRepairStatus.notNeeded; + } + const acroForm = catalog.AcroForm.get(); + if (acroForm.needAppearances) { + return PDFRepairStatus.repairable; + } + + return PDFRepairStatus.notNeeded; + }, });