Skip to content

Commit 3d62d4f

Browse files
committed
Add Byte Offsets to the XREF Table. RES-713
1 parent 69cda41 commit 3d62d4f

9 files changed

Lines changed: 155 additions & 4 deletions

File tree

src/main/java/com/itextpdf/rups/controller/PdfReaderController.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ This file is part of the iText (R) project.
4444

4545
import com.itextpdf.kernel.pdf.PdfArray;
4646
import com.itextpdf.kernel.pdf.PdfDictionary;
47+
import com.itextpdf.kernel.pdf.PdfIndirectReference;
4748
import com.itextpdf.kernel.pdf.PdfName;
4849
import com.itextpdf.kernel.pdf.PdfObject;
4950
import com.itextpdf.kernel.pdf.PdfStream;
@@ -55,6 +56,7 @@ This file is part of the iText (R) project.
5556
import com.itextpdf.kernel.utils.objectpathitems.LocalPathItem;
5657
import com.itextpdf.kernel.utils.objectpathitems.ObjectPath;
5758
import com.itextpdf.rups.io.listeners.PdfTreeNavigationListener;
59+
import com.itextpdf.rups.model.IndirectObjectFactory;
5860
import com.itextpdf.rups.model.ObjectLoader;
5961
import com.itextpdf.rups.model.PdfSyntaxParser;
6062
import com.itextpdf.rups.model.TreeNodeFactory;
@@ -75,12 +77,16 @@ This file is part of the iText (R) project.
7577
import com.itextpdf.rups.view.itext.StructureTree;
7678
import com.itextpdf.rups.view.itext.SyntaxHighlightedStreamPane;
7779
import com.itextpdf.rups.view.itext.XRefTable;
80+
import com.itextpdf.rups.view.itext.treenodes.ObjectStreamTreeNode;
7881
import com.itextpdf.rups.view.itext.treenodes.PdfObjectTreeNode;
7982

8083
import java.awt.Color;
8184
import java.awt.event.KeyListener;
8285
import java.util.ArrayDeque;
8386
import java.util.Deque;
87+
import java.util.List;
88+
import java.util.Observable;
89+
import java.util.Observer;
8490
import java.util.Stack;
8591
import java.util.function.Consumer;
8692
import javax.swing.JPanel;

src/main/java/com/itextpdf/rups/model/IndirectObjectFactory.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,18 @@ This file is part of the iText (R) project.
4646
import com.itextpdf.kernel.exceptions.PdfException;
4747
import com.itextpdf.kernel.pdf.PdfDictionary;
4848
import com.itextpdf.kernel.pdf.PdfDocument;
49+
import com.itextpdf.kernel.pdf.PdfIndirectReference;
4950
import com.itextpdf.kernel.pdf.PdfName;
5051
import com.itextpdf.kernel.pdf.PdfNull;
5152
import com.itextpdf.kernel.pdf.PdfObject;
53+
import com.itextpdf.kernel.pdf.PdfStream;
5254
import com.itextpdf.rups.view.Language;
5355

5456
import java.lang.reflect.Field;
5557
import java.lang.reflect.InvocationTargetException;
5658
import java.lang.reflect.Method;
5759
import java.util.ArrayList;
60+
import java.util.List;
5861

5962
/**
6063
* A factory that can produce all the indirect objects in a PDF file.
@@ -77,6 +80,11 @@ public class IndirectObjectFactory {
7780
* A list of all the indirect objects in a PDF file.
7881
*/
7982
protected ArrayList<PdfObject> objects = new ArrayList<>();
83+
84+
/**
85+
* List of all Object Streams in a PDF file.
86+
*/
87+
protected List<PdfIndirectReference> objectStreams = new ArrayList<>();
8088
/**
8189
* Mapping between the index in the objects list and the reference number in the xref table.
8290
*/
@@ -145,6 +153,14 @@ public boolean storeNextObject() {
145153
final int idx = size();
146154
idxToRef.put(idx, current);
147155
refToIdx.put(current, idx);
156+
157+
if ( object.getType() == PdfObject.STREAM ) {
158+
PdfStream stream = (PdfStream) object;
159+
if ( PdfName.ObjStm.equals(stream.get(PdfName.Type) )) {
160+
this.objectStreams.add(stream.getIndirectReference());
161+
}
162+
}
163+
148164
store(object);
149165
return true;
150166
}
@@ -238,6 +254,10 @@ public boolean isLoadedByReference(int ref) {
238254
return isLoaded.get(getIndexByRef(ref));
239255
}
240256

257+
public List<PdfIndirectReference> getObjectStreams() {
258+
return objectStreams;
259+
}
260+
241261
/**
242262
* Loads an object based on its reference number in the xref table.
243263
*

src/main/java/com/itextpdf/rups/model/ObjectLoader.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ public TreeNodeFactory getNodes() {
120120
return nodes;
121121
}
122122

123+
123124
/**
124125
* getter for a human readable name representing this loader
125126
*

src/main/java/com/itextpdf/rups/view/Language.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ public enum Language {
201201
PAGES,
202202
PAGES_TABLE_OBJECT,
203203
PDF_READING,
204+
PDF_OBJECT_STREAMS_TREE_NODE,
204205
PDF_OBJECT_TREE,
205206
PLAINTEXT,
206207
PLAINTEXT_DESCRIPTION,
@@ -246,10 +247,15 @@ public enum Language {
246247
WARNING_OID_NAME_NOT_FOUND,
247248

248249
XREF,
250+
XREF_BYTE_OFFSET,
251+
XREF_BYTE_OFFSET_OBJECT_STREAM,
249252
XREF_DESCRIPTION,
253+
XREF_NA,
254+
XREF_NOT_LOADED_YET,
250255
XREF_NUMBER,
251256
XREF_OBJECT,
252-
XREF_READING;
257+
XREF_READING
258+
;
253259

254260
/**
255261
* The location of the resource bundles.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package com.itextpdf.rups.view.itext;
2+
3+
import com.itextpdf.io.source.PdfTokenizer;
4+
import com.itextpdf.io.source.RandomAccessFileOrArray;
5+
import com.itextpdf.io.source.RandomAccessSourceFactory;
6+
import com.itextpdf.kernel.pdf.PdfName;
7+
import com.itextpdf.kernel.pdf.PdfStream;
8+
import com.itextpdf.rups.model.LoggerHelper;
9+
import com.itextpdf.rups.view.Console;
10+
11+
import java.io.IOException;
12+
import java.util.Arrays;
13+
14+
/**
15+
* Utility class to parse ObjectStreams to extract the offset of a given object id within the stream.
16+
*/
17+
public class ObjectStreamParser {
18+
/**
19+
* Parses an ObjectStream to find the offset to the passed parameter, compressedObjectNumber. This offset is
20+
* relative to the ObjectStream and not to the complete file, as described in the specification.
21+
*
22+
* If an Object ID isn't found inside the ObjectStream, this will return -1.
23+
*
24+
* @param objStm The ObjectStream to parse
25+
* @param compressedObjectNumber the ID of the object of which you want the offset
26+
* @return the offset of the object or -1 if the object is not found
27+
*/
28+
public static int parseObjectStream(PdfStream objStm, int compressedObjectNumber) {
29+
byte[] objStmBytes = objStm.getBytes(true);
30+
int byteOffsetOfFirst = objStm.getAsInt(PdfName.First);
31+
32+
PdfTokenizer pdfTokenizer = new PdfTokenizer(
33+
new RandomAccessFileOrArray(
34+
new RandomAccessSourceFactory()
35+
.createSource(
36+
Arrays.copyOfRange(objStmBytes, 0, byteOffsetOfFirst))));
37+
38+
try {
39+
while (pdfTokenizer.nextToken()) {
40+
if ( pdfTokenizer.getTokenType().equals(PdfTokenizer.TokenType.Number )) {
41+
int objNumber = pdfTokenizer.getIntValue();
42+
pdfTokenizer.nextToken();
43+
if ( objNumber == compressedObjectNumber ) {
44+
return pdfTokenizer.getIntValue();
45+
}
46+
}
47+
}
48+
} catch (IOException e) {
49+
LoggerHelper.error(e.getMessage(), e, ObjectStreamParser.class);
50+
return -1;
51+
}
52+
53+
return -1;
54+
}
55+
}

src/main/java/com/itextpdf/rups/view/itext/XRefTable.java

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ This file is part of the iText (R) project.
4242
*/
4343
package com.itextpdf.rups.view.itext;
4444

45-
import com.itextpdf.kernel.pdf.PdfNull;
46-
import com.itextpdf.kernel.pdf.PdfObject;
45+
import com.itextpdf.kernel.pdf.*;
4746
import com.itextpdf.rups.controller.PdfReaderController;
4847
import com.itextpdf.rups.model.IndirectObjectFactory;
4948
import com.itextpdf.rups.model.ObjectLoader;
@@ -93,7 +92,7 @@ public XRefTable(PdfReaderController controller) {
9392
*/
9493
@Override
9594
public int getColumnCount() {
96-
return 2;
95+
return 3;
9796
}
9897

9998
/**
@@ -117,6 +116,8 @@ public Object getValueAt(int rowIndex, int columnIndex) {
117116
return getObjectReferenceByRow(rowIndex);
118117
case OBJECT_COLUMN_INDEX:
119118
return getObjectDescriptionByRow(rowIndex);
119+
case 2:
120+
return getByteOffSetByRow(rowIndex);
120121
default:
121122
return null;
122123
}
@@ -147,6 +148,47 @@ private String getObjectDescriptionByRow(int rowIndex) {
147148
return PdfObjectTreeNode.getCaption(object);
148149
}
149150

151+
/**
152+
* Returns the byte offset of the selected XREF entry. If the entry has no real, actual offset.
153+
* i.e. it is compressed in a PDF Object Stream, then this shall return The ID of the Object Stream
154+
* and the offset of the Object Stream.
155+
*
156+
* @param rowIndex the index of the selected XREF entry
157+
* @return byte offset of the XREF entry or the ID and byte offset of the encompassing Object Stream
158+
*/
159+
private String getByteOffSetByRow(int rowIndex) {
160+
final PdfObject object = objects.getObjectByIndex(rowIndex);
161+
PdfIndirectReference indirectReference = object.getIndirectReference();
162+
if ( indirectReference != null ) {
163+
if (isObjectStream(indirectReference)) {
164+
int compressedObjectNumber = indirectReference.getObjNumber();
165+
PdfStream objStm = getObjectStream(indirectReference);
166+
int internalCompressedObjectOffset
167+
= ObjectStreamParser.parseObjectStream(objStm, compressedObjectNumber);
168+
169+
return String.format(
170+
Language.XREF_BYTE_OFFSET_OBJECT_STREAM.getString(),
171+
objStm.getIndirectReference().getObjNumber(), internalCompressedObjectOffset
172+
);
173+
}
174+
175+
return String.valueOf(indirectReference.getOffset());
176+
}
177+
return Language.XREF_NOT_LOADED_YET.getString();
178+
}
179+
180+
private PdfStream getObjectStream(PdfIndirectReference indirectReference) {
181+
int objStreamNumber = indirectReference.getObjStreamNumber();
182+
PdfObject objectByIndex = objects.loadObjectByReference(objStreamNumber);
183+
184+
return (PdfStream) objectByIndex;
185+
}
186+
187+
private boolean isObjectStream(PdfIndirectReference indirectReference) {
188+
return indirectReference.getOffset() == -1;
189+
}
190+
191+
150192
/**
151193
* @see javax.swing.JTable#getColumnName(int)
152194
*/
@@ -157,6 +199,8 @@ public String getColumnName(int columnIndex) {
157199
return Language.XREF_NUMBER.getString();
158200
case OBJECT_COLUMN_INDEX:
159201
return Language.XREF_OBJECT.getString();
202+
case 2:
203+
return Language.XREF_BYTE_OFFSET.getString();
160204
default:
161205
return null;
162206
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package com.itextpdf.rups.view.itext.treenodes;
2+
3+
import com.itextpdf.kernel.pdf.PdfObject;
4+
import com.itextpdf.rups.view.Language;
5+
6+
public class ObjectStreamTreeNode extends PdfObjectTreeNode {
7+
8+
public ObjectStreamTreeNode(PdfObject object) {
9+
super(object);
10+
setUserObject(Language.PDF_OBJECT_STREAMS_TREE_NODE.getString());
11+
}
12+
13+
}

src/main/resources/bundles/rups-lang.properties

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ PAGES=Pages
167167
PAGES_TABLE_OBJECT=Object %d
168168
169169
PDF_READING=Reading PDF document...
170+
PDF_OBJECT_STREAMS_TREE_NODE=PDF Object Streams
170171
PDF_OBJECT_TREE=PDF Object Tree (%s)
171172
172173
PLAINTEXT=Plain Text
@@ -220,7 +221,11 @@ WARNING_OPENED_IN_READ_ONLY_MODE=Document is opened in a read-only mode. For edi
220221
WARNING_OID_NAME_NOT_FOUND=Full name for object identifier was not found: %s.
221222
222223
XREF=XREF
224+
XREF_BYTE_OFFSET=Byte Offset
225+
XREF_BYTE_OFFSET_OBJECT_STREAM=Object Stream #%d (%d)
223226
XREF_DESCRIPTION=Cross-reference table
227+
XREF_NA=N/A
228+
XREF_NOT_LOADED_YET=Offset not loaded yet
224229
XREF_NUMBER=Number
225230
XREF_OBJECT=Object
226231
XREF_READING=Reading the Cross-Reference table

src/main/resources/bundles/rups-lang_nl_NL.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ WARNING_OID_NAME_NOT_FOUND=Volledige naam voor object identifier niet gevonden:
221221
222222
XREF=XREF
223223
XREF_DESCRIPTION=Cross-reference tabel
224+
XREF_NOT_LOADED_YET=Offset is nog niet ingeladen
224225
XREF_NUMBER=Nummer
225226
XREF_OBJECT=Object
226227
XREF_READING=Lezen van de Cross-Reference tabel

0 commit comments

Comments
 (0)