From 8604732d8aad81628e60822cb34141353ceb2253 Mon Sep 17 00:00:00 2001 From: Oliver Schmidtmer Date: Thu, 23 Sep 2021 23:31:03 +0200 Subject: [PATCH 1/4] PDFBOX-5283: irgnore wrong object number in lenient mode --- .../apache/pdfbox/pdfparser/COSParser.java | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index 31b9a341bc6..fc9d2de8cad 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -1235,19 +1235,22 @@ private COSObjectKey findObjectKey(COSObjectKey objectKey, long offset) throws I { source.seek(offset); // try to read the given object/generation number - if (objectKey.getNumber() == readObjectNumber()) + long foundObjectNumber = readObjectNumber(); + if(objectKey.getNumber() != foundObjectNumber){ + LOG.warn("found wrong object number. expected [" + objectKey.getNumber() +"] found ["+ foundObjectNumber + "]"); + if(!isLenient) return null; + } + + int genNumber = readGenerationNumber(); + // finally try to read the object marker + readExpectedString(OBJ_MARKER, true); + if (genNumber == objectKey.getGeneration()) { - int genNumber = readGenerationNumber(); - // finally try to read the object marker - readExpectedString(OBJ_MARKER, true); - if (genNumber == objectKey.getGeneration()) - { - return objectKey; - } - else if (isLenient && genNumber > objectKey.getGeneration()) - { - return new COSObjectKey(objectKey.getNumber(), genNumber); - } + return objectKey; + } + else if (isLenient && genNumber > objectKey.getGeneration()) + { + return new COSObjectKey(objectKey.getNumber(), genNumber); } } catch (IOException exception) @@ -1322,7 +1325,7 @@ private Map bfSearchForObjects() throws IOException // add the former object ID only if there was a subsequent object ID bfCOSObjectKeyOffsets.put( new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); - } + } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; From ea0a6e0ca319021129d6c9c99ed96b50a26c28cb Mon Sep 17 00:00:00 2001 From: Oliver Schmidtmer Date: Tue, 5 Oct 2021 16:51:30 +0200 Subject: [PATCH 2/4] only replace xRef with corrected entries, if the original entry was not valid --- .../java/org/apache/pdfbox/pdfparser/COSParser.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index fc9d2de8cad..f334a431bda 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -1159,6 +1159,7 @@ private boolean validateXrefOffsets(Map xrefOffset) throws I return true; } Map correctedKeys = new HashMap<>(); + Map validKeys = new HashMap<>(); for (Entry objectEntry : xrefOffset.entrySet()) { COSObjectKey objectKey = objectEntry.getKey(); @@ -1178,13 +1179,18 @@ else if (foundObjectKey != objectKey) { // Generation was fixed - need to update map later, after iteration correctedKeys.put(objectKey, foundObjectKey); + } else { + validKeys.put(objectKey, foundObjectKey); } } } for (Entry correctedKeyEntry : correctedKeys.entrySet()) { - xrefOffset.put(correctedKeyEntry.getValue(), - xrefOffset.remove(correctedKeyEntry.getKey())); + if(!validKeys.containsKey(correctedKeyEntry.getValue())) { + // Only replacy entries, if the original entry does not point to a valid object + xrefOffset.put(correctedKeyEntry.getValue(), + xrefOffset.remove(correctedKeyEntry.getKey())); + } } return true; } @@ -1239,6 +1245,7 @@ private COSObjectKey findObjectKey(COSObjectKey objectKey, long offset) throws I if(objectKey.getNumber() != foundObjectNumber){ LOG.warn("found wrong object number. expected [" + objectKey.getNumber() +"] found ["+ foundObjectNumber + "]"); if(!isLenient) return null; + else objectKey = new COSObjectKey(foundObjectNumber, objectKey.getGeneration()); } int genNumber = readGenerationNumber(); From cd2c0ac68f141b707fa863a72e658dc373e82c95 Mon Sep 17 00:00:00 2001 From: Oliver Schmidtmer Date: Wed, 6 Oct 2021 17:39:05 +0200 Subject: [PATCH 3/4] HashSet instead of HashMap for valid keys --- .../main/java/org/apache/pdfbox/pdfparser/COSParser.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index f334a431bda..9beba85300f 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -1159,7 +1159,7 @@ private boolean validateXrefOffsets(Map xrefOffset) throws I return true; } Map correctedKeys = new HashMap<>(); - Map validKeys = new HashMap<>(); + HashSet validKeys = new HashSet<>(); for (Entry objectEntry : xrefOffset.entrySet()) { COSObjectKey objectKey = objectEntry.getKey(); @@ -1180,13 +1180,13 @@ else if (foundObjectKey != objectKey) // Generation was fixed - need to update map later, after iteration correctedKeys.put(objectKey, foundObjectKey); } else { - validKeys.put(objectKey, foundObjectKey); + validKeys.add(objectKey); } } } for (Entry correctedKeyEntry : correctedKeys.entrySet()) { - if(!validKeys.containsKey(correctedKeyEntry.getValue())) { + if(!validKeys.contains(correctedKeyEntry.getValue())) { // Only replacy entries, if the original entry does not point to a valid object xrefOffset.put(correctedKeyEntry.getValue(), xrefOffset.remove(correctedKeyEntry.getKey())); @@ -1332,7 +1332,7 @@ private Map bfSearchForObjects() throws IOException // add the former object ID only if there was a subsequent object ID bfCOSObjectKeyOffsets.put( new COSObjectKey(lastObjectId, lastGenID), lastObjOffset); - } + } lastObjectId = objectId; lastGenID = genID; lastObjOffset = tempOffset + 1; From 5ea04921149d67d249560e1654cc5959fb8f7729 Mon Sep 17 00:00:00 2001 From: Oliver Schmidtmer Date: Thu, 10 Mar 2022 21:51:08 +0100 Subject: [PATCH 4/4] read pointers before replacing --- .../org/apache/pdfbox/pdfparser/COSParser.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java index 9beba85300f..f79597cc15b 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java @@ -1184,14 +1184,20 @@ else if (foundObjectKey != objectKey) } } } - for (Entry correctedKeyEntry : correctedKeys.entrySet()) - { - if(!validKeys.contains(correctedKeyEntry.getValue())) { - // Only replacy entries, if the original entry does not point to a valid object - xrefOffset.put(correctedKeyEntry.getValue(), - xrefOffset.remove(correctedKeyEntry.getKey())); + Map correctedPointers = new HashMap(); + for (Entry correctedKeyEntry : correctedKeys.entrySet()) { + if (!validKeys.contains(correctedKeyEntry.getValue())) { + // Only replace entries, if the original entry does not point to a valid object + correctedPointers.put(correctedKeyEntry.getValue(), xrefOffset.get(correctedKeyEntry.getKey())); } } + for (Entry correctedKeyEntry : correctedKeys.entrySet()) { + // remove old invalid, as some might not be replaced + xrefOffset.remove(correctedKeyEntry.getKey()); + } + for (Entry pointer : correctedPointers.entrySet()) { + xrefOffset.put(pointer.getKey(), pointer.getValue()); + } return true; }