From a717f7da03fcda5d6a0c3a771fa86d253daabc87 Mon Sep 17 00:00:00 2001 From: Lee Passey Date: Sun, 23 Feb 2025 12:51:28 -0800 Subject: [PATCH 1/3] Refine COS package to isolate COSObject code into it's own jar --- pdfcos/pom.xml | 93 + .../java/org/apache/pdfbox/cos/COSArray.java | 859 +++++++++ .../org/apache/pdfbox/cos/COSArrayList.java | 587 +++++++ .../java/org/apache/pdfbox/cos/COSBase.java | 98 ++ .../org/apache/pdfbox/cos/COSBoolean.java | 142 ++ .../org/apache/pdfbox/cos/COSDictionary.java | 1561 +++++++++++++++++ .../apache/pdfbox/cos/COSDictionaryMap.java | 263 +++ .../org/apache/pdfbox/cos/COSDocument.java | 633 +++++++ .../apache/pdfbox/cos/COSDocumentState.java | 58 + .../java/org/apache/pdfbox/cos/COSFloat.java | 225 +++ .../org/apache/pdfbox/cos/COSIncrement.java | 358 ++++ .../org/apache/pdfbox/cos/COSInteger.java | 205 +++ .../java/org/apache/pdfbox/cos/COSName.java | 766 ++++++++ .../java/org/apache/pdfbox/cos/COSNull.java | 66 + .../java/org/apache/pdfbox/cos/COSNumber.java | 110 ++ .../java/org/apache/pdfbox/cos/COSObject.java | 210 +++ .../apache/pdfbox/cos/COSObjectGetter.java | 32 + .../org/apache/pdfbox/cos/COSObjectKey.java | 152 ++ .../java/org/apache/pdfbox/cos/COSStream.java | 449 +++++ .../java/org/apache/pdfbox/cos/COSString.java | 274 +++ .../org/apache/pdfbox/cos/COSUpdateInfo.java | 63 + .../org/apache/pdfbox/cos/COSUpdateState.java | 341 ++++ .../org/apache/pdfbox/cos/ICOSParser.java | 51 + .../org/apache/pdfbox/cos/ICOSVisitor.java | 107 ++ .../org/apache/pdfbox/cos/PDFDocEncoding.java | 163 ++ .../pdfbox/cos/UnmodifiableCOSDictionary.java | 45 + .../pdfbox/cos/filter/ASCII85Filter.java | 52 + .../pdfbox/cos/filter/ASCII85InputStream.java | 274 +++ .../cos/filter/ASCII85OutputStream.java | 246 +++ .../pdfbox/cos/filter/ASCIIHexFilter.java | 145 ++ .../cos/filter/CCITTFaxDecoderStream.java | 813 +++++++++ .../cos/filter/CCITTFaxEncoderStream.java | 325 ++++ .../pdfbox/cos/filter/CCITTFaxFilter.java | 158 ++ .../cos/filter/COSCryptFilterDictionary.java | 137 ++ .../filter/COSEncryptFilterDictionary.java | 136 ++ .../pdfbox/cos/filter/COSInputStream.java | 111 ++ .../pdfbox/cos/filter/COSOutputStream.java | 212 +++ .../apache/pdfbox/cos/filter/CryptFilter.java | 62 + .../apache/pdfbox/cos/filter/DCTFilter.java | 344 ++++ .../pdfbox/cos/filter/DecodeOptions.java | 264 +++ .../pdfbox/cos/filter/DecodeResult.java | 89 + .../org/apache/pdfbox/cos/filter/Filter.java | 299 ++++ .../pdfbox/cos/filter/FilterFactory.java | 103 ++ .../apache/pdfbox/cos/filter/FlateFilter.java | 63 + .../cos/filter/FlateFilterDecoderStream.java | 243 +++ .../pdfbox/cos/filter/IdentityFilter.java | 49 + .../apache/pdfbox/cos/filter/JBIG2Filter.java | 153 ++ .../apache/pdfbox/cos/filter/JPXFilter.java | 211 +++ .../apache/pdfbox/cos/filter/LZWFilter.java | 296 ++++ .../filter/MissingImageReaderException.java | 37 + .../apache/pdfbox/cos/filter/Predictor.java | 366 ++++ .../cos/filter/RunLengthDecodeFilter.java | 189 ++ .../pdfbox/cos/filter/TIFFExtension.java | 106 ++ .../org/apache/pdfbox/cos/filter/package.html | 25 + .../java/org/apache/pdfbox/cos/package.html | 72 + .../apache/pdfbox/cos/util/DateConverter.java | 737 ++++++++ .../java/org/apache/pdfbox/cos/util/Hex.java | 247 +++ .../apache/pdfbox/cos/util/StringUtil.java | 42 + ...rg.apache.pdfbox.cos.encryption.properties | 0 .../apache/pdfbox/cos/COSDictionaryTest.java | 38 + .../apache/pdfbox/cos/PDFDocEncodingTest.java | 111 ++ .../org/apache/pdfbox/cos/TestCOSArray.java | 290 +++ .../org/apache/pdfbox/cos/TestCOSBase.java | 77 + .../org/apache/pdfbox/cos/TestCOSBoolean.java | 116 ++ .../org/apache/pdfbox/cos/TestCOSFloat.java | 435 +++++ .../org/apache/pdfbox/cos/TestCOSInteger.java | 177 ++ .../org/apache/pdfbox/cos/TestCOSName.java | 60 + .../org/apache/pdfbox/cos/TestCOSNumber.java | 132 ++ .../org/apache/pdfbox/cos/TestCOSObject.java | 160 ++ .../org/apache/pdfbox/cos/TestCOSStream.java | 215 +++ .../org/apache/pdfbox/cos/TestCOSString.java | 360 ++++ .../apache/pdfbox/cos/TestCOSUpdateInfo.java | 61 + .../org/apache/pdfbox/cos/TestVisitor.java | 96 + .../cos/UnmodifiableCOSDictionaryTest.java | 352 ++++ .../pdfbox/cos/filter/PredictorTest.java | 89 + .../apache/pdfbox/cos/filter/TestFilters.java | 219 +++ .../org/apache/pdfbox/cos/filter/package.html | 25 + .../java/org/apache/pdfbox/cos/package.html | 25 + .../pdfbox/cos/util/StringUtilTest.java | 73 + .../apache/pdfbox/cos/util/TestDateUtil.java | 424 +++++ .../apache/pdfbox/cos/util/TestHexUtil.java | 87 + .../apache/pdfbox/cos/filter/PDFBOX-1977.bin | Bin 0 -> 19321 bytes 82 files changed, 18139 insertions(+) create mode 100644 pdfcos/pom.xml create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/package.html create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java create mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java create mode 100644 pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/package.html create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java create mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java create mode 100644 pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin diff --git a/pdfcos/pom.xml b/pdfcos/pom.xml new file mode 100644 index 00000000000..f5572923c63 --- /dev/null +++ b/pdfcos/pom.xml @@ -0,0 +1,93 @@ + + + 4.0.0 + + + org.apache.pdfbox + pdfbox-parent + 4.0.0-SNAPSHOT + ../parent/pom.xml + + + pdfcos + 4.0.0-SNAPSHOT + bundle + Apache PDFBOX COS system + + + 11 + 11 + UTF-8 + + + + + org.apache.pdfbox + pdfbox-io + ${project.version} + + + org.apache.pdfbox + encryption + ${project.version} + + + org.apache.logging.log4j + log4j-api + + + org.bouncycastle + bcprov-jdk18on + ${bouncycastle.version} + compile + true + + + org.bouncycastle + bcpkix-jdk18on + ${bouncycastle.version} + compile + true + + + + org.apache.logging.log4j + log4j-core + test + + + org.junit.jupiter + junit-jupiter + ${junit.version} + test + + + + + + + org.apache.felix + maven-bundle-plugin + true + + + org.apache.pdfbox.pdfcos + org.apache.pdfbox.pdfcos + + + + + org.apache.rat + apache-rat-plugin + + + src/test/resources/org/apache/pdfbox/pdfcos/*.txt + + + + + + + \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java new file mode 100644 index 00000000000..3fc23a27949 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java @@ -0,0 +1,859 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +/** + * An array of PDFBase objects as part of the PDF document. + * + * @author Ben Litchfield + */ +public class COSArray extends COSBase implements Iterable, COSUpdateInfo +{ + private final ArrayList objects; + private final COSUpdateState updateState; + + public static COSArray of(float... floats) + { + ArrayList objects = new ArrayList<>(floats.length); + for (float f : floats) + { + objects.add(new COSFloat(f)); + } + return new COSArray(objects, true); + } + + /** + * Constructor. + */ + public COSArray() + { + this(new ArrayList<>(), true); + } + + /** + * Use the given list to initialize the COSArray. + * + * @param COSObjectGetters the initial list of COSObjectGetters + */ + public COSArray(List COSObjectGetters) + { + this( + COSObjectGetters.stream() + .map(co -> co == null ? null : co.getCOSObject()) + .collect(Collectors.toCollection(ArrayList::new)), + true + ); + } + + private COSArray(ArrayList cosObjects, boolean direct) + { + objects = cosObjects; + updateState = new COSUpdateState(this); + setDirect(direct); + } + + /** + * This will add an object to the array. + * + * @param object The object to add to the array. + */ + public void add( COSBase object ) + { + if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() + && object.getKey() != null) + { + COSObject cosObject = new COSObject(object, object.getKey()); + objects.add(cosObject); + getUpdateState().update(cosObject); + } + else + { + objects.add(object); + getUpdateState().update(object); + } + } + + /** + * This will add an object to the array. + * + * @param object The object to add to the array. + */ + public void add( COSObjectGetter object ) + { + COSBase base = null; + if (object != null) + { + base = object.getCOSObject(); + } + add(base); + } + + /** + * Add the specified object at the ith location and push the rest to the + * right. + * + * @param i The index to add at. + * @param object The object to add at that index. + */ + public void add( int i, COSBase object) + { + if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() + && object.getKey() != null) + { + COSObject cosObject = new COSObject(object, object.getKey()); + objects.add(i, cosObject); + getUpdateState().update(cosObject); + } + else + { + objects.add(i, object); + getUpdateState().update(object); + } + } + + /** + * This will remove all of the objects in the collection. + */ + public void clear() + { + objects.clear(); + getUpdateState().update(); + } + + /** + * This will remove all of the objects in the collection. + * + * @param objectsList The list of objects to remove from the collection. + */ + public void removeAll( Collection objectsList ) + { + objects.removeAll( objectsList ); + getUpdateState().update(); + } + + /** + * This will retain all of the objects in the collection. + * + * @param objectsList The list of objects to retain from the collection. + */ + public void retainAll( Collection objectsList ) + { + if (objects.retainAll(objectsList)) + { + getUpdateState().update(); + } + } + + /** + * This will add an object to the array. + * + * @param objectsList The object to add to the array. + */ + public void addAll( Collection objectsList ) + { + if (objects.addAll(objectsList)) + { + getUpdateState().update(objectsList); + } + } + + /** + * This will add all objects to this array. + * + * @param objectList The list of objects to add. + */ + public void addAll( COSArray objectList ) + { + if( objectList != null ) + { + if (objects.addAll(objectList.objects)) + { + getUpdateState().update(objectList); + } + } + } + + /** + * Add the specified object at the ith location and push the rest to the + * right. + * + * @param i The index to add at. + * @param objectList The object to add at that index. + */ + public void addAll( int i, Collection objectList ) + { + if (objects.addAll(i, objectList)) + { + getUpdateState().update(objectList); + } + } + + /** + * This will set an object at a specific index. + * + * @param index zero based index into array. + * @param object The object to set. + */ + public void set( int index, COSBase object ) + { + if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() + && object.getKey() != null) + { + COSObject cosObject = new COSObject(object, object.getKey()); + objects.set(index, cosObject); + getUpdateState().update(cosObject); + } + else + { + objects.set(index, object); + getUpdateState().update(object); + } + } + + /** + * This will set an object at a specific index. + * + * @param index zero based index into array. + * @param intVal The object to set. + */ + public void set( int index, int intVal ) + { + objects.set( index, COSInteger.get(intVal)); + getUpdateState().update(); + } + + /** + * This will set an object at a specific index. + * + * @param index zero based index into array. + * @param object The object to set. + */ + public void set( int index, COSObjectGetter object ) + { + COSBase base = null; + if( object != null ) + { + base = object.getCOSObject(); + } + set(index, base); + } + + /** + * This will get an object from the array. This will dereference the object. + * If the object is COSNull then null will be returned. + * + * @param index The index into the array to get the object. + * + * @return The object at the requested index. + */ + public COSBase getObject( int index ) + { + COSBase obj = objects.get( index ); + if( obj instanceof COSObject ) + { + obj = ((COSObject)obj).getObject(); + } + if (obj instanceof COSNull) + { + obj = null; + } + return obj; + } + + /** + * This will get an object from the array. This will NOT dereference + * the COS object. + * + * @param index The index into the array to get the object. + * + * @return The object at the requested index. + */ + public COSBase get( int index ) + { + return objects.get( index ); + } + + /** + * Get the value of the array as an integer. + * + * @param index The index into the list. + * + * @return The value at that index or -1 if does not exist. + */ + public int getInt( int index ) + { + return getInt( index, -1 ); + } + + /** + * Get the value of the array as an integer, return the default if it does not exist. + * + * @param index The value of the array. + * @param defaultValue The value to return if the value is null. + * @return The value at the index or the defaultValue. + */ + public int getInt( int index, int defaultValue ) + { + int retval = defaultValue; + if ( index < size() ) + { + Object obj = objects.get( index ); + if( obj instanceof COSNumber ) + { + retval = ((COSNumber)obj).intValue(); + } + } + return retval; + } + + /** + * Set the value in the array as an integer. + * + * @param index The index into the array. + * @param value The value to set. + */ + public void setInt( int index, int value ) + { + set( index, COSInteger.get( value ) ); + } + + /** + * Set the value in the array as a name. + * @param index The index into the array. + * @param name The name to set in the array. + */ + public void setName( int index, String name ) + { + set( index, COSName.getPDFName( name ) ); + } + + /** + * Get the value of the array as a string. + * + * @param index The index into the array. + * @return The name converted to a string or null if it does not exist. + */ + public String getName( int index ) + { + return getName( index, null ); + } + + /** + * Get an entry in the array that is expected to be a COSName. + * @param index The index into the array. + * @param defaultValue The value to return if it is null. + * @return The value at the index or defaultValue if none is found. + */ + public String getName( int index, String defaultValue ) + { + String retval = defaultValue; + if( index < size() ) + { + Object obj = objects.get( index ); + if( obj instanceof COSName ) + { + retval = ((COSName)obj).getName(); + } + } + return retval; + } + + /** + * Set the value in the array as a string. + * @param index The index into the array. + * @param string The string to set in the array. + */ + public void setString( int index, String string ) + { + if ( string != null ) + { + set( index, new COSString( string ) ); + } + else + { + set( index, null ); + } + } + + /** + * Get the value of the array as a string. + * + * @param index The index into the array. + * @return The string or null if it does not exist. + */ + public String getString( int index ) + { + return getString( index, null ); + } + + /** + * Get an entry in the array that is expected to be a COSName. + * @param index The index into the array. + * @param defaultValue The value to return if it is null. + * @return The value at the index or defaultValue if none is found. + */ + public String getString( int index, String defaultValue ) + { + String retval = defaultValue; + if( index < size() ) + { + Object obj = objects.get( index ); + if( obj instanceof COSString ) + { + retval = ((COSString)obj).getString(); + } + } + return retval; + } + + /** + * This will get the size of this array. + * + * @return The number of elements in the array. + */ + public int size() + { + return objects.size(); + } + + /** + * Returns true if the container is empty, false otherwise. + * + * @return true if the container is empty, false otherwise + */ + public boolean isEmpty() + { + return objects.isEmpty(); + } + + /** + * This will remove an element from the array. + * + * @param i The index of the object to remove. + * + * @return The object that was removed. + */ + public COSBase remove( int i ) + { + COSBase removedEntry = objects.remove( i ); + getUpdateState().update(); + return removedEntry; + } + + /** + * This will remove an element from the array. + * + * @param o The object to remove. + * + * @return true if the object was removed, false + * otherwise + */ + public boolean remove( COSBase o ) + { + boolean removed = objects.remove(o); + if (removed) + { + getUpdateState().update(); + } + return removed; + } + + /** + * This will remove an element from the array. + * This method will also remove a reference to the object. + * + * @param o The object to remove. + * @return true if the object was removed, false + * otherwise + */ + public boolean removeObject(COSBase o) + { + boolean removed = this.remove(o); + if (!removed) + { + for (int i = 0; i < this.size(); i++) + { + COSBase entry = this.get(i); + if (entry instanceof COSObject) + { + COSObject objEntry = (COSObject) entry; + if (objEntry.getObject().equals(o)) + { + return this.remove(entry); + } + } + } + } + return removed; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSArray{" + objects + "}"; + } + + /** + * Get access to the list. + * + * @return an iterator over the array elements + */ + @Override + public Iterator iterator() + { + return objects.iterator(); + } + + /** + * This will return the index of the entry or -1 if it is not found. + * + * @param object The object to search for. + * @return The index of the object or -1. + */ + public int indexOf(COSBase object) + { + for (int i = 0; i < size(); i++) + { + COSBase item = get(i); + if (item == null) + { + if (object == null) + { + return i; + } + } + else if (item.equals(object)) + { + return i; + } + } + return -1; + } + + /** + * This will return the index of the entry or -1 if it is not found. + * This method will also find references to indirect objects. + * + * @param object The object to search for. + * @return The index of the object or -1. + */ + public int indexOfObject(COSBase object) + { + for (int i = 0; i < this.size(); i++) + { + COSBase item = this.get(i); + if (item == null) + { + if (item == object) + { + return i; + } + } + else if (item.equals(object)) + { + return i; + } + else if (item instanceof COSObject && ((COSObject) item).getObject() != null && + ((COSObject) item).getObject().equals(object)) + { + return i; + } + } + return -1; + } + + /** + * This will add null values until the size of the array is at least + * as large as the parameter. If the array is already larger than the + * parameter then nothing is done. + * + * @param size The desired size of the array. + */ + public void growToSize( int size ) + { + growToSize( size, null ); + } + + /** + * This will add the object until the size of the array is at least + * as large as the parameter. If the array is already larger than the + * parameter then nothing is done. + * + * @param size The desired size of the array. + * @param object The object to fill the array with. + */ + public void growToSize( int size, COSBase object ) + { + objects.ensureCapacity(size); + while( size() < size ) + { + add( object ); + getUpdateState().update(object); + } + getUpdateState().update(); + } + + /** + * Visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromArray(this); + } + + /** + * This will take an COSArray of numbers and convert it to a float[]. + * + * @return This COSArray as an array of float numbers. + */ + public float[] toFloatArray() + { + float[] retval = new float[size()]; + for (int i = 0; i < retval.length; i++) + { + COSBase base = getObject(i); + retval[i] = base instanceof COSNumber ? ((COSNumber) base).floatValue() : 0; + } + return retval; + } + + /** + * Clear the current contents of the COSArray and set it with the float[]. + * + * @param value The new value of the float array. + */ + public void setFloatArray( float[] value ) + { + this.clear(); + for (float aValue : value) + { + add(new COSFloat(aValue)); + } + } + + /** + * Return contents of COSArray as a Java List. + * + * @return the COSArray as List + */ + public List toList() + { + return new ArrayList<>(objects); + } + + /** + * This will return a list of names if the COSArray consists of COSNames only. + * + * @return the list of names of the COSArray of COSNames + */ + public List toCOSNameStringList() + { + return objects.stream() // + .map(o -> ((COSName) o).getName()) // + .collect(Collectors.toList()); + } + + /** + * This will return a list of names if the COSArray consists of COSStrings only. + * + * @return the list of names of the COSArray of COSStrings + */ + public List toCOSStringStringList() + { + return objects.stream() // + .map(o -> ((COSString) o).getString()) // + .collect(Collectors.toList()); + } + + /** + * This will return a list of float values if the COSArray consists of COSNumbers only. + * + * @return the list of float values of the COSArray of COSNumbers + */ + public List toCOSNumberFloatList() + { + List numbers = new ArrayList<>(size()); + for (int i = 0; i < size(); i++) + { + COSBase num = getObject(i); + if (num instanceof COSNumber) + { + numbers.add(((COSNumber) num).floatValue()); + } + else + { + numbers.add(null); + } + } + return numbers; + } + + /** + * This will return a list of int values if the COSArray consists of COSNumbers only. + * + * @return the list of int values of the COSArray of COSNumbers + */ + public List toCOSNumberIntegerList() + { + List numbers = new ArrayList<>(size()); + for (int i = 0; i < size(); i++) + { + COSBase num = getObject(i); + if (num instanceof COSNumber) + { + numbers.add(((COSNumber) num).intValue()); + } + else + { + numbers.add(null); + } + } + return numbers; + } + + /** + * This will take a list of integer objects and return a COSArray of COSInteger objects. + * + * @param integer A list of integers + * + * @return An array of COSInteger objects + */ + public static COSArray ofCOSIntegers(List integer) + { + COSArray retval = new COSArray(); + integer.forEach(s -> retval.add(COSInteger.get(s.longValue()))); + return retval; + } + + /** + * This will take a list of string objects and return a COSArray of COSName objects. + * + * @param strings A list of strings + * + * @return An array of COSName objects + */ + public static COSArray ofCOSNames(List strings) + { + COSArray retval = new COSArray(); + strings.forEach(s -> retval.add(COSName.getPDFName(s))); + return retval; + } + + /** + * This will take a list of string objects and return a COSArray of COSName objects. + * + * @param strings A list of strings + * + * @return An array of COSName objects + */ + public static COSArray ofCOSStrings(List strings) + { + COSArray retval = new COSArray(); + strings.forEach(s -> retval.add(new COSString(s))); + return retval; + } + + /** + * Returns the current {@link COSUpdateState} of this {@link COSArray}. + * + * @return The current {@link COSUpdateState} of this {@link COSArray}. + * @see COSUpdateState + */ + @Override + public COSUpdateState getUpdateState() + { + return updateState; + } + + /** + * Collects all indirect objects numbers within this COSArray and all included dictionaries. It is used to avoid + * mixed up object numbers when importing an existing page to another pdf. + * + * Expert use only. You might run into an endless recursion if choosing a wrong starting point. + * + * @param indirectObjects a collection of already found indirect objects. + * + */ + public void getIndirectObjectKeys(Collection indirectObjects) + { + if (indirectObjects == null) + { + return; + } + COSObjectKey key = getKey(); + if (key != null) + { + // avoid endless recursions + if (indirectObjects.contains(key)) + { + return; + } + else + { + indirectObjects.add(key); + } + } + + for (COSBase cosBase : objects) + { + if (cosBase == null) + { + continue; + } + COSObjectKey cosBaseKey = cosBase.getKey(); + if (cosBaseKey != null && indirectObjects.contains(cosBaseKey)) + { + continue; + } + if (cosBase instanceof COSObject) + { + // dereference object + cosBase = ((COSObject) cosBase).getObject(); + } + if (cosBase instanceof COSDictionary) + { + // descend to included dictionary to collect all included indirect objects + ((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects); + } + else if (cosBase instanceof COSArray) + { + // descend to included array to collect all included indirect objects + ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects); + } + else if (cosBaseKey != null) + { + // add key for all indirect objects other than COSDictionary/COSArray + indirectObjects.add(cosBaseKey); + } + } + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java new file mode 100644 index 00000000000..86dedf4839e --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java @@ -0,0 +1,587 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; + +/** + * This is an implementation of a List that will sync its contents to a COSArray. + * + * @author Ben Litchfield + * @param Element type. + */ +public class COSArrayList implements List +{ + private final COSArray array; + private final List actual; + + // indicates that the list has been filtered + // i.e. the number of entries in array and actual differ + private boolean isFiltered = false; + + private COSDictionary parentDict; + private COSName dictKey; + + /** + * Default constructor. + */ + public COSArrayList() + { + array = new COSArray(); + actual = new ArrayList<>(); + } + + /** + * Create the COSArrayList specifying the List and the backing COSArray. + * + *

User of this constructor need to ensure that the entries in the List and + * the backing COSArray are matching i.e. the COSObject of the List entry is + * included in the COSArray. + * + *

If the number of entries in the List and the COSArray differ + * it is assumed that the List has been filtered. In that case the COSArrayList + * shall only be used for reading purposes and no longer for updating. + * + * @param actualList The list of standard java objects + * @param cosArray The COS array object to sync to. + */ + public COSArrayList( List actualList, COSArray cosArray ) + { + actual = actualList; + array = cosArray; + + // if the number of entries differs this may come from a filter being + // applied at the PDModel level + if (actual.size() != array.size()) { + isFiltered = true; + } + } + + /** + * This constructor is to be used if the array doesn't exist, but is to be created and added to + * the parent dictionary as soon as the first element is added to the array. + * + * @param dictionary The dictionary that holds the item, and will hold the array if an item is + * added. + * @param dictionaryKey The key into the dictionary to set the item. + */ + public COSArrayList(COSDictionary dictionary, COSName dictionaryKey) + { + array = new COSArray(); + actual = new ArrayList<>(); + parentDict = dictionary; + dictKey = dictionaryKey; + } + + /** + * This is a really special constructor. Sometimes the PDF spec says + * that a dictionary entry can either be a single item or an array of those + * items. But in the PDModel interface we really just want to always return + * a java.util.List. In the case were we get the list and never modify it + * we don't want to convert to COSArray and put one element, unless we append + * to the list. So here we are going to create this object with a single + * item instead of a list, but allow more items to be added and then converted + * to an array. + * + * @param actualObject The PDModel object. + * @param item The COS Model object. + * @param dictionary The dictionary that holds the item, and will hold the array if an item is added. + * @param dictionaryKey The key into the dictionary to set the item. + */ + public COSArrayList( E actualObject, COSBase item, COSDictionary dictionary, COSName dictionaryKey ) + { + array = new COSArray(); + array.add( item ); + actual = new ArrayList<>(); + actual.add( actualObject ); + + parentDict = dictionary; + dictKey = dictionaryKey; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return actual.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return actual.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(Object o) + { + return actual.contains(o); + } + + /** + * {@inheritDoc} + */ + @Override + public Iterator iterator() + { + return actual.iterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public Object[] toArray() + { + return actual.toArray(); + } + + /** + * {@inheritDoc} + */ + @Override + public X[] toArray(X[] a) + { + return actual.toArray(a); + + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(E o) + { + //when adding if there is a parentDict then change the item + //in the dictionary from a single item to an array. + if( parentDict != null ) + { + parentDict.setItem( dictKey, array ); + //clear the parent dict so it doesn't happen again, there might be + //a usecase for keeping the parentDict around but not now. + parentDict = null; + } + //string is a special case because we can't subclass to be COSObjectGetter + if( o instanceof String ) + { + array.add( new COSString( (String)o ) ); + } + else + { + if(array != null) + { + array.add(((COSObjectGetter)o).getCOSObject()); + } + } + return actual.add(o); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(Object o) + { + + if (isFiltered) { + throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); + } + + boolean retval = true; + int index = actual.indexOf( o ); + if( index >= 0 ) + { + actual.remove( index ); + array.remove( index ); + } + else + { + retval = false; + } + return retval; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll( Collection c) + { + return actual.containsAll( c ); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + if (isFiltered) { + throw new UnsupportedOperationException("Adding to a filtered List is not permitted"); + } + + //when adding if there is a parentDict then change the item + //in the dictionary from a single item to an array. + if( parentDict != null && !c.isEmpty()) + { + parentDict.setItem( dictKey, array ); + //clear the parent dict so it doesn't happen again, there might be + //a usecase for keeping the parentDict around but not now. + parentDict = null; + } + array.addAll( toCOSObjectList( c ) ); + return actual.addAll( c ); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(int index, Collection c) + { + if (isFiltered) { + throw new UnsupportedOperationException("Inserting to a filtered List is not permitted"); + } + + //when adding if there is a parentDict then change the item + //in the dictionary from a single item to an array. + if( parentDict != null && !c.isEmpty()) + { + parentDict.setItem( dictKey, array ); + //clear the parent dict so it doesn't happen again, there might be + //a usecase for keeping the parentDict around but not now. + parentDict = null; + } + + array.addAll( index, toCOSObjectList( c ) ); + return actual.addAll( index, c ); + } + + /** + * This will convert a list of COSObjectGetters to an array list of COSBase objects. + * + * @param COSObjectGetterList A list of COSObjectGetter. + * + * @return A list of COSBase. + * @throws IllegalArgumentException if an object type is not supported for conversion to a + * COSBase object. + */ + public static COSArray converterToCOSArray(List COSObjectGetterList) + { + COSArray array = null; + if( COSObjectGetterList != null ) + { + if( COSObjectGetterList instanceof COSArrayList ) + { + //if it is already a COSArrayList then we don't want to recreate the array, we want to reuse it. + array = ((COSArrayList)COSObjectGetterList).array; + } + else + { + array = new COSArray(); + for (Object next : COSObjectGetterList) + { + if( next instanceof String ) + { + array.add( new COSString( (String)next ) ); + } + else if( next instanceof Integer || next instanceof Long ) + { + array.add( COSInteger.get( ((Number)next).longValue() ) ); + } + else if( next instanceof Float || next instanceof Double ) + { + array.add( new COSFloat( ((Number)next).floatValue() ) ); + } + else if( next instanceof COSObjectGetter) + { + COSObjectGetter object = (COSObjectGetter)next; + array.add( object.getCOSObject() ); + } + else if( next == null ) + { + array.add( COSNull.NULL ); + } + else + { + throw new IllegalArgumentException( "Error: Don't know how to convert type to COSBase '" + + next.getClass().getName() + "'" ); + } + } + } + } + return array; + } + + private List toCOSObjectList( Collection list ) + { + List cosObjects = new ArrayList<>(list.size()); + list.forEach(next -> + { + if( next instanceof String ) + { + cosObjects.add( new COSString( (String)next ) ); + } + else + { + COSObjectGetter cos = (COSObjectGetter)next; + cosObjects.add( cos.getCOSObject() ); + } + }); + return cosObjects; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + c.forEach(item -> { + COSBase itemCOSBase = ((COSObjectGetter)item).getCOSObject(); + // remove all indirect objects too by dereferencing them + // before doing the comparison + for (int i=array.size()-1; i>=0; i--) + { + if (itemCOSBase.equals(array.getObject(i))) + { + array.remove(i); + } + } + }); + + return actual.removeAll( c ); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + c.forEach(item -> { + COSBase itemCOSBase = ((COSObjectGetter)item).getCOSObject(); + // remove all indirect objects too by dereferencing them + // before doing the comparison + for (int i=array.size()-1; i>=0; i--) + { + if (!itemCOSBase.equals(array.getObject(i))) + { + array.remove(i); + } + } + }); + + return actual.retainAll( c ); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + //when adding if there is a parentDict then change the item + //in the dictionary from a single item to an array. + if( parentDict != null ) + { + parentDict.setItem( dictKey, null ); + } + actual.clear(); + array.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + return actual.equals( o ); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return actual.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public E get(int index) + { + return actual.get( index ); + + } + + /** + * {@inheritDoc} + */ + @Override + public E set(int index, E element) + { + if (isFiltered) { + throw new UnsupportedOperationException("Replacing an element in a filtered List is not permitted"); + } + + if( element instanceof String ) + { + COSString item = new COSString( (String)element ); + if( parentDict != null && index == 0 ) + { + parentDict.setItem( dictKey, item ); + } + array.set( index, item ); + } + else + { + if( parentDict != null && index == 0 ) + { + parentDict.setItem( dictKey, ((COSObjectGetter)element).getCOSObject() ); + } + array.set( index, ((COSObjectGetter)element).getCOSObject() ); + } + return actual.set( index, element ); + } + + /** + * {@inheritDoc} + */ + @Override + public void add(int index, E element) + { + if (isFiltered) { + throw new UnsupportedOperationException("Adding an element in a filtered List is not permitted"); + } + + //when adding if there is a parentDict then change the item + //in the dictionary from a single item to an array. + if( parentDict != null ) + { + parentDict.setItem( dictKey, array ); + //clear the parent dict so it doesn't happen again, there might be + //a usecase for keeping the parentDict around but not now. + parentDict = null; + } + actual.add( index, element ); + if( element instanceof String ) + { + array.add( index, new COSString( (String)element ) ); + } + else + { + array.add( index, ((COSObjectGetter)element).getCOSObject() ); + } + } + + /** + * {@inheritDoc} + */ + @Override + public E remove(int index) + { + if (isFiltered) { + throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); + } + + array.remove( index ); + return actual.remove( index ); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(Object o) + { + return actual.indexOf( o ); + } + + /** + * {@inheritDoc} + */ + @Override + public int lastIndexOf(Object o) + { + return actual.lastIndexOf( o ); + + } + + /** + * {@inheritDoc} + */ + @Override + public ListIterator listIterator() + { + return actual.listIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public ListIterator listIterator(int index) + { + return actual.listIterator( index ); + } + + /** + * {@inheritDoc} + */ + @Override + public List subList(int fromIndex, int toIndex) + { + return actual.subList( fromIndex, toIndex ); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSArrayList{" + array.toString() + "}"; + } + + /** + * This will return then underlying COSArray. + * + * @return the COSArray + */ + public COSArray toList() + { + return array; + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java new file mode 100644 index 00000000000..6ce7e4b8e8e --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * The base object that all objects in the PDF document will extend. + * + * @author Ben Litchfield + */ +public abstract class COSBase implements COSObjectGetter +{ + protected boolean direct; // probably unnecessary + private COSObjectKey key; + + /** + * Constructor. + */ + public COSBase() + { + } + + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + @Override + public COSBase getCOSObject() + { + return this; + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + public abstract void accept(ICOSVisitor visitor) throws IOException; + + /** + * If the state is set true, the dictionary will be written direct into the called object. + * This means, no indirect object will be created. + * + * @return the state + */ + public boolean isDirect() + { + return direct; + } + + /** + * Set the state true, if the dictionary should be written as a direct object and not indirect. + * + * @param direct set it true, for writing direct object + */ + public void setDirect(boolean direct) + { + this.direct = direct; + } + + /** + * This will return the COSObjectKey of an indirect object. + * + * @return the COSObjectKey + */ + public COSObjectKey getKey() + { + return key; + } + + /** + * Set the COSObjectKey of an indirect object. + * + * @param key the COSObjectKey of the indirect object + */ + public void setKey(COSObjectKey key) + { + this.key = key; + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java new file mode 100644 index 00000000000..2e7dbd7fb3a --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * This class represents a boolean value in the PDF document. + * + * @author Ben Litchfield + */ +public final class COSBoolean extends COSBase +{ + /** + * The true boolean token. + */ + public static final byte[] TRUE_BYTES = { 116, 114, 117, 101 }; // "true".getBytes("ISO-8859-1") + /** + * The false boolean token. + */ + public static final byte[] FALSE_BYTES = { 102, 97, 108, 115, 101 }; // "false".getBytes("ISO-8859-1") + + /** + * The PDF true value. + */ + public static final COSBoolean TRUE = new COSBoolean( true ); + + /** + * The PDF false value. + */ + public static final COSBoolean FALSE = new COSBoolean( false ); + + private final boolean value; + + /** + * Constructor. + * + * @param aValue The boolean value. + */ + private COSBoolean(boolean aValue) + { + value = aValue; + } + + /** + * This will get the value that this object wraps. + * + * @return The boolean value of this object. + */ + public boolean getValue() + { + return value; + } + + /** + * This will get the value that this object wraps. + * + * @return The boolean value of this object. + */ + public Boolean getValueAsObject() + { + return value ? Boolean.TRUE : Boolean.FALSE; + } + + /** + * This will get the boolean value. + * + * @param value Parameter telling which boolean value to get. + * + * @return The single boolean instance that matches the parameter. + */ + public static COSBoolean getBoolean( boolean value ) + { + return value ? TRUE : FALSE; + } + + /** + * This will get the boolean value. + * + * @param value Parameter telling which boolean value to get. + * + * @return The single boolean instance that matches the parameter. + */ + public static COSBoolean getBoolean( Boolean value ) + { + return getBoolean( value.booleanValue() ); + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromBoolean(this); + } + + /** + * Return a string representation of this object. + * + * @return The string value of this object. + */ + @Override + public String toString() + { + return String.valueOf( value ); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + //taken from java.lang.Boolean + return value ? 1231 : 1237; + } + + /** + * {@inheritDoc} + */ + public boolean equals(Object obj) + { + return this == obj; // this is correct because there are only two COSBoolean objects. + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java new file mode 100644 index 00000000000..b259486d473 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java @@ -0,0 +1,1561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.InputStream; +import java.util.ArrayList; +// import java.util.Arrays; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.util.DateConverter; + +import java.io.IOException; +// import java.io.InputStream; +import java.util.function.BiConsumer; + +import static org.apache.pdfbox.cos.COSName.getPDFName; + +/** + * This class represents a dictionary where name/value pairs reside. + * + * @author Ben Litchfield + * + */ +public class COSDictionary extends COSBase implements COSUpdateInfo +{ + + /** + * Log instance. + */ + private static final Logger LOG = LogManager.getLogger(COSDictionary.class); + + private static final String PATH_SEPARATOR = "/"; + + /** + * The name-value pairs of this dictionary. The pairs are kept in the order they were added to the dictionary. + */ + protected Map items = new LinkedHashMap<>(); + private final COSUpdateState updateState; + + /** + * Constructor. + */ + public COSDictionary() + { + updateState = new COSUpdateState(this); + } + + /** + * Copy Constructor. This will make a shallow copy of this dictionary. + * + * @param dict The dictionary to copy. + */ + public COSDictionary(COSDictionary dict) + { + updateState = new COSUpdateState(this); + addAll(dict); + } + + /** + * @see Map#containsValue(Object) + * + * @param value The value to find in the map. + * + * @return true if the map contains this value. + */ + public boolean containsValue(Object value) + { + boolean contains = items.containsValue(value); + if (!contains && value instanceof COSObject) + { + contains = items.containsValue(((COSObject) value).getObject()); + } + return contains; + } + + /** + * Search in the map for the value that matches the parameter and + * return the first key that maps to that value. + * + * @param value The value to search for in the map. + * @return The key for the value in the map or null if it does not exist. + */ + public COSName getKeyForValue( Object value ) + { + for (Entry entry : items.entrySet()) + { + Object nextValue = entry.getValue(); + if (nextValue.equals(value) + || (nextValue instanceof COSObject && ((COSObject) nextValue).getObject() + .equals(value))) + { + return entry.getKey(); + } + } + return null; + } + + /** + * This will return the number of elements in this dictionary. + * + * @return The number of elements in the dictionary. + */ + public int size() + { + return items.size(); + } + + /** + * This will clear all items in the map. + */ + public void clear() + { + items.clear(); + getUpdateState().update(); + } + + /** + * This will get an object from this dictionary based on its key. If the + * object is a proxy object ({@link COSObject} then it will dereference + * it and get it from the document. If the object is COSNull then + * null will be returned. + * + * @param key The key to the object that we are getting. + * + * @return The object that matches the key. + */ + public COSBase getObjectFromDictionary( String key) + { + return getObjectFromDictionary( getPDFName( key)); + } + + /** + * This is a special case of getDictionaryObject that takes multiple keys. + * It will handle the situation where multiple keys could get the same value, + * i.e. if either CS or ColorSpace is used to get the colorspace. This will + * get an object from this dictionary. If the object is a proxy object then + * it will dereference it and get the underlying concrete object. + * If the object is COSNull then null will be returned. + * + * @param firstKey The first key to try. + * @param secondKey The second key to try. + * + * @return The object that matches the key. + */ + public COSBase getAlternateObjectFromDictionary( COSName firstKey, COSName secondKey) + { + COSBase retval = getObjectFromDictionary( firstKey); + if (retval == null && secondKey != null) + { + retval = getObjectFromDictionary( secondKey); + } + return retval; + } + + /** + * This will get an object from this dictionary. If the object is a reference + * then it will dereference it and return the concrete object. If the object + * is COSNull then null will be returned. + * + * @param key The key to the object that we are getting. + * + * @return The object that matches the key. + */ + public COSBase getObjectFromDictionary( COSName key) + { + COSBase retval = items.get(key); + if (retval instanceof COSObject) + { + retval = ((COSObject) retval).getObject(); + } + else if (retval instanceof COSNull) + { + retval = null; + } + return retval; + } + + /** + * This will set an item in this dictionary. If value is null then the result + * will be the same as removeItem( key ). + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setItem(COSName key, COSBase value) + { + if (value == null) + { + removeItem(key); + } + else + { + if ((value instanceof COSDictionary || value instanceof COSArray) && !value.isDirect() + && value.getKey() != null) + { + COSObject cosObject = new COSObject(value, value.getKey()); + items.put(key, cosObject); + getUpdateState().update(cosObject); + } + else + { + items.put(key, value); + getUpdateState().update(value); + } + } + } + + /** + * This will set an item in this dictionary. If value is null + * then the result will be the same as removeItem( key ). + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setItem(COSName key, COSObjectGetter value) + { + COSBase base = null; + if (value != null) + { + base = value.getCOSObject(); + } + setItem(key, base); + } + + /** + * This will set an item in this dictionary. If value is null then the + * result will be the same as removeItem( key ). + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setItem(String key, COSObjectGetter value) + { + setItem( getPDFName(key), value); + } + + /** + * This will set an item in this dictionary. + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setBoolean(String key, boolean value) + { + setItem( getPDFName(key), COSBoolean.getBoolean(value)); + } + + /** + * This will set an item in this dictionary. + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setBoolean(COSName key, boolean value) + { + setItem(key, COSBoolean.getBoolean(value)); + } + + /** + * This will set an item in the dictionary. If value is null then the result will be the same as removeItem( key ). + * + * @param key The key to the dictionary object. + * @param value The value to the dictionary object. + */ + public void setItem( String key, COSBase value ) + { + setItem( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSName + * object. If it is null then the object will be removed. + * + * @param key The key to the object, + * @param value The string value for the name. + */ + public void setName(String key, String value) + { + setName( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSName object. + * If it is null then the object will be removed. + * + * @param key The key to the object, + * @param value The string value for the name. + */ + public void setName(COSName key, String value) + { + COSName name = null; + if (value != null) + { + name = getPDFName(value); + } + setItem(key, name); + } + + /** + * Set the value of a date entry in this dictionary. + * + * @param key The key to the date value. + * @param date The date value. + */ + public void setDate(String key, Calendar date) + { + setDate( getPDFName(key), date); + } + + /** + * Set the date object. + * + * @param key The key to the date. + * @param date The date to set. + */ + public void setDate(COSName key, Calendar date) + { + setString(key, DateConverter.toString(date)); + } + + /** + * Set the date object. + * + * @param embedded The embedded dictionary. + * @param key The key to the date. + * @param date The date to set. + */ + public void setEmbeddedDate(COSName embedded, COSName key, Calendar date) + { + COSDictionary dic = getCOSDictionary(embedded); + if (dic == null && date != null) + { + dic = new COSDictionary(); + setItem(embedded, dic); + } + if (dic != null) + { + dic.setDate(key, date); + } + } + + /** + * This is a convenience method that will convert the value to a COSString object. If it is null then the object + * will be removed. + * + * @param key The key to the object, + * @param value The string value for the name. + */ + public void setString(String key, String value) + { + setString( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSString object. If it is null then the object + * will be removed. + * + * @param key The key to the object, + * @param value The string value for the name. + */ + public void setString(COSName key, String value) + { + COSString name = null; + if (value != null) + { + name = new COSString(value); + } + setItem(key, name); + } + + /** + * This is a convenience method that will convert the value to a COSString object. If it is null then the object + * will be removed. + * + * @param embedded The embedded dictionary to set the item in. + * @param key The key to the object, + * @param value The string value for the name. + */ + public void setEmbeddedString(COSName embedded, COSName key, String value) + { + COSDictionary dic = getCOSDictionary(embedded); + if (dic == null && value != null) + { + dic = new COSDictionary(); + setItem(embedded, dic); + } + if (dic != null) + { + dic.setString(key, value); + } + } + + /** + * This is a convenience method that will convert the value to a COSInteger object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setInt(String key, int value) + { + setInt( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSInteger object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setInt(COSName key, int value) + { + setItem(key, COSInteger.get(value)); + } + + /** + * This is a convenience method that will convert the value to a COSInteger object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setLong(String key, long value) + { + setLong( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSInteger object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setLong(COSName key, long value) + { + COSInteger intVal = COSInteger.get(value); + setItem(key, intVal); + } + + /** + * This is a convenience method that will convert the value to a COSInteger object. + * + * @param embeddedDictionary The embedded dictionary. + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setEmbeddedInt(COSName embeddedDictionary, COSName key, int value) + { + COSDictionary embedded = getCOSDictionary(embeddedDictionary); + if (embedded == null) + { + embedded = new COSDictionary(); + setItem(embeddedDictionary, embedded); + } + embedded.setInt(key, value); + } + + /** + * This is a convenience method that will convert the value to a COSFloat object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setFloat(String key, float value) + { + setFloat( getPDFName(key), value); + } + + /** + * This is a convenience method that will convert the value to a COSFloat object. + * + * @param key The key to the object, + * @param value The int value for the name. + */ + public void setFloat(COSName key, float value) + { + COSFloat fltVal = new COSFloat(value); + setItem(key, fltVal); + } + + /** + * Sets the given boolean value at bitPos in the flags. + * + * @param field The COSName of the field to set the value into. + * @param bitFlag the bit position to set the value in. + * @param value the value the bit position should have. + */ + public void setFlag(COSName field, int bitFlag, boolean value) + { + int currentFlags = getInt(field, 0); + if (value) + { + currentFlags = currentFlags | bitFlag; + } + else + { + currentFlags &= ~bitFlag; + } + setInt(field, currentFlags); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a COSName. Null is returned if the entry does not + * exist in the dictionary, or if the referenced object is not a COSName. + * + * @param key The key to the item in the dictionary. + * @return The COS name. + */ + public COSName getCOSName(COSName key) + { + COSBase name = getObjectFromDictionary( key); + if (name instanceof COSName) + { + return (COSName) name; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a proxy object ({@link COSObject}. Null is returned + * if the entry does not exist in the dictionary or if the referenced object + * is not a COSObject. + * + * @param key The key to the item in the dictionary. + * @return The COSObject. + */ + public COSObject getCOSObject(COSName key) + { + COSBase object = getItem(key); + if (object instanceof COSObject) + { + return (COSObject) object; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a COSDictionary. Null is returned if the entry + * does not exist in the dictionary or if the referenced object is not a + * COSDictionary. + * + * @param key The key to the item in the dictionary. + * @return The COSDictionary. + */ + public COSDictionary getCOSDictionary(COSName key) + { + COSBase dictionary = getObjectFromDictionary( key); + if (dictionary instanceof COSDictionary) + { + return (COSDictionary) dictionary; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a COSDictionary. If the object is not stored under + * the first key name, an entry using the second key name will be sought. + * Null is returned if neither entry exists in the dictionary, or if the + * entry found is not an instance of COSDictionary. + * + * @param firstKey The first key to the item in the dictionary. + * @param secondKey The second key to the item in the dictionary. + * @return The COSDictionary. + */ + public COSDictionary getAlternateCOSDictionary(COSName firstKey, COSName secondKey) + { + COSBase dictionary = getAlternateObjectFromDictionary( firstKey, secondKey ); + if (dictionary instanceof COSDictionary) + { + return (COSDictionary) dictionary; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a {@link COSStream}. Null is returned if the entry + * does not exist in the dictionary or if the object found is not an + * instance of COSStream. + * + * @param key The key to the item in the dictionary. + * @return The COSStream. + */ + public COSStream getCOSStream(COSName key) + { + COSBase base = getObjectFromDictionary( key); + if (base instanceof COSStream) + { + return (COSStream) base; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a {@link COSArray}. Null is returned if the entry + * does not exist in the dictionary or if the object found is not an + * instance of COSArray. + * + * @param key The key to the item in the dictionary. + * @return The COSArray. + */ + public COSArray getCOSArray(COSName key) + { + COSBase array = getObjectFromDictionary( key); + if (array instanceof COSArray) + { + return (COSArray) array; + } + return null; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a {@link COSName}. The default name is + * returned if the entry does not exist in the dictionary. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The COS name. + */ + public COSName getCOSName(COSName key, COSName defaultValue) + { + COSBase name = getObjectFromDictionary( key); + if (name instanceof COSName) + { + return (COSName) name; + } + return defaultValue; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a {@link COSName} and convert it to a Java string. + * Null is returned if the entry does not exist in the dictionary or if the + * object is neither a COSName nor a COSString. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public String getNameAsString(String key) + { + return getNameAsString( getPDFName(key)); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a name and convert it to a Java string. + * Null is returned if the entry does not exist in the dictionary or if the + * object is neither a COSName nor a COSString. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public String getNameAsString( COSName key ) + { + String retval = null; + COSBase name = getObjectFromDictionary( key); + if (name instanceof COSName) + { + retval = ((COSName) name).getName(); + } + else if (name instanceof COSString) + { + retval = ((COSString) name).getString(); + } + return retval; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a name and convert it to a Java string. + * Null is returned if the entry does not exist in the dictionary or if the + * object is neither a COSName nor a COSString. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The name converted to a string. + */ + public String getNameAsString( String key, String defaultValue) + { + return getNameAsString( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a name and convert it to a Java string. + * Null is returned if the entry does not exist in the dictionary or if the + * object is neither a COSName nor a COSString. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The name converted to a string. + */ + public String getNameAsString( COSName key, String defaultValue ) + { + String retval = getNameAsString(key); + if (retval == null) + { + retval = defaultValue; + } + return retval; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be converted to a Java string. Null is returned if + * the entry does not exist in the dictionary. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public String getString(String key) + { + return getString( getPDFName(key)); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a {@link COSString}. Null is returned if the entry + * does not exist in the dictionary or if it is not an instance of COSString. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public String getString(COSName key) + { + String retval = null; + COSBase value = getObjectFromDictionary( key); + if (value instanceof COSString) + { + retval = ((COSString) value).getString(); + } + return retval; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be converted to a Java string. If the entry does not + * exist in the dictionary the supplied default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return. + * @return The name converted to a string. + */ + public String getString(String key, String defaultValue) + { + return getString( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be converted to a Java string. If the entry does not + * exist in the dictionary the supplied default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return. + * @return The name converted to a string. + */ + public String getString(COSName key, String defaultValue) + { + String retval = getString(key); + if (retval == null) + { + retval = defaultValue; + } + return retval; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a name and convert it to a Java string. Null is + * returned if the entry does not exist in the dictionary. + * + * @param embedded The name of the embedded dictionary. + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public String getEmbeddedString(COSName embedded, COSName key) + { + return getEmbeddedString(embedded, key, null); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a name and convert it to a Java string. If the entry + * does not exist in the dictionary the supplied default value will be returned. + * + * @param embedded The name of the embedded dictionary. + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return. + * @return The name converted to a string. + */ + public String getEmbeddedString( COSName embedded, COSName key, String defaultValue ) + { + COSDictionary eDic = getCOSDictionary(embedded); + return eDic != null ? eDic.getString(key, defaultValue) : defaultValue; + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from this dictionary, which is expected to be a date expression, and + * convert it to a Java {@link Calendar}. Null is returned if the entry + * does not exist in the dictionary or if the date was invalid. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a date. + */ + public Calendar getDate(String key) + { + return getDate( getPDFName(key)); + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from this dictionary, which is expected to be a date expression, and + * convert it to a Java {@link Calendar}. Null is returned if the entry + * does not exist in the dictionary or if the date was invalid. + * + * @param key The key to the item in the dictionary. + * @return The name converted to a date. + */ + public Calendar getDate(COSName key) + { + COSBase base = getObjectFromDictionary( key); + if (base instanceof COSString) + { + return DateConverter.toCalendar((COSString) base); + } + return null; + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from this dictionary, which is expected to be a date expression, and + * convert it to a Java {@link Calendar}. If the entry does not exist in + * the dictionary or if the date is invalid the provided default value + * will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was invalid. + * @return The name converted to a date. + */ + public Calendar getDate(String key, Calendar defaultValue) + { + return getDate( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from this dictionary, which is expected to be a date expression, and + * convert it to a Java {@link Calendar}. If the entry does not exist in + * the dictionary or if the date is invalid the provided default value + * will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was invalid. + * @return The name converted to a date. + */ + public Calendar getDate(COSName key, Calendar defaultValue) + { + Calendar retval = getDate(key); + if (retval == null) + { + retval = defaultValue; + } + return retval; + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from an embedded dictionary, which is expected to be a date expression, + * and convert it to a Java {@link Calendar}. Null is returned if the entry + * does not exist in the dictionary or if the date was invalid. + * + * @param embedded The name of the embedded dictionary to use. + * @param key The key to the item in the dictionary. + * @return The name converted to a string. + */ + public Calendar getEmbeddedDate( COSName embedded, COSName key ) + { + return getEmbeddedDate(embedded, key, null); + } + + /** + * This is a convenience method that will get a {@link COSString} object + * from an embedded dictionary, which is expected to be a date expression, + * and convert it to a Java {@link Calendar} If the entry does not exist in + * the embedded dictionary or if the date is invalid the provided default + * value will be returned. + * + * @param embedded The embedded dictionary to get. + * @param key The key to the item in the dictionary. + * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was + * invalid. + * @return The name converted to a string. + */ + public Calendar getEmbeddedDate(COSName embedded, COSName key, Calendar defaultValue) + { + COSDictionary eDic = getCOSDictionary(embedded); + return eDic != null ? eDic.getDate(key, defaultValue) : defaultValue; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a cos boolean and convert + * it to a primitive boolean. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value returned if the entry is null. + * + * @return The value converted to a boolean. + */ + public boolean getBoolean(String key, boolean defaultValue) + { + return getBoolean( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a COSBoolean and convert it to a primitive boolean. + * If the entry does not exist, or if it is not a valid boolean value, the + * provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value returned if the entry is null. + * + * @return The entry converted to a boolean. + */ + public boolean getBoolean(COSName key, boolean defaultValue) + { + return getBoolean(key, null, defaultValue); + } + + /** + * This is a convenience method that will an object from this dictionary + * that is expected to be a COSBoolean and convert it to a primitive boolean. + * If the entry does not exist, or if it is not a valid boolean value, the + * provided default value will be returned. + * + * @param firstKey The first key to the item in the dictionary. + * @param secondKey The second key to the item in the dictionary. + * @param defaultValue The value returned if the entry is null. + * + * @return The entry converted to a boolean. + */ + public boolean getBoolean(COSName firstKey, COSName secondKey, boolean defaultValue) + { + boolean retval = defaultValue; + COSBase bool = getAlternateObjectFromDictionary( firstKey, secondKey); + if (bool instanceof COSBoolean) + { + retval = bool == COSBoolean.TRUE; + } + return retval; + } + + /** + * Get an integer from an embedded dictionary. Useful for 1-1 mappings. default:-1 + * + * @param embeddedDictionary The name of the embedded dictionary. + * @param key The key in the embedded dictionary. + * + * @return The value of the embedded integer. + */ + public int getEmbeddedInt( COSName embeddedDictionary, COSName key ) + { + return getEmbeddedInt(embeddedDictionary, key, -1); + } + + /** + * Get an integer from an embedded dictionary. Useful for 1-1 mappings. + * + * @param embeddedDictionary The name of the embedded dictionary. + * @param key The key in the embedded dictionary. + * @param defaultValue The value if there is no embedded dictionary or it does not contain the key. + * + * @return The value of the embedded integer. + */ + public int getEmbeddedInt( COSName embeddedDictionary, COSName key, int defaultValue ) + { + COSDictionary embedded = getCOSDictionary(embeddedDictionary); + return embedded != null ? embedded.getInt(key, defaultValue) : defaultValue; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an int. -1 is returned if + * there is no value. + * + * @param key The key to the item in the dictionary. + * @return The integer value. + */ + public int getInt( String key ) + { + return getInt( getPDFName(key), -1); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an int. -1 is returned if + * there is no value. + * + * @param key The key to the item in the dictionary. + * @return The integer value.. + */ + public int getInt( COSName key ) + { + return getInt(key, -1); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an integer. If the dictionary value is null then + * the provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The integer value. + */ + public int getInt(String key, int defaultValue) + { + return getInt( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an integer. If the dictionary value is null then + * the provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The integer value. + */ + public int getInt(COSName key, int defaultValue) + { + return getInt(key, null, defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an integer. If the dictionary value is null then + * the -1 will be returned. + * + * @param firstKey The first key to the item in the dictionary. + * @param secondKey The second key to the item in the dictionary. + * @return The integer value. + */ + public int getInt(COSName firstKey, COSName secondKey) + { + return getInt(firstKey, secondKey, -1); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be an integer. If the dictionary value is null then + * the provided default value will be returned. + * + * @param firstKey The first key to the item in the dictionary. + * @param secondKey The second key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The integer value. + */ + public int getInt(COSName firstKey, COSName secondKey, int defaultValue) + { + int retval = defaultValue; + COSBase obj = getAlternateObjectFromDictionary( firstKey, secondKey); + if (obj instanceof COSNumber) + { + retval = ((COSNumber) obj).intValue(); + } + return retval; + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a long. -1 is returned if there is no value. + * + * @param key The key to the item in the dictionary. + * + * @return The long value. + */ + public long getLong(String key) + { + return getLong( getPDFName(key), -1L); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a long. -1 is returned if there is no value. + * + * @param key The key to the item in the dictionary. + * @return The long value. + */ + public long getLong(COSName key) + { + return getLong(key, -1L); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a long integer. If the dictionary value is null + * then the provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The long value. + */ + public long getLong( String key, long defaultValue ) + { + return getLong( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a long integer. If the dictionary value is null + * then the provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The integer value. + */ + public long getLong(COSName key, long defaultValue) + { + long retval = defaultValue; + COSBase obj = getObjectFromDictionary( key); + if (obj instanceof COSNumber) + { + retval = ((COSNumber) obj).longValue(); + } + return retval; + } + + /** + * This is a convenience method that will get an object from this + * dictionary that is expected to be a float. -1 is returned + * if there is no value. + * + * @param key The key to the item in the dictionary. + * @return The float value. + */ + public float getFloat(String key) + { + return getFloat( getPDFName(key), -1); + } + + /** + * This is a convenience method that will get an object from this + * dictionary that is expected to be a float. -1 is returned + * if there is no value. + * + * @param key The key to the item in the dictionary. + * @return The float value. + */ + public float getFloat(COSName key) + { + return getFloat(key, -1); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a float. If the dictionary value is null then the + * provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The float value. + */ + public float getFloat( String key, float defaultValue ) + { + return getFloat( getPDFName(key), defaultValue); + } + + /** + * This is a convenience method that will get an object from this dictionary + * that is expected to be a float. If the dictionary value is null then the + * provided default value will be returned. + * + * @param key The key to the item in the dictionary. + * @param defaultValue The value to return if the dictionary item is null. + * @return The float value. + */ + public float getFloat(COSName key, float defaultValue) + { + float retval = defaultValue; + COSBase obj = getObjectFromDictionary( key); + if (obj instanceof COSNumber) + { + retval = ((COSNumber) obj).floatValue(); + } + return retval; + } + + /** + * Gets the boolean value from the flags at the given bit position. + * + * @param field The COSName of the field to get the flag from. + * @param bitFlag the bitPosition to get the value from. + * + * @return true if the number at bitPos is '1' + */ + public boolean getFlag(COSName field, int bitFlag) + { + int ff = getInt(field, 0); + return (ff & bitFlag) == bitFlag; + } + + /** + * This will remove an item from this dictionary. This will do nothing + * if the object does not exist. + * + * @param key The key to the item to remove from this dictionary. + */ + public void removeItem(COSName key) + { + items.remove(key); + getUpdateState().update(); + } + + /** + * This will do a lookup from this dictionary. + * + * @param key The key to the object. + * + * @return The item that matches the key. + */ + public COSBase getItem(COSName key) + { + return items.get(key); + } + + /** + * This will do a lookup into the dictionary. + * + * @param key The key to the object. + * + * @return The item that matches the key. + */ + public COSBase getItem(String key) + { + return getItem( getPDFName(key)); + } + + /** + * This is a special case of getItem that takes multiple keys, it will handle the situation + * where multiple keys could get the same value, ie if either CS or ColorSpace is used to get + * the colorspace. This will get an object from this dictionary. + * + * @param firstKey The first key to try. + * @param secondKey The second key to try. + * + * @return The object that matches the key. + */ + public COSBase getAlternateItem(COSName firstKey, COSName secondKey) + { + COSBase retval = getItem(firstKey); + if (retval == null && secondKey != null) + { + retval = getItem(secondKey); + } + return retval; + } + + /** + * Returns the names of the entries in this dictionary. The returned set is in the order the entries were added to + * the dictionary. + * + * @since Apache PDFBox 1.1.0 + * @return names of the entries in this dictionary + */ + public Set keySet() + { + return items.keySet(); + } + + /** + * Returns the name-value entries in this dictionary. The returned set is in the order the entries were added to the + * dictionary. + * + * @since Apache PDFBox 1.1.0 + * @return name-value entries in this dictionary + */ + public Set> entrySet() + { + return items.entrySet(); + } + + /** + * Convenience method that calls {@link Map#forEach(BiConsumer) Map.forEach(BiConsumer)}. + * + * @param action The action to be performed for each entry + * + */ + public void forEach(BiConsumer action) + { + items.forEach(action); + } + + /** + * This will get all the values for the dictionary. + * + * @return All the values for the dictionary. + */ + public Collection getValues() + { + return items.values(); + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If there is an error visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromDictionary(this); + } + + /** + * This will add all the dictionary's keys/values to this dictionary. + * Existing key/value pairs will be overwritten. + * + * @param dict The dictionaries to get the key/value pairs from. + */ + public void addAll(COSDictionary dict) + { + items.putAll(dict.items); + } + + /** + * @see Map#containsKey(Object) + * + * @param name The key to find in the map. + * @return true if the map contains this key. + */ + public boolean containsKey(COSName name) + { + return this.items.containsKey(name); + } + + /** + * @see Map#containsKey(Object) + * + * @param name The key to find in the map. + * @return true if the map contains this key. + */ + public boolean containsKey(String name) + { + return containsKey( getPDFName(name)); + } + + /** + * Nice method, gives you every object you want Arrays works properly too. Try "P/Annots/[k]/Rect" where k means the + * index of the Annots array. + * + * @param objPath the relative path to the object. + * @return the object + */ + public COSBase getObjectFromPath(String objPath) + { + String[] path = objPath.split(PATH_SEPARATOR); + COSBase retval = this; + for (String pathString : path) + { + if (retval instanceof COSArray) + { + int idx = Integer.parseInt(pathString.replace("\\[", "").replace("\\]", "")); + retval = ((COSArray) retval).getObject(idx); + } + else if (retval instanceof COSDictionary) + { + retval = ((COSDictionary) retval).getObjectFromDictionary( pathString); + } + } + return retval; + } + + /** + * Returns an unmodifiable view of this dictionary. + * + * @return an unmodifiable view of this dictionary + */ + public COSDictionary asUnmodifiableDictionary() + { + return new UnmodifiableCOSDictionary(this); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + try + { + return getDictionaryString(this, new ArrayList<>()); + } + catch (IOException e) + { + LOG.debug("An exception occurred trying - returning error message instead", e); + return "COSDictionary{" + e.getMessage() + "}"; + } + } + + private static String getDictionaryString(COSBase base, List objs) throws IOException + { + if (base == null) + { + return "null"; + } + if (objs.contains(base)) + { + // avoid endless recursion + return "hash:" + base.hashCode(); + } + if (base instanceof COSDictionary) + { + objs.add(base); + StringBuilder sb = new StringBuilder("COSDictionary{"); + for (Entry x : ((COSDictionary) base).entrySet()) + { + sb.append(x.getKey()); + sb.append(":"); + sb.append(getDictionaryString(x.getValue(), objs)); + sb.append(";"); + } + sb.append("}"); + if (base instanceof COSStream) + { + try (InputStream stream = ((COSStream) base).createRawInputStream()) + { + byte[] b = stream.readAllBytes(); + sb.append("COSStream{").append( Arrays.hashCode( b)).append( "}"); + } + } + return sb.toString(); + } + if (base instanceof COSArray) + { + objs.add(base); + StringBuilder sb = new StringBuilder("COSArray{"); + for (COSBase x : (COSArray) base) + { + sb.append(getDictionaryString(x, objs)); + sb.append(";"); + } + sb.append("}"); + return sb.toString(); + } + if (base instanceof COSObject) + { + objs.add(base); + COSObject obj = (COSObject) base; + return "COSObject{" + + getDictionaryString( + obj.isObjectNull() ? COSNull.NULL : obj.getObject(), objs) + + "}"; + } + return base.toString(); + } + + /** + * Returns the current {@link COSUpdateState} of this {@link COSDictionary}. + * + * @return The current {@link COSUpdateState} of this {@link COSDictionary}. + * @see COSUpdateState + */ + @Override + public COSUpdateState getUpdateState() + { + return updateState; + } + + /** + * Collects all indirect objects numbers within this dictionary and all included dictionaries. It is used to avoid + * mixed up object numbers when importing an existing page to another pdf. + * + * Expert use only. You might run into an endless recursion if choosing a wrong starting point. + * + * @param indirectObjects a collection of already found indirect objects. + * + */ + public void getIndirectObjectKeys(Collection indirectObjects) + { + if (indirectObjects == null) + { + return; + } + COSObjectKey key = getKey(); + if (key != null) + { + // avoid endless recursions + if (indirectObjects.contains(key)) + { + return; + } + else + { + indirectObjects.add(key); + } + } + for (Entry entry : items.entrySet()) + { + COSBase cosBase = entry.getValue(); + COSObjectKey cosBaseKey = cosBase != null ? cosBase.getKey() : null; + // avoid endless recursions + if (COSName.PARENT.equals(entry.getKey()) + || (cosBaseKey != null && indirectObjects.contains(cosBaseKey))) + { + continue; + } + if (cosBase instanceof COSObject) + { + // dereference object + cosBase = ((COSObject) cosBase).getObject(); + } + if (cosBase instanceof COSDictionary) + { + // descend to included dictionary to collect all included indirect objects + ((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects); + } + else if (cosBase instanceof COSArray) + { + // descend to included array to collect all included indirect objects + ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects); + } + else if (cosBaseKey != null) + { + // add key for all indirect objects other than COSDictionary/COSArray + indirectObjects.add(cosBaseKey); + } + } + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java new file mode 100644 index 00000000000..645dd20f508 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * This is a Map that will automatically sync the contents to a COSDictionary. + * + * @author Ben Litchfield + */ +public class COSDictionaryMap implements Map +{ + private final COSDictionary map; + private final Map actuals; + + /** + * Constructor for this map. + * + * @param actualsMap The map with standard java objects as values. + * @param dicMap The map with COSBase objects as values. + */ + public COSDictionaryMap( Map actualsMap, COSDictionary dicMap ) + { + actuals = actualsMap; + map = dicMap; + } + + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return map.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return size() == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsKey(Object key) + { + return actuals.containsKey( key ); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsValue(Object value) + { + return actuals.containsValue( value ); + } + + /** + * {@inheritDoc} + */ + @Override + public V get(Object key) + { + return actuals.get( key ); + } + + /** + * {@inheritDoc} + */ + @Override + public V put(K key, V value) + { + COSObjectGetter object = (COSObjectGetter)value; + + map.setItem( COSName.getPDFName( (String)key ), object.getCOSObject() ); + return actuals.put( key, value ); + } + + /** + * {@inheritDoc} + */ + @Override + public V remove(Object key) + { + map.removeItem( COSName.getPDFName( (String)key ) ); + return actuals.remove( key ); + } + + /** + * {@inheritDoc} + */ + @Override + public void putAll(Map t) + { + throw new UnsupportedOperationException("Not yet implemented"); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + map.clear(); + actuals.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public Set keySet() + { + return actuals.keySet(); + } + + /** + * {@inheritDoc} + */ + @Override + public Collection values() + { + return actuals.values(); + } + + /** + * {@inheritDoc} + */ + @Override + public Set> entrySet() + { + return Collections.unmodifiableSet(actuals.entrySet()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + boolean retval = false; + if( o instanceof COSDictionaryMap ) + { + COSDictionaryMap other = (COSDictionaryMap) o; + retval = other.map.equals( this.map ); + } + return retval; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return actuals.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return map.hashCode(); + } + + /** + * This will take a map<java.lang.String,org.apache.pdfbox.pdmodel.COSObjectGetter> + * and convert it into a COSDictionary. + * + * @param someMap A map containing COSObjectGetters + * + * @return A proper COSDictionary + */ + public static COSDictionary convert(Map someMap) + { + COSDictionary dic = new COSDictionary(); + someMap.forEach((name, objectable) -> + { + COSObjectGetter object = (COSObjectGetter) objectable; + dic.setItem(COSName.getPDFName(name), object.getCOSObject()); + }); + return dic; + } + + /** + * This will take a COS dictionary and convert it into COSDictionaryMap. All cos + * objects will be converted to their primitive form. + * + * @param map The COS mappings. + * @return A standard java map. + * @throws IOException If there is an error during the conversion. + */ + public static COSDictionaryMap convertBasicTypesToMap( COSDictionary map ) throws IOException + { + COSDictionaryMap retval = null; + if( map != null ) + { + Map actualMap = new HashMap<>(); + for( COSName key : map.keySet() ) + { + COSBase cosObj = map.getObjectFromDictionary( key ); + Object actualObject = null; + if( cosObj instanceof COSString ) + { + actualObject = ((COSString)cosObj).getString(); + } + else if( cosObj instanceof COSInteger ) + { + actualObject = ((COSInteger)cosObj).intValue(); + } + else if( cosObj instanceof COSName ) + { + actualObject = ((COSName)cosObj).getName(); + } + else if( cosObj instanceof COSFloat ) + { + actualObject = ((COSFloat)cosObj).floatValue(); + } + else if( cosObj instanceof COSBoolean ) + { + actualObject = ((COSBoolean)cosObj).getValue() ? Boolean.TRUE : Boolean.FALSE; + } + else + { + throw new IOException( "Error:unknown type of object to convert:" + cosObj ); + } + actualMap.put( key.getName(), actualObject ); + } + retval = new COSDictionaryMap<>( actualMap, map ); + } + + return retval; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java new file mode 100644 index 00000000000..03e4581cd4f --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java @@ -0,0 +1,633 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.RandomAccessStreamCache; +import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.stream.Collectors; + +/** + * This is the in-memory representation of the PDF document. You need to call + * close() on this object when you are done using it!! + * + * @author Ben Litchfield + * + */ +public class COSDocument extends COSBase implements Closeable +{ + /** + * Log instance. + */ + private static final Logger LOG = LogManager.getLogger(COSDocument.class); + + private float version = 1.4f; + + /** + * Maps ObjectKeys to a COSObject. Note that references to these objects + * are also stored in COSDictionary objects that map a name to a specific object. + */ + private final Map objectPool = + new HashMap<>(); + + /** + * Maps object and generation id to object byte offsets. + */ + private final Map xrefTable = + new HashMap<>(); + + /** + * List containing COSStream objects which are associated with this document. + */ + private final List cosStreamList = new ArrayList<>(); + + /** + * Document trailer dictionary. + */ + private COSDictionary trailer; + + /** + * Signal that document is already decrypted. + */ + private boolean isDecrypted = false; + + private long startXref; + + private boolean closed = false; + + private boolean isXRefStream; + + private boolean hasHybridXRef = false; + + /** + * TODO: what is this used for? + */ + private final RandomAccessStreamCache streamCache ; + + /** + * Used for incremental saving, to avoid XRef object numbers from being reused. + */ + private long highestXRefObjectNumber; + + /** + * A parser that can deference a COSProxyObject. + */ + private final ICOSParser parser; + + private final COSDocumentState documentState = new COSDocumentState(); + private COSDictionary encryption; + + /** + * Constructor. Uses main memory to buffer PDF streams. + */ + public COSDocument() + { + this( IOUtils.createMemoryOnlyStreamCache()); + } + + /** + * Constructor. Uses main memory to buffer PDF streams. + * + * @param parser Parser to be used to parse the document on demand + */ + public COSDocument(ICOSParser parser) + { + this(IOUtils.createMemoryOnlyStreamCache(), parser); + } + + /** + * Constructor that will use the provided function to create a stream cache for the storage of the PDF streams. + * + * @param streamCacheCreateFunction a function to create an instance of a stream cache + * + */ + public COSDocument( RandomAccessStreamCache.StreamCacheCreateFunction streamCacheCreateFunction) + { + this(streamCacheCreateFunction, null); + } + + /** + * Constructor that will use the provided function to create a stream cache + * for the storage of the PDF streams. Note again that we are not talking + * i/o streams, but about dereferenced COSStream objects + * + * @param streamCacheCreateFunction a function to create an instance of a stream cache + * @param parser Parser to be used to parse the document on demand + * + */ + public COSDocument( StreamCacheCreateFunction streamCacheCreateFunction, ICOSParser parser) + { + streamCache = getStreamCache(streamCacheCreateFunction); + this.parser = parser; + } + + private RandomAccessStreamCache getStreamCache( StreamCacheCreateFunction streamCacheCreateFunction) + { + if (streamCacheCreateFunction == null) + { + return null; + } + try + { + return streamCacheCreateFunction.create(); + } + catch (IOException exception1) + { + LOG.warn( + "An error occured when creating stream cache. Using memory only cache as fallback.", + exception1); + } + try + { + return IOUtils.createMemoryOnlyStreamCache().create(); + } + catch (IOException exception2) + { + LOG.warn("An error occured when creating stream cache for fallback.", exception2); + } + return null; + } + + /** + * This will get the encryption dictionary for this document. This will still return the parameters if the document + * was decrypted. As the encryption architecture in PDF documents is pluggable this returns an abstract class, + * but the only supported subclass at this time is a + * PDStandardEncryption object. + * + * @return The encryption dictionary(most likely a PDStandardEncryption object) + */ + public COSDictionary getEncryption() + { + if (encryption == null && isEncrypted()) + { + encryption = trailer.getCOSDictionary( COSName.ENCRYPT ); + } + return encryption; + } + + /** + * Creates a new COSStream using the current configuration for scratch files. + * Note that a COSStream is not a traditional stream object, but rather a wrapper + * for the stream object defined By adobe COS + * + * @return the new COSStream + */ + public COSStream createCOSStream() + { + COSStream stream = new COSStream( streamCache ); + // collect all COSStreams so that they can be closed when closing the COSDocument. + // This is limited to newly created pdfs as all COSStreams of an existing pdf are + // collected within the map objectPool + cosStreamList.add( stream); + return stream; + } + + /** + * Creates a new COSStream using the current configuration for scratch files. Not for public use. + * Only COSParser should call this method. + * + * @param dictionary the corresponding dictionary + * @param startPosition the start position within the source + * @param streamLength the stream length + * @return the new COSStream + * @throws IOException if the random access view can't be read + */ + public COSStream createCOSStream(COSDictionary dictionary, long startPosition, + long streamLength) throws IOException + { + COSStream stream = new COSStream(streamCache, + parser.createRandomAccessReadView(startPosition, streamLength)); + dictionary.forEach(stream::setItem); + stream.setKey(dictionary.getKey()); + return stream; + } + + /** + * Get the dictionary containing the linearization information if the pdf is linearized. + * + * @return the dictionary containing the linearization information + */ + public COSDictionary getLinearizedDictionary() + { + // get all keys with a positive offset in ascending order, as the linearization dictionary shall be the first + // within the pdf + List objectKeys = xrefTable.entrySet().stream() // + .filter(e -> e.getValue() > 0L) // + .sorted(Entry.comparingByValue()) // + .map(Entry::getKey) // + .collect(Collectors.toList()); + for (COSObjectKey objectKey : objectKeys) + { + COSObject objectFromPool = getObjectFromPool(objectKey); + COSBase realObject = objectFromPool.getObject(); + if (realObject instanceof COSDictionary) + { + COSDictionary dic = (COSDictionary) realObject; + if (dic.getItem(COSName.LINEARIZED) != null) + { + return dic; + } + } + } + return null; + } + + /** + * This will get all dictionaries objects by type. + * + * @param type The type of the object. + * + * @return This will return all objects with the specified type. + */ + public List getObjectsByType(COSName type) + { + return getObjectsByType(type, null); + } + + /** + * This will get all dictionaries objects by type. + * + * @param type1 The first possible type of the object, mandatory. + * @param type2 The second possible type of the object, usually an abbreviation, optional. + * + * @return This will return all objects with the specified type(s). + */ + public List getObjectsByType(COSName type1, COSName type2) + { + List originKeys = new ArrayList<>(xrefTable.keySet()); + List retval = getObjectsByType(originKeys, type1, type2); + // there might be some additional objects if the brute force parser was triggered + // due to a broken cross-reference table/stream + if (originKeys.size() < xrefTable.size()) + { + List additionalKeys = new ArrayList<>(xrefTable.keySet()); + additionalKeys.removeAll(originKeys); + retval.addAll(getObjectsByType(additionalKeys, type1, type2)); + } + return retval; + } + + private List getObjectsByType(List keys, COSName type1, COSName type2) + { + List retval = new ArrayList<>(); + for (COSObjectKey objectKey : keys) + { + COSObject objectFromPool = getObjectFromPool(objectKey); + COSBase realObject = objectFromPool.getObject(); + if (realObject instanceof COSDictionary) + { + COSName dictType = ((COSDictionary) realObject).getCOSName(COSName.TYPE); + if (type1.equals(dictType) || (type2 != null && type2.equals(dictType))) + { + retval.add(objectFromPool); + } + } + } + return retval; + } + + /** + * This will set the header version of this PDF document. + * + * @param versionValue The version of the PDF document. + */ + public void setVersion( float versionValue ) + { + version = versionValue; + } + + /** + * This will get the version extracted from the header of this PDF document. + * + * @return The header version. + */ + public float getVersion() + { + return version; + } + + /** + * Signals that the document is decrypted completely. + */ + public void setDecrypted() + { + isDecrypted = true; + } + + /** + * Indicates if a encrypted pdf is already decrypted after parsing. + * + * @return true indicates that the pdf is decrypted. + */ + public boolean isDecrypted() + { + return isDecrypted; + } + + /** + * This will tell if this is an encrypted document. + * + * @return true If this document is encrypted. + */ + public boolean isEncrypted() + { + return trailer != null && trailer.getCOSDictionary(COSName.ENCRYPT) != null; + } + + /** + * This will get the encryption dictionary if the document is encrypted or null if the document + * is not encrypted. + * + * @return The encryption dictionary. + */ + public COSDictionary getEncryptionDictionary() + { + return trailer.getCOSDictionary(COSName.ENCRYPT); + } + + /** + * This will set the encryption dictionary, this should only be called when + * encrypting the document. + * + * @param encDictionary The encryption dictionary. + */ + public void setEncryptionDictionary( COSDictionary encDictionary ) + { + trailer.setItem( COSName.ENCRYPT, encDictionary ); + } + + /** + * This will get the document ID. + * + * @return The document id. + */ + public COSArray getDocumentID() + { + return getTrailer().getCOSArray(COSName.ID); + } + + /** + * This will set the document ID. This should be an array of two strings. This method cannot be + * used to remove the document id by passing null or an empty array; it will be recreated. Only + * the first existing string is used when writing, the second one is always recreated. If you + * don't want this, you'll have to modify the {@code COSWriter} class, look for {@link COSName#ID}. + * + * @param id The document id. + */ + public void setDocumentID( COSArray id ) + { + getTrailer().setItem(COSName.ID, id); + } + + /** + * This will get the document trailer. + * + * @return the document trailer dict + */ + public COSDictionary getTrailer() + { + return trailer; + } + + /** + * // MIT added, maybe this should not be supported as trailer is a persistence construct. + * This will set the document trailer. + * + * @param newTrailer the document trailer dictionary + */ + public void setTrailer(COSDictionary newTrailer) + { + trailer = newTrailer; + trailer.getUpdateState().setOriginDocumentState(documentState); + } + + /** + * Internal PDFBox use only. Get the object number of the highest XRef stream. This is needed to + * avoid reusing such a number in incremental saving. + * + * @return The object number of the highest XRef stream, or 0 if there was no XRef stream. + */ + public long getHighestXRefObjectNumber() + { + return highestXRefObjectNumber; + } + + /** + * Internal PDFBox use only. Sets the object number of the highest XRef stream. This is needed + * to avoid reusing such a number in incremental saving. + * + * @param highestXRefObjectNumber The object number of the highest XRef stream. + */ + public void setHighestXRefObjectNumber(long highestXRefObjectNumber) + { + this.highestXRefObjectNumber = highestXRefObjectNumber; + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromDocument(this); + } + + /** + * This will close all storage and delete the tmp files. + * + * @throws IOException If there is an error close resources. + */ + @Override + public void close() throws IOException + { + if (closed) + { + return; + } + + // Make sure that: + // - first Exception is kept + // - all COSStreams are closed + // - stream cache is closed + // - there's a way to see which errors occurred + IOException firstException = null; + + // close all open I/O streams + for (COSObject object : objectPool.values()) + { + if (!object.isObjectNull()) + { + COSBase cosObject = object.getObject(); + if (cosObject instanceof COSStream) + { + firstException = IOUtils.closeAndLogException((COSStream) cosObject, LOG, + "COSStream", firstException); + } + } + } + + for (COSStream stream : cosStreamList) + { + firstException = IOUtils.closeAndLogException(stream, LOG, "COSStream", firstException); + } + + if (streamCache != null) + { + firstException = IOUtils.closeAndLogException(streamCache, LOG, "Stream Cache", + firstException); + } + closed = true; + + // rethrow first exception to keep method contract + if (firstException != null) + { + throw firstException; + } + } + + /** + * Returns true if this document has been closed. + * + * @return true if the document is already closed, false otherwise + */ + public boolean isClosed() + { + return closed; + } + + /** + * This will get an object from the pool. + * + * @param key The object key. + * + * @return The object in the pool or a new one if it has not been parsed yet. + */ + public COSObject getObjectFromPool(COSObjectKey key) + { + COSObject obj = null; + if( key != null ) + { + // make "proxy" object if this was a forward reference + obj = objectPool.computeIfAbsent(key, k -> new COSObject(k, parser)); + } + return obj; + } + + /** + * Populate XRef HashMap with given values. + * Each entry maps ObjectKeys to byte offsets in the file. + * @param xrefTableValues xref table entries to be added + */ + public void addXRefTable( Map xrefTableValues ) + { + xrefTable.putAll( xrefTableValues ); + } + + /** + * Returns the xrefTable which is a mapping of ObjectKeys + * to byte offsets in the file. + * @return mapping of ObjectsKeys to byte offsets + */ + public Map getXrefTable() + { + return xrefTable; + } + + /** + * This method set the startxref value of the document. This will only + * be needed for incremental updates. + * + * @param startXrefValue the value for startXref + */ + public void setStartXref(long startXrefValue) + { + startXref = startXrefValue; + } + + /** + * Return the startXref Position of the parsed document. This will only be needed for incremental updates. + * + * @return a long with the old position of the startxref + */ + public long getStartXref() + { + return startXref; + } + + /** + * Determines if the trailer is a XRef stream or not. + * + * @return true if the trailer is a XRef stream + */ + public boolean isXRefStream() + { + return isXRefStream; + } + + /** + * Sets isXRefStream to the given value. You need to take care that the version of your PDF is + * 1.5 or higher. + * + * @param isXRefStreamValue the new value for isXRefStream + */ + public void setIsXRefStream(boolean isXRefStreamValue) + { + isXRefStream = isXRefStreamValue; + } + + /** + * Determines if the pdf has hybrid cross references, both plain tables and streams. + * + * @return true if the pdf has hybrid cross references + */ + public boolean hasHybridXRef() + { + return hasHybridXRef; + } + + /** + * Marks the pdf as document using hybrid cross references. + */ + public void setHasHybridXRef() + { + hasHybridXRef = true; + } + + /** + * Returns the {@link COSDocumentState} of this {@link COSDocument}. + * + * @return The {@link COSDocumentState} of this {@link COSDocument}. + * @see COSDocumentState + */ + public COSDocumentState getDocumentState() + { + return documentState; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java new file mode 100644 index 00000000000..628148927ef --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +/** + * An instance of {@link COSDocumentState} collects all known states a {@link COSDocument} may have and shall allow + * their evaluation. + * + * @author Christian Appl + * @see COSDocument + */ +public class COSDocumentState +{ + + /** + * The parsing state of the document. + *

    + *
  • {@code true}, if the document is currently being parsed. (initial state)
  • + *
  • {@code false}, if the document's parsing completed and it may be edited and updated.
  • + *
+ */ + private boolean parsing = true; + + /** + * Sets the {@link #parsing} state of the document. + * + * @param parsing The {@link #parsing} state to set. + */ + public void setParsing(boolean parsing) + { + this.parsing = parsing; + } + + /** + * Returns {@code true}, if the document´s {@link #parsing} is completed and it may be updated. + * + * @return {@code true}, if the document´s {@link #parsing} is completed and it may be updated. + */ + public boolean isAcceptingUpdates() + { + return !parsing; + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java new file mode 100644 index 00000000000..4f9e7dd3d51 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; +import java.math.BigDecimal; + +/** + * This class represents a floating point number in a PDF document. + * + * @author Ben Litchfield + * + */ +public class COSFloat extends COSNumber +{ + private final float value; + private String valueAsString; + + public static final COSFloat ZERO = new COSFloat(0f, "0.0"); + public static final COSFloat ONE = new COSFloat(1f, "1.0"); + + /** + * Constructor. + * + * @param aFloat The primitive float object that this object wraps. + */ + public COSFloat( float aFloat ) + { + value = aFloat; + } + + /** + * An internal constructor to avoid formatting for the predefined constants. + * + * @param aFloat + * @param valueString + */ + private COSFloat(float aFloat, String valueString) + { + value = aFloat; + valueAsString = valueString; + } + + /** + * Constructor. + * + * @param aFloat The primitive float object that this object wraps. + * + * @throws IOException If aFloat is not a float. + */ + public COSFloat( String aFloat ) throws IOException + { + float parsedValue; + String stringValue = null; + try + { + float f = Float.parseFloat(aFloat); + parsedValue = coerce(f); + stringValue = f == parsedValue ? aFloat : null; + } + catch( NumberFormatException e ) + { + if (aFloat.startsWith("--")) + { + // PDFBOX-4289 has --16.33 + aFloat = aFloat.substring(1); + } + else if (aFloat.matches("^0\\.0*-\\d+")) + { + // PDFBOX-2990 has 0.00000-33917698 + // PDFBOX-3369 has 0.00-35095424 + // PDFBOX-3500 has 0.-262 + aFloat = "-" + aFloat.replaceFirst("-", ""); + } + else if (aFloat.matches("^-\\d+\\.-\\d+")) + { + // PDFBOX-5829 has -12.-1 + aFloat = "-" + aFloat.replace("-", ""); + } + else + { + throw new IOException("Error expected floating point number actual='" + aFloat + "'", e); + } + + try + { + parsedValue = coerce(Float.parseFloat(aFloat)); + } + catch (NumberFormatException e2) + { + throw new IOException("Error expected floating point number actual='" + aFloat + "'", e2); + } + } + value = parsedValue; + valueAsString = stringValue; + } + + /** + * Check and coerce the value field to be between MIN_NORMAL and MAX_VALUE. + * + * @param floatValue the value to be checked + * @return the coerced value + */ + private float coerce(float floatValue) + { + if (floatValue == Float.POSITIVE_INFINITY) + { + return Float.MAX_VALUE; + } + if (floatValue == Float.NEGATIVE_INFINITY) + { + return -Float.MAX_VALUE; + } + if (Math.abs(floatValue) < Float.MIN_NORMAL) + { + // values smaller than the smallest possible float value are converted to 0 + // see PDF spec, chapter 2 of Appendix C Implementation Limits + return 0f; + } + return floatValue; + } + + /** + * The value of the float object that this one wraps. + * + * @return The value of this object. + */ + @Override + public float floatValue() + { + return value; + } + + /** + * This will get the long value of this object. + * + * @return The long value of this object, + */ + @Override + public long longValue() + { + return (long) value; + } + + /** + * This will get the integer value of this object. + * + * @return The int value of this object, + */ + @Override + public int intValue() + { + return (int) value; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals( Object o ) + { + return o instanceof COSFloat && + Float.floatToIntBits(((COSFloat)o).value) == Float.floatToIntBits(value); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Float.hashCode(value); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSFloat{" + formatString() + "}"; + } + + /** + * Builds, if needed, and returns the string representation of the current value. + * @return current value as string. + */ + public String formatString() + { + if (valueAsString == null) + { + String s = String.valueOf(value); + boolean simpleFormat = s.indexOf('E') < 0; + valueAsString = simpleFormat ? s + : new BigDecimal(s).stripTrailingZeros().toPlainString(); + } + return valueAsString; + } + + /** + * Visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromFloat(this); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java new file mode 100644 index 00000000000..8b9d3b7590d --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * A {@link COSIncrement} starts at a given {@link COSUpdateInfo} to collect updates, that have been made to a + * {@link COSDocument} and therefore should be added to it´s next increment. + * + * @author Christian Appl + * @see COSUpdateState + * @see COSUpdateInfo + */ +public class COSIncrement implements Iterable +{ + + /** + * Contains the {@link COSBase}s, that shall be added to the increment at top level. + */ + private final Set objects = new LinkedHashSet<>(); + /** + * Contains the direct {@link COSBase}s, that are either contained written directly by structures contained in + * {@link #objects} or that must be excluded from being written as indirect {@link COSObject}s for other reasons. + */ + private final Set excluded = new HashSet<>(); + /** + * Contains all {@link COSObject}s, that have already been processed by this {@link COSIncrement} and shall not be + * processed again. + */ + private final Set processedObjects = new HashSet<>(); + /** + * Contains the {@link COSUpdateInfo} that this {@link COSIncrement} creates an increment for. + */ + private final COSUpdateInfo incrementOrigin; + /** + * Whether this {@link COSIncrement} has already been determined, or must still be evaluated. + */ + private boolean initialized = false; + + /** + * Creates a new {@link COSIncrement} for the given {@link COSUpdateInfo}, the increment will use it´s + * {@link COSDocumentState} as it´s own origin and shall collect all updates contained in the given + * {@link COSUpdateInfo}.
+ * Should the given object be {@code null}, the resulting increment shall be empty. + * + * @param incrementOrigin The {@link COSUpdateInfo} serving as an update source for this {@link COSIncrement}. + */ + public COSIncrement(COSUpdateInfo incrementOrigin) + { + this.incrementOrigin = incrementOrigin; + } + + /** + * Collect all updates made to the given {@link COSBase} and it's contained structures.
+ * This shall forward all {@link COSUpdateInfo} objects to the proper specialized collection methods. + * + * @param base The {@link COSBase} updates shall be collected for. + * @return Returns {@code true}, if the {@link COSBase} represents a direct child structure, that would require it´s + * parent to be updated instead. + * @see #collect(COSDictionary) + * @see #collect(COSArray) + * @see #collect(COSObject) + */ + private boolean collect(COSBase base) + { + if(contains(base)) + { + return false; + } + // handle updatable objects: + if(base instanceof COSDictionary) + { + return collect((COSDictionary) base); + } + else if(base instanceof COSObject) + { + return collect((COSObject) base); + } + else if(base instanceof COSArray) + { + return collect((COSArray) base); + } + return false; + } + + /** + * Collect all updates made to the given {@link COSDictionary} and it's contained structures. + * + * @param dictionary The {@link COSDictionary} updates shall be collected for. + * @return Returns {@code true}, if the {@link COSDictionary} represents a direct child structure, that would + * require it´s parent to be updated instead. + */ + private boolean collect(COSDictionary dictionary) + { + COSUpdateState updateState = dictionary.getUpdateState(); + // Is definitely part of the increment? + if(!isExcluded(dictionary) && !contains(dictionary) && updateState.isUpdated()) + { + add(dictionary); + } + boolean childDemandsParentUpdate = false; + // Collect children: + for(COSBase entry : dictionary.getValues()) + { + // Primitives can not be part of an increment. (on top level) + if(!(entry instanceof COSUpdateInfo) || contains(entry)) + { + continue; + } + COSUpdateInfo updatableEntry = (COSUpdateInfo) entry; + COSUpdateState entryUpdateState = updatableEntry.getUpdateState(); + // Entries with different document origin must be part of the increment! + updateDifferentOrigin(entryUpdateState); + // Always attempt to write COSArrays as direct objects. + if(updatableEntry.isNeedToBeUpdated() && + ((!(entry instanceof COSObject) && entry.isDirect()) || entry instanceof COSArray)) + { + // Exclude direct entries from the increment! + exclude(entry); + childDemandsParentUpdate = true; + } + // Collect descendants: + childDemandsParentUpdate = collect(entry) || childDemandsParentUpdate; + } + + if(isExcluded(dictionary)) + { + return childDemandsParentUpdate; + } + else + { + if(childDemandsParentUpdate && !contains(dictionary)) + { + add(dictionary); + } + return false; + } + } + + /** + * Collect all updates made to the given {@link COSArray} and it's contained structures. + * + * @param array The {@link COSDictionary} updates shall be collected for. + * @return Returns {@code true}, if the {@link COSArray}´s elements changed. A {@link COSArray} shall always be + * treated as a direct structure, that would require it´s parent to be updated instead. + */ + private boolean collect(COSArray array) + { + COSUpdateState updateState = array.getUpdateState(); + boolean childDemandsParentUpdate = updateState.isUpdated(); + for(COSBase entry : array) + { + // Primitives can not be part of an increment. (on top level) + if(!(entry instanceof COSUpdateInfo) || contains(entry)) + { + continue; + } + COSUpdateState entryUpdateState = ((COSUpdateInfo) entry).getUpdateState(); + // Entries with different document origin must be part of the increment! + updateDifferentOrigin(entryUpdateState); + // Collect descendants: + childDemandsParentUpdate = collect(entry) || childDemandsParentUpdate; + } + return childDemandsParentUpdate; + } + + /** + * Collect all updates made to the given {@link COSObject} and it's contained structures. + * + * @param object The {@link COSObject} updates shall be collected for. + * @return Always returns {@code false}. {@link COSObject}s by definition are indirect and shall never cause a + * parent structure to be updated. + */ + private boolean collect(COSObject object) + { + if(contains(object)) + { + return false; + } + addProcessedObject(object); + COSUpdateState updateState = object.getUpdateState(); + // Objects with different document origin must be part of the increment! + updateDifferentOrigin(updateState); + // determine actual, if necessary or possible without dereferencing: + COSUpdateInfo actual = null; + if(updateState.isUpdated() || object.isDereferenced()) + { + COSBase base = object.getObject(); + if(base instanceof COSUpdateInfo) + { + actual = (COSUpdateInfo) base; + } + } + // Skip? + if(actual == null || contains(actual.getCOSObject())) + { + return false; + } + boolean childDemandsParentUpdate = false; + COSUpdateState actualUpdateState = actual.getUpdateState(); + if(actualUpdateState.isUpdated()) + { + childDemandsParentUpdate = true; + } + exclude(actual.getCOSObject()); + childDemandsParentUpdate = collect(actual.getCOSObject()) || childDemandsParentUpdate; + if(updateState.isUpdated() || childDemandsParentUpdate) + { + add(actual.getCOSObject()); + } + return false; + } + + /** + * Returns {@code true}, if the given {@link COSBase} is already known to and has been processed by this + * {@link COSIncrement}. + * + * @param base The {@link COSBase} to check. + * @return {@code true}, if the given {@link COSBase} is already known to and has been processed by this + * {@link COSIncrement}. + * @see #objects + * @see #processedObjects + */ + public boolean contains(COSBase base) + { + return objects.contains(base) || (base instanceof COSObject && processedObjects.contains((COSObject) base)); + } + + /** + * Check whether the given {@link COSUpdateState}´s {@link COSDocumentState} differs from the {@link COSIncrement}´s + * known {@link #incrementOrigin}.
+ * Should that be the case, the {@link COSUpdateState} originates from another {@link COSDocument} and must be added + * to the {@link COSIncrement}, hence call {@link COSUpdateState#update()}. + * + * @param updateState The {@link COSUpdateState} that shall be updated, if it's originating from another + * {@link COSDocument}. + * @see #incrementOrigin + */ + private void updateDifferentOrigin(COSUpdateState updateState) + { + if(incrementOrigin != null && updateState != null && + incrementOrigin.getUpdateState().getOriginDocumentState() != updateState.getOriginDocumentState()) + { + updateState.update(); + } + } + + /** + * The given object and actual {COSBase}s shall be part of the increment and must be added to {@link #objects}, + * if possible.
+ * {@code null} values shall be skipped. + * + * @param object The {@link COSBase} to add to {@link #objects}. + * @see #objects + */ + private void add(COSBase object) + { + if(object != null) + { + objects.add(object); + } + } + + /** + * The given {@link COSObject} has been processed, or is being processed. It shall be added to + * {@link #processedObjects} to skip it, should it be encountered again.
+ * {@code null} values shall be ignored. + * + * @param base The {@link COSObject} to add to {@link #processedObjects}. + * @see #processedObjects + */ + private void addProcessedObject(COSObject base) + { + if(base != null) + { + processedObjects.add(base); + } + } + + /** + * The given {@link COSBase}s are not fit for inclusion in an increment and shall be added to {@link #excluded}.
+ * {@code null} values shall be ignored. + * + * @param base The {@link COSBase}s to add to {@link #excluded}. + * @return The {@link COSIncrement} itself, to allow method chaining. + * @see #excluded + */ + public COSIncrement exclude(COSBase... base) + { + if(base != null) + { + excluded.addAll( Arrays.asList(base)); + } + return this; + } + + /** + * Returns {@code true}, if the given {@link COSBase} has been excluded from the increment, and hence is contained + * in {@link #excluded}. + * + * @param base The {@link COSBase} to check for exclusion. + * @return {@code true}, if the given {@link COSBase} has been excluded from the increment, and hence is contained + * in {@link #excluded}. + * @see #excluded + */ + private boolean isExcluded(COSBase base) + { + return excluded.contains(base); + } + + /** + * Returns all indirect {@link COSBase}s, that shall be written to an increment as top level {@link COSObject}s.
+ * Calling this method will cause the increment to be initialized. + * + * @return All indirect {@link COSBase}s, that shall be written to an increment as top level {@link COSObject}s. + * @see #objects + */ + public Set getObjects() + { + if(!initialized && incrementOrigin != null) + { + collect(incrementOrigin.getCOSObject()); + initialized = true; + } + return objects; + } + + /** + * Return an iterator for the determined {@link #objects} contained in this {@link COSIncrement}. + * + * @return An iterator for the determined {@link #objects} contained in this {@link COSIncrement}. + */ + @Override + public Iterator iterator() + { + return getObjects().iterator(); + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java new file mode 100644 index 00000000000..14d5bf1b9df --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * This class represents an integer number in a PDF document. + * + * @author Ben Litchfield + */ +public final class COSInteger extends COSNumber +{ + + /** + * The lowest integer to be kept in the {@link #STATIC} array. + */ + private static final int LOW = -100; + + /** + * The highest integer to be kept in the {@link #STATIC} array. + */ + private static final int HIGH = 256; + + /** + * Static instances of all COSIntegers in the range from {@link #LOW} + * to {@link #HIGH}. + */ + private static final COSInteger[] STATIC = new COSInteger[HIGH - LOW + 1]; + + /** + * Constant for the number zero. + * @since Apache PDFBox 1.1.0 + */ + public static final COSInteger ZERO = get(0); + + /** + * Constant for the number one. + * @since Apache PDFBox 1.1.0 + */ + public static final COSInteger ONE = get(1); + + /** + * Constant for the number two. + * @since Apache PDFBox 1.1.0 + */ + public static final COSInteger TWO = get(2); + + /** + * Constant for the number three. + * @since Apache PDFBox 1.1.0 + */ + public static final COSInteger THREE = get(3); + + /** + * Constant for an out of range value which is bigger than Log.MAX_VALUE. + */ + protected static final COSInteger OUT_OF_RANGE_MAX = getInvalid(true); + + /** + * Constant for an out of range value which is smaller than Log.MIN_VALUE. + */ + protected static final COSInteger OUT_OF_RANGE_MIN = getInvalid(false); + + /** + * Returns a COSInteger instance with the given value. + * + * @param val integer value + * @return COSInteger instance + */ + public static COSInteger get(long val) + { + if (LOW <= val && val <= HIGH) + { + int index = (int) val - LOW; + // no synchronization needed + if (STATIC[index] == null) + { + STATIC[index] = new COSInteger(val, true); + } + return STATIC[index]; + } + return new COSInteger(val, true); + } + + private static COSInteger getInvalid(boolean maxValue) + { + return maxValue ? new COSInteger(Long.MAX_VALUE, false) + : new COSInteger(Long.MIN_VALUE, false); + } + + private final long value; + private final boolean isValid; + + /** + * constructor. + * + * @param val The integer value of this object. + * @param valid indicates if the value is valid. + */ + private COSInteger(long val, boolean valid) + { + value = val; + isValid = valid; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + return o instanceof COSInteger && ((COSInteger)o).intValue() == intValue(); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + //taken from java.lang.Long + return (int)(value ^ (value >> 32)); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSInt{" + value + "}"; + } + + /** + * polymorphic access to value as float. + * + * @return The float value of this object. + */ + @Override + public float floatValue() + { + return value; + } + + /** + * Polymorphic access to value as int + * This will get the integer value of this object. + * + * @return The int value of this object, + */ + @Override + public int intValue() + { + return (int)value; + } + + /** + * Polymorphic access to value as int + * This will get the integer value of this object. + * + * @return The int value of this object, + */ + @Override + public long longValue() + { + return value; + } + + /** + * Indicates whether this instance represents a valid value. + * + * @return true if the value is valid + */ + public boolean isValid() + { + return isValid; + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromInt(this); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java new file mode 100644 index 00000000000..4be540aecdd --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java @@ -0,0 +1,766 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +// import org.apache.pdfbox.util.Hex; + +import java.io.IOException; +import java.lang.ref.Cleaner; +import java.lang.ref.WeakReference; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * A PDF Name object. + * + * @author Ben Litchfield + */ +public final class COSName extends COSBase implements Comparable +{ + // using ConcurrentHashMap because this can be accessed by multiple threads + private static final Map> NAME_MAP = // + new ConcurrentHashMap<>(8192); + private static final Cleaner CLEANER = Cleaner.create(); + + // + // IMPORTANT: this list is *alphabetized* and does not need any JavaDoc + // + + // A + public static final COSName A = getPDFName("A"); + public static final COSName AA = getPDFName("AA"); + public static final COSName ABSOLUTE_COLORIMETRIC = getPDFName("AbsoluteColorimetric"); + public static final COSName AC = getPDFName("AC"); + public static final COSName ACRO_FORM = getPDFName("AcroForm"); + public static final COSName ACTUAL_TEXT = getPDFName("ActualText"); + public static final COSName ADBE = getPDFName("ADBE"); + public static final COSName ADBE_PKCS7_DETACHED = getPDFName("adbe.pkcs7.detached"); + public static final COSName ADBE_PKCS7_SHA1 = getPDFName("adbe.pkcs7.sha1"); + public static final COSName ADBE_X509_RSA_SHA1 = getPDFName("adbe.x509.rsa_sha1"); + public static final COSName ADOBE_PPKLITE = getPDFName("Adobe.PPKLite"); + public static final COSName AESV2 = getPDFName("AESV2"); + public static final COSName AESV3 = getPDFName("AESV3"); + public static final COSName AF = getPDFName("AF"); + public static final COSName AF_RELATIONSHIP = COSName.getPDFName("AFRelationship"); + public static final COSName AFTER = getPDFName("After"); + public static final COSName AI_META_DATA = getPDFName("AIMetaData"); + public static final COSName AIS = getPDFName("AIS"); + public static final COSName ALL_OFF = getPDFName("AllOff"); + public static final COSName ALL_ON = getPDFName("AllOn"); + public static final COSName ALT = getPDFName("Alt"); + public static final COSName ALPHA = getPDFName("Alpha"); + public static final COSName ALTERNATE = getPDFName("Alternate"); + public static final COSName ANNOT = getPDFName("Annot"); + public static final COSName ANNOTS = getPDFName("Annots"); + public static final COSName ANTI_ALIAS = getPDFName("AntiAlias"); + public static final COSName ANY_OFF = getPDFName("AnyOff"); + public static final COSName ANY_ON = getPDFName("AnyOn"); + public static final COSName AP = getPDFName("AP"); + public static final COSName AP_REF = getPDFName("APRef"); + public static final COSName APP = getPDFName("App"); + public static final COSName ART_BOX = getPDFName("ArtBox"); + public static final COSName ARTIFACT = getPDFName("Artifact"); + public static final COSName AS = getPDFName("AS"); + public static final COSName ASCENT = getPDFName("Ascent"); + public static final COSName ASCII_HEX_DECODE = getPDFName("ASCIIHexDecode"); + public static final COSName ASCII_HEX_DECODE_ABBREVIATION = getPDFName("AHx"); + public static final COSName ASCII85_DECODE = getPDFName("ASCII85Decode"); + public static final COSName ASCII85_DECODE_ABBREVIATION = getPDFName("A85"); + public static final COSName ATTACHED = getPDFName("Attached"); + public static final COSName AUTHOR = getPDFName("Author"); + public static final COSName AVG_WIDTH = getPDFName("AvgWidth"); + // B + public static final COSName B = getPDFName("B"); + public static final COSName BACKGROUND = getPDFName("Background"); + public static final COSName BASE_ENCODING = getPDFName("BaseEncoding"); + public static final COSName BASE_FONT = getPDFName("BaseFont"); + public static final COSName BASE_STATE = getPDFName("BaseState"); + public static final COSName BASE_VERSION = getPDFName("BaseVersion"); + public static final COSName BBOX = getPDFName("BBox"); + public static final COSName BC = getPDFName("BC"); + public static final COSName BE = getPDFName("BE"); + public static final COSName BEAD = getPDFName("BEAD"); + public static final COSName BEFORE = getPDFName("Before"); + public static final COSName BG = getPDFName("BG"); + public static final COSName BITS_PER_COMPONENT = getPDFName("BitsPerComponent"); + public static final COSName BITS_PER_COORDINATE = getPDFName("BitsPerCoordinate"); + public static final COSName BITS_PER_FLAG = getPDFName("BitsPerFlag"); + public static final COSName BITS_PER_SAMPLE = getPDFName("BitsPerSample"); + public static final COSName BL = getPDFName("Bl"); + public static final COSName BLACK_IS_1 = getPDFName("BlackIs1"); + public static final COSName BLACK_POINT = getPDFName("BlackPoint"); + public static final COSName BLEED_BOX = getPDFName("BleedBox"); + public static final COSName BM = getPDFName("BM"); + public static final COSName BORDER = getPDFName("Border"); + public static final COSName BOUNDS = getPDFName("Bounds"); + public static final COSName BPC = getPDFName("BPC"); + public static final COSName BS = getPDFName("BS"); + //** Acro form field type for button fields. + public static final COSName BTN = getPDFName("Btn"); + public static final COSName BYTERANGE = getPDFName("ByteRange"); + // C + public static final COSName C = getPDFName("C"); + public static final COSName C0 = getPDFName("C0"); + public static final COSName C1 = getPDFName("C1"); + public static final COSName CA = getPDFName("CA"); + public static final COSName CA_NS = getPDFName("ca"); + public static final COSName CALGRAY = getPDFName("CalGray"); + public static final COSName CALRGB = getPDFName("CalRGB"); + public static final COSName CAP = getPDFName("Cap"); + public static final COSName CAP_HEIGHT = getPDFName("CapHeight"); + public static final COSName CATALOG = getPDFName("Catalog"); + public static final COSName CCITTFAX_DECODE = getPDFName("CCITTFaxDecode"); + public static final COSName CCITTFAX_DECODE_ABBREVIATION = getPDFName("CCF"); + public static final COSName CENTER_WINDOW = getPDFName("CenterWindow"); + public static final COSName CERT = getPDFName("Cert"); + public static final COSName CERTS = getPDFName("Certs"); + public static final COSName CF = getPDFName("CF"); + public static final COSName CFM = getPDFName("CFM"); + //** Acro form field type for choice fields. + public static final COSName CH = getPDFName("Ch"); + public static final COSName CHAR_PROCS = getPDFName("CharProcs"); + public static final COSName CHAR_SET = getPDFName("CharSet"); + public static final COSName CHECK_SUM = getPDFName("CheckSum"); + public static final COSName CI = getPDFName("CI"); + public static final COSName CICI_SIGNIT = getPDFName("CICI.SignIt"); + public static final COSName CID_FONT_TYPE0 = getPDFName("CIDFontType0"); + public static final COSName CID_FONT_TYPE2 = getPDFName("CIDFontType2"); + public static final COSName CID_TO_GID_MAP = getPDFName("CIDToGIDMap"); + public static final COSName CID_SET = getPDFName("CIDSet"); + public static final COSName CIDSYSTEMINFO = getPDFName("CIDSystemInfo"); + public static final COSName CL = getPDFName("CL"); + public static final COSName CLASS_MAP = getPDFName("ClassMap"); + public static final COSName CLR_F = getPDFName("ClrF"); + public static final COSName CLR_FF = getPDFName("ClrFf"); + public static final COSName CMAP = getPDFName("CMap"); + public static final COSName CMAPNAME = getPDFName("CMapName"); + public static final COSName CMYK = getPDFName("CMYK"); + public static final COSName CO = getPDFName("CO"); + public static final COSName COLOR = getPDFName("Color"); + public static final COSName COLLECTION = getPDFName("Collection"); + public static final COSName COLLECTION_ITEM = getPDFName("CollectionItem"); + public static final COSName COLLECTION_FIELD = getPDFName("CollectionField"); + public static final COSName COLLECTION_SCHEMA = getPDFName("CollectionSchema"); + public static final COSName COLLECTION_SORT = getPDFName("CollectionSort"); + public static final COSName COLLECTION_SUBITEM = getPDFName("CollectionSubitem"); + public static final COSName COLOR_BURN = getPDFName("ColorBurn"); + public static final COSName COLOR_DODGE = getPDFName("ColorDodge"); + public static final COSName COLORANTS = getPDFName("Colorants"); + public static final COSName COLORS = getPDFName("Colors"); + public static final COSName COLORSPACE = getPDFName("ColorSpace"); + public static final COSName COLUMNS = getPDFName("Columns"); + public static final COSName COMPATIBLE = getPDFName("Compatible"); + public static final COSName COMPONENTS = getPDFName("Components"); + public static final COSName CONTACT_INFO = getPDFName("ContactInfo"); + public static final COSName CONTENTS = getPDFName("Contents"); + public static final COSName COORDS = getPDFName("Coords"); + public static final COSName COUNT = getPDFName("Count"); + public static final COSName CP = getPDFName("CP"); + public static final COSName CREATION_DATE = getPDFName("CreationDate"); + public static final COSName CREATOR = getPDFName("Creator"); + public static final COSName CRL = getPDFName("CRL"); + public static final COSName CRLS = getPDFName("CRLS"); + public static final COSName CROP_BOX = getPDFName("CropBox"); + public static final COSName CRYPT = getPDFName("Crypt"); + public static final COSName CS = getPDFName("CS"); + public static final COSName CYX = getPDFName("CYX"); + // D + public static final COSName D = getPDFName("D"); + public static final COSName DA = getPDFName("DA"); + public static final COSName DARKEN = getPDFName("Darken"); + public static final COSName DATE = getPDFName("Date"); + public static final COSName DCT_DECODE = getPDFName("DCTDecode"); + public static final COSName DCT_DECODE_ABBREVIATION = getPDFName("DCT"); + public static final COSName DECODE = getPDFName("Decode"); + public static final COSName DECODE_PARMS = getPDFName("DecodeParms"); + public static final COSName DEFAULT = getPDFName("default"); + public static final COSName DEFAULT_CMYK = getPDFName("DefaultCMYK"); + public static final COSName DEFAULT_CRYPT_FILTER = getPDFName("DefaultCryptFilter"); + public static final COSName DEFAULT_GRAY = getPDFName("DefaultGray"); + public static final COSName DEFAULT_RGB = getPDFName("DefaultRGB"); + public static final COSName DESC = getPDFName("Desc"); + public static final COSName DESCENDANT_FONTS = getPDFName("DescendantFonts"); + public static final COSName DESCENT = getPDFName("Descent"); + public static final COSName DEST = getPDFName("Dest"); + public static final COSName DEST_OUTPUT_PROFILE = getPDFName("DestOutputProfile"); + public static final COSName DESTS = getPDFName("Dests"); + public static final COSName DEVICECMYK = getPDFName("DeviceCMYK"); + public static final COSName DEVICEGRAY = getPDFName("DeviceGray"); + public static final COSName DEVICEN = getPDFName("DeviceN"); + public static final COSName DEVICERGB = getPDFName("DeviceRGB"); + public static final COSName DI = getPDFName("Di"); + public static final COSName DIFFERENCE = getPDFName("Difference"); + public static final COSName DIFFERENCES = getPDFName("Differences"); + public static final COSName DIGEST_METHOD = getPDFName("DigestMethod"); + public static final COSName DIGEST_RIPEMD160 = getPDFName("RIPEMD160"); + public static final COSName DIGEST_SHA1 = getPDFName("SHA1"); + public static final COSName DIGEST_SHA256 = getPDFName("SHA256"); + public static final COSName DIGEST_SHA384 = getPDFName("SHA384"); + public static final COSName DIGEST_SHA512 = getPDFName("SHA512"); + public static final COSName DIRECTION = getPDFName("Direction"); + public static final COSName DISPLAY_DOC_TITLE = getPDFName("DisplayDocTitle"); + public static final COSName DL = getPDFName("DL"); + public static final COSName DM = getPDFName("Dm"); + public static final COSName DOC = getPDFName("Doc"); + public static final COSName DOC_CHECKSUM = getPDFName("DocChecksum"); + public static final COSName DOC_TIME_STAMP = getPDFName("DocTimeStamp"); + public static final COSName DOCMDP = getPDFName("DocMDP"); + public static final COSName DOCUMENT = getPDFName("Document"); + public static final COSName DOMAIN = getPDFName("Domain"); + public static final COSName DOS = getPDFName("DOS"); + public static final COSName DP = getPDFName("DP"); + public static final COSName DR = getPDFName("DR"); + public static final COSName DS = getPDFName("DS"); + public static final COSName DSS = getPDFName("DSS"); + public static final COSName DUPLEX = getPDFName("Duplex"); + public static final COSName DUR = getPDFName("Dur"); + public static final COSName DV = getPDFName("DV"); + public static final COSName DW = getPDFName("DW"); + public static final COSName DW2 = getPDFName("DW2"); + // E + public static final COSName E = getPDFName("E"); + public static final COSName EARLY_CHANGE = getPDFName("EarlyChange"); + public static final COSName EF = getPDFName("EF"); + public static final COSName EMBEDDED_FDFS = getPDFName("EmbeddedFDFs"); + public static final COSName EMBEDDED_FILE = getPDFName("EmbeddedFile"); + public static final COSName EMBEDDED_FILES = getPDFName("EmbeddedFiles"); + public static final COSName EMPTY = getPDFName(""); + public static final COSName ENCODE = getPDFName("Encode"); + public static final COSName ENCODED_BYTE_ALIGN = getPDFName("EncodedByteAlign"); + public static final COSName ENCODING = getPDFName("Encoding"); + public static final COSName ENCODING_90MS_RKSJ_H = getPDFName("90ms-RKSJ-H"); + public static final COSName ENCODING_90MS_RKSJ_V = getPDFName("90ms-RKSJ-V"); + public static final COSName ENCODING_ETEN_B5_H = getPDFName("ETen-B5-H"); + public static final COSName ENCODING_ETEN_B5_V = getPDFName("ETen-B5-V"); + public static final COSName ENCRYPT = getPDFName("Encrypt"); + public static final COSName ENCRYPT_META_DATA = getPDFName("EncryptMetadata"); + public static final COSName ENCRYPTED_PAYLOAD = getPDFName("EncryptedPayload"); + public static final COSName END_OF_LINE = getPDFName("EndOfLine"); + public static final COSName ENTRUST_PPKEF = getPDFName("Entrust.PPKEF"); + public static final COSName EXCLUSION = getPDFName("Exclusion"); + public static final COSName EXTENSIONS = getPDFName("Extensions"); + public static final COSName EXTENSION_LEVEL = getPDFName("ExtensionLevel"); + public static final COSName EX_DATA = getPDFName("ExData"); + public static final COSName EXPORT = getPDFName("Export"); + public static final COSName EXPORT_STATE = getPDFName("ExportState"); + public static final COSName EXT_G_STATE = getPDFName("ExtGState"); + public static final COSName EXTEND = getPDFName("Extend"); + public static final COSName EXTENDS = getPDFName("Extends"); + // F + public static final COSName F = getPDFName("F"); + public static final COSName F_DECODE_PARMS = getPDFName("FDecodeParms"); + public static final COSName F_FILTER = getPDFName("FFilter"); + public static final COSName FB = getPDFName("FB"); + public static final COSName FDF = getPDFName("FDF"); + public static final COSName FF = getPDFName("Ff"); + public static final COSName FIELDS = getPDFName("Fields"); + public static final COSName FILESPEC = getPDFName("Filespec"); + public static final COSName FILTER = getPDFName("Filter"); + public static final COSName FIRST = getPDFName("First"); + public static final COSName FIRST_CHAR = getPDFName("FirstChar"); + public static final COSName FIT_WINDOW = getPDFName("FitWindow"); + public static final COSName FL = getPDFName("FL"); + public static final COSName FLAGS = getPDFName("Flags"); + public static final COSName FLATE_DECODE = getPDFName("FlateDecode"); + public static final COSName FLATE_DECODE_ABBREVIATION = getPDFName("Fl"); + public static final COSName FO = getPDFName("Fo"); + public static final COSName FOLDERS = getPDFName("Folders"); + public static final COSName FONT = getPDFName("Font"); + public static final COSName FONT_BBOX = getPDFName("FontBBox"); + public static final COSName FONT_DESC = getPDFName("FontDescriptor"); + public static final COSName FONT_FAMILY = getPDFName("FontFamily"); + public static final COSName FONT_FILE = getPDFName("FontFile"); + public static final COSName FONT_FILE2 = getPDFName("FontFile2"); + public static final COSName FONT_FILE3 = getPDFName("FontFile3"); + public static final COSName FONT_MATRIX = getPDFName("FontMatrix"); + public static final COSName FONT_NAME = getPDFName("FontName"); + public static final COSName FONT_STRETCH = getPDFName("FontStretch"); + public static final COSName FONT_WEIGHT = getPDFName("FontWeight"); + public static final COSName FORM = getPDFName("Form"); + public static final COSName FORMTYPE = getPDFName("FormType"); + public static final COSName FRM = getPDFName("FRM"); + public static final COSName FS = getPDFName("FS"); + public static final COSName FT = getPDFName("FT"); + public static final COSName FUNCTION = getPDFName("Function"); + public static final COSName FUNCTION_TYPE = getPDFName("FunctionType"); + public static final COSName FUNCTIONS = getPDFName("Functions"); + // G + public static final COSName G = getPDFName("G"); + public static final COSName GAMMA = getPDFName("Gamma"); + public static final COSName GROUP = getPDFName("Group"); + public static final COSName GTS_PDFA1 = getPDFName("GTS_PDFA1"); + // H + public static final COSName H = getPDFName("H"); + public static final COSName HARD_LIGHT = getPDFName("HardLight"); + public static final COSName HEIGHT = getPDFName("Height"); + public static final COSName HELV = getPDFName("Helv"); + public static final COSName HIDE_MENUBAR = getPDFName("HideMenubar"); + public static final COSName HIDE_TOOLBAR = getPDFName("HideToolbar"); + public static final COSName HIDE_WINDOWUI = getPDFName("HideWindowUI"); + public static final COSName HUE = getPDFName("Hue"); + // I + public static final COSName I = getPDFName("I"); + public static final COSName IC = getPDFName("IC"); + public static final COSName ICCBASED = getPDFName("ICCBased"); + public static final COSName ID = getPDFName("ID"); + public static final COSName ID_TREE = getPDFName("IDTree"); + public static final COSName IDENTITY = getPDFName("Identity"); + public static final COSName IDENTITY_H = getPDFName("Identity-H"); + public static final COSName IDENTITY_V = getPDFName("Identity-V"); + public static final COSName IF = getPDFName("IF"); + public static final COSName ILLUSTRATOR = getPDFName("Illustrator"); + public static final COSName IM = getPDFName("IM"); + public static final COSName IMAGE = getPDFName("Image"); + public static final COSName IMAGE_MASK = getPDFName("ImageMask"); + public static final COSName INDEX = getPDFName("Index"); + public static final COSName INDEXED = getPDFName("Indexed"); + public static final COSName INFO = getPDFName("Info"); + public static final COSName INKLIST = getPDFName("InkList"); + public static final COSName INTENT = getPDFName("Intent"); + public static final COSName INTERPOLATE = getPDFName("Interpolate"); + public static final COSName IRT = getPDFName("IRT"); + public static final COSName IT = getPDFName("IT"); + public static final COSName ITALIC_ANGLE = getPDFName("ItalicAngle"); + public static final COSName ISSUER = getPDFName("Issuer"); + public static final COSName IX = getPDFName("IX"); + + // J + public static final COSName JAVA_SCRIPT = getPDFName("JavaScript"); + public static final COSName JBIG2_DECODE = getPDFName("JBIG2Decode"); + public static final COSName JBIG2_GLOBALS = getPDFName("JBIG2Globals"); + public static final COSName JPX_DECODE = getPDFName("JPXDecode"); + public static final COSName JS = getPDFName("JS"); + // K + public static final COSName K = getPDFName("K"); + public static final COSName KEYWORDS = getPDFName("Keywords"); + public static final COSName KEY_USAGE = getPDFName("KeyUsage"); + public static final COSName KIDS = getPDFName("Kids"); + // L + public static final COSName L = getPDFName("L"); + public static final COSName LAB = getPDFName("Lab"); + public static final COSName LANG = getPDFName("Lang"); + public static final COSName LAST = getPDFName("Last"); + public static final COSName LAST_CHAR = getPDFName("LastChar"); + public static final COSName LAST_MODIFIED = getPDFName("LastModified"); + public static final COSName LC = getPDFName("LC"); + public static final COSName LE = getPDFName("LE"); + public static final COSName LEADING = getPDFName("Leading"); + public static final COSName LEGAL_ATTESTATION = getPDFName("LegalAttestation"); + public static final COSName LENGTH = getPDFName("Length"); + public static final COSName LENGTH1 = getPDFName("Length1"); + public static final COSName LENGTH2 = getPDFName("Length2"); + public static final COSName LENGTH3 = getPDFName("Length3"); + public static final COSName LIGHTEN = getPDFName("Lighten"); + public static final COSName LIMITS = getPDFName("Limits"); + public static final COSName LINEARIZED = getPDFName("Linearized"); + public static final COSName LINK = getPDFName("Link"); + public static final COSName LJ = getPDFName("LJ"); + public static final COSName LL = getPDFName("LL"); + public static final COSName LLE = getPDFName("LLE"); + public static final COSName LLO = getPDFName("LLO"); + public static final COSName LOCATION = getPDFName("Location"); + public static final COSName LUMINOSITY = getPDFName("Luminosity"); + public static final COSName LW = getPDFName("LW"); + public static final COSName LZW_DECODE = getPDFName("LZWDecode"); + public static final COSName LZW_DECODE_ABBREVIATION = getPDFName("LZW"); + // M + public static final COSName M = getPDFName("M"); + public static final COSName MAC = getPDFName("Mac"); + public static final COSName MAC_EXPERT_ENCODING = getPDFName("MacExpertEncoding"); + public static final COSName MAC_ROMAN_ENCODING = getPDFName("MacRomanEncoding"); + public static final COSName MARK_INFO = getPDFName("MarkInfo"); + public static final COSName MASK = getPDFName("Mask"); + public static final COSName MATRIX = getPDFName("Matrix"); + public static final COSName MATTE = getPDFName("Matte"); + public static final COSName MAX_LEN = getPDFName("MaxLen"); + public static final COSName MAX_WIDTH = getPDFName("MaxWidth"); + public static final COSName MCID = getPDFName("MCID"); + public static final COSName MDP = getPDFName("MDP"); + public static final COSName MEDIA_BOX = getPDFName("MediaBox"); + public static final COSName MEASURE = getPDFName("Measure"); + public static final COSName METADATA = getPDFName("Metadata"); + public static final COSName MISSING_WIDTH = getPDFName("MissingWidth"); + public static final COSName MIX = getPDFName("Mix"); + public static final COSName MK = getPDFName("MK"); + public static final COSName ML = getPDFName("ML"); + public static final COSName MM_TYPE1 = getPDFName("MMType1"); + public static final COSName MOD_DATE = getPDFName("ModDate"); + public static final COSName MULTIPLY = getPDFName("Multiply"); + // N + public static final COSName N = getPDFName("N"); + public static final COSName NAME = getPDFName("Name"); + public static final COSName NAMES = getPDFName("Names"); + public static final COSName NAVIGATOR = getPDFName("Navigator"); + public static final COSName NEED_APPEARANCES = getPDFName("NeedAppearances"); + public static final COSName NEW_WINDOW = getPDFName("NewWindow"); + public static final COSName NEXT = getPDFName("Next"); + public static final COSName NM = getPDFName("NM"); + public static final COSName NON_EFONT_NO_WARN = getPDFName("NonEFontNoWarn"); + public static final COSName NON_FULL_SCREEN_PAGE_MODE = getPDFName("NonFullScreenPageMode"); + public static final COSName NONE = getPDFName("None"); + public static final COSName NORMAL = getPDFName("Normal"); + public static final COSName NUMS = getPDFName("Nums"); + // O + public static final COSName O = getPDFName("O"); + public static final COSName OBJ = getPDFName("Obj"); + public static final COSName OBJR = getPDFName("OBJR"); + public static final COSName OBJ_STM = getPDFName("ObjStm"); + public static final COSName OC = getPDFName("OC"); + public static final COSName OCG = getPDFName("OCG"); + public static final COSName OCGS = getPDFName("OCGs"); + public static final COSName OCMD = getPDFName("OCMD"); + public static final COSName OCPROPERTIES = getPDFName("OCProperties"); + public static final COSName OCSP = getPDFName("OCSP"); + public static final COSName OCSPS = getPDFName("OCSPs"); + public static final COSName OE = getPDFName("OE"); + public static final COSName OID = getPDFName("OID"); + + /** + * "OFF", to be used for OCGs, not for Acroform + */ + public static final COSName OFF = getPDFName("OFF"); + + /** + * "Off", to be used for Acroform, not for OCGs + */ + public static final COSName Off = getPDFName("Off"); + + public static final COSName ON = getPDFName("ON"); + public static final COSName OP = getPDFName("OP"); + public static final COSName OP_NS = getPDFName("op"); + public static final COSName OPEN_ACTION = getPDFName("OpenAction"); + public static final COSName OPEN_TYPE = getPDFName("OpenType"); + public static final COSName OPM = getPDFName("OPM"); + public static final COSName OPT = getPDFName("Opt"); + public static final COSName ORDER = getPDFName("Order"); + public static final COSName ORDERING = getPDFName("Ordering"); + public static final COSName OS = getPDFName("OS"); + public static final COSName OUTLINES = getPDFName("Outlines"); + public static final COSName OUTPUT_CONDITION = getPDFName("OutputCondition"); + public static final COSName OUTPUT_CONDITION_IDENTIFIER = getPDFName( + "OutputConditionIdentifier"); + public static final COSName OUTPUT_INTENT = getPDFName("OutputIntent"); + public static final COSName OUTPUT_INTENTS = getPDFName("OutputIntents"); + public static final COSName OVERLAY = getPDFName("Overlay"); + // P + public static final COSName P = getPDFName("P"); + public static final COSName PA = getPDFName("PA"); + public static final COSName PAGE = getPDFName("Page"); + public static final COSName PAGE_LABELS = getPDFName("PageLabels"); + public static final COSName PAGE_LAYOUT = getPDFName("PageLayout"); + public static final COSName PAGE_MODE = getPDFName("PageMode"); + public static final COSName PAGES = getPDFName("Pages"); + public static final COSName PAINT_TYPE = getPDFName("PaintType"); + public static final COSName PANOSE = getPDFName("Panose"); + public static final COSName PARAMS = getPDFName("Params"); + public static final COSName PARENT = getPDFName("Parent"); + public static final COSName PARENT_TREE = getPDFName("ParentTree"); + public static final COSName PARENT_TREE_NEXT_KEY = getPDFName("ParentTreeNextKey"); + public static final COSName PART = getPDFName("Part"); + public static final COSName PATH = getPDFName("Path"); + public static final COSName PATTERN = getPDFName("Pattern"); + public static final COSName PATTERN_TYPE = getPDFName("PatternType"); + public static final COSName PC = getPDFName("PC"); + public static final COSName PDF_DOC_ENCODING = getPDFName("PDFDocEncoding"); + public static final COSName PERMS = getPDFName("Perms"); + public static final COSName PERCEPTUAL = getPDFName("Perceptual"); + public static final COSName PIECE_INFO = getPDFName("PieceInfo"); + public static final COSName PG = getPDFName("Pg"); + public static final COSName PI = getPDFName("PI"); + public static final COSName PO = getPDFName("PO"); + public static final COSName POPUP = getPDFName("Popup"); + public static final COSName PRE_RELEASE = getPDFName("PreRelease"); + public static final COSName PREDICTOR = getPDFName("Predictor"); + public static final COSName PREV = getPDFName("Prev"); + public static final COSName PRINT = getPDFName("Print"); + public static final COSName PRINT_AREA = getPDFName("PrintArea"); + public static final COSName PRINT_CLIP = getPDFName("PrintClip"); + public static final COSName PRINT_SCALING = getPDFName("PrintScaling"); + public static final COSName PRINT_STATE = getPDFName("PrintState"); + public static final COSName PRIVATE = getPDFName("Private"); + public static final COSName PROC_SET = getPDFName("ProcSet"); + public static final COSName PROCESS = getPDFName("Process"); + public static final COSName PRODUCER = getPDFName("Producer"); + public static final COSName PROP_BUILD = getPDFName("Prop_Build"); + public static final COSName PROPERTIES = getPDFName("Properties"); + public static final COSName PS = getPDFName("PS"); + public static final COSName PUB_SEC = getPDFName("PubSec"); + public static final COSName PV = getPDFName("PV"); + // Q + public static final COSName Q = getPDFName("Q"); + public static final COSName QUADPOINTS = getPDFName("QuadPoints"); + // R + public static final COSName R = getPDFName("R"); + public static final COSName RANGE = getPDFName("Range"); + public static final COSName RC = getPDFName("RC"); + public static final COSName RD = getPDFName("RD"); + public static final COSName REASON = getPDFName("Reason"); + public static final COSName REASONS = getPDFName("Reasons"); + public static final COSName RECIPIENTS = getPDFName("Recipients"); + public static final COSName RECT = getPDFName("Rect"); + public static final COSName REFERENCE = getPDFName("Reference"); + public static final COSName REGISTRY = getPDFName("Registry"); + public static final COSName REGISTRY_NAME = getPDFName("RegistryName"); + public static final COSName RELATIVE_COLORIMETRIC = getPDFName("RelativeColorimetric"); + public static final COSName RENAME = getPDFName("Rename"); + public static final COSName REPEAT = getPDFName("Repeat"); + public static final COSName RES_FORK = getPDFName("ResFork"); + public static final COSName RESOURCES = getPDFName("Resources"); + public static final COSName RGB = getPDFName("RGB"); + public static final COSName RI = getPDFName("RI"); + public static final COSName ROLE_MAP = getPDFName("RoleMap"); + public static final COSName ROOT = getPDFName("Root"); + public static final COSName ROTATE = getPDFName("Rotate"); + public static final COSName ROWS = getPDFName("Rows"); + public static final COSName RT = getPDFName("RT"); + public static final COSName RUN_LENGTH_DECODE = getPDFName("RunLengthDecode"); + public static final COSName RUN_LENGTH_DECODE_ABBREVIATION = getPDFName("RL"); + public static final COSName RV = getPDFName("RV"); + // S + public static final COSName S = getPDFName("S"); + public static final COSName SA = getPDFName("SA"); + public static final COSName SATURATION = getPDFName("Saturation"); + public static final COSName SCHEMA = getPDFName("Schema"); + public static final COSName SCREEN = getPDFName("Screen"); + public static final COSName SE = getPDFName("SE"); + public static final COSName SEPARATION = getPDFName("Separation"); + public static final COSName SET_F = getPDFName("SetF"); + public static final COSName SET_FF = getPDFName("SetFf"); + public static final COSName SHADING = getPDFName("Shading"); + public static final COSName SHADING_TYPE = getPDFName("ShadingType"); + public static final COSName SIG = getPDFName("Sig"); + public static final COSName SIG_FLAGS = getPDFName("SigFlags"); + public static final COSName SIG_REF = getPDFName("SigRef"); + public static final COSName SIZE = getPDFName("Size"); + public static final COSName SM = getPDFName("SM"); + public static final COSName SMASK = getPDFName("SMask"); + public static final COSName SMASK_IN_DATA = getPDFName("SMaskInData"); + public static final COSName SOFT_LIGHT = getPDFName("SoftLight"); + public static final COSName SORT = getPDFName("Sort"); + public static final COSName SOUND = getPDFName("Sound"); + public static final COSName SPLIT = getPDFName("Split"); + public static final COSName SS = getPDFName("SS"); + public static final COSName ST = getPDFName("St"); + public static final COSName STANDARD_ENCODING = getPDFName("StandardEncoding"); + public static final COSName STATE = getPDFName("State"); + public static final COSName STATE_MODEL = getPDFName("StateModel"); + public static final COSName STATUS = getPDFName("Status"); + public static final COSName STD_CF = getPDFName("StdCF"); + public static final COSName STEM_H = getPDFName("StemH"); + public static final COSName STEM_V = getPDFName("StemV"); + public static final COSName STM_F = getPDFName("StmF"); + public static final COSName STR_F = getPDFName("StrF"); + public static final COSName STRUCT_ELEM = getPDFName("StructElem"); + public static final COSName STRUCT_PARENT = getPDFName("StructParent"); + public static final COSName STRUCT_PARENTS = getPDFName("StructParents"); + public static final COSName STRUCT_TREE_ROOT = getPDFName("StructTreeRoot"); + public static final COSName STYLE = getPDFName("Style"); + public static final COSName SUB_FILTER = getPDFName("SubFilter"); + public static final COSName SUBJ = getPDFName("Subj"); + public static final COSName SUBJECT = getPDFName("Subject"); + public static final COSName SUBJECT_DN = getPDFName("SubjectDN"); + public static final COSName SUBTYPE = getPDFName("Subtype"); + public static final COSName SUPPLEMENT = getPDFName("Supplement"); + public static final COSName SV = getPDFName("SV"); + public static final COSName SV_CERT = getPDFName("SVCert"); + public static final COSName SW = getPDFName("SW"); + public static final COSName SY = getPDFName("Sy"); + public static final COSName SYNCHRONOUS = getPDFName("Synchronous"); + // T + public static final COSName T = getPDFName("T"); + public static final COSName TARGET = getPDFName("Target"); + public static final COSName TEMPLATES = getPDFName("Templates"); + public static final COSName THREAD = getPDFName("Thread"); + public static final COSName THREADS = getPDFName("Threads"); + public static final COSName THREE_DD = getPDFName("3DD"); + public static final COSName THUMB = getPDFName("Thumb"); + public static final COSName TI = getPDFName("TI"); + public static final COSName TILING_TYPE = getPDFName("TilingType"); + public static final COSName TIME_STAMP = getPDFName("TimeStamp"); + public static final COSName TITLE = getPDFName("Title"); + public static final COSName TK = getPDFName("TK"); + public static final COSName TM = getPDFName("TM"); + public static final COSName TO_UNICODE = getPDFName("ToUnicode"); + public static final COSName TR = getPDFName("TR"); + public static final COSName TR2 = getPDFName("TR2"); + public static final COSName TRAPPED = getPDFName("Trapped"); + public static final COSName TRANS = getPDFName("Trans"); + public static final COSName TRANSFORM_METHOD = getPDFName("TransformMethod"); + public static final COSName TRANSFORM_PARAMS = getPDFName("TransformParams"); + public static final COSName TRANSPARENCY = getPDFName("Transparency"); + public static final COSName TREF = getPDFName("TRef"); + public static final COSName TRIM_BOX = getPDFName("TrimBox"); + public static final COSName TRUE_TYPE = getPDFName("TrueType"); + public static final COSName TRUSTED_MODE = getPDFName("TrustedMode"); + public static final COSName TU = getPDFName("TU"); + /** Acro form field type for text field. */ + public static final COSName TX = getPDFName("Tx"); + public static final COSName TYPE = getPDFName("Type"); + public static final COSName TYPE0 = getPDFName("Type0"); + public static final COSName TYPE1 = getPDFName("Type1"); + public static final COSName TYPE3 = getPDFName("Type3"); + // U + public static final COSName U = getPDFName("U"); + public static final COSName UE = getPDFName("UE"); + public static final COSName UF = getPDFName("UF"); + public static final COSName UNCHANGED = getPDFName("Unchanged"); + public static final COSName UNIX = getPDFName("Unix"); + public static final COSName URI = getPDFName("URI"); + public static final COSName URL = getPDFName("URL"); + public static final COSName URL_TYPE = getPDFName("URLType"); + public static final COSName USAGE = getPDFName("Usage"); + public static final COSName USE_CMAP = getPDFName("UseCMap"); + public static final COSName USER_UNIT = getPDFName("UserUnit"); + // V + public static final COSName V = getPDFName("V"); + public static final COSName VE = getPDFName("VE"); + public static final COSName VERISIGN_PPKVS = getPDFName("VeriSign.PPKVS"); + public static final COSName VERSION = getPDFName("Version"); + public static final COSName VERTICES = getPDFName("Vertices"); + public static final COSName VERTICES_PER_ROW = getPDFName("VerticesPerRow"); + public static final COSName VIEW = getPDFName("View"); + public static final COSName VIEW_AREA = getPDFName("ViewArea"); + public static final COSName VIEW_CLIP = getPDFName("ViewClip"); + public static final COSName VIEW_STATE = getPDFName("ViewState"); + public static final COSName VIEWER_PREFERENCES = getPDFName("ViewerPreferences"); + public static final COSName VOLUME = getPDFName("Volume"); + public static final COSName VP = getPDFName("VP"); + public static final COSName VRI = getPDFName("VRI"); + // W + public static final COSName W = getPDFName("W"); + public static final COSName W2 = getPDFName("W2"); + public static final COSName WC = getPDFName("WC"); + public static final COSName WHITE_POINT = getPDFName("WhitePoint"); + public static final COSName WIDGET = getPDFName("Widget"); + public static final COSName WIDTH = getPDFName("Width"); + public static final COSName WIDTHS = getPDFName("Widths"); + public static final COSName WIN = getPDFName("Win"); + public static final COSName WIN_ANSI_ENCODING = getPDFName("WinAnsiEncoding"); + public static final COSName WMODE = getPDFName("WMode"); + public static final COSName WP = getPDFName("WP"); + public static final COSName WS = getPDFName("WS"); + // X + public static final COSName X = getPDFName("X"); + public static final COSName XFA = getPDFName("XFA"); + public static final COSName X_STEP = getPDFName("XStep"); + public static final COSName XHEIGHT = getPDFName("XHeight"); + public static final COSName XOBJECT = getPDFName("XObject"); + public static final COSName XREF = getPDFName("XRef"); + public static final COSName XREF_STM = getPDFName("XRefStm"); + // Y + public static final COSName Y = getPDFName("Y"); + public static final COSName Y_STEP = getPDFName("YStep"); + public static final COSName YES = getPDFName("Yes"); + + // Z + public static final COSName ZA_DB = getPDFName("ZaDb"); + + // fields + private final String name; + + /** + * This will get a COSName object with that name. + * + * @param aName The name of the object. + * + * @return A COSName with the specified name. + */ + public static COSName getPDFName(String aName) + { + WeakReference weakRef = NAME_MAP.get(aName); + COSName name = weakRef != null ? weakRef.get() : null; + + if (name == null) + { + // Although we use a ConcurrentHashMap, we cannot use computeIfAbsent() because the returned reference + // might be stale (even the newly created one). + // Use double checked locking to make the code thread safe. + synchronized (NAME_MAP) + { + weakRef = NAME_MAP.get(aName); + name = weakRef != null ? weakRef.get() : null; + if (name == null) + { + name = new COSName(aName); + CLEANER.register(name, () -> NAME_MAP.remove(aName)); + NAME_MAP.put(aName, new WeakReference<>(name)); + } + } + } + + return name; + } + + /** + * Private constructor. This will limit the number of COSName objects that are created. + * + * @param aName The name of the COSName object. + */ + private COSName(String aName) + { + this.name = aName; + } + + /** + * This will get the name of this COSName object. + * + * @return The name of the object. + */ + public String getName() + { + return name; + } + + @Override + public String toString() + { + return "COSName{" + name + "}"; + } + + @Override + public boolean equals(Object object) + { + return object instanceof COSName && name.equals(((COSName) object).name); + } + + @Override + public int hashCode() + { + return name.hashCode(); + } + + @Override + public int compareTo(COSName other) + { + return name.compareTo(other.name); + } + + /** + * Returns true if the name is the empty string. + * @return true if the name is the empty string. + */ + public boolean isEmpty() + { + return name.isEmpty(); + } + + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromName(this); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java new file mode 100644 index 00000000000..55bdf2733a5 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * This class represents a null PDF object. + * + * @author Ben Litchfield + */ +public final class COSNull extends COSBase +{ + /** + * The null token. + */ + public static final byte[] NULL_BYTES = {110, 117, 108, 108}; //"null".getBytes( "ISO-8859-1" ); + + /** + * The one null object in the system. + */ + public static final COSNull NULL = new COSNull(); + + /** + * Constructor. + */ + private COSNull() + { + //limit creation to one instance. + } + + /** + * Visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromNull(this); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSNull{}"; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java new file mode 100644 index 00000000000..97d4fcbe894 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * This class represents an abstract number in a PDF document. + * + * @author Ben Litchfield + */ +public abstract class COSNumber extends COSBase +{ + /** + * This will get the float value of this number. + * + * @return The float value of this object. + */ + public abstract float floatValue(); + + /** + * This will get the integer value of this number. + * + * @return The integer value of this number. + */ + public abstract int intValue(); + + /** + * This will get the long value of this number. + * + * @return The long value of this number. + */ + public abstract long longValue(); + + /** + * This factory method will get the appropriate number object. + * + * @param number The string representation of the number. + * + * @return A number object, either float or int. + * + * @throws IOException If the string is not a number. + */ + public static COSNumber get( String number ) throws IOException + { + if (number.length() == 1) + { + char digit = number.charAt(0); + if ('0' <= digit && digit <= '9') + { + return COSInteger.get((long) digit - '0'); + } + if (digit == '-' || digit == '.') + { + // See https://issues.apache.org/jira/browse/PDFBOX-592 + return COSInteger.ZERO; + } + throw new IOException("Not a number: " + number); + } + if (isFloat(number)) + { + return new COSFloat(number); + } + try + { + return COSInteger.get(Long.parseLong(number)); + } + catch (NumberFormatException e) + { + // check if the given string could be a number at all + String numberString = number.startsWith("+") || number.startsWith("-") + ? number.substring(1) : number; + if (!numberString.matches("[0-9]*")) + { + throw new IOException("Not a number: " + number); + } + // return a limited COSInteger value which is marked as invalid + return number.startsWith("-") ? COSInteger.OUT_OF_RANGE_MIN + : COSInteger.OUT_OF_RANGE_MAX; + } + } + + private static boolean isFloat( String number ) + { + int length = number.length(); + for (int i = 0; i < length; i++) + { + char digit = number.charAt(i); + if (digit == '.' || digit == 'e') + { + return true; + } + } + return false; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java new file mode 100644 index 00000000000..9ad4fd90d25 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; + +/** + * This class represents a PDF object. + * + * @author Ben Litchfield + * + */ +public class COSObject extends COSBase implements COSUpdateInfo +{ + private COSBase baseObject; + private ICOSParser parser; + private boolean isDereferenced = false; + private final COSUpdateState updateState; + + private static final Logger LOG = LogManager.getLogger(COSObject.class); + + /** + * Constructor. + * + * @param object The object that this encapsulates. + * + */ + public COSObject(COSBase object) + { + updateState = new COSUpdateState(this); + baseObject = object; + isDereferenced = true; + direct = false; + } + + /** + * Constructor. + * + * @param object The object that this encapsulates. + * @param objectKey The COSObjectKey of the encapsulated object + */ + public COSObject(COSBase object, COSObjectKey objectKey) + { + this(objectKey, null); + baseObject = object; + isDereferenced = true; + direct = false; + } + + /** + * Constructor. + * + * @param object The object that this encapsulates. + * @param parser The parser to be used to load the object on demand + * + */ + public COSObject(COSBase object, ICOSParser parser) + { + updateState = new COSUpdateState(this); + baseObject = object; + isDereferenced = object != null; + this.parser = parser; + direct = false; + } + + /** + * Constructor. + * + * @param key The object number of the encapsulated object. + * @param parser The parser to be used to load the object on demand + * + */ + public COSObject(COSObjectKey key, ICOSParser parser) + { + updateState = new COSUpdateState(this); + this.parser = parser; + setKey(key); + direct = false; + } + + /** + * Indicates if the referenced object is present or not. + * + * @return true if the indirect object is dereferenced + */ + public boolean isObjectNull() + { + return baseObject == null || baseObject.equals( COSNull.NULL ); + } + + /** + * Proxy objects can never be direct, so direct is always false + * @param direct ignored + */ + @Override + public void setDirect(boolean direct) + { + this.direct = false; + } + + /** + * This will get the object that this object encapsulates. + * + * @return The encapsulated object. + */ + public COSBase getObject() + { + if (!isDereferenced && parser != null) + { + try + { + // mark as dereferenced to avoid endless recursions + isDereferenced = true; + baseObject = parser.dereferenceCOSObject(this); + getUpdateState().dereferenceChild(baseObject); + } + catch (IOException e) + { + LOG.error(() -> "Can't dereference " + this, e); + } + finally + { + parser = null; + } + } + return baseObject; + } + + /** + * Sets the referenced object to COSNull and removes the initially assigned parser. + */ + public final void setToNull() + { + if(baseObject != null) + { + getUpdateState().update(); + } + baseObject = COSNull.NULL; + parser = null; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "COSObject{" + getKey() + "}"; + } + + /** + * visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept( ICOSVisitor visitor ) throws IOException + { + COSBase object = getObject(); + if (object != null) + { + object.accept(visitor); + } + else + { + COSNull.NULL.accept(visitor); + } + } + + /** + * Returns {@code true}, if the hereby referenced {@link COSBase} has already been parsed and loaded. + * + * @return {@code true}, if the hereby referenced {@link COSBase} has already been parsed and loaded. + */ + public boolean isDereferenced() + { + return isDereferenced; + } + + /** + * Returns the current {@link COSUpdateState} of this {@link COSObject}. + * + * @return The current {@link COSUpdateState} of this {@link COSObject}. + * @see COSUpdateState + */ + @Override + public COSUpdateState getUpdateState() + { + return updateState; + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java new file mode 100644 index 00000000000..a9465095c11 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +/** + * This is an interface used to get/create the underlying COSObject. + * + * @author Ben Litchfield + */ +public interface COSObjectGetter +{ + /** + * Convert this standard java object to a COS object. + * + * @return The cos object that matches this Java object. + */ + COSBase getCOSObject(); +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java new file mode 100644 index 00000000000..73238534a44 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +/** + * Object representing the physical reference to an indirect pdf object. + * + * @author Michael Traut + * + */ +public final class COSObjectKey implements Comparable +{ + private static final int NUMBER_OFFSET = Short.SIZE; + private static final long GENERATION_MASK = (long) Math.pow(2, NUMBER_OFFSET) - 1; + // combined number and generation + // The lowest 16 bits hold the generation 0-65535 + // The rest is used for the number (even though 34 bit are sufficient for 10 digits) + private final long numberAndGeneration; + // index within a compressed object stream if applicable otherwise -1 + private final int streamIndex; + + /** + * Constructor. + * + * @param num The object number. + * @param gen The object generation number. + */ + public COSObjectKey(long num, int gen) + { + this(num, gen, -1); + } + + /** + * Constructor. + * + * @param num The object number. + * @param gen The object generation number. + * @param index The index within a compressed object stream + */ + public COSObjectKey(long num, int gen, int index) + { + if (num < 0) + { + throw new IllegalArgumentException("Object number must not be a negative value"); + } + if (gen < 0) + { + throw new IllegalArgumentException("Generation number must not be a negative value"); + } + numberAndGeneration = computeInternalHash(num, gen); + this.streamIndex = index; + } + + /** + * Calculate the internal hash value for the given object number and generation number. + * + * @param num the object number + * @param gen the generation number + * @return the internal hash for the given values + */ + public static final long computeInternalHash(long num, int gen) + { + return num << NUMBER_OFFSET | (gen & GENERATION_MASK); + } + + /** + * Return the internal hash value which is based on the number and the generation. + * + * @return the internal hash value + */ + public long getInternalHash() + { + return numberAndGeneration; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + COSObjectKey objToBeCompared = obj instanceof COSObjectKey ? (COSObjectKey)obj : null; + return objToBeCompared != null + && objToBeCompared.numberAndGeneration == numberAndGeneration; + } + + /** + * This will get the object generation number. + * + * @return The object generation number. + */ + public int getGeneration() + { + return (int) (numberAndGeneration & GENERATION_MASK); + } + + /** + * This will get the object number. + * + * @return The object number. + */ + public long getNumber() + { + return numberAndGeneration >>> NUMBER_OFFSET; + } + + /** + * The index within a compressed object stream. + * + * @return the index within a compressed object stream if applicable otherwise -1 + */ + public int getStreamIndex() + { + return streamIndex; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Long.hashCode(numberAndGeneration); + } + + @Override + public String toString() + { + return getNumber() + " " + getGeneration() + " R"; + } + + @Override + public int compareTo(COSObjectKey other) + { + return Long.compare(numberAndGeneration, other.numberAndGeneration); + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java new file mode 100644 index 00000000000..5af35433c27 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.FilterOutputStream; +import java.io.IOException; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.Closeable; +import java.io.InputStream; +import java.io.OutputStream; + +import java.util.ArrayList; +import java.util.List; +import org.apache.pdfbox.cos.filter.COSInputStream; +// import org.apache.pdfbox.cos.filter.COSOutputStream; +import org.apache.pdfbox.cos.filter.COSOutputStream; +import org.apache.pdfbox.cos.filter.DecodeOptions; +import org.apache.pdfbox.cos.filter.Filter; +import org.apache.pdfbox.cos.filter.FilterFactory; + +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.RandomAccess; +import org.apache.pdfbox.io.RandomAccessInputStream; +import org.apache.pdfbox.io.RandomAccessOutputStream; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.RandomAccessReadBuffer; +import org.apache.pdfbox.io.RandomAccessStreamCache; +import org.apache.pdfbox.io.RandomAccessReadView; + +/** + * This class represents a stream object in a PDF document. This is not an IO + * stream that can be written to and read from, but a COSStream as defined by + * the PDF specification which consists of a dictionary followed by zero or more + * bytes of data that must be decoded before use. + * + * The stream dictionary has a required "length" entry that indicates how many bytes + * are in the stream. This class holds a backing store which is the true IO stream + * for decoding or encoding the byte data. + * + * @author Ben Litchfield + */ +public class COSStream extends COSDictionary implements Closeable +{ + // backing store, in-memory or on-disk + private RandomAccess randomAccess; + // used as a temp buffer when creating a new stream + private RandomAccessStreamCache streamCache; + // indicates if the stream cache was created within this COSStream instance + private boolean closeStreamCache = false; + // true if there's an open OutputStream + private boolean isWriting; + // random access view to be read from + private RandomAccessReadView randomAccessReadView; + + private static final Logger LOG = LogManager.getLogger(COSStream.class); + + /** + * Creates a new stream with an empty dictionary. + *

+ * Try to avoid using this constructor because it creates a new scratch file in memory. Instead, + * use {@link COSDocument#createCOSStream() document.getDocument().createCOSStream()} which will + * use the existing scratch file (in memory or in temp file) of the document. + *

+ */ + public COSStream() + { + this(null); + } + + /** + * Creates a new stream with an empty dictionary. Data is stored in the given scratch file. + * + * @param streamCache Stream cache for writing stream data. + */ + public COSStream(RandomAccessStreamCache streamCache) + { + setInt(COSName.LENGTH, 0); + this.streamCache = streamCache; + } + + /** + * Creates a new stream with an empty dictionary. Data is read from the given random accessview. Written data is + * stored in the given scratch file. + * + * @param streamCache Stream cache for writing stream data. + * @param randomAccessReadView source for the data to be read + * @throws IOException if the length of the random access view isn't available + */ + public COSStream(RandomAccessStreamCache streamCache, RandomAccessReadView randomAccessReadView) + throws IOException + { + this(streamCache); + this.randomAccessReadView = randomAccessReadView; + setInt(COSName.LENGTH, (int) randomAccessReadView.length()); + } + + /** + * Throws if the random access backing store has been closed. Helpful for catching cases where + * a user tries to use a COSStream which has outlived its COSDocument. + */ + private void checkClosed() throws IOException + { + if (randomAccess != null && randomAccess.isClosed()) + { + throw new IOException("COSStream has been closed and cannot be read. " + + "Perhaps its enclosing PDDocument has been closed?"); + // Tip for debugging: look at the destination file with an editor, you'll see an + // incomplete stream at the bottom. + } + } + + private RandomAccessStreamCache getStreamCache() throws IOException + { + if (streamCache == null) + { + streamCache = IOUtils.createMemoryOnlyStreamCache().create(); + closeStreamCache = true; + } + return streamCache; + } + + /** + * Returns a new InputStream which reads the encoded PDF stream data. Experts only! + * + * @return InputStream containing raw, encoded PDF stream data. + * @throws IOException If the stream could not be read. + */ + public InputStream createRawInputStream() throws IOException + { + checkClosed(); + if (isWriting) + { + throw new IllegalStateException("Cannot read while there is an open stream writer"); + } + if (randomAccess == null) + { + if (randomAccessReadView != null) + { + randomAccessReadView.seek(0); + return new RandomAccessInputStream( randomAccessReadView); + } + else + { + throw new IOException( + "Create InputStream called without data being written before to stream."); + } + } + else + { + return new RandomAccessInputStream(randomAccess); + } + } + + /** + * TODO: fix so that this is in a filter class, using this object as input. + * + * Returns a new InputStream which reads the decoded stream data. + * + * @return InputStream containing decoded stream data. + * @throws IOException If the stream could not be read. + */ + public COSInputStream createInputStream() throws IOException + { + return createInputStream( DecodeOptions.DEFAULT); + } + + public COSInputStream createInputStream(DecodeOptions options) throws IOException + { + InputStream input = createRawInputStream(); + return COSInputStream.create(getFilterList(), this, input, options); + } + + /** + * Returns a new RandomAccessRead which reads the decoded stream data. + * + * @return RandomAccessRead containing decoded stream data. + * @throws IOException If the stream could not be read. + */ + public RandomAccessRead createView() throws IOException + { + List filterList = getFilterList(); + if (filterList.isEmpty()) + { + if (randomAccess == null && randomAccessReadView != null) + { + return new RandomAccessReadView(randomAccessReadView, 0, + randomAccessReadView.length()); + } + else + { + return new RandomAccessReadBuffer( createRawInputStream()); + } + } + return Filter.decode(createRawInputStream(), filterList, this, DecodeOptions.DEFAULT, null); + } + + /** + * Returns a new OutputStream for writing stream data, using the current filters. + * + * @return OutputStream for un-encoded stream data. + * @throws IOException If the output stream could not be created. + */ + public OutputStream createOutputStream() throws IOException + { + return createOutputStream(null); + } + + /** + * Returns a new OutputStream for writing stream data, using and the given filters. + * + * @param filters COSArray or COSName of filters to be used. + * @return OutputStream for un-encoded stream data. + * @throws IOException If the output stream could not be created. + */ + public OutputStream createOutputStream(COSBase filters) throws IOException + { + checkClosed(); + if (isWriting) + { + throw new IllegalStateException("Cannot have more than one open stream writer."); + } + // apply filters, if any + if (filters != null) + { + setItem(COSName.FILTER, filters); + } + if (randomAccess != null) + randomAccess.clear(); + else + randomAccess = getStreamCache().createBuffer(); + OutputStream randomOut = new RandomAccessOutputStream(randomAccess); + OutputStream cosOut = new COSOutputStream( getFilterList(), this, randomOut, + getStreamCache()); + isWriting = true; + return new FilterOutputStream(cosOut) + { + @Override + public void write(byte[] b, int off, int len) throws IOException + { + this.out.write(b, off, len); + } + + @Override + public void close() throws IOException + { + super.close(); + setInt(COSName.LENGTH, (int)randomAccess.length()); + isWriting = false; + } + }; + } + + /** + * Returns a new OutputStream for writing encoded PDF data. Experts only! + * + * @return OutputStream for raw PDF stream data. + * @throws IOException If the output stream could not be created. + */ + public OutputStream createRawOutputStream() throws IOException + { + checkClosed(); + if (isWriting) + { + throw new IllegalStateException("Cannot have more than one open stream writer."); + } + if (randomAccess != null) + randomAccess.clear(); + else + randomAccess = getStreamCache().createBuffer(); + OutputStream out = new RandomAccessOutputStream(randomAccess); + isWriting = true; + return new FilterOutputStream(out) + { + @Override + public void write(byte[] b, int off, int len) throws IOException + { + this.out.write(b, off, len); + } + + @Override + public void close() throws IOException + { + super.close(); + setInt(COSName.LENGTH, (int)randomAccess.length()); + isWriting = false; + } + }; + } + + /** + * Returns the list of filters. + */ + public List getFilterList() throws IOException + { + List filterList; + COSBase filters = getFilters(); + if (filters instanceof COSName) + { + filterList = new ArrayList<>(1); + filterList.add(FilterFactory.INSTANCE.getFilter((COSName)filters)); + } + else if (filters instanceof COSArray) + { + COSArray filterArray = (COSArray)filters; + filterList = new ArrayList<>(filterArray.size()); + for (int i = 0; i < filterArray.size(); i++) + { + COSBase base = filterArray.get(i); + if (!(base instanceof COSName)) + { + throw new IOException("Forbidden type in filter array: " + + (base == null ? "null" : base.getClass().getName())); + } + filterList.add(FilterFactory.INSTANCE.getFilter((COSName) base)); + } + } + else + { + filterList = new ArrayList<>(); + } + return filterList; + } + + /** + * Returns the length of the encoded stream. + * + * @return length in bytes + */ + public long getLength() + { + if (isWriting) + { + throw new IllegalStateException("There is an open OutputStream associated with this " + + "COSStream. It must be closed before querying the " + + "length of this COSStream."); + } + return getInt(COSName.LENGTH, 0); + } + + /** + * This will return the filters to apply to the byte stream. + * The method will return + *
    + *
  • null if no filters are to be applied + *
  • a COSName if one filter is to be applied + *
  • a COSArray containing COSNames if multiple filters are to be applied + *
+ * + * @return the COSBase object representing the filters + */ + public COSBase getFilters() + { + return getObjectFromDictionary( COSName.FILTER); + } + + /** + * Returns the contents of the stream as a PDF "text string". + * + * @return the PDF string representation of the stream content + */ + public String toTextString() + { + try (InputStream input = COSInputStream.create(getFilterList(), this, createRawInputStream() )) + { + byte[] array = input.readAllBytes(); + COSString string = new COSString( array); + return string.getString(); + } + catch (IOException e) + { + LOG.debug("An exception occurred trying to get the content - returning empty string instead", e); + return ""; + } + } + + @Override + public void accept( ICOSVisitor visitor) throws IOException + { + visitor.visitFromStream(this); + } + + /** + * {@inheritDoc} + * + * Called by PDFBox when the PDDocument is closed, this closes the stream and removes the data. You will usually not + * need this. + * + * @throws IOException if something went wrong when closing the stream + */ + @Override + public void close() throws IOException + { + try + { + if (closeStreamCache && streamCache != null) + { + streamCache.close(); + streamCache = null; + } + } + finally + { + try + { + // marks the scratch file pages as free + if (randomAccess != null) + { + randomAccess.close(); + randomAccess = null; + } + } + finally + { + if (randomAccessReadView != null) + { + randomAccessReadView.close(); + randomAccessReadView = null; + } + } + } + } + + /** + * Indicates whether the stream contains any data or not. + * + * @return true if the stream contains any data + */ + public boolean hasData() + { + return randomAccess != null || randomAccessReadView != null; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java new file mode 100644 index 00000000000..804213c4d63 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.util.Hex; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/** + * A string object, which may be a text string, a PDFDocEncoded string, ASCII string, or byte string. + * + *

Text strings are used for character strings that contain information intended to be + * human-readable, such as text annotations, bookmark names, article names, document information, + * and so forth. + * + *

PDFDocEncoded strings are used for characters that are represented in a single byte. + * + *

ASCII strings are used for characters that are represented in a single byte using ASCII + * encoding. + * + *

Byte strings are used for binary data represented as a series of bytes, but the encoding is + * not known. The bytes of the string need not represent characters. + * + * @author Ben Litchfield + * @author John Hewson + */ +public final class COSString extends COSBase +{ + private static final Logger LOG = LogManager.getLogger(COSString.class); + + private final byte[] bytes; + private final boolean forceHexForm; + + // legacy behaviour for old PDFParser + public static final boolean FORCE_PARSING = + Boolean.getBoolean("org.apache.pdfbox.forceParsing"); + + /** + * Creates a new PDF string from a byte array. This method can be used to read a string from + * an existing PDF file, or to create a new byte string. + * + * @param bytes The raw bytes of the PDF text string or byte string. + */ + public COSString(byte[] bytes) + { + this(bytes, false); + } + + /** + * Creates a new PDF string from a byte array. This method can be used to read a string from an existing PDF file, + * or to create a new byte string. + * + * @param bytes The raw bytes of the PDF text string or byte string. + * @param forceHex forces the hexadecimal presentation of the string if set to true + * + */ + public COSString(byte[] bytes, boolean forceHex) + { + forceHexForm = forceHex; + this.bytes = Arrays.copyOf(bytes, bytes.length); + } + + /** + * Creates a new text string from a Java String. + * + * @param text The string value of the object. + */ + public COSString(String text) + { + this(text, false); + } + + /** + * Creates a new text string from a Java String. + * + * @param text The string value of the object. + * @param forceHex forces the hexadecimal presentation of the string if set to true + * + */ + public COSString(String text, boolean forceHex) + { + forceHexForm = forceHex; + // check whether the string uses only characters available in PDFDocEncoding + boolean isOnlyPDFDocEncoding = true; + for (char c : text.toCharArray()) + { + if (!PDFDocEncoding.containsChar(c)) + { + isOnlyPDFDocEncoding = false; + break; + } + } + + if (isOnlyPDFDocEncoding) + { + // PDFDocEncoded string + bytes = PDFDocEncoding.getBytes(text); + } + else + { + // UTF-16BE encoded string with a leading byte order marker + byte[] data = text.getBytes(StandardCharsets.UTF_16BE); + bytes = new byte[data.length + 2]; + bytes[0] = (byte) 0xFE; + bytes[1] = (byte) 0xFF; + System.arraycopy(data, 0, bytes, 2, data.length); + } + } + + /** + * This will create a COS string from a string of hex characters. + * + * @param hex A hex string. + * @return A cos string with the hex characters converted to their actual bytes. + * @throws IOException If there is an error with the hex string. + */ + public static COSString parseHex(String hex) throws IOException + { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + StringBuilder hexBuffer = new StringBuilder(hex.trim()); + + // if odd number then the last hex digit is assumed to be 0 + if (hexBuffer.length() % 2 != 0) + { + hexBuffer.append('0'); + } + + int length = hexBuffer.length(); + for (int i = 0; i < length; i += 2) + { + try + { + bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16)); + } + catch (NumberFormatException e) + { + if (FORCE_PARSING) + { + LOG.warn("Encountered a malformed hex string"); + bytes.write('?'); // todo: what does Acrobat do? Any example PDFs? + } + else + { + throw new IOException("Invalid hex string: " + hex, e); + } + } + } + + return new COSString(bytes.toByteArray()); + } + + /** + * Returns true if the string is to be written in hex form. + * + * @return true if the COSString is written in hex form + */ + public boolean getForceHexForm() + { + return forceHexForm; + } + + /** + * Returns the content of this string as a PDF text string. + * + * @return the PDF string representation of the COSString + */ + public String getString() + { + // text string - BOM indicates Unicode + if (bytes.length >= 2) + { + if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF) + { + // UTF-16BE + return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE); + } + else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE) + { + // UTF-16LE - not in the PDF spec! + return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE); + } + } + + // otherwise use PDFDocEncoding + return PDFDocEncoding.toString(bytes); + } + + /** + * Returns the content of this string as a PDF ASCII string. + * + * @return the ASCII string representation of the COSString + */ + public String getASCII() + { + // ASCII string + return new String(bytes, StandardCharsets.US_ASCII); + } + + /** + * Returns the raw bytes of the string using a new byte array. Best used with a PDF byte string. + * + * @return a clone of the underlying byte[] representation of the COSString + */ + public byte[] getBytes() + { + return Arrays.copyOf(bytes, bytes.length); + } + + /** + * This will take this string and create a hex representation of the bytes that make the string. + * + * @return A hex string representing the bytes in this string. + */ + public String toHexString() + { + return Hex.getString(bytes); + } + + /** + * Visitor pattern double dispatch method. + * + * @param visitor The object to notify when visiting this object. + * @throws IOException If an error occurs while visiting this object. + */ + @Override + public void accept(ICOSVisitor visitor) throws IOException + { + visitor.visitFromString(this); + } + + @Override + public boolean equals(Object obj) + { + if (obj instanceof COSString) + { + COSString strObj = (COSString) obj; + return getString().equals(strObj.getString()) && + forceHexForm == strObj.forceHexForm; + } + return false; + } + + @Override + public int hashCode() + { + int result = Arrays.hashCode(bytes); + return result + (forceHexForm ? 17 : 0); + } + + @Override + public String toString() + { + return "COSString{" + getString() + "}"; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java new file mode 100644 index 00000000000..0a425419c53 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +public interface COSUpdateInfo extends COSObjectGetter +{ + + /** + * Get the update state for the COSWriter. This indicates whether an object is to be written + * when there is an incremental save. + * + * @return the update state. + */ + default boolean isNeedToBeUpdated() + { + return getUpdateState().isUpdated(); + } + + /** + * Set the update state of the dictionary for the COSWriter. This indicates whether an object is + * to be written when there is an incremental save. + * + * @param flag the update state. + */ + default void setNeedToBeUpdated( boolean flag ) + { + getUpdateState().update(flag); + } + + /** + * Uses this {@link COSUpdateInfo} as the base object of a new {@link COSIncrement}. + * + * @return A {@link COSIncrement} based on this {@link COSUpdateInfo}. + * @see COSIncrement + */ + default COSIncrement toIncrement() + { + return getUpdateState().toIncrement(); + } + + /** + * Returns the current {@link COSUpdateState} of this {@link COSUpdateInfo}. + * + * @return The current {@link COSUpdateState} of this {@link COSUpdateInfo}. + * @see COSUpdateState + */ + COSUpdateState getUpdateState(); + +} \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java new file mode 100644 index 00000000000..ad663f8e706 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +/** + * A {@link COSUpdateState} instance manages update states for a {@link COSUpdateInfo}. Such states are used to create + * a {@link COSIncrement} for the incremental saving of a {@link COSDocument}. + * + * @author Christian Appl + * @see COSDocumentState + * @see COSUpdateInfo + * @see COSIncrement + */ +public class COSUpdateState +{ + + /** + * The {@link COSUpdateInfo} the {@link COSUpdateState} does manage update states for. + */ + private final COSUpdateInfo updateInfo; + /** + * The {@link COSDocumentState} the {@link #updateInfo} is linked to. + */ + private COSDocumentState originDocumentState = null; + /** + * The actual update state of {@link #updateInfo}. + *

    + *
  • {@code true}, if {@link #updateInfo} has been updated after the document completed parsing.
  • + *
  • {@code false}, if {@link #updateInfo} has remained unaltered since the document completed parsing.
  • + *
+ */ + private boolean updated = false; + + /** + * Creates a new {@link COSUpdateState} for the given {@link COSUpdateInfo}. + * + * @param updateInfo The {@link COSUpdateInfo}, that shall be managed by this {@link COSUpdateState}. + */ + public COSUpdateState(COSUpdateInfo updateInfo) + { + this.updateInfo = updateInfo; + } + + /** + *

+ * Links the given {@link COSDocumentState} to the {@link #updated} state of the managed {@link #updateInfo}.
+ *

+ *

+ * This shall also initialize {@link #updated} accordingly and will also set the same {@link COSDocumentState} for + * all possibly contained substructures. + *

+ *

+ * Should {@link #originDocumentState} already have been set, by a prior call to this method, this shall deny to + * overwrite it. + *

+ *

+ * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are + * allowed. + *

+ *

+ * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. + *

+ * + * @param originDocumentState The {@link COSDocumentState} that shall be linked to this {@link COSUpdateState}. + * @see #originDocumentState + * @see #updated + */ + public void setOriginDocumentState(COSDocumentState originDocumentState) + { + setOriginDocumentState(originDocumentState, false); + } + + /** + *

+ * Links the given {@link COSDocumentState} to the {@link #updated} state of the managed {@link #updateInfo}.
+ *

+ *

+ * This shall also initialize {@link #updated} accordingly and will also set the same {@link COSDocumentState} for + * all possibly contained substructures. + *

+ *

+ * Should {@link #originDocumentState} already have been set, by a prior call to this method, this shall deny to + * overwrite it. + *

+ *

+ * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are + * allowed. + *

+ *

+ * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. + *

+ *

+ * Additionally to {@link #setOriginDocumentState(COSDocumentState)}, this shall also deny changing + * {@link #updated}, should the flag {@code dereferencing} indicate, that this is caused by dereferencing a + * {@link COSObject}. + *

+ * + * @param originDocumentState The {@link COSDocumentState} that shall be linked to this {@link COSUpdateState}. + * @param dereferencing {@code true}, if this update of the {@link COSDocumentState} is caused by + * dereferencing a {@link COSObject}. + * @see #originDocumentState + * @see #updated + */ + private void setOriginDocumentState(COSDocumentState originDocumentState, boolean dereferencing) + { + if(this.originDocumentState != null || originDocumentState == null) + { + return; + } + this.originDocumentState = originDocumentState; + if(!dereferencing) + { + update(); + } + + if(updateInfo instanceof COSDictionary) + { + COSDictionary dictionary = (COSDictionary) updateInfo; + for(COSBase entry : dictionary.getValues()) + { + if (entry instanceof COSUpdateInfo) + { + ((COSUpdateInfo) entry).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); + } + } + } + else if(updateInfo instanceof COSArray) + { + COSArray array = (COSArray) updateInfo; + for(COSBase entry : array) + { + if (entry instanceof COSUpdateInfo) + { + ((COSUpdateInfo) entry).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); + } + } + } + else if(updateInfo instanceof COSObject) + { + COSObject object = (COSObject) updateInfo; + COSBase reference; + if(object.isDereferenced() && (reference = object.getObject()) instanceof COSUpdateInfo) + { + ((COSUpdateInfo) reference).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); + } + } + } + + /** + *

+ * Returns the {@link #originDocumentState}, that is linked to the managed {@link #updateInfo}. + *

+ *

+ * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are + * allowed. + *

+ *

+ * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. + *

+ * + * @return The {@link COSDocumentState} linked to this {@link COSUpdateState}. + * @see #setOriginDocumentState(COSDocumentState) + */ + public COSDocumentState getOriginDocumentState() + { + return originDocumentState; + } + + /** + * Returns {@code true}, if the linked {@link #originDocumentState} {@link COSDocumentState#isAcceptingUpdates()} + * and such a {@link COSDocumentState} has been linked to this {@link COSUpdateState}. + * + * @return {@code true}, if the linked {@link #originDocumentState} {@link COSDocumentState#isAcceptingUpdates()} + * and such a {@link COSDocumentState} has been linked to this {@link COSUpdateState}. + * @see #originDocumentState + * @see COSDocumentState#isAcceptingUpdates() + */ + boolean isAcceptingUpdates() + { + return originDocumentState != null && originDocumentState.isAcceptingUpdates(); + } + + /** + * Returns the actual {@link #updated} state of the managed {@link #updateInfo}. + * + * @return The actual {@link #updated} state of the managed {@link #updateInfo} + * @see #updated + */ + public boolean isUpdated() + { + return updated; + } + + /** + * Calls {@link #update(boolean)} with {@code true} as the new update state.
+ * This shall only then have an effect, if {@link #isAcceptingUpdates()} returns {@code true}. + * + * @see #update(boolean) + * @see #updated + * @see #isAcceptingUpdates() + */ + void update() + { + update(true); + } + + /** + * Sets the {@link #updated} state of the managed {@link #updateInfo} to the given state.
+ * This shall only then have an effect, if {@link #isAcceptingUpdates()} returns {@code true}. + * + * @param updated The state to set for {@link #updated}. + * @see #update(boolean) + * @see #updated + * @see #isAcceptingUpdates() + */ + void update(boolean updated) + { + if(isAcceptingUpdates()) + { + this.updated = updated; + } + } + + /** + *

+ * Shall call {@link #update()} for this {@link COSUpdateState} and shall + * {@link #setOriginDocumentState(COSDocumentState)} for the given child, initializing it´s {@link #updated} state + * and {@link #originDocumentState}. + *

+ *

+ * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. + *

+ * + * @param child The child that shall also be updated. + * @see #update() + * @see #setOriginDocumentState(COSDocumentState) + */ + void update(COSBase child) + { + update(); + if(child instanceof COSUpdateInfo) + { + ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState); + } + } + + /** + *

+ * Shall call {@link #update()} for this {@link COSUpdateState} and shall + * {@link #setOriginDocumentState(COSDocumentState)} for the given children, initializing their {@link #updated} + * state and {@link #originDocumentState}. + *

+ *

+ * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. + *

+ * + * @param children The children that shall also be updated. + * @see #update() + * @see #setOriginDocumentState(COSDocumentState) + */ + void update(COSArray children) + { + update((Iterable) children); + } + + /** + *

+ * Shall call {@link #update()} for this {@link COSUpdateState} and shall + * {@link #setOriginDocumentState(COSDocumentState)} for the given children, initializing their {@link #updated} + * state and {@link #originDocumentState}. + *

+ *

+ * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. + *

+ * + * @param children The children that shall also be updated. + * @see #update() + * @see #setOriginDocumentState(COSDocumentState) + */ + void update(Iterable children) + { + update(); + if(children == null) + { + return; + } + for(COSBase child : children) + { + if(child instanceof COSUpdateInfo) + { + ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState); + } + } + } + + /** + * This shall {@link #setOriginDocumentState(COSDocumentState, boolean)} for the dereferenced child, + * initializing its {@link #originDocumentState}. + *

+ * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo} and will never change + * the child´s {@link #updated} state. + *

+ * + * @param child The child, that has been dereferenced. + * @see #setOriginDocumentState(COSDocumentState, boolean) + */ + void dereferenceChild(COSBase child) + { + if(child instanceof COSUpdateInfo) + { + ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState, true); + } + } + + /** + * Uses the managed {@link #updateInfo} as the base object of a new {@link COSIncrement}. + * + * @return A {@link COSIncrement} based on the managed {@link #updateInfo}. + * @see COSUpdateInfo + * @see COSIncrement + */ + COSIncrement toIncrement() + { + return new COSIncrement(updateInfo); + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java new file mode 100644 index 00000000000..34da698feb2 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.apache.pdfbox.io.RandomAccessReadView; + +import java.io.IOException; + +/** + * Presumably this is a parser interface which requires that implementing classes + * be able to dereference (read/decode) indirect (Proxy) objects. Note that this + * is just an interface declaration. The actual parser implementations will be + * in the pdfparser package. + */ +public interface ICOSParser +{ + /** + * Dereference the COSBase object which is referenced by the given COSObject. + * + * @param obj the COSObject which references the COSBase object to be dereferenced. + * @return the referenced object + * @throws IOException if something went wrong when dereferencing the COSBase object + */ + COSBase dereferenceCOSObject( COSObject obj ) throws IOException; + + /** + * Creates {@link RandomAccessReadView} object for unknown reasons starting at the given position with the given length. + * + * @param startPosition start position within the underlying random access read + * @param streamLength stream length + * @return the random access read view + * @throws IOException if something went wrong when creating the view for the RandomAccessRead + */ + RandomAccessReadView createRandomAccessReadView( long startPosition, long streamLength ) + throws IOException; + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java new file mode 100644 index 00000000000..142110322d8 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.IOException; + +/** + * An interface for visiting a PDF document at the type (COS) level. + * + * @author Michael Traut + */ +public interface ICOSVisitor +{ + /** + * Notification of visit to Array object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromArray( COSArray obj ) throws IOException; + + /** + * Notification of visit to boolean object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromBoolean( COSBoolean obj ) throws IOException; + + /** + * Notification of visit to dictionary object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromDictionary( COSDictionary obj ) throws IOException; + + /** + * Notification of visit to document object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromDocument( COSDocument obj ) throws IOException; + + /** + * Notification of visit to float object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromFloat( COSFloat obj ) throws IOException; + + /** + * Notification of visit to integer object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromInt( COSInteger obj ) throws IOException; + + /** + * Notification of visit to name object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromName( COSName obj ) throws IOException; + + /** + * Notification of visit to null object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromNull( COSNull obj ) throws IOException; + + /** + * Notification of visit to stream object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromStream( COSStream obj ) throws IOException; + + /** + * Notification of visit to string object. + * + * @param obj The Object that is being visited. + * @throws IOException If there is an error while visiting this object. + */ + void visitFromString( COSString obj ) throws IOException; +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java new file mode 100644 index 00000000000..8885550ceca --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.util.HashMap; +import java.util.Map; + +/** + * The "PDFDocEncoding" encoding. PDFDocEncoding is a predefined text encoding + * unique to PDF. It supports a superset of the ISO Latin 1 character set which + * happens, as Adobe’s PDF Reference 1.2 puts it, to be “compatible with Unicode + * in that all Unicode codes less than 256 match PDFDocEncoding.” Note that + * this is *not* a Type 1 font encoding, it is used only within PDF "text strings". + */ +final class PDFDocEncoding +{ + private static final char REPLACEMENT_CHARACTER = '\uFFFD'; + + private static final int[] CODE_TO_UNI; + private static final Map UNI_TO_CODE; + + static + { + CODE_TO_UNI = new int[256]; + UNI_TO_CODE = new HashMap<>(256); + + // initialize with basically ISO-8859-1 + for (int i = 0; i < 256; i++) + { + // skip entries not in Unicode column + if (i > 0x17 && i < 0x20) + { + continue; + } + if (i > 0x7E && i < 0xA1) + { + continue; + } + if (i == 0xAD) + { + continue; + } + + set(i, (char)i); + } + + // then do all deviations (based on the table in ISO 32000-1:2008) + // block 1 + set(0x18, '\u02D8'); // BREVE + set(0x19, '\u02C7'); // CARON + set(0x1A, '\u02C6'); // MODIFIER LETTER CIRCUMFLEX ACCENT + set(0x1B, '\u02D9'); // DOT ABOVE + set(0x1C, '\u02DD'); // DOUBLE ACUTE ACCENT + set(0x1D, '\u02DB'); // OGONEK + set(0x1E, '\u02DA'); // RING ABOVE + set(0x1F, '\u02DC'); // SMALL TILDE + // block 2 + set(0x7F, REPLACEMENT_CHARACTER); // undefined + set(0x80, '\u2022'); // BULLET + set(0x81, '\u2020'); // DAGGER + set(0x82, '\u2021'); // DOUBLE DAGGER + set(0x83, '\u2026'); // HORIZONTAL ELLIPSIS + set(0x84, '\u2014'); // EM DASH + set(0x85, '\u2013'); // EN DASH + set(0x86, '\u0192'); // LATIN SMALL LETTER SCRIPT F + set(0x87, '\u2044'); // FRACTION SLASH (solidus) + set(0x88, '\u2039'); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + set(0x89, '\u203A'); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + set(0x8A, '\u2212'); // MINUS SIGN + set(0x8B, '\u2030'); // PER MILLE SIGN + set(0x8C, '\u201E'); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase) + set(0x8D, '\u201C'); // LEFT DOUBLE QUOTATION MARK (quotedblleft) + set(0x8E, '\u201D'); // RIGHT DOUBLE QUOTATION MARK (quotedblright) + set(0x8F, '\u2018'); // LEFT SINGLE QUOTATION MARK (quoteleft) + set(0x90, '\u2019'); // RIGHT SINGLE QUOTATION MARK (quoteright) + set(0x91, '\u201A'); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase) + set(0x92, '\u2122'); // TRADE MARK SIGN + set(0x93, '\uFB01'); // LATIN SMALL LIGATURE FI + set(0x94, '\uFB02'); // LATIN SMALL LIGATURE FL + set(0x95, '\u0141'); // LATIN CAPITAL LETTER L WITH STROKE + set(0x96, '\u0152'); // LATIN CAPITAL LIGATURE OE + set(0x97, '\u0160'); // LATIN CAPITAL LETTER S WITH CARON + set(0x98, '\u0178'); // LATIN CAPITAL LETTER Y WITH DIAERESIS + set(0x99, '\u017D'); // LATIN CAPITAL LETTER Z WITH CARON + set(0x9A, '\u0131'); // LATIN SMALL LETTER DOTLESS I + set(0x9B, '\u0142'); // LATIN SMALL LETTER L WITH STROKE + set(0x9C, '\u0153'); // LATIN SMALL LIGATURE OE + set(0x9D, '\u0161'); // LATIN SMALL LETTER S WITH CARON + set(0x9E, '\u017E'); // LATIN SMALL LETTER Z WITH CARON + set(0x9F, REPLACEMENT_CHARACTER); // undefined + set(0xA0, '\u20AC'); // EURO SIGN + // end of deviations + } + + private PDFDocEncoding() + { + } + + private static void set(int code, char unicode) + { + CODE_TO_UNI[code] = unicode; + UNI_TO_CODE.put(unicode, code); + } + + /** + * Returns the string representation of the given PDFDocEncoded bytes. + */ + public static String toString(byte[] bytes) + { + StringBuilder sb = new StringBuilder(bytes.length); + for (byte b : bytes) + { + if ((b & 0xff) >= CODE_TO_UNI.length) + { + sb.append('?'); + } + else + { + sb.append((char)CODE_TO_UNI[b & 0xff]); + } + } + return sb.toString(); + } + + /** + * Returns the given string encoded with PDFDocEncoding. + */ + public static byte[] getBytes(String text) + { + ByteArrayOutputStream out = new ByteArrayOutputStream(text.length()); + for (char c : text.toCharArray()) + { + out.write(UNI_TO_CODE.getOrDefault(c, 0)); + } + return out.toByteArray(); + } + + /** + * Returns true if the given character is available in PDFDocEncoding. + * + * @param character UTF-16 character + */ + public static boolean containsChar(char character) + { + return UNI_TO_CODE.containsKey(character); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java new file mode 100644 index 00000000000..465a07e61b0 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.util.Collections; + +/** + * An unmodifiable COSDictionary. + * + * @author John Hewson + */ +final class UnmodifiableCOSDictionary extends COSDictionary +{ + /** + * {@inheritDoc} + */ + UnmodifiableCOSDictionary(COSDictionary dict) + { + super(); + items = Collections.unmodifiableMap(dict.items); + } + + /** + * {@inheritDoc} + */ + @Override + public void setNeedToBeUpdated(boolean flag) + { + throw new UnsupportedOperationException(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java new file mode 100644 index 00000000000..b53ac45702b --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.pdfbox.cos.COSDictionary; + +/** + * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data. + * @author Ben Litchfield + */ +final class ASCII85Filter extends Filter +{ + @Override + public DecodeResult decode( InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + try (ASCII85InputStream is = new ASCII85InputStream(encoded)) + { + is.transferTo(decoded); + } + decoded.flush(); + return new DecodeResult(parameters); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + try (ASCII85OutputStream os = new ASCII85OutputStream(encoded)) + { + input.transferTo(os); + } + encoded.flush(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java new file mode 100644 index 00000000000..cfd75c9ee95 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * This class represents an ASCII85 stream. + * + * @author Ben Litchfield + * + */ +final class ASCII85InputStream extends FilterInputStream +{ + private int index; + private int n; + private boolean eof; + + private byte[] ascii; + private byte[] b; + + private static final char TERMINATOR = '~'; + private static final char OFFSET = '!'; + private static final char NEWLINE = '\n'; + private static final char RETURN = '\r'; + private static final char SPACE = ' '; + private static final char PADDING_U = 'u'; + private static final char Z = 'z'; + + /** + * Constructor. + * + * @param is The input stream to actually read from. + */ + ASCII85InputStream(InputStream is) + { + super(is); + index = 0; + n = 0; + eof = false; + ascii = new byte[5]; + b = new byte[4]; + } + + /** + * This will read the next byte from the stream. + * + * @return The next byte read from the stream. + * + * @throws IOException If there is an error reading from the wrapped stream. + */ + @Override + public int read() throws IOException + { + if (index >= n) + { + if (eof) + { + return -1; + } + index = 0; + int k; + byte z; + do + { + int zz = (byte) in.read(); + if (zz == -1) + { + eof = true; + return -1; + } + z = (byte) zz; + } while (z == NEWLINE || z == RETURN || z == SPACE); + + if (z == TERMINATOR) + { + eof = true; + ascii = b = null; + n = 0; + return -1; + } + else if (z == Z) + { + b[0] = b[1] = b[2] = b[3] = 0; + n = 4; + } + else + { + ascii[0] = z; // may be EOF here.... + for (k = 1; k < 5; ++k) + { + do + { + int zz = (byte) in.read(); + if (zz == -1) + { + eof = true; + return -1; + } + z = (byte) zz; + } while (z == NEWLINE || z == RETURN || z == SPACE); + ascii[k] = z; + if (z == TERMINATOR) + { + // don't include ~ as padding byte + ascii[k] = (byte) PADDING_U; + break; + } + } + n = k - 1; + if (n == 0) + { + eof = true; + ascii = null; + b = null; + return -1; + } + if (k < 5) + { + for (++k; k < 5; ++k) + { + // use 'u' for padding + ascii[k] = (byte) PADDING_U; + } + eof = true; + } + // decode stream + long t = 0; + for (k = 0; k < 5; ++k) + { + z = (byte) (ascii[k] - OFFSET); + if (z < 0 || z > 93) + { + n = 0; + eof = true; + ascii = null; + b = null; + throw new IOException("Invalid data in Ascii85 stream"); + } + t = (t * 85L) + z; + } + for (k = 3; k >= 0; --k) + { + b[k] = (byte) (t & 0xFFL); + t >>>= 8; + } + } + } + return b[index++] & 0xFF; + } + + /** + * This will read a chunk of data. + * + * @param data The buffer to write data to. + * @param offset The offset into the data stream. + * @param len The number of byte to attempt to read. + * + * @return The number of bytes actually read. + * + * @throws IOException If there is an error reading data from the underlying stream. + */ + @Override + public int read(byte[] data, int offset, int len) throws IOException + { + if (eof && index >= n) + { + return -1; + } + for (int i = 0; i < len; i++) + { + if (index < n) + { + data[i + offset] = b[index++]; + } + else + { + int t = read(); + if (t == -1) + { + return i; + } + data[i + offset] = (byte) t; + } + } + return len; + } + + /** + * This will close the underlying stream and release any resources. + * + * @throws IOException If there is an error closing the underlying stream. + */ + @Override + public void close() throws IOException + { + ascii = null; + eof = true; + b = null; + super.close(); + } + + /** + * non supported interface methods. + * + * @return False always. + */ + @Override + public boolean markSupported() + { + return false; + } + + /** + * Unsupported. + * + * @param nValue ignored. + * + * @return Always zero. + */ + @Override + public long skip(long nValue) + { + return 0; + } + + /** + * Unsupported. + * + * @return Always zero. + */ + @Override + public int available() + { + return 0; + } + + /** + * Unsupported. + * + * @param readlimit ignored. + */ + @Override + public synchronized void mark(int readlimit) + { + } + + /** + * Unsupported. + * + * @throws IOException telling that this is an unsupported action. + */ + @Override + public synchronized void reset() throws IOException + { + throw new IOException("Reset is not supported"); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java new file mode 100644 index 00000000000..6b441bc73fe --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * This class represents an ASCII85 output stream. + * + * @author Ben Litchfield + * + */ +final class ASCII85OutputStream extends FilterOutputStream +{ + + private int lineBreak; + private int count; + + private byte[] indata; + private byte[] outdata; + + /** + * Function produces five ASCII printing characters from + * four bytes of binary data. + */ + private int maxline; + private boolean flushed; + private char terminator; + private static final char OFFSET = '!'; + private static final char NEWLINE = '\n'; + private static final char Z = 'z'; + + /** + * Constructor. + * + * @param out The output stream to write to. + */ + ASCII85OutputStream(OutputStream out) + { + super(out); + lineBreak = 36 * 2; + maxline = 36 * 2; + count = 0; + indata = new byte[4]; + outdata = new byte[5]; + flushed = true; + terminator = '~'; + } + + /** + * This will set the terminating character. + * + * @param term The terminating character. + */ + public void setTerminator(char term) + { + if (term < 118 || term > 126 || term == Z) + { + throw new IllegalArgumentException("Terminator must be 118-126 excluding z"); + } + terminator = term; + } + + /** + * This will get the terminating character. + * + * @return The terminating character. + */ + public char getTerminator() + { + return terminator; + } + + /** + * This will set the line length that will be used. + * + * @param l The length of the line to use. + */ + public void setLineLength(int l) + { + if (lineBreak > l) + { + lineBreak = l; + } + maxline = l; + } + + /** + * This will get the length of the line. + * + * @return The line length attribute. + */ + public int getLineLength() + { + return maxline; + } + + /** + * This will transform the next four ascii bytes. + */ + private void transformASCII85() + { + long word = ((((indata[0] << 8) | (indata[1] & 0xFF)) << 16) | ((indata[2] & 0xFF) << 8) | (indata[3] & 0xFF)) & 0xFFFFFFFFL; + + if (word == 0) + { + outdata[0] = (byte) Z; + outdata[1] = 0; + return; + } + long x; + x = word / (85L * 85L * 85L * 85L); + outdata[0] = (byte) (x + OFFSET); + word -= x * 85L * 85L * 85L * 85L; + + x = word / (85L * 85L * 85L); + outdata[1] = (byte) (x + OFFSET); + word -= x * 85L * 85L * 85L; + + x = word / (85L * 85L); + outdata[2] = (byte) (x + OFFSET); + word -= x * 85L * 85L; + + x = word / 85L; + outdata[3] = (byte) (x + OFFSET); + + outdata[4] = (byte) ((word % 85L) + OFFSET); + } + + /** + * This will write a single byte. + * + * @param b The byte to write. + * + * @throws IOException If there is an error writing to the stream. + */ + @Override + public void write(int b) throws IOException + { + flushed = false; + indata[count++] = (byte) b; + if (count < 4) + { + return; + } + transformASCII85(); + for (int i = 0; i < 5; i++) + { + if (outdata[i] == 0) + { + break; + } + out.write(outdata[i]); + if (--lineBreak == 0) + { + out.write(NEWLINE); + lineBreak = maxline; + } + } + count = 0; + } + + /** + * This will flush the data to the stream. + * + * @throws IOException If there is an error writing the data to the stream. + */ + @Override + public void flush() throws IOException + { + if (flushed) + { + return; + } + if (count > 0) + { + for (int i = count; i < 4; i++) + { + indata[i] = 0; + } + transformASCII85(); + if (outdata[0] == Z) + { + for (int i = 0; i < 5; i++) // expand 'z', + { + outdata[i] = (byte) OFFSET; + } + } + for (int i = 0; i < count + 1; i++) + { + out.write(outdata[i]); + if (--lineBreak == 0) + { + out.write(NEWLINE); + lineBreak = maxline; + } + } + } + if (--lineBreak == 0) + { + out.write(NEWLINE); + } + out.write(terminator); + out.write('>'); + out.write(NEWLINE); + count = 0; + lineBreak = maxline; + flushed = true; + super.flush(); + } + + /** + * This will close the stream. + * + * @throws IOException If there is an error closing the wrapped stream. + */ + @Override + public void close() throws IOException + { + try + { + flush(); + super.close(); + } + finally + { + indata = outdata = null; + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java new file mode 100644 index 00000000000..b0c5fbe8681 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.util.Hex; + +/** + * Decodes data encoded in an ASCII hexadecimal form, reproducing the original binary data. + * + * @author Ben Litchfield + */ +final class ASCIIHexFilter extends Filter +{ + private static final Logger LOG = LogManager.getLogger(ASCIIHexFilter.class); + + private static final int[] REVERSE_HEX = { + /* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 20 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 30 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 40 */ -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, + /* 50 */ 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, + /* 60 */ -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, + /* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, + /* 100 */ 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + /* 110 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 120 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 130 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 140 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 150 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 160 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 170 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 180 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 190 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 200 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 210 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 220 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 230 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 240 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + /* 250 */ -1, -1, -1, -1, -1, -1 + }; + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + int value, firstByte, secondByte; + while ((firstByte = encoded.read()) != -1) + { + // always after first char + while (isWhitespace(firstByte)) + { + firstByte = encoded.read(); + } + if (firstByte == -1 || isEOD(firstByte)) + { + break; + } + + if (REVERSE_HEX[firstByte] == -1) + { + LOG.error("Invalid hex, int: {} char: {}", firstByte, (char) firstByte); + } + value = REVERSE_HEX[firstByte] * 16; + secondByte = encoded.read(); + + if (secondByte == -1 || isEOD(secondByte)) + { + // second value behaves like 0 in case of EOD + decoded.write(value); + break; + } + if (REVERSE_HEX[secondByte] == -1) + { + LOG.error("Invalid hex, int: {} char: {}", secondByte, (char) secondByte); + } + value += REVERSE_HEX[secondByte]; + decoded.write(value); + } + decoded.flush(); + return new DecodeResult(parameters); + } + + // whitespace + // 0 0x00 Null (NUL) + // 9 0x09 Tab (HT) + // 10 0x0A Line feed (LF) + // 12 0x0C Form feed (FF) + // 13 0x0D Carriage return (CR) + // 32 0x20 Space (SP) + private static boolean isWhitespace(int c) + { + switch (c) + { + case 0: + case 9: + case 10: + case 12: + case 13: + case 32: + return true; + default: + return false; + } + } + + private static boolean isEOD(int c) + { + return c == '>'; + } + + @Override + public void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + int byteRead; + while ((byteRead = input.read()) != -1) + { + Hex.writeHexByte((byte)byteRead, encoded); + } + encoded.flush(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java new file mode 100644 index 00000000000..e07ab3d7f03 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java @@ -0,0 +1,813 @@ +/* + * Copyright (c) 2012, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.apache.pdfbox.cos.filter; + + +import java.io.EOFException; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +/** + * CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. + * + * @author Harald Kuhr + * @author Oliver Schmidtmer + * @author last modified by $Author: haraldk$ + * @version $Id: CCITTFaxDecoderStream.java,v 1.0 23.05.12 15:55 haraldk Exp$ + * + * Taken from commit 24c6682236e5a02151359486aa4075ddc5ab1534 of 18.08.2018 from twelvemonkeys/imageio/plugins/tiff/CCITTFaxDecoderStream.java + * + * Initial changes for PDFBox, discussed in PDFBOX-3338: + * - removed Validate() usages + * - catch VALUE_EOL in decode1D() + */ +final class CCITTFaxDecoderStream extends FilterInputStream { + // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. + + private final int columns; + private final byte[] decodedRow; + + private final boolean optionG32D; + // Leading zeros for aligning EOL + private final boolean optionG3Fill; + private final boolean optionUncompressed; + private final boolean optionByteAligned; + + private final int type; + + private int decodedLength; + private int decodedPos; + + private int[] changesReferenceRow; + private int[] changesCurrentRow; + private int changesReferenceRowCount; + private int changesCurrentRowCount; + + private int lastChangingElement = 0; + + /** + * Creates a CCITTFaxDecoderStream. + * This constructor may be used for CCITT streams embedded in PDF files, + * which use EncodedByteAlign. + * + * @param stream the compressed CCITT stream. + * @param columns the number of columns in the stream. + * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE}, + * {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}. + * @param options CCITT T.4 or T.6 options. + * @param byteAligned enable byte alignment used in PDF files (EncodedByteAlign). + */ + public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, + final long options, final boolean byteAligned) { + super(stream); + + this.columns = columns; + this.type = type; + + // We know this is only used for b/w (1 bit) + decodedRow = new byte[(columns + 7) / 8]; + changesReferenceRow = new int[columns + 2]; + changesCurrentRow = new int[columns + 2]; + + switch (type) { + case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: + optionByteAligned = byteAligned; + optionG32D = false; + optionG3Fill = false; + optionUncompressed = false; + break; + case TIFFExtension.COMPRESSION_CCITT_T4: + optionByteAligned = byteAligned; + optionG32D = (options & TIFFExtension.GROUP3OPT_2DENCODING) != 0; + optionG3Fill = (options & TIFFExtension.GROUP3OPT_FILLBITS) != 0; + optionUncompressed = (options & TIFFExtension.GROUP3OPT_UNCOMPRESSED) != 0; + break; + case TIFFExtension.COMPRESSION_CCITT_T6: + optionByteAligned = byteAligned; + optionG32D = false; + optionG3Fill = false; + optionUncompressed = (options & TIFFExtension.GROUP4OPT_UNCOMPRESSED) != 0; + break; + default: + throw new IllegalArgumentException("Illegal parameter: " + type); + } + + } + + private void fetch() throws IOException { + if (decodedPos >= decodedLength) { + decodedLength = 0; + + try { + decodeRow(); + } + catch (ArrayIndexOutOfBoundsException e) { + // Mask the AIOOBE as an IOException + throw new IOException("Malformed CCITT stream", e); + } + catch (EOFException e) { + // TODO: Rewrite to avoid throw/catch for normal flow... + if (decodedLength != 0) { + throw e; + } + + // ..otherwise, just let client code try to read past the + // end of stream + decodedLength = -1; + } + + decodedPos = 0; + } + } + + private void decode1D() throws IOException { + int index = 0; + boolean white = true; + changesCurrentRowCount = 0; + + do { + int completeRun; + + if (white) { + completeRun = decodeRun(whiteRunTree); + } + else { + completeRun = decodeRun(blackRunTree); + } + + index += completeRun; + changesCurrentRow[changesCurrentRowCount++] = index; + + // Flip color for next run + white = !white; + } while (index < columns); + } + + private void decode2D() throws IOException { + changesReferenceRowCount = changesCurrentRowCount; + int[] tmp = changesCurrentRow; + changesCurrentRow = changesReferenceRow; + changesReferenceRow = tmp; + + boolean white = true; + int index = 0; + changesCurrentRowCount = 0; + + mode: while (index < columns) { + // read mode + Node n = codeTree.root; + + while (true) { + n = n.walk(readBit()); + + if (n == null) { + continue mode; + } + else if (n.isLeaf) { + switch (n.value) { + case VALUE_HMODE: + int runLength; + runLength = decodeRun(white ? whiteRunTree : blackRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + + runLength = decodeRun(white ? blackRunTree : whiteRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + break; + + case VALUE_PASSMODE: + int pChangingElement = getNextChangingElement(index, white) + 1; + + if (pChangingElement >= changesReferenceRowCount) { + index = columns; + } + else { + index = changesReferenceRow[pChangingElement]; + } + + break; + + default: + // Vertical mode (-3 to 3) + int vChangingElement = getNextChangingElement(index, white); + + if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) { + index = columns + n.value; + } + else { + index = changesReferenceRow[vChangingElement] + n.value; + } + + changesCurrentRow[changesCurrentRowCount] = index; + changesCurrentRowCount++; + white = !white; + + break; + } + + continue mode; + } + } + } + } + + private int getNextChangingElement(final int a0, final boolean white) { + int start = (lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1); + if (start > 2) { + start -= 2; + } + + if (a0 == 0) { + return start; + } + + for (int i = start; i < changesReferenceRowCount; i += 2) { + if (a0 < changesReferenceRow[i]) { + lastChangingElement = i; + return i; + } + } + + return -1; + } + + private void decodeRowType2() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + decode1D(); + } + + private void decodeRowType4() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + eof: while (true) { + // read till next EOL code + Node n = eolOnlyTree.root; + + while (true) { + n = n.walk(readBit()); + + if (n == null) { + continue eof; + } + + if (n.isLeaf) { + break eof; + } + } + } + + if (!optionG32D || readBit()) { + decode1D(); + } + else { + decode2D(); + } + } + + private void decodeRowType6() throws IOException { + if (optionByteAligned) { + resetBuffer(); + } + decode2D(); + } + + private void decodeRow() throws IOException { + switch (type) { + case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: + decodeRowType2(); + break; + case TIFFExtension.COMPRESSION_CCITT_T4: + decodeRowType4(); + break; + case TIFFExtension.COMPRESSION_CCITT_T6: + decodeRowType6(); + break; + default: + throw new IllegalArgumentException("Illegal parameter: " + type); + } + + int index = 0; + boolean white = true; + + lastChangingElement = 0; + for (int i = 0; i <= changesCurrentRowCount; i++) { + int nextChange = columns; + + if (i != changesCurrentRowCount) { + nextChange = changesCurrentRow[i]; + } + + if (nextChange > columns) { + nextChange = columns; + } + + int byteIndex = index / 8; + + while (index % 8 != 0 && (nextChange - index) > 0) { + decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + if (index % 8 == 0) { + byteIndex = index / 8; + final byte value = (byte) (white ? 0x00 : 0xff); + + while ((nextChange - index) > 7) { + decodedRow[byteIndex] = value; + index += 8; + ++byteIndex; + } + } + + while ((nextChange - index) > 0) { + if (index % 8 == 0) { + decodedRow[byteIndex] = 0; + } + + decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); + index++; + } + + white = !white; + } + + if (index != columns) { + throw new IOException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); + } + + decodedLength = (index + 7) / 8; + } + + private int decodeRun(final Tree tree) throws IOException { + int total = 0; + + Node n = tree.root; + + while (true) { + boolean bit = readBit(); + n = n.walk(bit); + + if (n == null) { + throw new IOException("Unknown code in Huffman RLE stream"); + } + + if (n.isLeaf) { + total += n.value; + if (n.value >= 64) { + n = tree.root; + } + else if (n.value >= 0) { + return total; + } + else { + return columns; + } + } + } + } + + private void resetBuffer() { + bufferPos = -1; + } + + int buffer = -1; + int bufferPos = -1; + + private boolean readBit() throws IOException { + if (bufferPos < 0 || bufferPos > 7) { + buffer = in.read(); + + if (buffer == -1) { + throw new EOFException("Unexpected end of Huffman RLE stream"); + } + + bufferPos = 0; + } + + boolean isSet = (buffer & 0x80) != 0; + buffer <<= 1; + bufferPos++; + + return isSet; + } + + @Override + public int read() throws IOException { + if (decodedLength < 0) { + return 0x0; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + return 0x0; + } + } + + return decodedRow[decodedPos++] & 0xff; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (decodedLength < 0) { + Arrays.fill(b, off, off + len, (byte) 0x0); + return len; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + Arrays.fill(b, off, off + len, (byte) 0x0); + return len; + } + } + + int read = Math.min(decodedLength - decodedPos, len); + System.arraycopy(decodedRow, decodedPos, b, off, read); + decodedPos += read; + + return read; + } + + @Override + public long skip(long n) throws IOException { + if (decodedLength < 0) { + return -1; + } + + if (decodedPos >= decodedLength) { + fetch(); + + if (decodedLength < 0) { + return -1; + } + } + + int skipped = (int) Math.min(decodedLength - decodedPos, n); + decodedPos += skipped; + + return skipped; + } + + @Override + public boolean markSupported() { + return false; + } + + @Override + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + private static final class Node { + Node left; + Node right; + + int value; // > 63 non term. + + boolean canBeFill = false; + boolean isLeaf = false; + + void set(final boolean next, final Node node) { + if (!next) { + left = node; + } + else { + right = node; + } + } + + Node walk(final boolean next) { + return next ? right : left; + } + + @Override + public String toString() { + return "[leaf=" + isLeaf + ", value=" + value + ", canBeFill=" + canBeFill + "]"; + } + } + + private static final class Tree { + final Node root = new Node(); + + void fill(final int depth, final int path, final int value) throws IOException { + Node current = root; + + for (int i = 0; i < depth; i++) { + int bitPos = depth - 1 - i; + boolean isSet = ((path >> bitPos) & 1) == 1; + Node next = current.walk(isSet); + + if (next == null) { + next = new Node(); + + if (i == depth - 1) { + next.value = value; + next.isLeaf = true; + } + + if (path == 0) { + next.canBeFill = true; + } + + current.set(isSet, next); + } + else { + if (next.isLeaf) { + throw new IOException("node is leaf, no other following"); + } + } + + current = next; + } + } + + void fill(final int depth, final int path, final Node node) throws IOException { + Node current = root; + + for (int i = 0; i < depth; i++) { + int bitPos = depth - 1 - i; + boolean isSet = ((path >> bitPos) & 1) == 1; + Node next = current.walk(isSet); + + if (next == null) { + if (i == depth - 1) { + next = node; + } + else { + next = new Node(); + } + + if (path == 0) { + next.canBeFill = true; + } + + current.set(isSet, next); + } + else { + if (next.isLeaf) { + throw new IOException("node is leaf, no other following"); + } + } + + current = next; + } + } + } + + static final short[][] BLACK_CODES = { + { // 2 bits + 0x2, 0x3, + }, + { // 3 bits + 0x2, 0x3, + }, + { // 4 bits + 0x2, 0x3, + }, + { // 5 bits + 0x3, + }, + { // 6 bits + 0x4, 0x5, + }, + { // 7 bits + 0x4, 0x5, 0x7, + }, + { // 8 bits + 0x4, 0x7, + }, + { // 9 bits + 0x18, + }, + { // 10 bits + 0x17, 0x18, 0x37, 0x8, 0xf, + }, + { // 11 bits + 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, + }, + { // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, + 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, + }, + { // 13 bits + 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77, + } + }; + static final short[][] BLACK_RUN_LENGTHS = { + { // 2 bits + 3, 2, + }, + { // 3 bits + 1, 4, + }, + { // 4 bits + 6, 5, + }, + { // 5 bits + 7, + }, + { // 6 bits + 9, 8, + }, + { // 7 bits + 10, 11, 12, + }, + { // 8 bits + 13, 14, + }, + { // 9 bits + 15, + }, + { // 10 bits + 16, 17, 0, 18, 64, + }, + { // 11 bits + 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, + }, + { // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, + 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, + 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, + }, + { // 13 bits + 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, + 1152, 1216, + } + }; + + public static final short[][] WHITE_CODES = { + { // 4 bits + 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, + }, + { // 5 bits + 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, + }, + { // 6 bits + 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, + }, + { // 7 bits + 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, + }, + { // 8 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, + 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, + 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, + }, + { // 9 bits + 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + }, + { // 10 bits + }, + { // 11 bits + 0x8, 0xc, 0xd, + }, + { // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, + } + }; + + public static final short[][] WHITE_RUN_LENGTHS = { + { // 4 bits + 2, 3, 4, 5, 6, 7, + }, + { // 5 bits + 128, 8, 9, 64, 10, 11, + }, + { // 6 bits + 192, 1664, 16, 17, 13, 14, 15, 1, 12, + }, + { // 7 bits + 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, + }, + { // 8 bits + 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, + 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, + }, + { // 9 bits + 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, + }, + { // 10 bits + }, + { // 11 bits + 1792, 1856, 1920, + }, + { // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, + } + }; + + static final Node EOL; + static final Node FILL; + static final Tree blackRunTree; + static final Tree whiteRunTree; + static final Tree eolOnlyTree; + static final Tree codeTree; + + static final int VALUE_EOL = -2000; + static final int VALUE_FILL = -1000; + static final int VALUE_PASSMODE = -3000; + static final int VALUE_HMODE = -4000; + + static { + EOL = new Node(); + EOL.isLeaf = true; + EOL.value = VALUE_EOL; + FILL = new Node(); + FILL.value = VALUE_FILL; + FILL.left = FILL; + FILL.right = EOL; + + eolOnlyTree = new Tree(); + try { + eolOnlyTree.fill(12, 0, FILL); + eolOnlyTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + blackRunTree = new Tree(); + try { + for (int i = 0; i < BLACK_CODES.length; i++) { + for (int j = 0; j < BLACK_CODES[i].length; j++) { + blackRunTree.fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); + } + } + blackRunTree.fill(12, 0, FILL); + blackRunTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + whiteRunTree = new Tree(); + try { + for (int i = 0; i < WHITE_CODES.length; i++) { + for (int j = 0; j < WHITE_CODES[i].length; j++) { + whiteRunTree.fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); + } + } + + whiteRunTree.fill(12, 0, FILL); + whiteRunTree.fill(12, 1, EOL); + } + catch (IOException e) { + throw new AssertionError(e); + } + + codeTree = new Tree(); + try { + codeTree.fill(4, 1, VALUE_PASSMODE); // pass mode + codeTree.fill(3, 1, VALUE_HMODE); // H mode + codeTree.fill(1, 1, 0); // V(0) + codeTree.fill(3, 3, 1); // V_R(1) + codeTree.fill(6, 3, 2); // V_R(2) + codeTree.fill(7, 3, 3); // V_R(3) + codeTree.fill(3, 2, -1); // V_L(1) + codeTree.fill(6, 2, -2); // V_L(2) + codeTree.fill(7, 2, -3); // V_L(3) + } + catch (IOException e) { + throw new AssertionError(e); + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java new file mode 100644 index 00000000000..c0c3c75759f --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2013, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name "TwelveMonkeys" nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * CCITT Modified Group 4 (T6) fax compression. + * + * @author Oliver Schmidtmer + * + * Taken from commit 047884e3d9e1b30516c79b147ead763303dc9bcb of 21.4.2016 from + * twelvemonkeys/imageio/plugins/tiff/CCITTFaxEncoderStream.java + * + * Initial changes for PDFBox: + * - removed Validate + * - G4 compression only + * - removed options + */ +final class CCITTFaxEncoderStream extends OutputStream { + + private int currentBufferLength = 0; + private final byte[] inputBuffer; + private final int inputBufferLength; + private final int columns; + private final int rows; + + private int[] changesCurrentRow; + private int[] changesReferenceRow; + private int currentRow = 0; + private int changesCurrentRowLength = 0; + private int changesReferenceRowLength = 0; + private byte outputBuffer = 0; + private byte outputBufferBitLength = 0; + private final int fillOrder; + private final OutputStream stream; + + CCITTFaxEncoderStream(final OutputStream stream, final int columns, final int rows, final int fillOrder) { + + this.stream = stream; + this.columns = columns; + this.rows = rows; + this.fillOrder = fillOrder; + + this.changesReferenceRow = new int[columns]; + this.changesCurrentRow = new int[columns]; + + inputBufferLength = (columns + 7) / 8; + inputBuffer = new byte[inputBufferLength]; + } + + @Override + public void write(int b) throws IOException { + inputBuffer[currentBufferLength] = (byte) b; + currentBufferLength++; + + if (currentBufferLength == inputBufferLength) { + encodeRow(); + currentBufferLength = 0; + } + } + + @Override + public void flush() throws IOException { + stream.flush(); + } + + @Override + public void close() throws IOException { + stream.close(); + } + + private void encodeRow() throws IOException { + currentRow++; + int[] tmp = changesReferenceRow; + changesReferenceRow = changesCurrentRow; + changesCurrentRow = tmp; + changesReferenceRowLength = changesCurrentRowLength; + changesCurrentRowLength = 0; + + int index = 0; + boolean white = true; + while (index < columns) { + int byteIndex = index / 8; + int bit = index % 8; + if ((((inputBuffer[byteIndex] >> (7 - bit)) & 1) == 1) == (white)) { + changesCurrentRow[changesCurrentRowLength] = index; + changesCurrentRowLength++; + white = !white; + } + index++; + } + + encodeRowType6(); + + if (currentRow == rows) { + writeEOL(); + writeEOL(); + fill(); + } + } + + + private void encodeRowType6() throws IOException { + encode2D(); + } + + private int[] getNextChanges(int pos, boolean white) { + int[] result = {columns, columns}; + for (int i = 0; i < changesCurrentRowLength; i++) { + if (pos < changesCurrentRow[i] || (pos == 0 && white)) { + result[0] = changesCurrentRow[i]; + if ((i + 1) < changesCurrentRowLength) { + result[1] = changesCurrentRow[i + 1]; + } + break; + } + } + + return result; + } + + private void writeRun(int runLength, boolean white) throws IOException { + int nonterm = runLength / 64; + Code[] codes = white ? WHITE_NONTERMINATING_CODES : BLACK_NONTERMINATING_CODES; + while (nonterm > 0) { + if (nonterm >= codes.length) { + write(codes[codes.length - 1].code, codes[codes.length - 1].length); + nonterm -= codes.length; + } + else { + write(codes[nonterm - 1].code, codes[nonterm - 1].length); + nonterm = 0; + } + } + + Code c = white ? WHITE_TERMINATING_CODES[runLength % 64] : BLACK_TERMINATING_CODES[runLength % 64]; + write(c.code, c.length); + } + + private void encode2D() throws IOException { + boolean white = true; + int index = 0; // a0 + while (index < columns) { + int[] nextChanges = getNextChanges(index, white); // a1, a2 + + int[] nextRefs = getNextRefChanges(index, white); // b1, b2 + + int difference = nextChanges[0] - nextRefs[0]; + if (nextChanges[0] > nextRefs[1]) { + // PMODE + write(1, 4); + index = nextRefs[1]; + } + else if (difference > 3 || difference < -3) { + // HMODE + write(1, 3); + writeRun(nextChanges[0] - index, white); + writeRun(nextChanges[1] - nextChanges[0], !white); + index = nextChanges[1]; + + } + else { + // VMODE + switch (difference) { + case 0: + write(1, 1); + break; + case 1: + write(3, 3); + break; + case 2: + write(3, 6); + break; + case 3: + write(3, 7); + break; + case -1: + write(2, 3); + break; + case -2: + write(2, 6); + break; + case -3: + write(2, 7); + break; + default: + break; + } + white = !white; + index = nextRefs[0] + difference; + } + } + } + + private int[] getNextRefChanges(int a0, boolean white) { + int[] result = {columns, columns}; + for (int i = (white ? 0 : 1); i < changesReferenceRowLength; i += 2) { + if (changesReferenceRow[i] > a0 || (a0 == 0 && i == 0)) { + result[0] = changesReferenceRow[i]; + if ((i + 1) < changesReferenceRowLength) { + result[1] = changesReferenceRow[i + 1]; + } + break; + } + } + return result; + } + + private void write(int code, int codeLength) throws IOException { + + for (int i = 0; i < codeLength; i++) { + boolean codeBit = ((code >> (codeLength - i - 1)) & 1) == 1; + if (fillOrder == TIFFExtension.FILL_LEFT_TO_RIGHT) { + outputBuffer |= (codeBit ? 1 << (7 - ((outputBufferBitLength) % 8)) : 0); + } + else { + outputBuffer |= (codeBit ? 1 << (((outputBufferBitLength) % 8)) : 0); + } + outputBufferBitLength++; + + if (outputBufferBitLength == 8) { + stream.write(outputBuffer); + clearOutputBuffer(); + } + } + } + + private void writeEOL() throws IOException { + write(1, 12); + } + + private void fill() throws IOException { + if (outputBufferBitLength != 0) { + stream.write(outputBuffer); + } + clearOutputBuffer(); + } + + private void clearOutputBuffer() { + outputBuffer = 0; + outputBufferBitLength = 0; + } + + private static class Code { + private Code(int code, int length) { + this.code = code; + this.length = length; + } + + final int code; + final int length; + } + + private static final Code[] WHITE_TERMINATING_CODES; + + private static final Code[] WHITE_NONTERMINATING_CODES; + + private static final Code[] BLACK_TERMINATING_CODES; + + private static final Code[] BLACK_NONTERMINATING_CODES; + + static { + // Setup HUFFMAN Codes + WHITE_TERMINATING_CODES = new Code[64]; + WHITE_NONTERMINATING_CODES = new Code[40]; + for (int i = 0; i < CCITTFaxDecoderStream.WHITE_CODES.length; i++) { + int bitLength = i + 4; + for (int j = 0; j < CCITTFaxDecoderStream.WHITE_CODES[i].length; j++) { + int value = CCITTFaxDecoderStream.WHITE_RUN_LENGTHS[i][j]; + int code = CCITTFaxDecoderStream.WHITE_CODES[i][j]; + + if (value < 64) { + WHITE_TERMINATING_CODES[value] = new Code(code, bitLength); + } + else { + WHITE_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); + } + } + } + + BLACK_TERMINATING_CODES = new Code[64]; + BLACK_NONTERMINATING_CODES = new Code[40]; + for (int i = 0; i < CCITTFaxDecoderStream.BLACK_CODES.length; i++) { + int bitLength = i + 2; + for (int j = 0; j < CCITTFaxDecoderStream.BLACK_CODES[i].length; j++) { + int value = CCITTFaxDecoderStream.BLACK_RUN_LENGTHS[i][j]; + int code = CCITTFaxDecoderStream.BLACK_CODES[i][j]; + + if (value < 64) { + BLACK_TERMINATING_CODES[value] = new Code(code, bitLength); + } + else { + BLACK_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); + } + } + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java new file mode 100644 index 00000000000..bd5341e58bd --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.PushbackInputStream; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; + +/** + * Decodes image data that has been encoded using either Group 3 or Group 4 + * CCITT facsimile (fax) encoding, and encodes image data to Group 4. + * + * @author Ben Litchfield + * @author Marcel Kammer + * @author Paul King + */ +final class CCITTFaxFilter extends Filter +{ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + // get decode parameters + COSDictionary decodeParms = getDecodeParams(parameters, index); + + // parse dimensions + int cols = decodeParms.getInt(COSName.COLUMNS, 1728); + int rows = decodeParms.getInt(COSName.ROWS, 0); + int height = parameters.getInt(COSName.HEIGHT, COSName.H, 0); + if (rows > 0 && height > 0) + { + // PDFBOX-771, PDFBOX-3727: rows in DecodeParms sometimes contains an incorrect value + rows = height; + } + else + { + // at least one of the values has to have a valid value + rows = Math.max(rows, height); + } + + // decompress data + int k = decodeParms.getInt(COSName.K, 0); + boolean encodedByteAlign = decodeParms.getBoolean(COSName.ENCODED_BYTE_ALIGN, false); + int arraySize = (cols + 7) / 8 * rows; + // TODO possible options?? + byte[] decompressed = new byte[arraySize]; + CCITTFaxDecoderStream s; + int type; + long tiffOptions = 0; + if (k == 0) + { + type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D + byte[] streamData = new byte[20]; + int bytesRead = encoded.read(streamData); + if (bytesRead == -1) + { + throw new IOException("EOF while reading CCITT header"); + } + PushbackInputStream pushbackInputStream = new PushbackInputStream(encoded, streamData.length); + pushbackInputStream.unread(streamData, 0, bytesRead); + encoded = pushbackInputStream; + if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1)) + { + // leading EOL (0b000000000001) not found, search further and try RLE if not + // found + type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE; + short b = (short) (((streamData[0] << 8) + (streamData[1] & 0xff)) >> 4); + for (int i = 12; i < bytesRead * 8; i++) + { + b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01)); + if ((b & 0xFFF) == 1) + { + type = TIFFExtension.COMPRESSION_CCITT_T4; + break; + } + } + } + } + else if (k > 0) + { + // Group 3 2D + type = TIFFExtension.COMPRESSION_CCITT_T4; + tiffOptions = TIFFExtension.GROUP3OPT_2DENCODING; + } + else + { + // Group 4 + type = TIFFExtension.COMPRESSION_CCITT_T6; + } + s = new CCITTFaxDecoderStream(encoded, cols, type, tiffOptions, encodedByteAlign); + readFromDecoderStream(s, decompressed); + + // invert bitmap + boolean blackIsOne = decodeParms.getBoolean(COSName.BLACK_IS_1, false); + if (!blackIsOne) + { + // Inverting the bitmap + // Note the previous approach with starting from an IndexColorModel didn't work + // reliably. In some cases the image wouldn't be painted for some reason. + // So a safe but slower approach was taken. + invertBitmap(decompressed); + } + + decoded.write(decompressed); + return new DecodeResult(parameters); + } + + void readFromDecoderStream(CCITTFaxDecoderStream decoderStream, byte[] result) + throws IOException + { + int pos = 0; + int read; + while ((read = decoderStream.read(result, pos, result.length - pos)) > -1) + { + pos += read; + if (pos >= result.length) + { + break; + } + } + } + + private void invertBitmap(byte[] bufferData) + { + for (int i = 0, c = bufferData.length; i < c; i++) + { + bufferData[i] = (byte) (~bufferData[i] & 0xFF); + } + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + int cols = parameters.getInt(COSName.COLUMNS); + int rows = parameters.getInt(COSName.ROWS); + CCITTFaxEncoderStream ccittFaxEncoderStream = + new CCITTFaxEncoderStream(encoded, cols, rows, TIFFExtension.FILL_LEFT_TO_RIGHT); + input.transferTo(ccittFaxEncoderStream); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java new file mode 100644 index 00000000000..cb359bc84ba --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos.filter; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSBoolean; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObjectGetter; + +/** + * This class is a specialized view of the crypt filter dictionary of a PDF + * document It contains a low level dictionary (COSDictionary) and provides + * methods to manage its fields. + */ +public class COSCryptFilterDictionary implements COSObjectGetter +{ + + /** + * COS crypt filter dictionary. + */ + protected COSDictionary cryptFilterDictionary = null; + + /** + * creates a new empty crypt filter dictionary. + */ + public COSCryptFilterDictionary() + { + cryptFilterDictionary = new COSDictionary(); + } + + /** + * creates a new crypt filter dictionary from the low level dictionary provided. + * @param d the low level dictionary that will be managed by the newly created object + */ + public COSCryptFilterDictionary( COSDictionary d) + { + cryptFilterDictionary = d; + } + + /** + * This will get the dictionary associated with this crypt filter dictionary. + * + * @return The COS dictionary that this object wraps. + */ + @Override + public COSDictionary getCOSObject() + { + return cryptFilterDictionary; + } + + /** + * This will set the number of bits to use for the crypt filter algorithm. + * + * @param length The new key length. + */ + public void setLength(int length) + { + cryptFilterDictionary.setInt(COSName.LENGTH, length); + } + + /** + * This will return the Length entry of the crypt filter dictionary.

+ * The length in bits for the crypt filter algorithm. This will return a multiple of 8. + * + * @return The length in bits for the encryption algorithm + */ + public int getLength() + { + return cryptFilterDictionary.getInt( COSName.LENGTH, 40 ); + } + + /** + * This will set the crypt filter method. + * Allowed values are: NONE, V2, AESV2, AESV3 + * + * @param cfm name of the crypt filter method. + * + */ + public void setCryptFilterMethod(COSName cfm) + { + cryptFilterDictionary.setItem( COSName.CFM, cfm ); + } + + /** + * This will return the crypt filter method. + * Allowed values are: NONE, V2, AESV2, AESV3 + * + * @return the name of the crypt filter method. + */ + public COSName getCryptFilterMethod() + { + return cryptFilterDictionary.getCOSName(COSName.CFM); + } + + /** + * Will get the EncryptMetaData dictionary info. + * + * @return true if EncryptMetaData is explicitly set (the default is true) + */ + public boolean isEncryptMetaData() + { + COSBase value = cryptFilterDictionary.getObjectFromDictionary( COSName.ENCRYPT_META_DATA ); + if (value instanceof COSBoolean) + { + return ((COSBoolean) value).getValue(); + } + + // default is true (see 7.6.3.2 Standard Encryption Dictionary PDF 32000-1:2008) + return true; + } + + /** + * Set the EncryptMetaData dictionary info. + * + * @param encryptMetaData true if EncryptMetaData shall be set. + */ + public void setEncryptMetaData(boolean encryptMetaData) + { + getCOSObject().setBoolean(COSName.ENCRYPT_META_DATA, encryptMetaData); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java new file mode 100644 index 00000000000..2434863d609 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos.filter; + +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSBoolean; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSObjectGetter; + +/** + * This class is a specialized view of the encryption filter dictionary of a + * PDF document. It contains a low level dictionary (COSDictionary) and provides + * the methods to manage its fields. + */ +public class COSEncryptFilterDictionary implements COSObjectGetter +{ + /** + * COS crypt filter dictionary. + */ + protected COSDictionary cryptFilterDictionary = null; + + /** + * creates a new empty crypt filter dictionary. + */ + public COSEncryptFilterDictionary() + { + cryptFilterDictionary = new COSDictionary(); + } + + /** + * creates a new crypt filter dictionary from the low level dictionary provided. + * @param d the low level dictionary that will be managed by the newly created object + */ + public COSEncryptFilterDictionary( COSDictionary d) + { + cryptFilterDictionary = d; + } + + /** + * This will get the dictionary associated with this crypt filter dictionary. + * + * @return The COS dictionary that this object wraps. + */ + @Override + public COSDictionary getCOSObject() + { + return cryptFilterDictionary; + } + + /** + * This will set the number of bits to use for the crypt filter algorithm. + * + * @param length The new key length. + */ + public void setLength(int length) + { + cryptFilterDictionary.setInt( COSName.LENGTH, length); + } + + /** + * This will return the Length entry of the crypt filter dictionary.

+ * The length in bits for the crypt filter algorithm. This will return a multiple of 8. + * + * @return The length in bits for the encryption algorithm + */ + public int getLength() + { + return cryptFilterDictionary.getInt( COSName.LENGTH, 40 ); + } + + /** + * This will set the crypt filter method. + * Allowed values are: NONE, V2, AESV2, AESV3 + * + * @param cfm name of the crypt filter method. + * + */ + public void setCryptFilterMethod(COSName cfm) + { + cryptFilterDictionary.setItem( COSName.CFM, cfm ); + } + + /** + * This will return the crypt filter method. + * Allowed values are: NONE, V2, AESV2, AESV3 + * + * @return the name of the crypt filter method. + */ + public COSName getCryptFilterMethod() + { + return cryptFilterDictionary.getCOSName(COSName.CFM); + } + + /** + * Will get the EncryptMetaData dictionary info. + * + * @return true if EncryptMetaData is explicitly set (the default is true) + */ + public boolean isEncryptMetaData() + { + COSBase value = getCOSObject().getObjectFromDictionary( COSName.ENCRYPT_META_DATA); + if (value instanceof COSBoolean) + { + return ((COSBoolean) value).getValue(); + } + + // default is true (see 7.6.3.2 Standard Encryption Dictionary PDF 32000-1:2008) + return true; + } + + /** + * Set the EncryptMetaData dictionary info. + * + * @param encryptMetaData true if EncryptMetaData shall be set. + */ + public void setEncryptMetaData(boolean encryptMetaData) + { + getCOSObject().setBoolean(COSName.ENCRYPT_META_DATA, encryptMetaData); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java new file mode 100644 index 00000000000..b86ca6847d1 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos.filter; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.io.RandomAccessInputStream; +import org.apache.pdfbox.io.RandomAccessRead; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * An InputStream which reads from an encoded COS stream. + * + * @author John Hewson + */ +public final class COSInputStream extends FilterInputStream +{ + public static COSInputStream create( COSStream cosStreamObject ) throws IOException + { + return create( cosStreamObject.getFilterList(), cosStreamObject, cosStreamObject.createRawInputStream()); + } + + /** + * Creates a new COSInputStream from an encoded input stream. + * + * @param filters Filters to be applied. + * @param parameters Filter parameters. + * @param in Encoded input stream. + * @return Decoded stream. + * @throws IOException If the stream could not be read. + */ + public static COSInputStream create(List filters, COSDictionary parameters, InputStream in) + throws IOException + { + return create(filters, parameters, in, DecodeOptions.DEFAULT); + } + + /** + * Creates a new COSInputStream from an encoded input stream. + * + * @param filters Filters to be applied. + * @param parameters Filter parameters. + * @param in Encoded input stream. + * @param options decode options for the encoded stream + * @return Decoded stream. + * @throws IOException If the stream could not be read. + */ + public static COSInputStream create( List filters, COSDictionary parameters, InputStream in, + DecodeOptions options) throws IOException + { + if (filters.isEmpty()) + { + return new COSInputStream(in, Collections.emptyList()); + } + List results = new ArrayList<>(filters.size()); + RandomAccessRead decoded = Filter.decode(in, filters, parameters, options, results); + return new COSInputStream(new RandomAccessInputStream(decoded), results); + } + + private final List decodeResults; + + /** + * Constructor. + * + * @param input decoded stream + * @param decodeResults results of decoding + */ + private COSInputStream(InputStream input, List decodeResults) + { + super(input); + this.decodeResults = decodeResults; + } + + /** + * Returns the result of the last filter, for use by repair mechanisms. + * + * @return the result of the last filter + */ + public DecodeResult getDecodeResult() + { + if (decodeResults.isEmpty()) + { + return DecodeResult.createDefault(); + } + else + { + return decodeResults.get(decodeResults.size() - 1); + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java new file mode 100644 index 00000000000..08a7ffe6728 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos.filter; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.io.RandomAccess; +import org.apache.pdfbox.io.RandomAccessInputStream; +import org.apache.pdfbox.io.RandomAccessOutputStream; +import org.apache.pdfbox.io.RandomAccessStreamCache; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; + +/** + * An OutputStream which writes to an encoded COS stream. + * + * @author John Hewson + */ +public final class COSOutputStream extends FilterOutputStream +{ + private final List filters; + private final COSDictionary parameters; + private final RandomAccessStreamCache streamCache; + private RandomAccess buffer; + + /** + * * TODO: fix so that this is in a filter class, using this object as input. + * Returns a new OutputStream for writing stream data, using the current filters. + * + * @return OutputStream for un-encoded stream data. + * @throws IOException If the output stream could not be created. + */ +// public OutputStream createOutputStream() throws IOException +// { +// return createOutputStream(null); +// } +// +// /** +// * Returns a new OutputStream for writing stream data, using and the given filters. +// * +// * @param filters COSArray or COSName of filters to be used. +// * @return OutputStream for un-encoded stream data. +// * @throws IOException If the output stream could not be created. +// */ +// public OutputStream createOutputStream( COSBase filters) throws IOException +// { +// checkClosed(); +// if (isWriting) +// { +// throw new IllegalStateException("Cannot have more than one open stream writer."); +// } +// // apply filters, if any +// if (filters != null) +// { +// setItem(COSName.FILTER, filters); +// } +// if (randomAccess != null) +// randomAccess.clear(); +// else +// randomAccess = getStreamCache().createBuffer(); +// OutputStream randomOut = new RandomAccessOutputStream( randomAccess); +// OutputStream cosOut = new COSOutputStream( getFilterList(), this, randomOut, +// getStreamCache()); +// isWriting = true; +// return new FilterOutputStream( cosOut) +// { +// @Override +// public void write(byte[] b, int off, int len) throws IOException +// { +// this.out.write(b, off, len); +// } +// +// @Override +// public void close() throws IOException +// { +// super.close(); +// setInt(COSName.LENGTH, (int)randomAccess.length()); +// isWriting = false; +// } +// }; +// } + + /** + * Creates a new COSOutputStream writes to an encoded COS stream. + * + * @param filters Filters to apply. + * @param parameters Filter parameters. + * @param output Encoded stream. + * @param streamCache Stream cache to use. + * + * @throws IOException If there was an error creating a temporary buffer + */ + public COSOutputStream( List filters, COSDictionary parameters, OutputStream output, + RandomAccessStreamCache streamCache ) throws IOException + { + super(output); + this.filters = filters; + this.parameters = parameters; + this.streamCache = streamCache; + buffer = filters.isEmpty() ? null : streamCache.createBuffer(); + } + + @Override + public void write(byte[] b) throws IOException + { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException + { + if (buffer != null) + { + buffer.write(b, off, len); + } + else + { + super.write(b, off, len); + } + } + + @Override + public void write(int b) throws IOException + { + if (buffer != null) + { + buffer.write(b); + } + else + { + super.write(b); + } + } + + @Override + public void flush() throws IOException + { + if (buffer == null) + { + super.flush(); + } + } + + @Override + public void close() throws IOException + { + try + { + if (buffer != null) + { + try + { + // apply filters in reverse order + for (int i = filters.size() - 1; i >= 0; i--) + { + try (InputStream unfilteredIn = new RandomAccessInputStream(buffer)) + { + if (i == 0) + { + /* + * The last filter to run can encode directly to the enclosed output + * stream. + */ + filters.get(i).encode(unfilteredIn, out, parameters, i); + } + else + { + RandomAccess filteredBuffer = streamCache.createBuffer(); + try (OutputStream filteredOut = new RandomAccessOutputStream(filteredBuffer)) + { + filters.get(i).encode(unfilteredIn, filteredOut, parameters, i); + } + finally + { + buffer.close(); + buffer = filteredBuffer; + } + } + } + } + } + finally + { + buffer.close(); + buffer = null; + } + } + } + finally + { + super.close(); + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java new file mode 100644 index 00000000000..5c1e2e2a11f --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; + +/** + * Decrypts data encrypted by a security handler, reproducing the data as it was before encryption. + * @author Adam Nichols + */ +final class CryptFilter extends Filter +{ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + COSName encryptionName = parameters.getCOSName(COSName.NAME); + if(encryptionName == null || encryptionName.equals(COSName.IDENTITY)) + { + // currently the only supported implementation is the Identity crypt filter + Filter identityFilter = new IdentityFilter(); + identityFilter.decode(encoded, decoded, parameters, index); + return new DecodeResult(parameters); + } + throw new IOException("Unsupported crypt filter " + encryptionName.getName()); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + COSName encryptionName = parameters.getCOSName(COSName.NAME); + if(encryptionName == null || encryptionName.equals(COSName.IDENTITY)) + { + // currently the only supported implementation is the Identity crypt filter + Filter identityFilter = new IdentityFilter(); + identityFilter.encode(input, encoded, parameters); + } + else + { + throw new IOException("Unsupported crypt filter " + encryptionName.getName()); + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java new file mode 100644 index 00000000000..c5f3ac54ac1 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.awt.color.CMMException; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferByte; +import java.awt.image.Raster; +import java.awt.image.WritableRaster; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import javax.imageio.IIOException; +import javax.imageio.ImageIO; +import javax.imageio.ImageReadParam; +import javax.imageio.ImageReader; +import javax.imageio.metadata.IIOMetadata; +import javax.imageio.metadata.IIOMetadataNode; +import javax.imageio.stream.ImageInputStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSDictionary; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +/** + * Decompresses data encoded using a DCT (discrete cosine transform) + * technique based on the JPEG standard. + * + * @author John Hewson + */ +final class DCTFilter extends Filter +{ + private static final Logger LOG = LogManager.getLogger(DCTFilter.class); + + private static final int POS_TRANSFORM = 11; + private static final String ADOBE = "Adobe"; + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException + { + ImageReader reader = findRasterReader("JPEG", "a suitable JAI I/O image filter is not installed"); + try (ImageInputStream iis = ImageIO.createImageInputStream(encoded)) + { + + // skip one LF if there + if (iis.read() != 0x0A) + { + iis.seek(0); + } + + reader.setInput(iis); + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + irp.setSourceRegion(options.getSourceRegion()); + options.setFilterSubsampled(true); + + Raster raster = readImageRaster(reader, irp); + + // special handling for 4-component images + if (raster.getNumBands() == 4) + { + // get APP14 marker + Integer transform; + try + { + transform = getAdobeTransform(reader.getImageMetadata(0)); + } + catch (IIOException | NegativeArraySizeException e) + { + // we really tried asking nicely, now we're using brute force. + LOG.debug("Couldn't read usíng getAdobeTransform() - using getAdobeTransformByBruteForce() as fallback", e); + transform = getAdobeTransformByBruteForce(iis); + } + int colorTransform = transform != null ? transform : 0; + + // 0 = Unknown (RGB or CMYK), 1 = YCbCr, 2 = YCCK + // https://exiftool.org/TagNames/JPEG.html#Adobe + switch (colorTransform) + { + case 0: + // already CMYK + break; + case 1: + LOG.warn("There is no 4 channel YCbCr, using YCCK"); + // fallthrough + case 2: + raster = fromYCCKtoCMYK(raster); + break; + default: + throw new IllegalArgumentException("Unknown colorTransform"); + } + } + else if (raster.getNumBands() == 3) + { + // BGR to RGB + raster = fromBGRtoRGB(raster); + } + + DataBufferByte dataBuffer = (DataBufferByte)raster.getDataBuffer(); + decoded.write(dataBuffer.getData()); + } + catch (CMMException ex) + { + // PDFBOX-5732 + throw new IOException(ex); + } + finally + { + reader.dispose(); + } + return new DecodeResult(parameters); + } + + private Raster readImageRaster(ImageReader reader, ImageReadParam irp) throws IOException + { + String numChannels = getNumChannels(reader); + // get the raster using horrible JAI workarounds + ImageIO.setUseCache(false); + Raster raster; + // Strategy: use read() for RGB or "can't get metadata" + // use readRaster() for CMYK and gray and as fallback if read() fails + // after "can't get metadata" because "no meta" file was CMYK + if ("3".equals(numChannels) || numChannels.isEmpty()) + { + try + { + // I'd like to use ImageReader#readRaster but it is buggy and can't read RGB correctly + BufferedImage image = reader.read(0, irp); + if (image.getColorModel().getNumColorComponents() == 4) + { + throw new IIOException("CMYK image"); + } + raster = image.getRaster(); + } + catch (IIOException e) + { + // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but + // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files + LOG.debug("Couldn't read use read() for RGB image - using readRaster() as fallback", e); + raster = reader.readRaster(0, irp); + } + } + else + { + // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but + // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files + raster = reader.readRaster(0, irp); + } + return raster; + } + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); + } + + // reads the APP14 Adobe transform tag and returns its value, or 0 if unknown + private Integer getAdobeTransform(IIOMetadata metadata) + { + Element tree = (Element)metadata.getAsTree("javax_imageio_jpeg_image_1.0"); + Element markerSequence = (Element)tree.getElementsByTagName("markerSequence").item(0); + NodeList app14AdobeNodeList = markerSequence.getElementsByTagName("app14Adobe"); + if (app14AdobeNodeList != null) + { + int app14AdobeNodeListLength = app14AdobeNodeList.getLength(); + if (app14AdobeNodeListLength > 0) + { + if (app14AdobeNodeListLength > 1) + { + LOG.warn("app14Adobe entry appears several times, using the last one"); + } + Element adobe = (Element) app14AdobeNodeList.item(app14AdobeNodeListLength - 1); + return Integer.valueOf(adobe.getAttribute("transform")); + } + } + return 0; + } + + // See in https://github.com/haraldk/TwelveMonkeys + // com.twelvemonkeys.imageio.plugins.jpeg.AdobeDCT class for structure of APP14 segment + private int getAdobeTransformByBruteForce(ImageInputStream iis) throws IOException + { + int a = 0; + iis.seek(0); + int by; + while ((by = iis.read()) != -1) + { + if (ADOBE.charAt(a) == by) + { + ++a; + if (a != ADOBE.length()) + { + continue; + } + // match + a = 0; + long afterAdobePos = iis.getStreamPosition(); + iis.seek(afterAdobePos - 9); + int tag = iis.readUnsignedShort(); + if (tag != 0xFFEE) + { + iis.seek(afterAdobePos); + continue; + } + int len = iis.readUnsignedShort(); + if (len >= POS_TRANSFORM + 1) + { + byte[] app14 = new byte[Math.max(len, POS_TRANSFORM + 1)]; + if (iis.read(app14) >= POS_TRANSFORM + 1) + { + return app14[POS_TRANSFORM]; + } + } + } + else + { + a = 0; + } + } + return 0; + } + + // converts YCCK image to CMYK. YCCK is an equivalent encoding for + // CMYK data, so no color management code is needed here, nor does the + // PDF color space have to be consulted + private WritableRaster fromYCCKtoCMYK(Raster raster) + { + WritableRaster writableRaster = raster.createCompatibleWritableRaster(); + + int[] value = new int[4]; + for (int y = 0, height = raster.getHeight(); y < height; y++) + { + for (int x = 0, width = raster.getWidth(); x < width; x++) + { + raster.getPixel(x, y, value); + + // 4-channels 0..255 + float Y = value[0]; + float Cb = value[1]; + float Cr = value[2]; + float K = value[3]; + + // YCCK to RGB, see http://software.intel.com/en-us/node/442744 + int r = clamp(Y + 1.402f * Cr - 179.456f); + int g = clamp(Y - 0.34414f * Cb - 0.71414f * Cr + 135.45984f); + int b = clamp(Y + 1.772f * Cb - 226.816f); + + // naive RGB to CMYK + int cyan = 255 - r; + int magenta = 255 - g; + int yellow = 255 - b; + + // update new raster + value[0] = cyan; + value[1] = magenta; + value[2] = yellow; + value[3] = (int)K; + writableRaster.setPixel(x, y, value); + } + } + return writableRaster; + } + + // converts from BGR to RGB + private WritableRaster fromBGRtoRGB(Raster raster) + { + WritableRaster writableRaster = raster.createCompatibleWritableRaster(); + + int width = raster.getWidth(); + int height = raster.getHeight(); + int w3 = width * 3; + int[] tab = new int[w3]; + //BEWARE: handling the full image at a time is slower than one line at a time + for (int y = 0; y < height; y++) + { + raster.getPixels(0, y, width, 1, tab); + for (int off = 0; off < w3; off += 3) + { + int tmp = tab[off]; + tab[off] = tab[off + 2]; + tab[off + 2] = tmp; + } + writableRaster.setPixels(0, y, width, 1, tab); + } + return writableRaster; + } + + // returns the number of channels as a string, or an empty string if there is an error getting the meta data + private String getNumChannels(ImageReader reader) + { + try + { + IIOMetadata imageMetadata = reader.getImageMetadata(0); + if (imageMetadata == null) + { + return ""; + } + IIOMetadataNode metaTree = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0"); + Element numChannelsItem = (Element) metaTree.getElementsByTagName("NumChannels").item(0); + if (numChannelsItem == null) + { + return ""; + } + return numChannelsItem.getAttribute("value"); + } + catch (IOException | NegativeArraySizeException e) + { + LOG.debug("Couldn't read metadata - returning empty string", e); + return ""; + } + } + + // clamps value to 0-255 range + private int clamp(float value) + { + return (int)((value < 0) ? 0 : ((value > 255) ? 255 : value)); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + throw new UnsupportedOperationException("DCTFilter encoding not implemented, use the JPEGFactory methods instead"); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java new file mode 100644 index 00000000000..a3ef43a3c1f --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.awt.Rectangle; + +/** + * Options that may be passed to a Filter to request special handling when decoding the stream. + * Filters may not honor some or all of the specified options, and so callers should check the + * honored flag if further processing relies on the options being used. + */ +public class DecodeOptions +{ + /** + * Default decode options. The honored flag for this instance is always true, as it represents + * the default behavior. + */ + public static final DecodeOptions DEFAULT = new FinalDecodeOptions(true); + + private Rectangle sourceRegion = null; + private int subsamplingX = 1; + private int subsamplingY = 1; + private int subsamplingOffsetX = 0; + private int subsamplingOffsetY = 0; + private boolean filterSubsampled = false; + + /** + * Constructs an empty DecodeOptions instance + */ + public DecodeOptions() + { + // this constructor is intentionally left empty + } + + /** + * Constructs an instance specifying the region of the image that should be decoded. The actual + * region will be clipped to the dimensions of the image. + * + * @param sourceRegion Region of the source image that should be decoded + */ + public DecodeOptions(Rectangle sourceRegion) + { + this.sourceRegion = sourceRegion; + } + + /** + * Constructs an instance specifying the region of the image that should be decoded. The actual + * region will be clipped to the dimensions of the image. + * + * @param x x-coordinate of the top-left corner of the region to be decoded + * @param y y-coordinate of the top-left corner of the region to be decoded + * @param width Width of the region to be decoded + * @param height Height of the region to be decoded + */ + public DecodeOptions(int x, int y, int width, int height) + { + this(new Rectangle(x, y, width, height)); + } + + /** + * Constructs an instance specifying the image should be decoded using subsampling. The + * subsampling will be the same for the X and Y axes. + * + * @param subsampling The number of rows and columns to advance in the source for each pixel in + * the decoded image. + */ + public DecodeOptions(int subsampling) + { + subsamplingX = subsampling; + subsamplingY = subsampling; + } + + /** + * When decoding an image, the part of the image that should be decoded, or null if the entire + * image is needed. + * + * @return The source region to decode, or null if the entire image should be decoded + */ + public Rectangle getSourceRegion() + { + return sourceRegion; + } + + /** + * Sets the region of the source image that should be decoded. The region will be clipped to the + * dimensions of the source image. Setting this value to null will result in the entire image + * being decoded. + * + * @param sourceRegion The source region to decode, or null if the entire image should be + * decoded. + */ + public void setSourceRegion(Rectangle sourceRegion) + { + this.sourceRegion = sourceRegion; + } + + /** + * When decoding an image, the number of columns to advance in the source for every pixel + * decoded. + * + * @return The x-axis subsampling value + */ + public int getSubsamplingX() + { + return subsamplingX; + } + + /** + * Sets the number of columns to advance in the source for every pixel decoded + * + * @param ssX The x-axis subsampling value + */ + public void setSubsamplingX(int ssX) + { + this.subsamplingX = ssX; + } + + /** + * When decoding an image, the number of rows to advance in the source for every pixel decoded. + * + * @return The y-axis subsampling value + */ + public int getSubsamplingY() + { + return subsamplingY; + } + + /** + * Sets the number of rows to advance in the source for every pixel decoded + * + * @param ssY The y-axis subsampling value + */ + public void setSubsamplingY(int ssY) + { + this.subsamplingY = ssY; + } + + /** + * When decoding an image, the horizontal offset for subsampling + * + * @return The x-axis subsampling offset + */ + public int getSubsamplingOffsetX() + { + return subsamplingOffsetX; + } + + /** + * Sets the horizontal subsampling offset for decoding images + * + * @param ssOffsetX The x-axis subsampling offset + */ + public void setSubsamplingOffsetX(int ssOffsetX) + { + this.subsamplingOffsetX = ssOffsetX; + } + + /** + * When decoding an image, the vertical offset for subsampling + * + * @return The y-axis subsampling offset + */ + public int getSubsamplingOffsetY() + { + return subsamplingOffsetY; + } + + /** + * Sets the vertical subsampling offset for decoding images + * + * @param ssOffsetY The y-axis subsampling offset + */ + public void setSubsamplingOffsetY(int ssOffsetY) + { + this.subsamplingOffsetY = ssOffsetY; + } + + /** + * Flag used by the filter to specify if it performed subsampling. + * + * Some filters may be unable or unwilling to apply subsampling, and so the caller must check + * this flag after decoding. + * + * @return True if the filter applied the options specified by this instance, false otherwise. + */ + public boolean isFilterSubsampled() + { + return filterSubsampled; + } + + /** + * Used internally by filters to signal they have applied subsampling as requested by this + * options instance. + * + * @param filterSubsampled Value specifying if the filter could meet the requested options. + * Usually a filter will only call this with the value true, as the default value + * for the flag is false. + */ + void setFilterSubsampled(boolean filterSubsampled) + { + this.filterSubsampled = filterSubsampled; + } + + /** + * Helper class for reusable instances which may not be modified. + */ + private static class FinalDecodeOptions extends DecodeOptions + { + FinalDecodeOptions(boolean filterSubsampled) + { + super.setFilterSubsampled(filterSubsampled); + } + + @Override + public void setSourceRegion(Rectangle sourceRegion) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingX(int ssX) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingY(int ssY) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingOffsetX(int ssOffsetX) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + public void setSubsamplingOffsetY(int ssOffsetY) + { + throw new UnsupportedOperationException("This instance may not be modified."); + } + + @Override + void setFilterSubsampled(boolean filterSubsampled) + { + // Silently ignore the request. + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java new file mode 100644 index 00000000000..b8eb65242a6 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.awt.image.BufferedImage; +import org.apache.pdfbox.cos.COSDictionary; +// import org.apache.pdfbox.pdmodel.graphics.color.PDJPXColorSpace; + +/** + * The result of a filter decode operation. Allows information such as color space to be + * extracted from image streams, and for stream parameters to be repaired during reading. + * + * @author John Hewson + */ +public final class DecodeResult +{ + private final COSDictionary parameters; +// private PDJPXColorSpace colorSpace; + private BufferedImage smask; + + DecodeResult(COSDictionary parameters) + { + this.parameters = parameters; + } + +// DecodeResult(COSDictionary parameters, PDJPXColorSpace colorSpace) +// { +// this.parameters = parameters; +// this.colorSpace = colorSpace; +// } + + /** + * Return a default DecodeResult. + * + * @return a default instance of DecodeResult + */ + public static DecodeResult createDefault() + { + return new DecodeResult(new COSDictionary()); + } + + /** + * Returns the stream parameters, repaired using the embedded stream data. + * @return the repaired stream parameters, or an empty dictionary + */ + public COSDictionary getParameters() + { + return parameters; + } + + /** + * Returns the embedded JPX color space, if any. + * @return the embedded JPX color space, or null if there is none. + */ +// public PDJPXColorSpace getJPXColorSpace() +// { +// return colorSpace; +// } + + // Sets the JPX color space +// void setColorSpace(PDJPXColorSpace colorSpace) +// { +// this.colorSpace = colorSpace; +// } + + void setJPXSMask(BufferedImage smask) + { + this.smask = smask; + } + + public BufferedImage getJPXSMask() + { + return smask; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java new file mode 100644 index 00000000000..42419c1c68a --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.zip.Deflater; +import javax.imageio.ImageIO; +import javax.imageio.ImageReader; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.RandomAccessInputStream; +import org.apache.pdfbox.io.RandomAccessOutputStream; +import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.RandomAccessReadBuffer; +import org.apache.pdfbox.io.RandomAccessReadWriteBuffer; + +/** + * A filter for stream data. + * + * @author Ben Litchfield + * @author John Hewson + */ +public abstract class Filter +{ + private static final Logger LOG = LogManager.getLogger(Filter.class); + + /** + * Compression Level System Property. Set this to a value from 0 to 9 to change the zlib deflate + * compression level used to compress /Flate streams. The default value is -1 which is + * {@link Deflater#DEFAULT_COMPRESSION}. To set maximum compression, use + * {@code System.setProperty(Filter.SYSPROP_DEFLATELEVEL, "9");} + */ + public static final String SYSPROP_DEFLATELEVEL = "org.apache.pdfbox.filter.deflatelevel"; + + /** + * Constructor. + */ + protected Filter() + { + } + + /** + * Decodes data, producing the original non-encoded data. + * @param encoded the encoded byte stream + * @param decoded the stream where decoded data will be written + * @param parameters the parameters used for decoding + * @param index the index to the filter being decoded + * @return repaired parameters dictionary, or the original parameters dictionary + * @throws IOException if the stream cannot be decoded + */ + public abstract DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, + int index) throws IOException; + + /** + * Decodes data, with optional DecodeOptions. Not all filters support all options, and so + * callers should check the options' honored flag to test if they were applied. + * + * @param encoded the encoded byte stream + * @param decoded the stream where decoded data will be written + * @param parameters the parameters used for decoding + * @param index the index to the filter being decoded + * @param options additional options for decoding + * @return repaired parameters dictionary, or the original parameters dictionary + * @throws IOException if the stream cannot be decoded + */ + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, + int index, DecodeOptions options) throws IOException + { + return decode(encoded, decoded, parameters, index); + } + + /** + * Encodes data. + * @param input the byte stream to encode + * @param encoded the stream where encoded data will be written + * @param parameters the parameters used for encoding + * @param index the index to the filter being encoded + * @throws IOException if the stream cannot be encoded + */ + public final void encode(InputStream input, OutputStream encoded, COSDictionary parameters, + int index) throws IOException + { + encode(input, encoded, parameters.asUnmodifiableDictionary()); + } + + // implemented in subclasses + protected abstract void encode(InputStream input, OutputStream encoded, + COSDictionary parameters) throws IOException; + + // gets the decode params for a specific filter index, this is used to + // normalise the DecodeParams entry so that it is always a dictionary + protected COSDictionary getDecodeParams(COSDictionary dictionary, int index) + { + COSBase filter = dictionary.getAlternateObjectFromDictionary( COSName.F, COSName.FILTER ); + COSBase obj = dictionary.getAlternateObjectFromDictionary( COSName.DP, COSName.DECODE_PARMS ); + if (filter instanceof COSName && obj instanceof COSDictionary) + { + // PDFBOX-3932: The PDF specification requires "If there is only one filter and that + // filter has parameters, DecodeParms shall be set to the filter’s parameter dictionary" + // but tests show that Adobe means "one filter name object". + return (COSDictionary)obj; + } + else if (filter instanceof COSArray && obj instanceof COSArray) + { + COSArray array = (COSArray)obj; + if (index < array.size()) + { + COSBase objAtIndex = array.getObject(index); + if (objAtIndex instanceof COSDictionary) + { + return (COSDictionary) objAtIndex; + } + } + } + else if (obj != null && !(filter instanceof COSArray || obj instanceof COSArray)) + { + LOG.error("Expected DecodeParams to be an Array or Dictionary but found {}", + obj.getClass().getName()); + } + return new COSDictionary(); + } + + /** + * Finds a suitable image reader for an image format. + * + * @param formatName The image format to search for. + * @param errorCause The probably cause if something goes wrong. + * @return The image reader for the format. + * @throws MissingImageReaderException if no image reader is found. + */ + public static final ImageReader findImageReader(String formatName, String errorCause) + throws MissingImageReaderException + { + Iterator readers = ImageIO.getImageReadersByFormatName(formatName); + while (readers.hasNext()) + { + ImageReader reader = readers.next(); + if (reader != null) + { + return reader; + } + } + throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); + } + + /** + * Finds a suitable image raster reader for an image format. + * + * @param formatName The image format to search for. + * @param errorCause The probably cause if something goes wrong. + * @return The image reader for the format. + * @throws MissingImageReaderException if no image reader is found. + */ + public static final ImageReader findRasterReader(String formatName, String errorCause) + throws MissingImageReaderException + { + Iterator readers = ImageIO.getImageReadersByFormatName(formatName); + while (readers.hasNext()) + { + ImageReader reader = readers.next(); + if (reader != null) + { + if (reader.canReadRaster()) + { + return reader; + } + reader.dispose(); + } + } + throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); + } + + /** + * @return the ZIP compression level configured for PDFBox + */ + public static int getCompressionLevel() + { + int compressionLevel = Deflater.DEFAULT_COMPRESSION; + try + { + compressionLevel = Integer.parseInt(System.getProperty(Filter.SYSPROP_DEFLATELEVEL, "-1")); + } + catch (NumberFormatException ex) + { + LOG.warn(ex.getMessage(), ex); + } + return Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); + } + + /** + * Decodes data, with optional DecodeOptions. Not all filters support all options, and so callers should check the + * options' honored flag to test if they were applied. + * + * @param encoded the input stream holding the encoded data + * @param filterList list of filters to be used for decoding + * @param parameters the parameters used for decoding + * @param options additional options for decoding + * @param results list of optional decoding results for each filter + * @return the decoded stream data + * @throws IOException if the stream cannot be decoded + * @throws IllegalArgumentException if filterList is empty + */ + public static RandomAccessRead decode(InputStream encoded, List filterList, + COSDictionary parameters, DecodeOptions options, List results) + throws IOException + { + long length = parameters.getLong(COSName.LENGTH, + RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB); + if (filterList.isEmpty()) + { + throw new IllegalArgumentException("Empty filterList"); + } + if (filterList.size() > 1) + { + Set filterSet = new HashSet<>(filterList); + if (filterSet.size() != filterList.size()) + { + List reducedFilterList = new ArrayList<>(); + for (Filter filter : filterList) + { + if (!reducedFilterList.contains(filter)) + { + reducedFilterList.add(filter); + } + } + // replace origin list with the reduced one + filterList = reducedFilterList; + LOG.warn("Removed duplicated filter entries"); + } + } + InputStream input = encoded; + RandomAccessReadWriteBuffer randomAccessWriteBuffer = null; + OutputStream output = null; + // apply filters + for (int i = 0; i < filterList.size(); i++) + { + if (i > 0) + { + randomAccessWriteBuffer.seek(0); + input = new RandomAccessInputStream(randomAccessWriteBuffer); + length = randomAccessWriteBuffer.length(); + } + // we don't know the size of the decoded stream, just estimate a 4 times bigger size than the encoded stream + // use the estimated stream size as chunk size, use the default chunk size as limit to avoid to big values + if (length <= 0 || length >= RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB / 4) + { + length = RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB; + } + else + { + length = length * 4; + } + randomAccessWriteBuffer = new RandomAccessReadWriteBuffer((int) length); + output = new RandomAccessOutputStream(randomAccessWriteBuffer); + try + { + DecodeResult result = filterList.get(i).decode(input, output, parameters, i, + options); + if (results != null) + { + results.add(result); + } + } + finally + { + IOUtils.closeQuietly(input); + } + } + randomAccessWriteBuffer.seek(0); + return randomAccessWriteBuffer; + } + +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java new file mode 100644 index 00000000000..38fd0b32518 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import org.apache.pdfbox.cos.COSName; + +/** + * Factory for Filter classes. + * + * @author Ben Litchfield + */ +public final class FilterFactory +{ + /** + * Singleton instance. + */ + public static final FilterFactory INSTANCE = new FilterFactory(); + + private final Map filters = new HashMap<>(); + + private FilterFactory() + { + Filter flate = new FlateFilter(); + Filter dct = new DCTFilter(); + Filter ccittFax = new CCITTFaxFilter(); + Filter lzw = new LZWFilter(); + Filter asciiHex = new ASCIIHexFilter(); + Filter ascii85 = new ASCII85Filter(); + Filter runLength = new RunLengthDecodeFilter(); + Filter crypt = new CryptFilter(); + Filter jpx = new JPXFilter(); + Filter jbig2 = new JBIG2Filter(); + + filters.put(COSName.FLATE_DECODE, flate); + filters.put(COSName.FLATE_DECODE_ABBREVIATION, flate); + filters.put(COSName.DCT_DECODE, dct); + filters.put(COSName.DCT_DECODE_ABBREVIATION, dct); + filters.put(COSName.CCITTFAX_DECODE, ccittFax); + filters.put(COSName.CCITTFAX_DECODE_ABBREVIATION, ccittFax); + filters.put(COSName.LZW_DECODE, lzw); + filters.put(COSName.LZW_DECODE_ABBREVIATION, lzw); + filters.put(COSName.ASCII_HEX_DECODE, asciiHex); + filters.put(COSName.ASCII_HEX_DECODE_ABBREVIATION, asciiHex); + filters.put(COSName.ASCII85_DECODE, ascii85); + filters.put(COSName.ASCII85_DECODE_ABBREVIATION, ascii85); + filters.put(COSName.RUN_LENGTH_DECODE, runLength); + filters.put(COSName.RUN_LENGTH_DECODE_ABBREVIATION, runLength); + filters.put(COSName.CRYPT, crypt); + filters.put(COSName.JPX_DECODE, jpx); + filters.put(COSName.JBIG2_DECODE, jbig2); + } + + /** + * Returns a filter instance given its name as a string. + * @param filterName the name of the filter to retrieve + * @return the filter that matches the name + * @throws IOException if the filter name was invalid + */ + public Filter getFilter(String filterName) throws IOException + { + return getFilter(COSName.getPDFName(filterName)); + } + + /** + * Returns a filter instance given its COSName. + * @param filterName the name of the filter to retrieve + * @return the filter that matches the name + * @throws IOException if the filter name was invalid + */ + public Filter getFilter(COSName filterName) throws IOException + { + Filter filter = filters.get(filterName); + if (filter == null) + { + throw new IOException("Invalid filter: " + filterName); + } + return filter; + } + + // returns all available filters, for testing + Collection getAllFilters() + { + return filters.values(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java new file mode 100644 index 00000000000..d3cbde5ec46 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import org.apache.pdfbox.cos.COSDictionary; + +/** + * Decompresses data encoded using the zlib/deflate compression method, + * reproducing the original text or binary data. + * + * @author Ben Litchfield + * @author Marcel Kammer + */ +final class FlateFilter extends Filter +{ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + final COSDictionary decodeParams = getDecodeParams(parameters, index); + + try (FlateFilterDecoderStream decoderStream = new FlateFilterDecoderStream(encoded)) + { + OutputStream wrapPredictor = Predictor.wrapPredictor(decoded, decodeParams); + decoderStream.transferTo(wrapPredictor); + wrapPredictor.flush(); + } + return new DecodeResult(parameters); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + int compressionLevel = getCompressionLevel(); + Deflater deflater = new Deflater(compressionLevel); + try (DeflaterOutputStream out = new DeflaterOutputStream(encoded,deflater)) + { + input.transferTo(out); + } + encoded.flush(); + deflater.end(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java new file mode 100644 index 00000000000..1ca4dfb9114 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +/** + * Stream based decoder for the flate filter which uses zlib/deflate compression. + * + * Use Inflater instead of InflateInputStream to avoid an EOFException due to a probably missing Z_STREAM_END, see + * PDFBOX-1232 for details. + * + */ +public final class FlateFilterDecoderStream extends FilterInputStream +{ + private static final Logger LOG = LogManager.getLogger(FlateFilterDecoderStream.class); + + private boolean isEOF = false; + private int currentDataIndex = 0; + private int bytesDecoded = 0; + + private byte[] buffer = new byte[2048]; + private byte[] decodedData = new byte[4096]; + // use nowrap mode to bypass zlib-header and checksum to avoid a DataFormatException + private final Inflater inflater = new Inflater(true); + + /** + * Constructor. + * + * @param inputStream The input stream to actually read from. + */ + public FlateFilterDecoderStream(InputStream inputStream) throws IOException + { + super(inputStream); + // skip zlib header + in.read(); + in.read(); + } + + private boolean fetch() throws IOException + { + currentDataIndex = 0; + if (isEOF || inflater.finished()) + { + isEOF = true; + bytesDecoded = 0; + return false; + } + if (inflater.needsInput()) + { + int bytesRead = in.read(buffer); + if (bytesRead > -1) + { + inflater.setInput(buffer, 0, bytesRead); + } + else + { + isEOF = true; + return false; + } + } + try + { + // overwrite formerly read bytes + if (bytesDecoded > 0) + { + Arrays.fill(decodedData, 0, bytesDecoded, (byte) 0); + } + bytesDecoded = inflater.inflate(decodedData); + } + catch (DataFormatException exception) + { + isEOF = true; + // check if some bytes could be read at all + int countZeros = 0; + for (int i = 0; i < decodedData.length; i++) + { + if (decodedData[i] == 0) + { + countZeros++; + } + else + { + countZeros = 0; + } + } + bytesDecoded = decodedData.length - countZeros; + // don't throw an exception, use the already read data or an empty stream + LOG.warn("FlateFilter: premature end of stream due to a DataFormatException = {}", + exception.getMessage()); + return bytesDecoded > 0; + } + return true; + } + + /** + * This will read the next byte from the stream. + * + * @return The next byte read from the stream. + * + * @throws IOException If there is an error reading from the wrapped stream. + */ + @Override + public int read() throws IOException + { + if (isEOF) + { + return -1; + } + if (currentDataIndex == bytesDecoded && !fetch()) + { + return -1; + } + return decodedData[currentDataIndex++] & 0xFF; + } + + /** + * This will read a chunk of data. + * + * @param data The buffer to write data to. + * @param offset The offset into the data stream. + * @param length The number of byte to attempt to read. + * + * @return The number of bytes actually read. + * + * @throws IOException If there is an error reading data from the underlying stream. + */ + @Override + public int read(byte[] data, int offset, int length) throws IOException + { + if (isEOF) + { + return -1; + } + int numberOfBytesRead = 0; + while (numberOfBytesRead < length) + { + int available = bytesDecoded - currentDataIndex; + if (available > 0) + { + int bytes2Copy = Math.min(length - numberOfBytesRead, available); + System.arraycopy(decodedData, currentDataIndex, data, numberOfBytesRead + offset, + bytes2Copy); + currentDataIndex += bytes2Copy; + numberOfBytesRead += bytes2Copy; + } + else if (!fetch()) + { + break; + } + } + return numberOfBytesRead; + } + + /** + * This will close the underlying stream and release any resources. + * + * @throws IOException If there is an error closing the underlying stream. + */ + @Override + public void close() throws IOException + { + inflater.end(); + super.close(); + } + + /** + * mark/reset isn't supported. + * + * @return always false. + */ + @Override + public boolean markSupported() + { + return false; + } + + /** + * Unsupported. + * + * @param n ignored. + * + * @return always zero. + */ + @Override + public long skip(long n) + { + return 0; + } + + /** + * Unsupported. + * + * @return always zero. + */ + @Override + public int available() + { + return 0; + } + + /** + * Unsupported. + * + * @param readlimit ignored. + */ + @Override + public synchronized void mark(int readlimit) + { + } + + /** + * Unsupported. + * + * @throws IOException always throw as reset is an unsupported feature. + */ + @Override + public synchronized void reset() throws IOException + { + throw new IOException("reset is not supported"); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java new file mode 100644 index 00000000000..11ad49e1498 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.pdfbox.cos.COSDictionary; + +/** + * The IdentityFilter filter passes the data through without any modifications. + * It is defined in section 7.6.5 of the PDF 1.7 spec and also stated in table 26. + * + * @author Adam Nichols + */ +final class IdentityFilter extends Filter +{ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) + throws IOException + { + encoded.transferTo(decoded); + decoded.flush(); + return new DecodeResult(parameters); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + input.transferTo(encoded); + encoded.flush(); + } +} \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java new file mode 100644 index 00000000000..be685c311c5 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.awt.image.BufferedImage; +import java.awt.image.DataBuffer; +import java.awt.image.DataBufferByte; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SequenceInputStream; +import javax.imageio.ImageIO; +import javax.imageio.ImageReadParam; +import javax.imageio.ImageReader; +import javax.imageio.stream.ImageInputStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSStream; + +/** + * Decompresses data encoded using the JBIG2 standard, reproducing the original + * monochrome (1 bit per pixel) image data (or an approximation of that data). + * + * Requires a JBIG2 plugin for Java Image I/O to be installed. A known working + * plug-in is the Apache PDFBox JBIG2 plugin. + * + * @author Timo Boehme + */ +final class JBIG2Filter extends Filter +{ + private static final Logger LOG = LogManager.getLogger(JBIG2Filter.class); + + private static boolean levigoLogged = false; + + private static synchronized void logLevigoDonated() + { + if (!levigoLogged) + { + LOG.info("The Levigo JBIG2 plugin has been donated to the Apache Foundation"); + LOG.info("and an improved version is available for download at " + + "https://pdfbox.apache.org/download.cgi"); + levigoLogged = true; + } + } + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException + { + ImageReader reader = findImageReader("JBIG2", "jbig2-imageio is not installed"); + if (reader.getClass().getName().contains("levigo")) + { + logLevigoDonated(); + } + + int bits = parameters.getInt(COSName.BITS_PER_COMPONENT, 1); + COSDictionary params = getDecodeParams(parameters, index); + + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + irp.setSourceRegion(options.getSourceRegion()); + options.setFilterSubsampled(true); + + InputStream source = encoded; + if (params != null) + { + COSStream globals = params.getCOSStream(COSName.JBIG2_GLOBALS); + if (globals != null) + { + source = new SequenceInputStream( COSInputStream.create( globals ), encoded); + } + } + + try (ImageInputStream iis = ImageIO.createImageInputStream(source)) + { + reader.setInput(iis); + + BufferedImage image; + try + { + image = reader.read(0, irp); + } + catch (Exception e) + { + // wrap and rethrow any exceptions + throw new IOException("Could not read JBIG2 image", e); + } + + // I am assuming since JBIG2 is always black and white + // depending on your renderer this might or might be needed + if (image.getColorModel().getPixelSize() != bits) + { + if (bits != 1) + { + LOG.warn("Attempting to handle a JBIG2 with more than 1-bit depth"); + } + BufferedImage packedImage = new BufferedImage(image.getWidth(), image.getHeight(), + BufferedImage.TYPE_BYTE_BINARY); +// Graphics graphics = packedImage.getGraphics(); +// graphics.drawImage(image, 0, 0, null); +// graphics.dispose(); + image = packedImage; + } + + DataBuffer dBuf = image.getData().getDataBuffer(); + if (dBuf.getDataType() == DataBuffer.TYPE_BYTE) + { + decoded.write(((DataBufferByte) dBuf).getData()); + } + else + { + throw new IOException("Unexpected image buffer type"); + } + } + finally + { + reader.dispose(); + } + + return new DecodeResult(parameters); + } + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + throw new UnsupportedOperationException("JBIG2 encoding not implemented"); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java new file mode 100644 index 00000000000..8f41f9e97e4 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +// import java.awt.color.ColorSpace; +import java.awt.image.BufferedImage; +import java.awt.image.DataBuffer; +import java.awt.image.DataBufferByte; +import java.awt.image.DataBufferUShort; +import java.awt.image.IndexColorModel; +import java.awt.image.MultiPixelPackedSampleModel; +import java.awt.image.Raster; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import javax.imageio.ImageReadParam; +import javax.imageio.ImageReader; +import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +// import org.apache.pdfbox.pdmodel.graphics.color.PDJPXColorSpace; + +/** + * Decompress data encoded using the wavelet-based JPEG 2000 standard, + * reproducing the original data. + * + * Requires the Java Advanced Imaging (JAI) Image I/O Tools to be installed from java.net, see + * jai-imageio. + * Alternatively you can build from the source available in the + * jai-imageio-core svn repo. + * + * Mac OS X users should download the tar.gz file for linux and unpack it to obtain the + * required jar files. The .so file can be safely ignored. + * + * @author John Hewson + * @author Timo Boehme + */ +public final class JPXFilter extends Filter +{ + /** + * {@inheritDoc} + */ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index, DecodeOptions options) throws IOException + { + DecodeResult result = new DecodeResult(new COSDictionary()); + result.getParameters().addAll(parameters); + BufferedImage image = readJPX(encoded, options, result); + + Raster raster = image.getRaster(); + switch (raster.getDataBuffer().getDataType()) + { + case DataBuffer.TYPE_BYTE: + DataBufferByte byteBuffer = (DataBufferByte) raster.getDataBuffer(); + decoded.write(byteBuffer.getData()); + return result; + + case DataBuffer.TYPE_USHORT: + DataBufferUShort wordBuffer = (DataBufferUShort) raster.getDataBuffer(); + for (short w : wordBuffer.getData()) + { + decoded.write(w >> 8); + decoded.write(w); + } + return result; + + case DataBuffer.TYPE_INT: + // not yet used (as of October 2018) but works as fallback + // if we decide to convert to BufferedImage.TYPE_INT_RGB + int[] ar = new int[raster.getNumBands()]; + for (int y = 0; y < image.getHeight(); ++y) + { + for (int x = 0; x < image.getWidth(); ++x) + { + raster.getPixel(x, y, ar); + for (int i = 0; i < ar.length; ++i) + { + decoded.write(ar[i]); + } + } + } + return result; + + default: + throw new IOException("Data type " + raster.getDataBuffer().getDataType() + " not implemented"); + } + } + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); + } + + // try to read using JAI Image I/O + private BufferedImage readJPX(InputStream input, DecodeOptions options, DecodeResult result) throws IOException + { + ImageReader reader = findImageReader("JPEG2000", "Java Advanced Imaging (JAI) Image I/O Tools are not installed"); + // PDFBOX-4121: ImageIO.createImageInputStream() is much slower + try (ImageInputStream iis = new MemoryCacheImageInputStream(input)) + { + reader.setInput(iis, true, true); + ImageReadParam irp = reader.getDefaultReadParam(); + irp.setSourceRegion(options.getSourceRegion()); + irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), + options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); + options.setFilterSubsampled(true); + + BufferedImage image; + try + { + image = reader.read(0, irp); + } + catch (Exception e) + { + // wrap and rethrow any exceptions + throw new IOException("Could not read JPEG 2000 (JPX) image", e); + } + + COSDictionary parameters = result.getParameters(); + + // "If the image stream uses the JPXDecode filter, this entry is optional + // and shall be ignored if present" + // + // note that indexed color spaces make the BPC logic tricky, see PDFBOX-2204 + int bpc = image.getColorModel().getPixelSize() / image.getRaster().getNumBands(); + parameters.setInt(COSName.BITS_PER_COMPONENT, bpc); + + // "Decode shall be ignored, except in the case where the image is treated as a mask" + if (!parameters.getBoolean(COSName.IMAGE_MASK, false)) + { + parameters.setItem(COSName.DECODE, null); + } + + // override dimensions, see PDFBOX-1735 + parameters.setInt(COSName.WIDTH, reader.getWidth(0)); + parameters.setInt(COSName.HEIGHT, reader.getHeight(0)); + + // extract embedded color space + if (!parameters.containsKey(COSName.COLORSPACE)) + { + if (image.getSampleModel() instanceof MultiPixelPackedSampleModel && + image.getColorModel().getPixelSize() == 1 && + image.getRaster().getNumBands() == 1 && + image.getColorModel() instanceof IndexColorModel) + { + // PDFBOX-4326: + // force CS_GRAY colorspace because colorspace in IndexColorModel + // has 3 colors despite that there is only 1 color per pixel + // in raster +// result.setColorSpace(new PDJPXColorSpace(ColorSpace.getInstance(ColorSpace.CS_GRAY))); + } +// else if (image.getTransparency() == Transparency.TRANSLUCENT && +// parameters.getInt(COSName.SMASK_IN_DATA) > 0) +// { +// // PDFBOX-5657: save the soft mask in DecodeResult and use it later +// // we never had SMaskInData = 2, maybe more work is needed +// BufferedImage smask = new BufferedImage( +// image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); +// smask.setData(image.getAlphaRaster()); +// result.setJPXSMask(smask); +// // create opaque image +// BufferedImage bim = new BufferedImage( +// image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB); +// Graphics2D g2d = (Graphics2D) bim.getGraphics(); +// g2d.drawImage(image, 0, 0, null); +// g2d.dispose(); +// image = bim; +// result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); +// } +// else +// { +// result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); +// } + } + + return image; + } + finally + { + reader.dispose(); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + throw new UnsupportedOperationException("JPX encoding not implemented"); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java new file mode 100644 index 00000000000..926ea8c236b --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java @@ -0,0 +1,296 @@ +/* + * Copyright 2014 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import javax.imageio.stream.MemoryCacheImageInputStream; +import javax.imageio.stream.MemoryCacheImageOutputStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; + +/** + * + * This is the filter used for the LZWDecode filter. + * + * @author Ben Litchfield + * @author Tilman Hausherr + */ +public class LZWFilter extends Filter +{ + /** + * Log instance. + */ + private static final Logger LOG = LogManager.getLogger(LZWFilter.class); + + /** + * The LZW clear table code. + */ + public static final long CLEAR_TABLE = 256; + + /** + * The LZW end of data code. + */ + public static final long EOD = 257; + + //BEWARE: codeTable must be local to each method, because there is only + // one instance of each filter + + /** + * {@inheritDoc} + */ + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + COSDictionary decodeParams = getDecodeParams(parameters, index); + boolean earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1) != 0; + doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange); + return new DecodeResult(parameters); + } + + private static void doLZWDecode(InputStream encoded, OutputStream decoded, boolean earlyChange) throws IOException + { + List codeTable = new ArrayList<>(); + int chunk = 9; + final MemoryCacheImageInputStream in = new MemoryCacheImageInputStream(encoded); + long nextCommand; + long prevCommand = -1; + + try + { + while ((nextCommand = in.readBits(chunk)) != EOD) + { + if (nextCommand == CLEAR_TABLE) + { + chunk = 9; + codeTable = createCodeTable(); + prevCommand = -1; + } + else + { + if (nextCommand < codeTable.size()) + { + byte[] data = codeTable.get((int) nextCommand); + byte firstByte = data[0]; + decoded.write(data); + if (prevCommand != -1) + { + checkIndexBounds(codeTable, prevCommand, in); + data = codeTable.get((int) prevCommand); + byte[] newData = Arrays.copyOf(data, data.length + 1); + newData[data.length] = firstByte; + codeTable.add(newData); + } + } + else + { + checkIndexBounds(codeTable, prevCommand, in); + byte[] data = codeTable.get((int) prevCommand); + byte[] newData = Arrays.copyOf(data, data.length + 1); + newData[data.length] = data[0]; + decoded.write(newData); + codeTable.add(newData); + } + + chunk = calculateChunk(codeTable.size(), earlyChange); + prevCommand = nextCommand; + } + } + } + catch (EOFException ex) + { + LOG.warn("Premature EOF in LZW stream, EOD code missing", ex); + } + decoded.flush(); + } + + private static void checkIndexBounds(List codeTable, long index, MemoryCacheImageInputStream in) + throws IOException + { + if (index < 0) + { + throw new IOException("negative array index: " + index + " near offset " + + in.getStreamPosition()); + } + if (index >= codeTable.size()) + { + throw new IOException("array index overflow: " + index + + " >= " + codeTable.size() + " near offset " + + in.getStreamPosition()); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected void encode(InputStream rawData, OutputStream encoded, COSDictionary parameters) + throws IOException + { + List codeTable = createCodeTable(); + int chunk = 9; + + byte[] inputPattern = null; + try (MemoryCacheImageOutputStream out = new MemoryCacheImageOutputStream(encoded)) + { + out.writeBits(CLEAR_TABLE, chunk); + int foundCode = -1; + int r; + while ((r = rawData.read()) != -1) + { + byte by = (byte) r; + if (inputPattern == null) + { + inputPattern = new byte[] { by }; + foundCode = by & 0xff; + } + else + { + inputPattern = Arrays.copyOf(inputPattern, inputPattern.length + 1); + inputPattern[inputPattern.length - 1] = by; + int newFoundCode = findPatternCode(codeTable, inputPattern); + if (newFoundCode == -1) + { + // use previous + chunk = calculateChunk(codeTable.size() - 1, true); + out.writeBits(foundCode, chunk); + // create new table entry + codeTable.add(inputPattern); + + if (codeTable.size() == 4096) + { + // code table is full + out.writeBits(CLEAR_TABLE, chunk); + codeTable = createCodeTable(); + } + + inputPattern = new byte[] { by }; + foundCode = by & 0xff; + } + else + { + foundCode = newFoundCode; + } + } + } + if (foundCode != -1) + { + chunk = calculateChunk(codeTable.size() - 1, true); + out.writeBits(foundCode, chunk); + } + + // PPDFBOX-1977: the decoder wouldn't know that the encoder would output + // an EOD as code, so he would have increased his own code table and + // possibly adjusted the chunk. Therefore, the encoder must behave as + // if the code table had just grown and thus it must be checked it is + // needed to adjust the chunk, based on an increased table size parameter + chunk = calculateChunk(codeTable.size(), true); + + out.writeBits(EOD, chunk); + + // pad with 0 + out.writeBits(0, 7); + + // must do or file will be empty :-( + out.flush(); + } + } + + /** + * Find a matching pattern in the code table. + * + * @param codeTable The LZW code table. + * @param pattern The pattern to be searched for. + * @return The index of the matching pattern or -1 if nothing is found. + */ + private static int findPatternCode(List codeTable, byte[] pattern) + { + // for the first 256 entries, index matches value + if (pattern.length == 1) + { + return pattern[0]; + } + + // no need to test the first 256 + 2 entries against longer patterns + for (int i = 257; i < codeTable.size(); i++) + { + if (Arrays.equals(codeTable.get(i), pattern)) + { + return i; + } + } + + return -1; + } + + /** + * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE markers. + */ + private static List createCodeTable() + { + List codeTable = new ArrayList<>(4096); + codeTable.addAll(INITIAL_CODE_TABLE); + return codeTable; + } + + private static final List INITIAL_CODE_TABLE = createInitialCodeTable(); + + private static List createInitialCodeTable() + { + List codeTable = new ArrayList<>(258); + for (int i = 0; i < 256; ++i) + { + codeTable.add(new byte[] { (byte) (i & 0xFF) }); + } + codeTable.add(null); // 256 EOD + codeTable.add(null); // 257 CLEAR_TABLE + return codeTable; + } + + /** + * Calculate the appropriate chunk size + * + * @param tabSize the size of the code table + * @param earlyChange true for early chunk increase + * + * @return a value between 9 and 12 + */ + private static int calculateChunk(int tabSize, boolean earlyChange) + { + int i = tabSize + (earlyChange ? 1 : 0); + if (i >= 2048) + { + return 12; + } + if (i >= 1024) + { + return 11; + } + if (i >= 512) + { + return 10; + } + return 9; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java new file mode 100644 index 00000000000..0ff744829a0 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; + +/** + * Thrown when a required JAI ImageReader is missing. + * + * @author John Hewson + */ +public class MissingImageReaderException extends IOException +{ + /** + * + */ + private static final long serialVersionUID = 1L; + + public MissingImageReaderException(String message) + { + super(message); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java new file mode 100644 index 00000000000..b3f6f922129 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java @@ -0,0 +1,366 @@ +/* + * Copyright 2014 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; + +/** + * Helper class to contain predictor decoding used by Flate and LZW filter. + * To see the history, look at the FlateFilter class. + */ +public final class Predictor +{ + + private Predictor() + { + } + + /** + * Decodes a single line of data in-place. + * @param predictor Predictor value for the current line + * @param colors Number of color components, from decode parameters. + * @param bitsPerComponent Number of bits per components, from decode parameters. + * @param columns Number samples in a row, from decode parameters. + * @param actline Current (active) line to decode. Data will be decoded in-place, + * i.e. - the contents of this buffer will be modified. + * @param lastline The previous decoded line. When decoding the first line, this + * parameter should be an empty byte array of the same length as + * actline. + */ + static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, byte[] actline, byte[] lastline) + { + if (predictor == 1) + { + // no prediction + return; + } + final int bitsPerPixel = colors * bitsPerComponent; + final int bytesPerPixel = (bitsPerPixel + 7) / 8; + final int rowlength = actline.length; + switch (predictor) + { + case 2: + // PRED TIFF SUB + if (bitsPerComponent == 8) + { + // for 8 bits per component it is the same algorithm as PRED SUB of PNG format + for (int p = bytesPerPixel; p < rowlength; p++) + { + int sub = actline[p] & 0xff; + int left = actline[p - bytesPerPixel] & 0xff; + actline[p] = (byte) (sub + left); + } + break; + } + if (bitsPerComponent == 16) + { + for (int p = bytesPerPixel; p < rowlength - 1; p += 2) + { + int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); + int left = (((actline[p - bytesPerPixel] & 0xff) << 8) + + (actline[p - bytesPerPixel + 1] & 0xff)); + actline[p] = (byte) (((sub + left) >> 8) & 0xff); + actline[p + 1] = (byte) ((sub + left) & 0xff); + } + break; + } + if (bitsPerComponent == 1 && colors == 1) + { + // bytesPerPixel cannot be used: + // "A row shall occupy a whole number of bytes, rounded up if necessary. + // Samples and their components shall be packed into bytes + // from high-order to low-order bits." + for (int p = 0; p < rowlength; p++) + { + for (int bit = 7; bit >= 0; --bit) + { + int sub = (actline[p] >> bit) & 1; + if (p == 0 && bit == 7) + { + continue; + } + int left; + if (bit == 7) + { + // use bit #0 from previous byte + left = actline[p - 1] & 1; + } + else + { + // use "previous" bit + left = (actline[p] >> (bit + 1)) & 1; + } + if (((sub + left) & 1) == 0) + { + // reset bit + actline[p] &= ~(1 << bit); + } + else + { + // set bit + actline[p] |= 1 << bit; + } + } + } + break; + } + // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too + int elements = columns * colors; + for (int p = colors; p < elements; ++p) + { + int bytePosSub = p * bitsPerComponent / 8; + int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; + int bytePosLeft = (p - colors) * bitsPerComponent / 8; + int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; + + int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); + int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); + actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left); + } + break; + case 10: + // PRED NONE + // do nothing + break; + case 11: + // PRED SUB + for (int p = bytesPerPixel; p < rowlength; p++) + { + int sub = actline[p]; + int left = actline[p - bytesPerPixel]; + actline[p] = (byte) (sub + left); + } + break; + case 12: + // PRED UP + for (int p = 0; p < rowlength; p++) + { + int up = actline[p] & 0xff; + int prior = lastline[p] & 0xff; + actline[p] = (byte) ((up + prior) & 0xff); + } + break; + case 13: + // PRED AVG + for (int p = 0; p < rowlength; p++) + { + int avg = actline[p] & 0xff; + int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; + int up = lastline[p] & 0xff; + actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); + } + break; + case 14: + // PRED PAETH + for (int p = 0; p < rowlength; p++) + { + int paeth = actline[p] & 0xff; + int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left + int b = lastline[p] & 0xff;// upper + int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft + int value = a + b - c; + int absa = Math.abs(value - a); + int absb = Math.abs(value - b); + int absc = Math.abs(value - c); + + if (absa <= absb && absa <= absc) + { + actline[p] = (byte) ((paeth + a) & 0xff); + } + else if (absb <= absc) + { + actline[p] = (byte) ((paeth + b) & 0xff); + } + else + { + actline[p] = (byte) ((paeth + c) & 0xff); + } + } + break; + default: + break; + } + } + + static int calculateRowLength(int colors, int bitsPerComponent, int columns) + { + final int bitsPerPixel = colors * bitsPerComponent; + return (columns * bitsPerPixel + 7) / 8; + } + + // get value from bit interval from a byte + static int getBitSeq(int by, int startBit, int bitSize) + { + int mask = ((1 << bitSize) - 1); + return (by >>> startBit) & mask; + } + + // set value in a bit interval and return that value + static int calcSetBitSeq(int by, int startBit, int bitSize, int val) + { + int mask = ((1 << bitSize) - 1); + int truncatedVal = val & mask; + mask = ~(mask << startBit); + return (by & mask) | (truncatedVal << startBit); + } + + /** + * Wraps and OutputStream in a predictor decoding stream as necessary. + * If no predictor is specified by the parameters, the original stream is returned as is. + * + * @param out The stream to which decoded data should be written + * @param decodeParams Decode parameters for the stream + * @return An OutputStream is returned, which will write decoded data + * into the given stream. If no predictor is specified, the original stream is returned. + */ + static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams) + { + int predictor = decodeParams.getInt(COSName.PREDICTOR); + if (predictor > 1) + { + int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); + int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); + int columns = decodeParams.getInt(COSName.COLUMNS, 1); + + return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns); + } + else + { + return out; + } + } + + /** + * Output stream that implements predictor decoding. Data is buffered until a complete + * row is available, which is then decoded and written to the underlying stream. + * The previous row is retained for decoding the next row. + */ + private static final class PredictorOutputStream extends FilterOutputStream + { + // current predictor type + private int predictor; + // image decode parameters + private final int colors; + private final int bitsPerComponent; + private final int columns; + private final int rowLength; + // PNG predictor (predictor>=10) means every row has a (potentially different) + // predictor value + private final boolean predictorPerRow; + + // data buffers + private byte[] currentRow; + private byte[] lastRow; + // amount of data in the current row + private int currentRowData = 0; + // was the per-row predictor value read for the current row being processed + private boolean predictorRead = false; + + PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, int columns) + { + super(out); + this.predictor = predictor; + this.colors = colors; + this.bitsPerComponent = bitsPerComponent; + this.columns = columns; + this.rowLength = calculateRowLength(colors, bitsPerComponent, columns); + this.predictorPerRow = predictor >= 10; + currentRow = new byte[rowLength]; + lastRow = new byte[rowLength]; + } + + @Override + public void write(byte[] bytes) throws IOException + { + write(bytes, 0, bytes.length); + } + + @Override + public void write(byte[] bytes, int off, int len) throws IOException + { + int currentOffset = off; + int maxOffset = currentOffset + len; + while (currentOffset < maxOffset) + { + if (predictorPerRow && currentRowData == 0 && !predictorRead) + { + // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) + // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ... + predictor = bytes[currentOffset] + 10; + currentOffset++; + predictorRead = true; + } + else + { + int toRead = Math.min(rowLength - currentRowData, maxOffset - currentOffset); + System.arraycopy(bytes, currentOffset, currentRow, currentRowData, toRead); + currentRowData += toRead; + currentOffset += toRead; + + // current row is filled, decode it, write it to underlying stream, + // and reset the state. + if (currentRowData == currentRow.length) + { + decodeAndWriteRow(); + } + } + } + } + + private void decodeAndWriteRow() throws IOException + { + decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow); + out.write(currentRow); + flipRows(); + } + + /** + * Flips the row buffers (to avoid copying), and resets the current-row index + * and predictorRead flag + */ + private void flipRows() + { + byte[] temp = lastRow; + lastRow = currentRow; + currentRow = temp; + currentRowData = 0; + predictorRead = false; + } + + @Override + public void flush() throws IOException + { + // The last row is allowed to be incomplete, and should be completed with zeros. + if (currentRowData > 0) + { + Arrays.fill(currentRow, currentRowData, rowLength, (byte)0); + decodeAndWriteRow(); + } + super.flush(); + } + + @Override + public void write(int i) throws IOException + { + throw new UnsupportedOperationException("Not supported"); + } + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java new file mode 100644 index 00000000000..cb6665f7f46 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.pdfbox.cos.COSDictionary; + +/** + * Decompresses data encoded using a byte-oriented run-length encoding algorithm, + * reproducing the original text or binary data + * + * @author Ben Litchfield + * @author Tilman Hausherr + */ +final class RunLengthDecodeFilter extends Filter +{ + private static final int RUN_LENGTH_EOD = 128; + + @Override + public DecodeResult decode(InputStream encoded, OutputStream decoded, + COSDictionary parameters, int index) throws IOException + { + int dupAmount; + byte[] buffer = new byte[128]; + while ((dupAmount = encoded.read()) != -1 && dupAmount != RUN_LENGTH_EOD) + { + if (dupAmount <= 127) + { + int amountToCopy = dupAmount + 1; + int compressedRead; + while (amountToCopy > 0) + { + compressedRead = encoded.read(buffer, 0, amountToCopy); + // EOF reached? + if (compressedRead == -1) + { + break; + } + decoded.write(buffer, 0, compressedRead); + amountToCopy -= compressedRead; + } + } + else + { + int dupByte = encoded.read(); + // EOF reached? + if (dupByte == -1) + { + break; + } + for (int i = 0; i < 257 - dupAmount; i++) + { + decoded.write(dupByte); + } + } + } + return new DecodeResult(parameters); + } + + @Override + protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) + throws IOException + { + // Not used in PDFBox except for testing the decoder. + int lastVal = -1; + int byt; + int count = 0; + boolean equality = false; + + // buffer for "unequal" runs, size between 2 and 128 + byte[] buf = new byte[128]; + + while ((byt = input.read()) != -1) + { + if (lastVal == -1) + { + // first time + lastVal = byt; + count = 1; + } + else + { + if (count == 128) + { + if (equality) + { + // max length of equals + encoded.write(129); // = 257 - 128 + encoded.write(lastVal); + } + else + { + // max length of unequals + encoded.write(127); + encoded.write(buf, 0, 128); + } + equality = false; + lastVal = byt; + count = 1; + } + else if (count == 1) + { + if (byt == lastVal) + { + equality = true; + } + else + { + buf[0] = (byte) lastVal; + buf[1] = (byte) byt; + lastVal = byt; + } + count = 2; + } + else + { + // 1 < count < 128 + if (byt == lastVal) + { + if (equality) + { + ++count; + } + else + { + // write all we got except the last + encoded.write(count - 2); + encoded.write(buf, 0, count - 1); + count = 2; + equality = true; + } + } + else + { + if (equality) + { + // equality ends here + encoded.write(257 - count); + encoded.write(lastVal); + equality = false; + count = 1; + } + else + { + buf[count] = (byte) byt; + ++count; + } + lastVal = byt; + } + } + } + } + if (count > 0) + { + if (count == 1) + { + encoded.write(0); + encoded.write(lastVal); + } + else if (equality) + { + encoded.write(257 - count); + encoded.write(lastVal); + } + else + { + encoded.write(count - 1); + encoded.write(buf, 0, count); + } + } + encoded.write(RUN_LENGTH_EOD); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java new file mode 100644 index 00000000000..f5625f59996 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012, Harald Kuhr + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name "TwelveMonkeys" nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.apache.pdfbox.cos.filter; + +/** + * TIFFExtension + * + * @author Harald Kuhr + * @author last modified by $Author: haraldk$ + * @version $Id: TIFFExtension.java,v 1.0 08.05.12 16:45 haraldk Exp$ + */ +interface TIFFExtension { + /** CCITT T.4/Group 3 Fax compression. */ + int COMPRESSION_CCITT_T4 = 3; + /** CCITT T.6/Group 4 Fax compression. */ + int COMPRESSION_CCITT_T6 = 4; + /** LZW Compression. Was baseline, but moved to extension due to license issues in the LZW algorithm. */ + int COMPRESSION_LZW = 5; + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int COMPRESSION_OLD_JPEG = 6; + /** JPEG Compression (lossy). */ + int COMPRESSION_JPEG = 7; + /** Custom: PKZIP-style Deflate. */ + int COMPRESSION_DEFLATE = 32946; + /** Adobe-style Deflate. */ + int COMPRESSION_ZLIB = 8; + + int PHOTOMETRIC_SEPARATED = 5; + int PHOTOMETRIC_YCBCR = 6; + int PHOTOMETRIC_CIELAB = 8; + int PHOTOMETRIC_ICCLAB = 9; + int PHOTOMETRIC_ITULAB = 10; + + int PLANARCONFIG_PLANAR = 2; + + int PREDICTOR_HORIZONTAL_DIFFERENCING = 2; + int PREDICTOR_HORIZONTAL_FLOATINGPOINT = 3; + + int FILL_RIGHT_TO_LEFT = 2; + + int SAMPLEFORMAT_INT = 2; + int SAMPLEFORMAT_FP = 3; + int SAMPLEFORMAT_UNDEFINED = 4; + + int YCBCR_POSITIONING_CENTERED = 1; + int YCBCR_POSITIONING_COSITED = 2; + + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int JPEG_PROC_BASELINE = 1; + /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ + int JPEG_PROC_LOSSLESS = 14; + + /** For use with Photometric: 5 (Separated), when image data is in CMYK color space. */ + int INKSET_CMYK = 1; + + /** + * For use with Photometric: 5 (Separated), when image data is in a color space other than CMYK. + * See {@link com.twelvemonkeys.imageio.metadata.exif.TIFF#TAG_INK_NAMES InkNames} field for a + * description of the inks to be used. + */ + int INKSET_NOT_CMYK = 2; + + int ORIENTATION_TOPRIGHT = 2; + int ORIENTATION_BOTRIGHT = 3; + int ORIENTATION_BOTLEFT = 4; + int ORIENTATION_LEFTTOP = 5; + int ORIENTATION_RIGHTTOP = 6; + int ORIENTATION_RIGHTBOT = 7; + int ORIENTATION_LEFTBOT = 8; + + int GROUP3OPT_2DENCODING = 1; + int GROUP3OPT_UNCOMPRESSED = 2; + int GROUP3OPT_FILLBITS = 4; + int GROUP3OPT_BYTEALIGNED = 8; + int GROUP4OPT_UNCOMPRESSED = 2; + int GROUP4OPT_BYTEALIGNED = 4; + int COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE = 2; + int FILL_LEFT_TO_RIGHT = 1; // Default +} + diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html new file mode 100644 index 00000000000..1d1f771a2b2 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html @@ -0,0 +1,25 @@ + + + + + + + +This package will hold the PDFBox implementations of the filters that are used in PDF documents. + + diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html b/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html new file mode 100644 index 00000000000..beecb1289c1 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html @@ -0,0 +1,72 @@ + + + + + + + +

COS stands for Carousel Object Syntax, which is the syntax used to structure + PDF files. Although Carousel was only a code name for what later became Acrobat, + the name is still used to refer to the way a PDF file is composed. COS objects + are the building blocks of PDF files, and represent document components like + pages, bookmarks, fonts, and annotations. +

+

+ The official PDF documentation claims that PDF is composed of 8 types + of COS objects, however it goes on to state that there are two types + of numeric objects, integer and real numbers, which brings the actual + number of object types to 9. Virtually all COS objects have a label + so they can be referenced indirectly. +

+

+ The following classes encapsulate COS objects: COSArray, COSBoolean, + COSDictionary, COSName, COSNumber, COSString, COSStream, and COSNull. + All of these objects are derived from COSBase which holds the object label + ("key") and mandates the implementation of certain abstract methods. + The object label is encapsulated in the COSObjectKey class. This package + also includes the classes COSFloat and COSInteger which extend COSNumber +

+

Also defined are COSDocument, which represents the collection of + all the COS objects in a PDF document, and COSObject which is a + proxy object for all other COS objects. Both the COSDictionary and + COSObject classes extend COSBase despite the fact that they are not + true COS objects. +

+

+ The proxy object, COSObject, is not derived from COSBase, but implements + COSObjectGetter and has the same label as whichever concrete class it + represents. Typically, the associated concrete class is not instantiated + until the COSObject's getCOSObject() method is called, after which it + will contain a reference to the decoded concrete class. The abstract class + COSObjectGetter requires the implementation of the getCOSObject() method, + which will return whichever COSBase derived class is associated with the + object. The use of COSObjectGetter is not limited to objects in this + package but is used throughout pdfbox. +

+

+ Other classes are defined in this package to directly support the + base classes, such as COSObjectKey and COSInputStream. These classes + do not derive from COSBase, and most do not implement COSObjectGetter(). +

+

+ A brief summary of the PDF file structure can be found at + Medium.com +

+See also the PDF Reference 1.7. + + diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java new file mode 100644 index 00000000000..1baf1bb5b32 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java @@ -0,0 +1,737 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.util; + +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Locale; +import java.util.SimpleTimeZone; +import java.util.TimeZone; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.cos.COSString; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; + +/* + * Date format is described in PDF Reference 1.7 section 3.8.2 + * (www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf) + * and also in PDF 32000-1:2008 + * (http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf)) + * although the latter inexplicably omits the trailing apostrophe. + * + * The interpretation of dates without timezones is unclear. + * The code below assumes that such dates are in UTC+00 (aka GMT). + * This is in keeping with the PDF Reference's assertion that: + * numerical fields default to zero values. + * However, the Reference does go on to make the cryptic remark: + * If no UT information is specified, the relationship of the specified + * time to UT is considered to be unknown. Whether or not the time + * zone is known, the rest of the date should be specified in local time. + * I understand this to refer to _creating_ a pdf date value. That is, + * code that can get the wall clock time and cannot get the timezone + * should write the wall clock time with a time zone of zero. + * When _parsing_ a PDF date, the statement talks about "the rest of the date" + * being local time, thus explicitly excluding the use of the local time + * for the time zone. +*/ + +/** + * Converts dates to strings and back using the PDF date standard + * in section 3.8.2 of PDF Reference 1.7. + * + * @author Ben Litchfield + * @author Fred Hansen + * + * TODO Move members of this class elsewhere for shared use in pdfbox and xmpbox. + */ +public final class DateConverter +{ + private static final Logger LOG = LogManager.getLogger(DateConverter.class); + + private DateConverter() + { + } + + // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours + private static final int MINUTES_PER_HOUR = 60; + private static final int SECONDS_PER_MINUTE = 60; + private static final int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE*1000; + private static final int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE; + private static final int HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, DAY = 2*HALF_DAY; + + /* + * The Date format is supposed to be the PDF_DATE_FORMAT, but other + * forms appear. These lists offer alternatives to be tried + * if parseBigEndianDate fails. + * + * The time zone offset generally trails the date string, so it is processed + * separately with parseTZoffset. (This does not preclude having time + * zones in the elements below; one does.) + * + * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its + * calendar field (PDFBox-402), so these lists are strings to create + * SimpleDate format as needed. + * + * Some past entries have been elided because they duplicate existing + * entries. See the API for SimpleDateFormat, which says + * "For parsing, the number of pattern letters is ignored + * unless it's needed to separate two adjacent fields." + * + * toCalendar(String, String[]) tests to see that the entire input text + * has been consumed. Therefore the ordering of formats is important. + * If one format begins with the entirety of another, the longer + * must precede the other in the list. + * + * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh" + * Where year is yy, four digit years are accepted + * and two digit years are converted to four digits in the range + * [thisyear-79...thisyear+20] + */ + private static final String[] ALPHA_START_FORMATS = + { + "EEEE, dd MMM yy hh:mm:ss a", + "EEEE, MMM dd, yy hh:mm:ss a", + "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows + "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465 + "EEEE MMM dd, yy HH:mm:ss", // ECMP5 + "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7 + "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant + }; + + private static final String[] DIGIT_START_FORMATS = + { + "dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00 + "dd MMM yy HH:mm", // for 26 May 2000 11:25 + "yyyy MMM d", // ambiguity resolved only by omitting time + "yyyymmddhh:mm:ss", // test case "200712172:2:3" + "H:m M/d/yy", // test case "9:47 5/12/2008" + "M/d/yy HH:mm:ss", + "M/d/yy HH:mm", + "M/d/yy", + + // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss" + // "yyyy MMM d HH:mm:ss", + + // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy", + // (incoming digit strings do not mark themselves as y, m, or d!) + // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170 + // "M/dd/yyyy hh:mm:ss", + // "MM/d/yyyy hh:mm:ss", + // "M/d/yyyy HH:mm:ss", + // "M/dd/yyyy", + // "MM/d/yyyy", + // "M/d/yyyy", + // "M/d/yyyy HH:mm:ss", + // "M/d/yy HH:mm:ss", + // subsumed by big-endian parse + // "yyyy-MM-dd'T'HH:mm:ss", + // "yyyy-MM-dd'T'HH:mm:ss", + // "yyyymmdd hh:mm:ss", + // "yyyymmdd", + // "yyyymmddX''00''", // covers 24 cases + // (originally the above ended with '+00''00'''; + // the first apostrophe quoted the plus, + // '' mapped to a single ', and the ''' was invalid) + }; + + /** + * Converts a Calendar to a string formatted as: + * D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -. + * + * @param cal The date to convert to a string. May be null. + * The DST_OFFSET is included when computing the output time zone. + * + * @return The date as a String to be used in a PDF document, + * or null if the cal value is null + */ + public static String toString( Calendar cal) + { + if (cal == null) + { + return null; + } + String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + + cal.get(Calendar.DST_OFFSET), "'"); + return String.format( Locale.US, "D:" + + "%1$4tY%1$2tm%1$2td" // yyyyMMdd + + "%1$2tH%1$2tM%1$2tS" // HHmmss + + "%2$s" // time zone + + "'", // trailing apostrophe + cal, offset); + } + + /** + * Converts the date to ISO 8601 string format: + * yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-'). + * + * @param cal The date to convert. Must not be null. + * The DST_OFFSET is included in the output value. + * + * @return The date represented as an ISO 8601 string. + */ + public static String toISO8601(Calendar cal) + { + String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + + cal.get(Calendar.DST_OFFSET), ":"); + return String.format(Locale.US, + "%1$4tY" // yyyy + + "-%1$2tm" // -mm (%tm adds one to cal month value) + + "-%1$2td" // -dd (%tm adds one to cal month value) + + "T" // T + + "%1$2tH:%1$2tM:%1$2tS" // HHmmss + + "%2$s", // time zone + cal, offset); + } + + /* + * Constrain a timezone offset to the range [-14:00 thru +14:00]. + * by adding or subtracting multiples of a full day. + */ + private static int restrainTZoffset(long proposedOffset) + { + if (proposedOffset <= 14 * MILLIS_PER_HOUR && proposedOffset >= -14 * MILLIS_PER_HOUR) + { + // https://www.w3.org/TR/xmlschema-2/#dateTime-timezones + // Timezones between 14:00 and -14:00 are valid + return (int) proposedOffset; + } + // Constrain a timezone offset to the range [-11:59 thru +12:00]. + proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY; + if (proposedOffset == 0) + { + return HALF_DAY; + } + // 0 <= proposedOffset < DAY + proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY; + // -HALF_DAY < proposedOffset < HALF_DAY + return (int)proposedOffset; + } + + /* + * Formats a time zone offset as #hh^mm + * where # is + or -, hh is hours, ^ is a separator, and mm is minutes. + * Any separator may be specified by the second argument; + * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF). + * The returned value is constrained to the range -11:59 ... 11:59. + * For offset of 0 millis, the String returned is "+00^00", never "Z". + * To get a "general" offset in form GMT#hh:mm, write + * "GMT"+DateConverter.formatTZoffset(offset, ":"); + * + * Take thought in choosing the source for the millis value. + * It can come from calendarValue.getTimeZone() or from + * calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created + * from a valid time zone ID, then it may have a daylight savings rule. + * (As of July 4, 2013, the data base at http://www.iana.org/time-zones + * recognized 629 time zone regions. But a TimeZone created as + * new SimpleTimeZone(millisOffset, "ID"), + * will not have a daylight savings rule. (Not even if there is a + * known time zone with the given ID. To get the TimeZone named "xDT" + * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT. + * + * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value. + * At most it has an OFFSET value like -04'00'. It is generally impossible to + * determine what TIMEZONE corresponds to a given OFFSET. If the date is + * in the summer when daylight savings is in effect, an offset of -0400 + * might correspond to any one of the 38 regions (of 53) with standard time + * offset -0400 and no daylight saving. Or it might correspond to + * any one of the 31 regions (out of 43) that observe daylight savings + * and have standard time offset of -0500. + * + * If a Calendar has not been assigned a TimeZone with setTimeZone(), + * it will have by default the local TIMEZONE, not just the OFFSET. In the + * USA, this TimeZone will have a daylight savings rule. + * + * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs + * from the offset in the TimeZone set by Calendar.setTimeZone(). Example: + * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'. + * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00' + * calVal = new GregorianCalendar(); // TimeZone is the local default + * calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR); + * Four different offsets can be computed from calVal: + * calVal.get(Calendar.ZONE_OFFSET) => -07:00 + * calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00 + * calVal.getTimeZone().getRawOffset() => -05:00 + * calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00 + * + * Which is correct??? I dunno, though setTimeZone() does seem to affect + * ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone + * or ZONE_OFFSET has been set; both have been set by initialization code. + * TimeZone is initialized to the local default time zone + * and ZONE_OFFSET is set from it. + * + * My choice in this DateConverter class has been to set the + * initial TimeZone of a GregorianCalendar to GMT. Thereafter + * the TimeZone is modified with {@link #adjustTimeZoneNicely}. + * + * package-private for testing + */ + static String formatTZoffset(long millis, String sep) + { + SimpleDateFormat sdf = new SimpleDateFormat("Z", Locale.ENGLISH); // #hhmm + sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis),"unknown")); + String tz = sdf.format(new Date()); + return tz.substring(0,3) + sep + tz.substring(3); + } + + /* + * Parses an integer from a string, starting at and advancing a ParsePosition. + * Returns The integer that was at the given parse position, or the remedy value + * if no digits were found. + * + * The ParsePosition will be incremented by the number of digits found, but no + * more than maxlen. That is, the ParsePosition will advance across at most + * maxlen initial digits in text. The error index is ignored and unchanged. + * + * maxlen is the maximum length of the integer to parse, usually 2, but 4 for + * year fields. If the field of length maxlen begins with a digit, but contains + * a non-digit, no error is signaled and the integer value is returned. + */ + private static int parseTimeField(String text, ParsePosition where, int maxlen, int remedy) + { + if (text == null) + { + return remedy; + } + // it would seem that DecimalFormat.parse() would be simpler; + // but that class blithely ignores setMaximumIntegerDigits + int retval = 0; + int index = where.getIndex(); + int limit = index + Math.min(maxlen, text.length()-index); + for (; index < limit; index++) + { + // convert digit to integer + int cval = text.charAt(index) - '0'; + // test to see if we got a digit + if (cval < 0 || cval > 9) + { + // no digit at index + break; + } + // append the digit to the return value + retval = retval * 10 + cval; + } + if (index == where.getIndex()) + { + return remedy; + } + where.setIndex(index); + return retval; + } + + /* + * Advances the ParsePosition past any and all the characters that match + * those in the optionals list. In particular, a space will skip all spaces. + * + * The start value is incremented by the number of optionals found. The error + * index is ignored and unchanged. + * + * Returns the last non-space character passed over (even if space is not in + * the optionals list.) + */ + private static char skipOptionals(String text, ParsePosition where, String optionals) + { + char retval = ' '; + char currch; + while (where.getIndex() < text.length() && + optionals.indexOf((currch = text.charAt(where.getIndex()))) >= 0) + { + retval = (currch != ' ') ? currch : retval; + where.setIndex(where.getIndex() + 1); + } + return retval; + } + + /* + * If the victim string is at the given position in the text, this method + * advances the position past that string. + * + * `where` is the initial position to look at. After return, this will have + * been incremented by the length of the victim if it was found. The error + * index is ignored and unchanged. + */ + private static boolean skipString(String text, String victim, ParsePosition where) + { + if (text.startsWith(victim, where.getIndex())) + { + where.setIndex(where.getIndex()+victim.length()); + return true; + } + return false; + } + + /* + * Construct a new GregorianCalendar and set defaults. + * Locale is ENGLISH. + * TimeZone is "UTC" (zero offset and no DST). + * Parsing is NOT lenient. Milliseconds are zero. + * + * package-private for testing + */ + static GregorianCalendar newGreg() + { + GregorianCalendar retCal = new GregorianCalendar(new SimpleTimeZone(0, "UTC"), Locale.ENGLISH); + retCal.setLenient(false); + retCal.set(Calendar.MILLISECOND, 0); + return retCal; + } + + /* + * Install a TimeZone on a GregorianCalendar without changing the + * hours value. A plain GregorianCalendat.setTimeZone() + * adjusts the Calendar.HOUR value to compensate. This is *BAD* + * (not to say *EVIL*) when we have already set the time. + */ + private static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz) + { + cal.setTimeZone(tz); + int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) / + MILLIS_PER_MINUTE; + cal.add(Calendar.MINUTE, -offset); + } + + /* + * Parses the end of a date string for a time zone and, if one is found, + * sets the time zone of the GregorianCalendar. Otherwise the calendar + * time zone is unchanged. + * + * The text is parsed as + * (Z|GMT|UTC)? [+- ]* h [': ]? m '? + * where the leading String is optional, h is two digits by default, + * but may be a single digit if followed by one of space, apostrophe, + * colon, or the end of string. Similarly, m is one or two digits. + * This scheme accepts the format of PDF, RFC 822, and ISO8601. + * If none of these applies (as for a time zone name), we try + * TimeZone.getTimeZone(). + * + * Scanning begins at where.index. After success, the returned index + * is that of the next character after the recognized string. + * + * package-private for testing + */ + static boolean parseTZoffset(String text, GregorianCalendar cal, + ParsePosition initialWhere) + { + ParsePosition where = new ParsePosition(initialWhere.getIndex()); + TimeZone tz = new SimpleTimeZone(0, "GMT"); + int tzHours, tzMin; + char sign = skipOptionals(text, where, "Z+- "); + boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) || + skipString(text, "UTC", where)); + sign = (!hadGMT) ? sign : skipOptionals(text, where, "+- "); + + tzHours = parseTimeField(text, where, 2, -999); + skipOptionals(text, where, "': "); + tzMin = parseTimeField(text, where, 2, 0); + skipOptionals(text, where, "' "); + + if (tzHours != -999) + { + // we parsed a time zone in default format + int hrSign = (sign == '-' ? -1 : 1); + tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * (long) MILLIS_PER_HOUR + + tzMin * (long) MILLIS_PER_MINUTE))); + updateZoneId(tz); + } + else if ( ! hadGMT) + { + // try to process as a name; "GMT" or "UTC" has already been processed + String tzText = text.substring(initialWhere.getIndex()).trim(); + tz = TimeZone.getTimeZone(tzText); + // getTimeZone returns "GMT" for unknown ids + if ("GMT".equals(tz.getID())) + { + // no timezone in text, cal amd initialWhere are unchanged + return false; + } + else + { + // we got a tz by name; use it + where.setIndex(text.length()); + } + } + adjustTimeZoneNicely(cal, tz); + initialWhere.setIndex(where.getIndex()); + return true; + } + + /** + * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where + * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12. + * Zones that don't fit in this schema are set to zone ID "unknown". + * + * @param tz the time zone to update. + */ + private static void updateZoneId(TimeZone tz) + { + int offset = tz.getRawOffset(); + char pm = '+'; + if (offset < 0) + { + pm = '-'; + offset = -offset; + } + int hh = offset / 3600000; + int mm = offset % 3600000 / 60000; + if (offset == 0) + { + tz.setID("GMT"); + } + else if (pm == '+' && hh <= 12) + { + tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm)); + } + else if (pm == '-' && hh <= 14) + { + tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm)); + } + else + { + tz.setID("unknown"); + } + } + + /* + * Parses a big-endian date: year month day hour min sec. + * The year must be four digits. Other fields may be adjacent + * and delimited by length or they may follow appropriate delimiters. + * year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction] + * If any numeric field is omitted, all following fields must also be omitted. + * No time zone is processed. + * + * Ambiguous dates can produce unexpected results. For example: + * 1970 12 23:08 will parse as 1970 December 23 00:08:00 + * + * The parse begins at `where, on return the index + * is advanced to just beyond the last character processed. + * The error index is ignored and unchanged. + */ + private static GregorianCalendar parseBigEndianDate(String text, + ParsePosition initialWhere) + { + ParsePosition where = new ParsePosition(initialWhere.getIndex()); + int year = parseTimeField(text, where, 4, 0); + if (where.getIndex() != 4 + initialWhere.getIndex()) + { + return null; + } + skipOptionals(text, where, "/- "); + int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11 + skipOptionals(text, where, "/- "); + int day = parseTimeField(text, where, 2, 1); + skipOptionals(text, where, " T"); + int hour = parseTimeField(text, where, 2, 0); + skipOptionals(text, where, ": "); + int minute = parseTimeField(text, where, 2, 0); + skipOptionals(text, where, ": "); + int second = parseTimeField(text, where, 2, 0); + char nextC = skipOptionals(text, where, "."); + if (nextC == '.') + { + // fractions of a second: skip up to 19 digits + parseTimeField(text, where, 19, 0); + } + + GregorianCalendar dest = newGreg(); + try + { + dest.set(year, month, day, hour, minute, second); + // trigger limit tests + dest.getTimeInMillis(); + } + catch (IllegalArgumentException ill) + { + LOG.debug("Couldn't parse arguments text:{} initialWhere:{}", text, initialWhere, ill); + return null; + } + initialWhere.setIndex(where.getIndex()); + skipOptionals(text, initialWhere, " "); + // dest has at least a year value + return dest; + } + + /* + * See if text can be parsed as a date according to any of a list of + * formats. The time zone may be included as part of the format, or + * omitted in favor of later testing for a trailing time zone. + * + * The parse starts at `where`, upon return it will have been + * incremented to refer to the next non-space character after the date. + * If no date was found, the value is unchanged. + * The error index is ignored and unchanged. + * + * If there is a failure to find a date, or the GregorianCalendar + * for the date that was found. Unless a time zone was + * part of the format, the time zone will be GMT+0 + */ + private static GregorianCalendar parseSimpleDate(String text, String[] fmts, + ParsePosition initialWhere) + { + for(String fmt : fmts) + { + ParsePosition where = new ParsePosition(initialWhere.getIndex()); + SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH); + GregorianCalendar retCal = newGreg(); + sdf.setCalendar(retCal); + if (sdf.parse(text, where) != null) + { + initialWhere.setIndex(where.getIndex()); + skipOptionals(text, initialWhere, " "); + return retCal; + } + } + return null; + } + + /* + * Parses a String to see if it begins with a date, and if so, + * returns that date. The date must be strictly correct--no + * field may exceed the appropriate limit. + * (That is, the Calendar has setLenient(false).) + * Skips initial spaces, but does NOT check for "D:" + * + * The scan first tries parseBigEndianDate and parseTZoffset + * and then tries parseSimpleDate with appropriate formats, + * again followed by parseTZoffset. If at any stage the entire + * text is consumed, that date value is returned immediately. + * Otherwise the date that consumes the longest initial part + * of the text is returned. + * + * - PDF format dates are among those recognized by parseBigEndianDate. + * - The formats tried are alphaStartFormats or digitStartFormat and + * any listed in the value of moreFmts. + */ + private static Calendar parseDate(String text, ParsePosition initialWhere) + { + if (text == null || text.isEmpty() || "D:".equals(text.trim())) + { + return null; + } + + // remember longestr date string + int longestLen = -999999; + // theorem: the above value will never be used + // proof: longestLen is only used if longestDate is not null + + GregorianCalendar longestDate = null; // null says no date found yet + int whereLen; // tempcopy of where.getIndex() + + ParsePosition where = new ParsePosition(initialWhere.getIndex()); + // check for null (throws exception) and trim off surrounding spaces + skipOptionals(text, where, " "); + int startPosition = where.getIndex(); + + // try big-endian parse + GregorianCalendar retCal = parseBigEndianDate(text, where); + // check for success and a timezone + if (retCal != null && (where.getIndex() == text.length() || + parseTZoffset(text, retCal, where))) + { + // if text is fully consumed, return the date else remember it and its length + whereLen = where.getIndex(); + if (whereLen == text.length()) + { + initialWhere.setIndex(whereLen); + return retCal; + } + longestLen = whereLen; + longestDate = retCal; + } + + // try one of the sets of standard formats + where.setIndex(startPosition); + String [] formats + = Character.isDigit(text.charAt(startPosition)) + ? DIGIT_START_FORMATS + : ALPHA_START_FORMATS; + retCal = parseSimpleDate(text, formats, where); + // check for success and a timezone + if (retCal != null && + (where.getIndex() == text.length() || + parseTZoffset(text, retCal, where))) + { + // if text is fully consumed, return the date else remember it and its length + whereLen = where.getIndex(); + if (whereLen == text.length()) + { + initialWhere.setIndex(whereLen); + return retCal; + } + if (whereLen > longestLen) + { + longestLen = whereLen; + longestDate = retCal; + } + } + + if (longestDate != null) + { + initialWhere.setIndex(longestLen); + return longestDate; + } + return retCal; + } + + /** + * Returns the Calendar for a given COS string containing a date, + * or {@code null} if it cannot be parsed. + * + * The returned value will have 0 for DST_OFFSET. + * + * @param text A COS string containing a date. + * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. + */ + public static Calendar toCalendar(COSString text) + { + if (text == null) + { + return null; + } + return toCalendar(text.getString()); + } + + /** + * Returns the Calendar for a given string containing a date, + * or {@code null} if it cannot be parsed. + * + * The returned value will have 0 for DST_OFFSET. + * + * @param text A COS string containing a date. + * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. + */ + public static Calendar toCalendar(String text) + { + if (text == null || text.trim().isEmpty()) + { + return null; + } + + ParsePosition where = new ParsePosition(0); + skipOptionals(text, where, " "); + skipString(text, "D:", where); + Calendar calendar = parseDate(text, where); + + if (calendar == null || where.getIndex() != text.length()) + { + // the date string is invalid + return null; + } + return calendar; + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java new file mode 100644 index 00000000000..538f8cb2f13 --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos.util; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Base64; + +/** + * Utility functions for hex encoding. + * + * @author John Hewson + */ +public final class Hex +{ + private static final Logger LOG = LogManager.getLogger(Hex.class); + + /** + * for hex conversion. + * + * https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal + * + */ + private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + + private Hex() {} + + /** + * Returns a hex string of the given byte. + * + * @param b the byte to be converted + * @return the hex string representing the given byte + */ + public static String getString(byte b) + { + char[] chars = {HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]}; + return new String(chars); + } + + /** + * Returns a hex string of the given byte array. + * + * @param bytes the bytes to be converted + * @return the hex string representing the given bytes + */ + public static String getString(byte[] bytes) + { + StringBuilder string = new StringBuilder(bytes.length * 2); + for (byte b : bytes) + { + string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]); + } + return string.toString(); + } + + /** + * Returns the bytes corresponding to the ASCII hex encoding of the given byte. + * + * @param b the byte to be converted + * @return the ASCII hex encoding of the given byte + */ + public static byte[] getBytes(byte b) + { + return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]}; + } + + /** + * Returns the bytes corresponding to the ASCII hex encoding of the given bytes. + * + * @param bytes the bytey to be converted + * @return the ASCII hex encoding of the given bytes + */ + public static byte[] getBytes(byte[] bytes) + { + byte[] asciiBytes = new byte[bytes.length*2]; + for(int i=0; i< bytes.length; i++) + { + asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])]; + asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])]; + } + return asciiBytes; + } + + /** + * Returns the characters corresponding to the ASCII hex encoding of the given short. + * + * @param num the short value to be converted + * @return the ASCII hex encoding of the given short value + */ + public static char[] getChars(short num) + { + char[] hex = new char[4]; + hex[0] = HEX_CHARS[(num >> 12) & 0x0F]; + hex[1] = HEX_CHARS[(num >> 8) & 0x0F]; + hex[2] = HEX_CHARS[(num >> 4) & 0x0F]; + hex[3] = HEX_CHARS[num & 0x0F]; + return hex; + } + + /** + * Takes the characters in the given string, convert it to bytes in UTF16-BE format + * and build a char array that corresponds to the ASCII hex encoding of the resulting + * bytes. + * + * Example: + *
+     *   getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
+     * 
+ * + * @param text The string to convert + * @return The string converted to hex + */ + public static char[] getCharsUTF16BE(String text) + { + // Note that the internal representation of string in Java is already UTF-16. Therefore + // we do not need to use an encoder to convert the string to its byte representation. + char[] hex = new char[text.length()*4]; + + for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++) + { + char c = text.charAt(stringIdx); + hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F]; + hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F]; + hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F]; + hex[charIdx++] = HEX_CHARS[c & 0x0F]; + } + + return hex; + } + + /** + * Writes the given byte as hex value to the given output stream. + * @param b the byte to be written + * @param output the output stream to be written to + * @throws IOException exception if anything went wrong + */ + public static void writeHexByte(byte b, OutputStream output) throws IOException + { + output.write(HEX_BYTES[getHighNibble(b)]); + output.write(HEX_BYTES[getLowNibble(b)]); + } + + /** + * Writes the given byte array as hex value to the given output stream. + * @param bytes the byte array to be written + * @param output the output stream to be written to + * @throws IOException exception if anything went wrong + */ + public static void writeHexBytes(byte[] bytes, OutputStream output) throws IOException + { + for (byte b : bytes) + { + writeHexByte(b, output); + } + } + + /** + * Get the high nibble of the given byte. + * + * @param b the given byte + * @return the high nibble + */ + private static int getHighNibble(byte b) + { + return (b & 0xF0) >> 4; + } + + /** + * Get the low nibble of the given byte. + * + * @param b the given byte + * @return the low nibble + */ + private static int getLowNibble(byte b) + { + return b & 0x0F; + } + + /** + * Decode a base64 String. + * + * @param base64Value a base64 encoded String. + * + * @return the decoded String as a byte array. + * + * @throws IllegalArgumentException if this isn't a base64 encoded string. + */ + public static byte[] decodeBase64(String base64Value) + { + return Base64.getDecoder(). + decode(StringUtil.PATTERN_SPACE.matcher(base64Value).replaceAll("")); + } + + /** + * Decodes a hex String into a byte array. + * + * @param s A String with ASCII hex. + * @return decoded byte array. + */ + public static byte[] decodeHex(String s) + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int i = 0; + while (i < s.length() - 1) + { + if (s.charAt(i) == '\n' || s.charAt(i) == '\r') + { + ++i; + } + else + { + String hexByte = s.substring(i, i + 2); + try + { + baos.write(Integer.parseInt(hexByte, 16)); // Byte.parseByte won't work with "9C" + } + catch (NumberFormatException ex) + { + LOG.error(() -> "Can't parse " + hexByte + ", aborting decode", ex); + break; + } + i += 2; + } + } + return baos.toByteArray(); + } +} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java new file mode 100644 index 00000000000..743c3bf9a8c --- /dev/null +++ b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.util; + +import java.util.Arrays; +import java.util.regex.Pattern; + +public final class StringUtil +{ + public static final Pattern PATTERN_SPACE = Pattern.compile("\\s"); + + public static String[] splitOnSpace(String s) + { + return PATTERN_SPACE.split(s); + } + + /** + * Split at spaces but keep them + * + * @param s + * @return + */ + public static String[] tokenizeOnSpace(String s) + { + return Arrays.stream(s.split("(?<=" + StringUtil.PATTERN_SPACE + ")|(?=" + StringUtil.PATTERN_SPACE + ")")) + .toArray(String[]::new); + } +} diff --git a/pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties b/pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties new file mode 100644 index 00000000000..e69de29bb2d diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java new file mode 100644 index 00000000000..0a6e0c344f6 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +class COSDictionaryTest +{ + @Test + void testCOSDictionaryNotEqualsCOSStream() + { + COSDictionary cosDictionary = new COSDictionary(); + COSStream cosStream = new COSStream(); + cosDictionary.setItem( COSName.BE, COSName.BE); + cosDictionary.setInt(COSName.LENGTH, 0); + cosStream.setItem(COSName.BE, COSName.BE); + assertNotEquals(cosDictionary, cosStream, + "a COSDictionary shall not be equal to a COSStream with the same dictionary entries"); + assertNotEquals(cosStream, cosDictionary, + "a COSStream shall not be equal to a COSDictionary with the same dictionary entries"); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java new file mode 100644 index 00000000000..2d70bc9436d --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Test for PDFDocEncoding. + * + */ +class PDFDocEncodingTest +{ + + static final List deviations = new ArrayList<>(); + + static + { + // all deviations (based on the table in ISO 32000-1:2008) + // block 1 + deviations.add(String.valueOf('\u02D8')); // BREVE + deviations.add(String.valueOf('\u02C7')); // CARON + deviations.add(String.valueOf('\u02C6')); // MODIFIER LETTER CIRCUMFLEX ACCENT + deviations.add(String.valueOf('\u02D9')); // DOT ABOVE + deviations.add(String.valueOf('\u02DD')); // DOUBLE ACUTE ACCENT + deviations.add(String.valueOf('\u02DB')); // OGONEK + deviations.add(String.valueOf('\u02DA')); // RING ABOVE + deviations.add(String.valueOf('\u02DC')); // SMALL TILDE + // block 2 + deviations.add(String.valueOf('\u2022')); // BULLET + deviations.add(String.valueOf('\u2020')); // DAGGER + deviations.add(String.valueOf('\u2021')); // DOUBLE DAGGER + deviations.add(String.valueOf('\u2026')); // HORIZONTAL ELLIPSIS + deviations.add(String.valueOf('\u2014')); // EM DASH + deviations.add(String.valueOf('\u2013')); // EN DASH + deviations.add(String.valueOf('\u0192')); // LATIN SMALL LETTER SCRIPT F + deviations.add(String.valueOf('\u2044')); // FRACTION SLASH (solidus) + deviations.add(String.valueOf('\u2039')); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + deviations.add(String.valueOf('\u203A')); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + deviations.add(String.valueOf('\u2212')); // MINUS SIGN + deviations.add(String.valueOf('\u2030')); // PER MILLE SIGN + deviations.add(String.valueOf('\u201E')); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase) + deviations.add(String.valueOf('\u201C')); // LEFT DOUBLE QUOTATION MARK (quotedblleft) + deviations.add(String.valueOf('\u201D')); // RIGHT DOUBLE QUOTATION MARK (quotedblright) + deviations.add(String.valueOf('\u2018')); // LEFT SINGLE QUOTATION MARK (quoteleft) + deviations.add(String.valueOf('\u2019')); // RIGHT SINGLE QUOTATION MARK (quoteright) + deviations.add(String.valueOf('\u201A')); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase) + deviations.add(String.valueOf('\u2122')); // TRADE MARK SIGN + deviations.add(String.valueOf('\uFB01')); // LATIN SMALL LIGATURE FI + deviations.add(String.valueOf('\uFB02')); // LATIN SMALL LIGATURE FL + deviations.add(String.valueOf('\u0141')); // LATIN CAPITAL LETTER L WITH STROKE + deviations.add(String.valueOf('\u0152')); // LATIN CAPITAL LIGATURE OE + deviations.add(String.valueOf('\u0160')); // LATIN CAPITAL LETTER S WITH CARON + deviations.add(String.valueOf('\u0178')); // LATIN CAPITAL LETTER Y WITH DIAERESIS + deviations.add(String.valueOf('\u017D')); // LATIN CAPITAL LETTER Z WITH CARON + deviations.add(String.valueOf('\u0131')); // LATIN SMALL LETTER DOTLESS I + deviations.add(String.valueOf('\u0142')); // LATIN SMALL LETTER L WITH STROKE + deviations.add(String.valueOf('\u0153')); // LATIN SMALL LIGATURE OE + deviations.add(String.valueOf('\u0161')); // LATIN SMALL LETTER S WITH CARON + deviations.add(String.valueOf('\u017E')); // LATIN SMALL LETTER Z WITH CARON + deviations.add(String.valueOf('\u20AC')); // EURO SIGN + // end of deviations + } + + @Test + void testDeviations() + { + deviations.forEach(deviation -> + { + COSString cosString = new COSString( deviation); + assertEquals(cosString.getString(), deviation); + }); + } + + /** + * PDFBOX-3864: Test that chars smaller than 256 which are NOT part of PDFDocEncoding are + * handled correctly. + * + * @throws IOException + */ + @Test + void testPDFBox3864() throws IOException + { + for (int i = 0; i < 256; i++) + { + String hex = String.format("FEFF%04X", i); + COSString cs1 = COSString.parseHex(hex); + COSString cs2 = new COSString(cs1.getString()); + assertEquals(cs1, cs2); + } + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java new file mode 100644 index 00000000000..44dd846556f --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java @@ -0,0 +1,290 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.util.Arrays; +import java.util.List; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unittests for {@link COSArray} + */ +class TestCOSArray +{ + @Test + void testCreate() + { + COSArray cosArray = new COSArray(); + assertEquals(0, cosArray.size()); + Assertions.assertThrows(NullPointerException.class, () -> new COSArray( + (List) null), + "Constructor should have thrown an exception"); + + cosArray = new COSArray(Arrays.asList( COSName.A, COSName.B, COSName.C)); + assertEquals(3, cosArray.size()); + assertEquals(COSName.A, cosArray.get(0)); + assertEquals(COSName.B, cosArray.get(1)); + assertEquals(COSName.C, cosArray.get(2)); + } + + @Test + void testConvertString2COSNameAndBack() + { + COSArray cosArray = COSArray.ofCOSNames( + Arrays.asList(COSName.A.getName(), COSName.B.getName(), COSName.C.getName())); + assertEquals(3, cosArray.size()); + assertEquals(COSName.A, cosArray.get(0)); + assertEquals(COSName.B, cosArray.get(1)); + assertEquals(COSName.C, cosArray.get(2)); + + List cosNameStringList = cosArray.toCOSNameStringList(); + assertEquals(3, cosNameStringList.size()); + assertEquals(COSName.A.getName(), cosNameStringList.get(0)); + assertEquals(COSName.B.getName(), cosNameStringList.get(1)); + assertEquals(COSName.C.getName(), cosNameStringList.get(2)); + } + + @Test + void testConvertString2COSStringAndBack() + { + COSArray cosArray = COSArray + .ofCOSStrings(Arrays.asList("A", "B", "C")); + assertEquals(3, cosArray.size()); + assertEquals("A", cosArray.getString(0)); + assertEquals("B", cosArray.getString(1)); + assertEquals("C", cosArray.getString(2)); + + List cosStringStringList = cosArray.toCOSStringStringList(); + assertEquals(3, cosStringStringList.size()); + assertEquals("A", cosStringStringList.get(0)); + assertEquals("B", cosStringStringList.get(1)); + assertEquals("C", cosStringStringList.get(2)); + } + + @Test + void testConvertInteger2COSStringAndBack() + { + COSArray cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3)); + assertEquals(3, cosArray.size()); + assertEquals(1, cosArray.getInt(0)); + assertEquals(2, cosArray.getInt(1)); + assertEquals(3, cosArray.getInt(2)); + + List cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); + assertEquals(3, cosNumberIntegerList.size()); + assertEquals(1, (int) cosNumberIntegerList.get(0)); + assertEquals(2, (int) cosNumberIntegerList.get(1)); + assertEquals(3, (int) cosNumberIntegerList.get(2)); + + // check arrays with null values + cosArray = new COSArray(Arrays.asList( COSInteger.get( 1), null, COSInteger.get( 3))); + assertEquals(3, cosArray.size()); + assertEquals(1, cosArray.getInt(0)); + assertNull(cosArray.get(1)); + assertEquals(3, cosArray.getInt(2)); + cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); + assertEquals(3, cosNumberIntegerList.size()); + assertEquals(1, (int) cosNumberIntegerList.get(0)); + assertNull(cosNumberIntegerList.get(1)); + assertEquals(3, (int) cosNumberIntegerList.get(2)); + } + + @Test + void testConvertFloat2COSStringAndBack() + { + float[] floatArrayStart = { 1.0f, 0.1f, 0.02f }; + COSArray cosArray = new COSArray(); + cosArray.setFloatArray(floatArrayStart); + + assertEquals(3, cosArray.size()); + assertEquals( COSFloat.ONE, cosArray.get( 0)); + assertEquals(new COSFloat(0.1f), cosArray.get(1)); + assertEquals(new COSFloat(0.02f), cosArray.get(2)); + + List cosNumberFloatList = cosArray.toCOSNumberFloatList(); + assertEquals(3, cosNumberFloatList.size()); + assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); + assertEquals(0.1f, (float) cosNumberFloatList.get(1), 0); + assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); + + float[] floatArrayEnd = cosArray.toFloatArray(); + assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); + assertEquals(0.1f, (float) cosNumberFloatList.get(1), 0); + assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); + assertArrayEquals(floatArrayStart, floatArrayEnd, 0); + + // check arrays with null values + cosArray = new COSArray(Arrays.asList(COSFloat.ONE, null, new COSFloat(0.02f))); + assertEquals(3, cosArray.size()); + assertEquals(COSFloat.ONE, cosArray.get(0)); + assertNull(cosArray.get(1)); + assertEquals(new COSFloat(0.02f), cosArray.get(2)); + + cosNumberFloatList = cosArray.toCOSNumberFloatList(); + assertEquals(3, cosNumberFloatList.size()); + assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); + assertNull(cosNumberFloatList.get(1)); + assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); + + floatArrayEnd = cosArray.toFloatArray(); + // due to the null value the second value of the array is set to 0 + assertArrayEquals(new float[] { 1.0f, 0f, 0.02f }, floatArrayEnd, 0); + + } + + @Test + void testGetSetName() + { + COSArray cosArray = new COSArray(); + cosArray.growToSize(3); + cosArray.setName(0, "A"); + cosArray.setName(1, "B"); + cosArray.setName(2, "C"); + assertEquals(3, cosArray.size()); + assertEquals("A", cosArray.getName(0)); + assertEquals("B", cosArray.getName(1)); + assertEquals("C", cosArray.getName(2)); + assertEquals("NULL", cosArray.getName(3, "NULL")); + assertEquals(0, cosArray.indexOf(COSName.A)); + assertEquals(1, cosArray.indexOf(COSName.B)); + assertEquals(2, cosArray.indexOf(COSName.C)); + assertEquals(-1, cosArray.indexOf(COSName.D)); + cosArray.setName(1, "D"); + assertEquals(3, cosArray.size()); + assertEquals("D", cosArray.getName(1)); + } + + @Test + void testGetSetInt() + { + COSArray cosArray = new COSArray(); + cosArray.growToSize(3); + cosArray.setInt(0, 0); + cosArray.setInt(1, 1); + cosArray.setInt(2, 2); + assertEquals(3, cosArray.size()); + assertEquals(0, cosArray.getInt(0)); + assertEquals(1, cosArray.getInt(1)); + assertEquals(2, cosArray.getInt(2)); + assertEquals(0, cosArray.getInt(3, 0)); + assertEquals(0, cosArray.indexOf(COSInteger.get(0))); + assertEquals(1, cosArray.indexOf(COSInteger.get(1))); + assertEquals(2, cosArray.indexOf(COSInteger.get(2))); + assertEquals(-1, cosArray.indexOf(COSInteger.get(3))); + cosArray.setInt(1, 3); + assertEquals(3, cosArray.size()); + assertEquals(3, cosArray.getInt(1)); + } + + @Test + void testGetSetString() + { + COSArray cosArray = new COSArray(); + cosArray.growToSize(3); + cosArray.setString(0, "Test1"); + cosArray.setString(1, "Test2"); + cosArray.setString(2, "Test3"); + assertEquals(3, cosArray.size()); + assertEquals("Test1", cosArray.getString(0)); + assertEquals("Test2", cosArray.getString(1)); + assertEquals("Test3", cosArray.getString(2)); + assertEquals("NULL", cosArray.getString(3, "NULL")); + assertEquals(0, cosArray.indexOf(new COSString( "Test1"))); + assertEquals(1, cosArray.indexOf(new COSString("Test2"))); + assertEquals(2, cosArray.indexOf(new COSString("Test3"))); + assertEquals(-1, cosArray.indexOf(new COSString("Test4"))); + cosArray.setString(1, "Test4"); + assertEquals(3, cosArray.size()); + assertEquals("Test4", cosArray.getString(1)); + } + + @Test + void testRemove() + { + COSArray cosArray = COSArray + .ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); + cosArray.clear(); + assertEquals(0, cosArray.size()); + + cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); + assertEquals(COSInteger.get(3), cosArray.remove(2)); + // 1,2,4,5,6 should be left + assertEquals(5, cosArray.size()); + assertEquals(1, cosArray.getInt(0)); + assertEquals(4, cosArray.getInt(2)); + + // 1,2,4,6 should be left + assertTrue(cosArray.removeObject(COSInteger.get(5))); + assertEquals(4, cosArray.size()); + assertEquals(1, cosArray.getInt(0)); + assertEquals(4, cosArray.getInt(2)); + assertEquals(6, cosArray.getInt(3)); + + cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); + cosArray.removeAll(Arrays.asList(COSInteger.get(3), COSInteger.get(4))); + // 1,2,5,6 should be left + assertEquals(4, cosArray.size()); + assertEquals(2, cosArray.getInt(1)); + assertEquals(5, cosArray.getInt(2)); + + cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); + cosArray.retainAll(Arrays.asList(COSInteger.get(3), COSInteger.get(4))); + // 3,4 should be left + assertEquals(2, cosArray.size()); + assertEquals(3, cosArray.getInt(0)); + assertEquals(4, cosArray.getInt(1)); + + } + + @Test + void testGrowToSize() + { + COSArray cosArray = new COSArray(); + assertEquals(0, cosArray.size()); + cosArray.growToSize(2); + // COSArray has 2 empty elements + assertEquals(2, cosArray.size()); + // size is already 2 -> nothing happens + cosArray.growToSize(2, COSInteger.get(0)); + assertEquals(2, cosArray.size()); + // increase size, fill the new elements with the given value + cosArray.growToSize(4, COSInteger.get(1)); + assertEquals(4, cosArray.size()); + List cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); + assertEquals(4, cosNumberIntegerList.size()); + assertNull(cosNumberIntegerList.get(0)); + assertEquals(1, (int) cosNumberIntegerList.get(2)); + assertEquals(1, (int) cosNumberIntegerList.get(3)); + } + + @Test + void testToList() + { + COSArray cosArray = COSArray + .ofCOSIntegers(Arrays.asList(0, 1, 2, 3, 4, 5)); + List list = cosArray.toList(); + assertEquals(6, list.size()); + assertEquals(COSInteger.get(0), list.get(0)); + assertEquals(COSInteger.get(5), list.get(5)); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java new file mode 100644 index 00000000000..f21356ddda9 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.IOException; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class for {@link COSBase}. + */ +abstract class TestCOSBase +{ + /** The COSBase abstraction of the object being tested. */ + protected static COSBase testCOSBase; + + /** + * Tests getCOSObject() - tests that the underlying object is returned. + * In the case of objects derived from COSBase this is always an identity + * function. + */ + @Test + void testGetCOSObject() + { + assertEquals(testCOSBase, testCOSBase.getCOSObject()); + } + + /** + * Test accept() - tests the interface for visiting a document at the COS level. + */ + abstract void testAccept() throws IOException; + + /** + * Tests isDirect() and setDirect() - tests the getter/setter methods. + */ + @Test + void testIsSetDirect() + { + testCOSBase.setDirect(true); + assertTrue(testCOSBase.isDirect()); + testCOSBase.setDirect(false); + assertFalse(testCOSBase.isDirect()); + } + + /** + * A simple utility function to compare two byte arrays. + * @param byteArr1 the expected byte array + * @param byteArr2 the byte array being compared + */ + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + protected void testByteArrays(byte[] byteArr1, byte[] byteArr2) + { + assertEquals(byteArr1.length, byteArr1.length); + for (int i = 0; i < byteArr1.length; i++) + { + assertEquals(byteArr1[i], byteArr2[i]); + } + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java new file mode 100644 index 00000000000..07c8b52bda4 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +//import java.io.IOException; +//import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Unittests for {@link COSBoolean} + */ +class TestCOSBoolean extends TestCOSBase +{ + final COSBoolean cosBooleanTrue = COSBoolean.TRUE; + final COSBoolean cosBooleanFalse = COSBoolean.FALSE; + + @BeforeAll + static void setUp() + { + testCOSBase = COSBoolean.TRUE; + } + + @Test + void testGetValue() + { + assertTrue(cosBooleanTrue.getValue()); + assertFalse(cosBooleanFalse.getValue()); + } + + @Test + void testGetValueAsObject() + { + assertTrue(cosBooleanTrue.getValueAsObject() instanceof Boolean); + assertEquals(Boolean.TRUE, cosBooleanTrue.getValueAsObject()); + assertTrue(cosBooleanFalse.getValueAsObject() instanceof Boolean); + assertEquals(Boolean.FALSE, cosBooleanFalse.getValueAsObject()); + } + + @Test + void testGetBoolean() + { + assertEquals(cosBooleanTrue, COSBoolean.getBoolean(Boolean.TRUE)); + assertEquals(cosBooleanFalse, COSBoolean.getBoolean(Boolean.FALSE)); + } + + @Test + void testEquals() + { + COSBoolean test1 = COSBoolean.TRUE; + COSBoolean test2 = COSBoolean.TRUE; + COSBoolean test3 = COSBoolean.TRUE; + // Reflexive (x == x) + assertEquals(test1, test1); + // Symmetric is preserved ( x==y then y===x) + assertEquals(test2, test1); + assertEquals(test1, test2); + // Transitive (if x==y && y==z then x===z) + assertEquals(test1, test2); + assertEquals(test2, test3); + assertEquals(test1, test3); + + assertNotEquals(COSBoolean.TRUE, COSBoolean.FALSE); + // same 'value' but different type + assertNotEquals(Boolean.TRUE, COSBoolean.TRUE); + assertNotEquals(Boolean.FALSE, COSBoolean.FALSE); + assertNotEquals(true, COSBoolean.TRUE); + assertNotEquals(true, COSBoolean.FALSE); + } + + @Override + @Test + void testAccept() + { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + TestVisitor visitor = new TestVisitor( outStream ); + int index = 0; + try + { + cosBooleanTrue.accept( visitor ); + testByteArrays(String.valueOf(cosBooleanTrue) + .getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); + outStream.reset(); + cosBooleanFalse.accept( visitor ); + testByteArrays(String.valueOf(cosBooleanFalse) + .getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); + outStream.reset(); + } + catch (Exception e) + { + fail("Failed to write " + index + " exception: " + e.getMessage()); + } + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java new file mode 100644 index 00000000000..6c91f1040c9 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.Date; +import java.util.Random; +// import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Tests {@link COSFloat}. + */ +class TestCOSFloat extends TestCOSNumber +{ + @BeforeAll + static void setUp() + { + try + { + testCOSBase = COSNumber.get( "1.1"); + } + catch (IOException e) + { + fail("Failed to create a COSNumber in setUp()"); + } + } + + /** + * Base class to run looped tests with float numbers. + * + * To use it, derive a class and just implement runTest(). Then either call + * runTests for a series of random and pseudorandom tests, or runTest to + * test with corner values. + */ + abstract class BaseTester + { + private int low = -100000; + private int high = 300000; + private int step = 20000; + + public void setLoop(int low, int high, int step) + { + this.low = low; + this.high = high; + this.step = step; + } + + // deterministic and non-deterministic test + public void runTests() + { + // deterministic test + loop(123456); + + // non-deterministic test + loop(System.currentTimeMillis()); + } + + // look through a series of pseudorandom tests influenced by a seed + private void loop(long seed) + { + Date currentDate = new Date(); + Random rnd = new Random( currentDate.getTime() ); + for (int i = low; i < high; i += step) + { + float num = i * rnd.nextFloat(); + try + { + runTest(num); + } + catch (AssertionError a) + { + fail("num = " + num + ", seed = " + seed + ", message: " + a.getMessage()); + } + } + } + + abstract void runTest(float num); + + } + + /** + * Tests equals() - ensures that the Object.equals() contract is obeyed. + * These are tested over a range of arbitrary values to ensure Consistency, + * Reflexivity, Symmetry, Transitivity and non-nullity. + */ + @Test + void testEquals() + { + new BaseTester() + { + @Override + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + void runTest(float num) + { + + COSFloat test1 = new COSFloat(num); + COSFloat test2 = new COSFloat(num); + COSFloat test3 = new COSFloat(num); + // Reflexive (x == x) + assertEquals(test1, test1); + // Symmetric is preserved ( x==y then y==x) + assertEquals(test2, test3); + assertEquals(test3, test2); + // Transitive (if x==y && y==z then x==z) + assertEquals(test1, test2); + assertEquals(test2, test3); + assertEquals(test1, test3); + + float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); + COSFloat test4 = new COSFloat(nf); + assertNotEquals(test4, test1); + } + }.runTests(); + } + + class HashCodeTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat test1 = new COSFloat(num); + COSFloat test2 = new COSFloat(num); + assertEquals(test1.hashCode(), test2.hashCode()); + + float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); + COSFloat test3 = new COSFloat(nf); + assertNotSame(test3.hashCode(), test1.hashCode()); + } + } + + /** + * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed + * over a range of arbitrary values. + */ + @Test + void testHashCode() + { + new HashCodeTester().runTests(); + } + + class FloatValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + assertEquals(num, testFloat.floatValue()); + } + + } + + @Override + @Test + void testFloatValue() + { + new FloatValueTester().runTests(); + } + + class IntValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + assertEquals((int) num, testFloat.intValue()); + } + + } + + @Override + @Test + void testIntValue() + { + new IntValueTester().runTests(); + } + + class LongValueTester extends BaseTester + { + + @Override + void runTest(float num) + { + COSFloat testFloat = new COSFloat(num); + assertEquals((long) num, testFloat.longValue()); + } + + } + + @Override + @Test + void testLongValue() + { + new LongValueTester().runTests(); + } + + class AcceptTester extends BaseTester + { + final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + final TestVisitor visitor = new TestVisitor(outStream); // writing gets tested elsewhere + + @Override + void runTest(float num) + { + COSFloat cosFloat = new COSFloat(num); + try + { + cosFloat.accept(visitor); + } + catch (IOException e) + { + throw new RuntimeException( e ); + } + String expected = "COSFloat{" + floatToString( cosFloat.floatValue() ) + "}"; + assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); + testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); + outStream.reset(); + } + + } + + @Override + @Test + void testAccept() + { + new AcceptTester().runTests(); + } + +// class WritePDFTester extends BaseTester +// { +// final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); +// +// WritePDFTester() +// { +// setLoop(-1000, 3000, 200); +// } +// +// @Override +// void runTest(float num) +// { +// try +// { +// COSFloat cosFloat = new COSFloat(num); +// // cosFloat.writePDF(outStream); // writing gets tested elsewhere +// +// String expected = floatToString(cosFloat.floatValue()); +// assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); +// assertEquals("COSFloat{" + expected + "}", cosFloat.toString()); +// +// expected = floatToString(num); +// assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); +// assertEquals("COSFloat{" + expected + "}", cosFloat.toString()); +// testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), +// outStream.toByteArray()); +// +// outStream.reset(); +// } +// catch (IOException e) +// { +// fail("Failed to write " + num + " exception: " + e.getMessage()); +// } +// } +// +// } + + /** + * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes + * this object to it. + */ +// @Test +// void testWritePDF() // writing gets tested elsewhere +// { +// WritePDFTester writePDFTester = new WritePDFTester(); +// writePDFTester.runTests(); +// +// // test a corner case as described in PDFBOX-1778 +// writePDFTester.runTest(0.000000000000000000000000000000001f); +// } + + @Test + void testDoubleNegative() throws IOException + { + // PDFBOX-4289 + COSFloat cosFloat = new COSFloat("--16.33"); + assertEquals(-16.33f, cosFloat.floatValue()); + } + + @Test + void testVerySmallValues() throws IOException + { + double smallValue = Float.MIN_VALUE / 10d; + + assertEquals(-1, Double.compare(smallValue, Float.MIN_VALUE), + "Test must be performed with a value smaller than Float.MIN_VALUE."); + + // 1.4012984643248171E-46 + String asString = String.valueOf(smallValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + smallValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(smallValue); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + } + + @Test + void testVeryLargeValues() throws IOException + { + double largeValue = Float.MAX_VALUE * 10d; + + assertEquals(1, Double.compare(largeValue, Float.MAX_VALUE), + "Test must be performed with a value larger than Float.MAX_VALUE."); + + // 1.4012984643248171E-46 + String asString = String.valueOf(largeValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + largeValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(largeValue); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + } + + @Test + void testMisplacedNegative() throws IOException + { + // PDFBOX-2990, PDFBOX-3369 have 0.00000-33917698 + // PDFBOX-3500 has 0.-262 + + COSFloat cosFloat = new COSFloat("0.00000-33917698"); + assertEquals(new COSFloat("-0.0000033917698"), cosFloat); + + cosFloat = new COSFloat("0.-262"); + assertEquals(new COSFloat("-0.262"), cosFloat); + + cosFloat = new COSFloat("-0.-262"); + assertEquals(new COSFloat("-0.262"), cosFloat); + + cosFloat = new COSFloat("-12.-1"); + assertEquals(new COSFloat("-12.1"), cosFloat); + } + + @Test + void testDuplicateMisplacedNegative() + { + assertThrows(IOException.class, () -> new COSFloat("0.-26-2")); + assertThrows(IOException.class, () -> new COSFloat("---0.262")); + assertThrows(IOException.class, () -> new COSFloat("--0.2-62")); + } + + @Test + void testStubOperatorMinMaxValues() + { + float largeValue = 32768f; + float largeNegativeValue = -32768f; + + assertEquals(largeValue, new COSFloat(largeValue).floatValue()); + assertEquals(largeNegativeValue, new COSFloat(largeNegativeValue).floatValue()); + } + + private String floatToString(float value) + { + // use a BigDecimal as intermediate state to avoid + // a floating point string representation of the float value + return removeTrailingNull(new BigDecimal(String.valueOf(value)).toPlainString()); + } + + private String removeTrailingNull(String value) + { + // remove fraction digit "0" only + if (value.indexOf('.') > -1 && !value.endsWith(".0")) + { + while (value.endsWith("0") && !value.endsWith(".0")) + { + value = value.substring(0,value.length()-1); + } + } + return value; + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java new file mode 100644 index 00000000000..94a4f734ed1 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +//import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * A test case for COSInteger + * + * @author Koch + */ +class TestCOSInteger extends TestCOSNumber +{ + @BeforeAll + static void setUp() + { + try + { + testCOSBase = COSNumber.get( "0"); + } + catch (IOException e) + { + fail("Failed to create a COSNumber in setUp()"); + } + } + + /** + * Tests equals() - ensures that the Object.equals() contract is obeyed. These are tested over + * a range of arbitrary values to ensure Consistency, Reflexivity, Symmetry, Transitivity and + * non-nullity. + */ + @Test + void testEquals() + { + // Consistency + for (int i = -1000; i < 3000; i += 200) + { + COSInteger test1 = COSInteger.get( i); + COSInteger test2 = COSInteger.get(i); + COSInteger test3 = COSInteger.get(i); + // Reflexive (x == x) + assertEquals(test1, test1); + // Symmetric is preserved ( x==y then y===x) + assertEquals(test2, test1); + assertEquals(test1, test2); + // Transitive (if x==y && y==z then x===z) + assertEquals(test1, test2); + assertEquals(test2, test3); + assertEquals(test1, test3); + + COSInteger test4 = COSInteger.get(i + 1); + assertNotEquals(test4, test1); + } + } + + /** + * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed over a range of + * arbitrary values. + */ + @Test + void testHashCode() + { + for (int i = -1000; i < 3000; i += 200) + { + COSInteger test1 = COSInteger.get(i); + COSInteger test2 = COSInteger.get(i); + assertEquals(test1.hashCode(), test2.hashCode()); + + COSInteger test3 = COSInteger.get(i + 1); + assertNotSame(test3.hashCode(), test1.hashCode()); + } + } + + @Override + @Test + void testFloatValue() + { + for (int i = -1000; i < 3000; i += 200) + { + assertEquals((float) i, COSInteger.get(i).floatValue()); + } + } + + @Override + @Test + void testIntValue() + { + for (int i = -1000; i < 3000; i += 200) + { + assertEquals(i, COSInteger.get(i).intValue()); + } + } + + @Override + @Test + void testLongValue() + { + for (int i = -1000; i < 3000; i += 200) + { + assertEquals((long) i, COSInteger.get(i).longValue()); + } + } + + @Override + @Test + void testAccept() + { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + TestVisitor visitor = new TestVisitor(outStream); + int index = 0; + try + { + // 197 is a prime number, used just to get some variation in the digits. + for (int i = -1000; i < 3000; i += 197) + { + index = i; + COSInteger cosInt = COSInteger.get(i); + String expected = Integer.toString( i ); + cosInt.accept(visitor); + testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); + outStream.reset(); + } + } + catch (Exception e) + { + fail("Failed to write " + index + " exception: " + e.getMessage()); + } + } + + /** + * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes this object to it. + */ +// @Test +// void testWritePDF() +// { +// ByteArrayOutputStream outStream = new ByteArrayOutputStream(); +// int index = 0; +// try +// { +// for (int i = -1000; i < 3000; i += 200) +// { +// index = i; +// COSInteger cosInt = COSInteger.get(i); +// testByteArrays(String.valueOf(i).getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); +// outStream.reset(); +// } +// } +// catch (Exception e) +// { +// fail("Failed to write " + index + " exception: " + e.getMessage()); +// } +// } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java new file mode 100644 index 00000000000..69b1d5814e3 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java @@ -0,0 +1,60 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +//import java.io.ByteArrayOutputStream; +import java.io.IOException; +//import org.apache.pdfbox.Loader; +//import org.apache.pdfbox.pdmodel.PDDocument; +//import org.apache.pdfbox.pdmodel.PDPage; +import org.junit.jupiter.api.Test; + +//import static org.junit.jupiter.api.Assertions.assertEquals; +//import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestCOSName +{ + /** + * PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?". + * + * @throws IOException + */ + @Test + void PDFBox4076() throws IOException + { + /* + String special = "中国你好!"; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + try (PDDocument document = new PDDocument()) + { + PDPage page = new PDPage(); + document.addPage(page); + document.getDocumentCatalog().getCOSObject().setString( COSName.getPDFName( special), special); + + document.save(baos); + } + try (PDDocument document = Loader.loadPDF(baos.toByteArray())) + { + COSDictionary catalogDict = document.getDocumentCatalog().getCOSObject(); + assertTrue(catalogDict.containsKey(special)); + assertEquals(special, catalogDict.getString(special)); + } + */ + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java new file mode 100644 index 00000000000..58015601cba --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.IOException; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Test class for {@link COSNumber} + */ +abstract class TestCOSNumber extends TestCOSBase +{ + /** + * Test floatValue() - test that the correct float value is returned. + */ + abstract void testFloatValue(); + + /** + * Test intValue() - test that the correct int value is returned. + */ + abstract void testIntValue(); + + /** + * Test longValue() - test that the correct long value is returned. + */ + abstract void testLongValue(); + + /** + * Tests get() - tests a static constructor for COSNumber classes. + */ + @Test + void testGet() + { + try + { + + // Ensure the basic static numbers are recognized + assertEquals( COSInteger.ZERO, COSNumber.get( "0")); + assertEquals(COSInteger.ZERO, COSNumber.get("-")); + assertEquals(COSInteger.ZERO, COSNumber.get(".")); + assertEquals(COSInteger.ONE, COSNumber.get("1")); + assertEquals(COSInteger.TWO, COSNumber.get("2")); + assertEquals(COSInteger.THREE, COSNumber.get("3")); + // Test some arbitrary ints + assertEquals(COSInteger.get(100), COSNumber.get("100")); + assertEquals(COSInteger.get(256), COSNumber.get("256")); + assertEquals(COSInteger.get(-1000), COSNumber.get("-1000")); + assertEquals(COSInteger.get(2000), COSNumber.get("+2000")); + // Some arbitrary floats + assertEquals( new COSFloat( 1.1f), COSNumber.get( "1.1")); + assertEquals(new COSFloat(100f), COSNumber.get("100.0")); + assertEquals(new COSFloat(-100.001f), COSNumber.get("-100.001")); + // according to the specs the exponential shall not be used + // but obviously there some + assertNotNull(COSNumber.get("-2e-006")); + assertNotNull(COSNumber.get("-8e+05")); + + assertThrows(NullPointerException.class, () -> COSNumber.get(null)); + assertThrows(IOException.class, () -> COSNumber.get("a")); + } + catch (IOException e) + { + fail("Failed to convert a number " + e.getMessage()); + } + } + + /** + * PDFBOX-5176: large number, too big for a long leads to an COSInteger value which is marked as invalid. + * + * @throws IOException + */ + public void testLargeNumber() throws IOException + { + // max value + COSNumber cosNumber = COSNumber.get(Long.toString(Long.MAX_VALUE)); + assertTrue(cosNumber instanceof COSInteger); + COSInteger cosInteger = (COSInteger) cosNumber; + assertTrue(cosInteger.isValid()); + // min value + cosNumber = COSNumber.get(Long.toString(Long.MIN_VALUE)); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertTrue(cosInteger.isValid()); + + // out of range, max value + cosNumber = COSNumber.get("18446744073307448448"); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertFalse(cosInteger.isValid()); + // out of range, min value + cosNumber = COSNumber.get("-18446744073307448448"); + assertTrue(cosNumber instanceof COSInteger); + cosInteger = (COSInteger) cosNumber; + assertFalse(cosInteger.isValid()); + } + + @Test + void testInvalidNumber() + { + try + { + COSNumber.get("18446744073307F448448"); + fail("Was expecting an IOException"); + } + catch (IOException e) + { + } + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java new file mode 100644 index 00000000000..d0678bc6a3a --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.apache.pdfbox.io.RandomAccessReadView; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.apache.pdfbox.cos.TestCOSString.ESC_CHAR_STRING_PDF_FORMAT; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class for {@link COSObject}. + */ +class TestCOSObject extends TestCOSBase implements ICOSParser +{ + static COSObjectKey key = new COSObjectKey( 121L, 0 ); + static COSString cosString; + + @BeforeAll + static void setUp() + { + cosString = new COSString( "test string" ); + cosString.setKey( key ); // same key as the proxy object, because + // this will be the dereferenced object. + cosString.setDirect( true ); // If we were writing this as the value + // in a COSDictionary (which we are not) we would use this object + // directly and not create a reference for it. + testCOSBase = new COSObject( cosString ); + } + + @Test + void testGetCOSObject() + { + assert( testCOSBase.getCOSObject() instanceof COSObject ); + } + + @Test + @Override + void testIsSetDirect() + { + testCOSBase.setDirect(true); + assertFalse(testCOSBase.isDirect()); + testCOSBase.setDirect(false); + assertFalse(testCOSBase.isDirect()); + } + + @Test + void testGetObject() + { + // if I'm not mistaken, a proxy object will /never/ be direct + assertFalse( testCOSBase.isDirect() ); + COSBase base = ((COSObject) testCOSBase).getObject(); + // testCOSBase has no parser, so the object returned should be the + // string object we initialized it with. + assertEquals( cosString, base ); + assertTrue( ((COSObject) testCOSBase).isDereferenced() ); + + + final COSObject testCOSObject = new COSObject( key, this ); + + // start by making sure that the test object is indirect and is not dereferenced. + assertFalse( testCOSObject.isDereferenced() ); + + // getObject should cause the referenced object to be dereferenced + base = testCOSObject.getObject(); + assertTrue( testCOSObject.isDereferenced() ); + assertEquals( cosString, base ); + } + + /** + * Test accept() - tests the interface for visiting a document at the COS level. + * In the case of proxy {@link COSObject} the visitor is passed either to the + * encapsulated object, if it is present or can be dereferenced, or to the + * {@link COSNull#NULL} global object. + */ + @Test + void testAccept() throws IOException + { + String expected = "(" + ESC_CHAR_STRING_PDF_FORMAT + ")"; + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + TestVisitor visitor = new TestVisitor(outStream); + testCOSBase.accept( visitor ); + // the base test object encapsulates a string. + assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); + outStream.reset(); + // this new COSObject will dereference to the same string object as above. + COSObject testCOSObject = new COSObject( key, this ); + testCOSObject.accept( visitor ); + assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); + outStream.reset(); + testCOSObject.setToNull(); + testCOSObject.accept( visitor ); + assertEquals( "COSNull.NULL", outStream.toString( StandardCharsets.ISO_8859_1 )); + } + + @Test + void isCOSObjectNull() + { + COSObject testCOSObject = new COSObject( key, this ); + // The object has not been dereferenced, so it should still be null + assertTrue( testCOSObject.isObjectNull()); + testCOSObject.getObject(); // This should dereference the object + assertFalse( testCOSObject.isObjectNull()); + // this should set the encapsulated object to COSNull.NULL + testCOSObject.setToNull(); + assertTrue( testCOSObject.isObjectNull()); + // set to null should have zeroed out the parser, so no further + // dereferencing should be possible. + COSBase base = testCOSObject.getObject(); + assertEquals( COSNull.NULL, base ); + } + + /** + * A simple utility function to compare two byte arrays. + * @param byteArr1 the expected byte array + * @param byteArr2 the byte array being compared + */ + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + protected void testByteArrays(byte[] byteArr1, byte[] byteArr2) + { + assertEquals(byteArr1.length, byteArr1.length); + for (int i = 0; i < byteArr1.length; i++) + { + assertEquals(byteArr1[i], byteArr2[i]); + } + } + + @Override + public COSBase dereferenceCOSObject( COSObject obj ) throws IOException + { + return cosString; + } + + @Override + public RandomAccessReadView createRandomAccessReadView( long startPosition, long streamLength ) throws IOException + { + return null; + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java new file mode 100644 index 00000000000..2adb0aa3bd5 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import org.apache.pdfbox.cos.filter.Filter; +import org.apache.pdfbox.cos.filter.FilterFactory; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestCOSStream +{ + /** + * Tests encoding of a stream without any filter applied. + * + * @throws IOException + */ + @Test + void testUncompressedStreamEncode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + COSStream stream = createStream( testString, null); + validateEncoded(stream, testString); + } + + /** + * Tests decoding of a stream without any filter applied. + * + * @throws IOException + */ + @Test + void testUncompressedStreamDecode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + COSStream stream = createStream(testString, null); + validateDecoded(stream, testString); + } + + /** + * Tests encoding of a stream with one filter applied. + * + * @throws IOException + */ + @Test + void testCompressedStream1Encode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + byte[] testStringEncoded = encodeData( testString, COSName.FLATE_DECODE); + COSStream stream = createStream(testString, COSName.FLATE_DECODE); + validateEncoded(stream, testStringEncoded); + } + + /** + * Tests decoding of a stream with one filter applied. + * + * @throws IOException + */ + @Test + void testCompressedStream1Decode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); + COSStream stream = new COSStream(); + + try (OutputStream output = stream.createRawOutputStream()) + { + output.write(testStringEncoded); + } + + stream.setItem(COSName.FILTER, COSName.FLATE_DECODE); + validateDecoded(stream, testString); + } + + /** + * Tests encoding of a stream with 2 filters applied. + * + * @throws IOException + */ + @Test + void testCompressedStream2Encode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); + testStringEncoded = encodeData(testStringEncoded, COSName.ASCII85_DECODE); + + COSArray filters = new COSArray(); + filters.add(COSName.ASCII85_DECODE); + filters.add(COSName.FLATE_DECODE); + + COSStream stream = createStream(testString, filters); + validateEncoded(stream, testStringEncoded); + } + + /** + * Tests decoding of a stream with 2 filters applied. + * + * @throws IOException + */ + @Test + void testCompressedStream2Decode() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); + testStringEncoded = encodeData(testStringEncoded, COSName.ASCII85_DECODE); + COSStream stream = new COSStream(); + + COSArray filters = new COSArray(); + filters.add(COSName.ASCII85_DECODE); + filters.add(COSName.FLATE_DECODE); + stream.setItem(COSName.FILTER, filters); + + try (OutputStream output = stream.createRawOutputStream()) + { + output.write(testStringEncoded); + } + + validateDecoded(stream, testString); + } + + /** + * Tests tests that encoding is done correctly even if the the stream is closed twice. + * Closeable.close() allows streams to be closed multiple times. The second and subsequent + * close() calls should have no effect. + * + * @throws IOException + */ + @Test + void testCompressedStreamDoubleClose() throws IOException + { + byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); + byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); + COSStream stream = new COSStream(); + OutputStream output = stream.createOutputStream(COSName.FLATE_DECODE); + output.write(testString); + output.close(); + output.close(); + validateEncoded(stream, testStringEncoded); + } + + @Test + void testHasStreamData() throws IOException + { + try (COSStream stream = new COSStream()) + { + assertFalse(stream.hasData()); + Assertions.assertThrows(IOException.class, () -> stream.createInputStream(), + "createInputStream should have thrown an IOException"); + + byte[] testString = "This is a test string to be used as input for TestCOSStream" + .getBytes(StandardCharsets.US_ASCII); + try (OutputStream output = stream.createOutputStream()) + { + output.write(testString); + } + assertTrue(stream.hasData()); + } + } + + private byte[] encodeData(byte[] original, COSName filter) throws IOException + { + Filter encodingFilter = FilterFactory.INSTANCE.getFilter(filter); + ByteArrayOutputStream encoded = new ByteArrayOutputStream(); + encodingFilter.encode( new ByteArrayInputStream(original), encoded, new COSDictionary(), 0); + return encoded.toByteArray(); + } + + private COSStream createStream(byte[] testString, COSBase filters) throws IOException + { + COSStream stream = new COSStream(); + try (OutputStream output = stream.createOutputStream(filters)) + { + output.write(testString); + } + return stream; + } + + private void validateEncoded(COSStream stream, byte[] expected) throws IOException + { + InputStream in = stream.createRawInputStream(); + byte[] decoded = in.readAllBytes(); + stream.close(); + assertTrue(Arrays.equals(expected, decoded), "Encoded data doesn't match input"); + } + + private void validateDecoded(COSStream stream, byte[] expected) throws IOException + { + InputStream in = stream.createInputStream(); + byte[] encoded = in.readAllBytes(); + stream.close(); + assertTrue(Arrays.equals(expected, encoded), "Decoded data doesn't match input"); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java new file mode 100644 index 00000000000..2f93a0d54db --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +// import org.apache.pdfbox.pdfwriter.COSWriter; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * This will test all of the filters in the PDFBox system. + * + * Ben Litchfield + */ +class TestCOSString extends TestCOSBase +{ + static final String ESC_CHAR_STRING = + "( test#some) escaped< \\chars>!~1239857 "; + static final String ESC_CHAR_STRING_PDF_FORMAT = + "\\( test#some\\) escaped< \\\\chars>!~1239857 "; + + @BeforeAll + static void setUp() + { + testCOSBase = new COSString( "test cos string"); + } + + /** + * TODO: writing gets tested elsewhere + * + * Test setForceHexForm() and setForceLiteralForm() - tests these two + * constructors do enforce the different String output forms within PDF. + */ +// @Test +// void testSetForceHexLiteralForm() +// { +// String inputString = "Test with a text and a few numbers 1, 2 and 3"; +// String pdfHex = "<" + createHex(inputString) + ">"; +// COSString cosStr = new COSString(inputString, true); +// writePDFTests(pdfHex, cosStr); +// +// COSString escStr = new COSString(ESC_CHAR_STRING); +// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", escStr); +// COSString escStrHex = new COSString(ESC_CHAR_STRING, true); +// // Escape characters not escaped in hex version +// writePDFTests("<" + createHex(ESC_CHAR_STRING) + ">", escStrHex); +// } +// +// /** +// * TODO: writing gets tested elsewhere +// * +// * Helper method for testing writePDF(). +// * +// * @param expected the String expected when writePDF() is invoked +// * @param testSubj the test subject +// */ +// private void writePDFTests(String expected, COSString testSubj) +// { +// ByteArrayOutputStream outStream = new ByteArrayOutputStream(); +// try +// { +// COSWriter visitor = new COSWriter( outStream ); +// visitor.writeString(testSubj.getBytes(), testSubj.getForceHexForm(), outStream); +// } +// catch (IOException e) +// { +// fail("IOException: " + e.getMessage()); +// } +// assertEquals(expected, outStream.toString()); +// } + + /** + * Test parseHex() - tests that the proper String is created from a hex string input. + */ + @Test + void testFromHex() + { + String expected = "Quick and simple test"; + String hexForm = createHex(expected); + try + { + COSString test1 = COSString.parseHex(hexForm); +// writePDFTests("(" + expected + ")", test1); + COSString test2 = COSString.parseHex(createHex(ESC_CHAR_STRING)); +// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", test2); + } + catch (IOException e) + { + fail("IOException thrown: " + e.getMessage()); + } + assertThrows(IOException.class, () -> COSString.parseHex(hexForm + "xx"), + "Should have thrown an IOException here"); + } + + private String createHex(String str) + { + StringBuilder sb = new StringBuilder(); + for (char c : str.toCharArray()) + { + sb.append(Integer.toString(c, 16)); + } + return sb.toString().toUpperCase(); + } + + /** + * Tests getHex() - ensure the hex String returned is properly formatted. + */ + @Test + void testGetHex() + { + String expected = "Test subject for testing getHex"; + COSString test1 = new COSString(expected); + String hexForm = createHex(expected); + assertEquals(hexForm, test1.toHexString()); + COSString escCS = new COSString(ESC_CHAR_STRING); + // Not sure whether the escaped characters should be escaped or not, presumably since + // writePDF() gives you the proper formatted text, getHex() should ONLY convert to hex. + assertEquals(createHex(ESC_CHAR_STRING), escCS.toHexString()); + } + + /** + * Test testGetString() - ensure getString() are returned in the correct format. + */ + @Test + void testGetString() + { + try + { + String testStr = "Test subject for getString()"; + COSString test1 = new COSString(testStr); + assertEquals(testStr, test1.getString()); + + COSString hexStr = COSString.parseHex(createHex(testStr)); + assertEquals(testStr, hexStr.getString()); + + COSString escapedString = new COSString(ESC_CHAR_STRING); + assertEquals(ESC_CHAR_STRING, escapedString.getString()); + + testStr = "Line1\nLine2\nLine3\n"; + COSString lineFeedString = new COSString(testStr); + assertEquals(testStr, lineFeedString.getString()); + } + catch (IOException e) + { + fail("IOException thrown: " + e.getMessage()); + } + } + + /** + * Test getBytes() - again not much to test, just ensure the proper byte array is returned. + */ + @Test + void testGetBytes() + { + COSString str = new COSString(ESC_CHAR_STRING); + testByteArrays(ESC_CHAR_STRING.getBytes(), str.getBytes()); + } + + /** + * TODO: writing output is tested elsewhere. + * Tests writePDF() - tests that the string is in PDF format. + */ +// @Test +// void testWritePDF() +// { +// // This has been tested quite thorougly above but do a couple tests anyway +// COSString testSubj = new COSString(ESC_CHAR_STRING); +// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", testSubj); +// String textString = "This is just an arbitrary piece of text for testing"; +// COSString testSubj2 = new COSString(textString); +// writePDFTests("(" + textString + ")", testSubj2); +// } + + /** + * This will test all of the filters in the system. + * + * @throws IOException If there is an exception while encoding. + */ + @Test + void testUnicode() throws IOException + { + String theString = "\u4e16"; + COSString string = new COSString(theString); + assertEquals( theString, string.getString() ); + + String textAscii = "This is some regular text. It should all be expressible in ASCII"; + /** En français où les choses sont accentués. En español, así */ + String text8Bit = "En fran\u00e7ais o\u00f9 les choses sont accentu\u00e9s. En espa\u00f1ol, as\u00ed"; + /** をクリックしてく */ + String textHighBits = "\u3092\u30af\u30ea\u30c3\u30af\u3057\u3066\u304f"; + + // Testing the getString method + COSString stringAscii = new COSString( textAscii ); + assertEquals( stringAscii.getString(), textAscii ); + + COSString string8Bit = new COSString( text8Bit ); + assertEquals( string8Bit.getString(), text8Bit ); + + COSString stringHighBits = new COSString( textHighBits ); + assertEquals( stringHighBits.getString(), textHighBits ); + + + // Testing the getBytes method + // The first two strings should be stored as ISO-8859-1 because they only contain chars in the range 0..255 + assertEquals(textAscii, new String(stringAscii.getBytes(), StandardCharsets.ISO_8859_1)); + // likewise for the 8bit characters. + assertEquals(text8Bit, new String(string8Bit.getBytes(), StandardCharsets.ISO_8859_1)); + + // The japanese text contains high bits so must be stored as big endian UTF-16 + assertEquals(textHighBits, new String(stringHighBits.getBytes(), "UnicodeBig")); + + + // Test the writePDF method to ensure that the Strings are correct when written into PDF. +// ByteArrayOutputStream out = new ByteArrayOutputStream(); +// COSWriter.writeString(stringAscii, out); +// assertEquals("(" + textAscii + ")", out.toString("ASCII")); + +// out.reset(); +// COSWriter.writeString(string8Bit, out); +// StringBuffer hex = new StringBuffer(); +// for(char c : text8Bit.toCharArray()) +// { +// hex.append( Integer.toHexString(c).toUpperCase() ); +// } +// assertEquals("<"+ hex +">", out.toString("ASCII")); + +// out.reset(); +// COSWriter.writeString(stringHighBits, out); +// hex = new StringBuffer(); +// hex.append("FEFF"); // Byte Order Mark +// for(char c : textHighBits.toCharArray()) +// { +// hex.append( Integer.toHexString(c).toUpperCase() ); +// } +// assertEquals("<"+ hex +">", out.toString("ASCII")); + } + + @Override + @Test + void testAccept() throws IOException + { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + TestVisitor visitor = new TestVisitor( outStream ); + COSString testSubj = new COSString(ESC_CHAR_STRING); + testSubj.accept(visitor); + String expected = "(" + ESC_CHAR_STRING_PDF_FORMAT + ")"; + assertEquals(expected, outStream.toString()); + outStream.reset(); + COSString testSubjHex = new COSString(ESC_CHAR_STRING, true); + testSubjHex.accept(visitor); + expected = "<" + createHex(ESC_CHAR_STRING) + ">"; + assertEquals(expected, outStream.toString()); + } + + /** + * Tests equals(Object) - ensure that the Object.equals() contract is obeyed. + */ + @Test + void testEquals() + { + // Check all these several times for consistency + for (int i = 0; i < 10; i++) + { + // Reflexive + COSString x1 = new COSString("Test"); + assertEquals(x1, x1); + + // Symmetry i.e. if x == y then y == x + COSString y1 = new COSString("Test"); + assertEquals(x1, y1); + assertEquals(y1, x1); + COSString x2 = new COSString("Test", true); + // also if x != y then y != x + assertNotEquals(x1, x2); + assertNotEquals(x2, x1); + + // Transitive if x == y && y == z then x == z + COSString z1 = new COSString("Test"); + assertEquals(x1, y1); + assertEquals(y1, z1); + assertEquals(x1, z1); + // Test the negative as well if x1 == y1 && y1 != x2 then x1 != x2 + assertEquals(x1, y1); + assertNotEquals(y1, x2); + assertNotEquals(x1, x2); + } + } + + /** + * Test hashCode() - tests that the Object.hashCode() contract is obeyed. + */ + @Test + void testHashCode() + { + COSString str1 = new COSString("Test1"); + COSString str2 = new COSString("Test2"); + assertNotEquals(str1.hashCode(), str2.hashCode()); + COSString str3 = new COSString("Test1"); + assertEquals(str1.hashCode(), str3.hashCode()); + COSString str3Hex = new COSString("Test1", true); + assertNotEquals(str1.hashCode(), str3Hex.hashCode()); + } + + /** + * Test testCompareFromHexString() - tests that Strings created from hex + * compare correctly (PDFBOX-2401) + * + * @throws IOException + */ + @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity + @Test + void testCompareFromHexString() throws IOException + { + COSString test1 = COSString.parseHex("000000FF000000"); + COSString test2 = COSString.parseHex("000000FF00FFFF"); + assertEquals(test1, test1); + assertEquals(test2, test2); + assertNotEquals(test1.toHexString(), test2.toHexString()); + assertFalse(Arrays.equals(test1.getBytes(), test2.getBytes())); + assertNotEquals(test1, test2); + assertNotEquals(test2, test1); + assertNotEquals(test1.getString(), test2.getString()); + } + + /** + * PDFBOX-3881: Test that if String has only the BOM, that it be an empty string. + * + * @throws IOException + */ + @Test + void testEmptyStringWithBOM() throws IOException + { + assertTrue(COSString.parseHex("FEFF").getString().isEmpty()); + assertTrue(COSString.parseHex("FFFE").getString().isEmpty()); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java new file mode 100644 index 00000000000..eb02974e244 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class for {@link COSUpdateInfo}. + */ +class TestCOSUpdateInfo +{ + + /** + * Tests isNeedToBeUpdate() and setNeedToBeUpdate() - tests the getter/setter methods. + */ + @Test + void testIsSetNeedToBeUpdate() + { + COSDocumentState origin = new COSDocumentState(); + origin.setParsing(false); + // COSDictionary + COSUpdateInfo testCOSDictionary = new COSDictionary(); + testCOSDictionary.setNeedToBeUpdated(true); + assertFalse(testCOSDictionary.isNeedToBeUpdated()); + testCOSDictionary.getUpdateState().setOriginDocumentState(origin); + testCOSDictionary.setNeedToBeUpdated(true); + assertTrue(testCOSDictionary.isNeedToBeUpdated()); + testCOSDictionary.setNeedToBeUpdated(false); + assertFalse(testCOSDictionary.isNeedToBeUpdated()); + + // COSObject + COSUpdateInfo testCOSObject; + testCOSObject = new COSObject( null); + testCOSObject.setNeedToBeUpdated(true); + assertFalse(testCOSObject.isNeedToBeUpdated()); + testCOSObject.getUpdateState().setOriginDocumentState(origin); + testCOSObject.setNeedToBeUpdated(true); + assertTrue(testCOSObject.isNeedToBeUpdated()); + testCOSObject.setNeedToBeUpdated(false); + assertFalse(testCOSObject.isNeedToBeUpdated()); + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java new file mode 100644 index 00000000000..2821207c6f3 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java @@ -0,0 +1,96 @@ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.apache.pdfbox.cos.TestCOSString.ESC_CHAR_STRING_PDF_FORMAT; + +public class TestVisitor implements ICOSVisitor +{ + private final ByteArrayOutputStream output; + public TestVisitor( ByteArrayOutputStream outStream ) + { + output = outStream; + } + + @Override + public void visitFromArray( COSArray obj ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + + } + + @Override + public void visitFromBoolean( COSBoolean cosBoolean ) throws IOException + { + if( cosBoolean.getValue() ) + { + output.write( COSBoolean.TRUE_BYTES ); + } + else + { + output.write( COSBoolean.FALSE_BYTES ); + } + + } + + @Override + public void visitFromDictionary( COSDictionary obj ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + + } + + @Override + public void visitFromDocument( COSDocument obj ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + + } + + @Override + public void visitFromFloat( COSFloat cosFloat ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + output.write( cosFloat.toString().getBytes( StandardCharsets.ISO_8859_1 ) ); + } + + @Override + public void visitFromInt( COSInteger cosInteger ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + output.write( Integer.toString( cosInteger.intValue() ).getBytes( StandardCharsets.ISO_8859_1 ) ); + } + + @Override + public void visitFromName( COSName obj ) throws IOException + { + throw new IOException(); + } + + @Override + public void visitFromNull( COSNull obj ) throws IOException + { + output.write( "COSNull.NULL".getBytes( StandardCharsets.ISO_8859_1 )); + } + + @Override + public void visitFromStream( COSStream obj ) throws IOException + { + + } + + @Override + public void visitFromString( COSString cosString ) throws IOException + { + if (cosString.getForceHexForm()) + { + output.write( ("<" + cosString.toHexString() + ">").getBytes( StandardCharsets.ISO_8859_1 )); + } + else + { + output.write( ("(" + ESC_CHAR_STRING_PDF_FORMAT + ")").getBytes( StandardCharsets.ISO_8859_1 )); + } + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java new file mode 100644 index 00000000000..0045d7ec796 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos; + +import java.util.Calendar; +//import org.apache.pdfbox.pdmodel.font.encoding.Encoding; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.fail; + +class UnmodifiableCOSDictionaryTest +{ + @Test + void testUnmodifiableCOSDictionary() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.clear(); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.removeItem( COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + + COSDictionary cosDictionary = new COSDictionary(); + try + { + unmodifiableCOSDictionary.addAll(cosDictionary); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setFlag(COSName.A, 0, true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setNeedToBeUpdated(true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetItem() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setItem(COSName.A, COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch(UnsupportedOperationException exception) + { + // nothing to do + } + +// Encoding standardEncoding = Encoding.getInstance(COSName.STANDARD_ENCODING); +// try +// { +// unmodifiableCOSDictionary.setItem(COSName.A, standardEncoding); +// fail("An UnsupportedOperationException should have been thrown"); +// } +// catch(UnsupportedOperationException exception) +// { +// // nothing to do +// } + + try + { + unmodifiableCOSDictionary.setItem("A", COSName.A); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + +// try +// { +// unmodifiableCOSDictionary.setItem("A", standardEncoding); +// fail("An UnsupportedOperationException should have been thrown"); +// } +// catch(UnsupportedOperationException exception) +// { +// // nothing to do +// } + } + + @Test + void testSetBoolean() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setBoolean(COSName.A, true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setBoolean("A", true); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetName() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setName(COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setName("A", "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetDate() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + Calendar calendar = Calendar.getInstance(); + try + { + unmodifiableCOSDictionary.setDate(COSName.A, calendar); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setDate("A", calendar); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetEmbeddedDate() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + Calendar calendar = Calendar.getInstance(); + try + { + unmodifiableCOSDictionary.setEmbeddedDate(COSName.PARAMS, COSName.A, calendar); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + } + + @Test + void testSetString() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setString(COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setString("A", "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetEmbeddedString() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setEmbeddedString(COSName.PARAMS, COSName.A, "A"); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetInt() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setInt(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setInt("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetEmbeddedInt() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setEmbeddedInt(COSName.PARAMS, COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetLong() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setLong(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setLong("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + + @Test + void testSetFloat() + { + COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); + try + { + unmodifiableCOSDictionary.setFloat(COSName.A, 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + + try + { + unmodifiableCOSDictionary.setFloat("A", 0); + fail("An UnsupportedOperationException should have been thrown"); + } + catch (UnsupportedOperationException exception) + { + // nothing to do + } + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java new file mode 100644 index 00000000000..a9f5c994a43 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java @@ -0,0 +1,89 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import org.junit.jupiter.api.Test; + +import static org.apache.pdfbox.cos.filter.Predictor.calcSetBitSeq; +import static org.apache.pdfbox.cos.filter.Predictor.getBitSeq; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * + * @author Tilman Hausherr + */ +class PredictorTest +{ + /** + * Test of getBitSeq method, of class Predictor. + */ + @Test + void testGetBitSeq() + { + assertEquals(Integer.parseInt("11111111", 2), getBitSeq(Integer.parseInt("11111111", 2), 0, 8)); + assertEquals(Integer.parseInt("00000000", 2), getBitSeq(Integer.parseInt("00000000", 2), 0, 8)); + assertEquals(Integer.parseInt("1", 2), getBitSeq(Integer.parseInt("11111111", 2), 0, 1)); + assertEquals(Integer.parseInt("0", 2), getBitSeq(Integer.parseInt("00000000", 2), 0, 1)); + assertEquals(Integer.parseInt("001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 3)); + assertEquals(Integer.parseInt("10101010", 2), getBitSeq(Integer.parseInt("10101010", 2), 0, 8)); + assertEquals(Integer.parseInt("10", 2), getBitSeq(Integer.parseInt("10101010", 2), 0, 2)); + assertEquals(Integer.parseInt("01", 2), getBitSeq(Integer.parseInt("10101010", 2), 1, 2)); + assertEquals(Integer.parseInt("10", 2), getBitSeq(Integer.parseInt("10101010", 2), 2, 2)); + assertEquals(Integer.parseInt("101", 2), getBitSeq(Integer.parseInt("10101010", 2), 3, 3)); + assertEquals(Integer.parseInt("1010101", 2), getBitSeq(Integer.parseInt("10101010", 2), 1, 7)); + assertEquals(Integer.parseInt("01", 2), getBitSeq(Integer.parseInt("10101010", 2), 3, 2)); + assertEquals(Integer.parseInt("00110001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 8)); + assertEquals(Integer.parseInt("10001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 5)); + assertEquals(Integer.parseInt("0011", 2), getBitSeq(Integer.parseInt("00110001", 2), 4, 4)); + assertEquals(Integer.parseInt("110", 2), getBitSeq(Integer.parseInt("00110001", 2), 3, 3)); + assertEquals(Integer.parseInt("00", 2), getBitSeq(Integer.parseInt("00110001", 2), 6, 2)); + assertEquals(Integer.parseInt("1111", 2), getBitSeq(Integer.parseInt("11110000", 2), 4, 4)); + assertEquals(Integer.parseInt("11", 2), getBitSeq(Integer.parseInt("11110000", 2), 6, 2)); + assertEquals(Integer.parseInt("0000", 2), getBitSeq(Integer.parseInt("11110000", 2), 0, 4)); + } + + /** + * Test of calcSetBitSeq method, of class Predictor. + */ + @Test + void testCalcSetBitSeq() + { + assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 0)); + assertEquals(Integer.parseInt("00000001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 1)); + assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 1, 1)); + assertEquals(Integer.parseInt("11111101", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 2, 1)); + assertEquals(Integer.parseInt("11111001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 3, 1)); + assertEquals(Integer.parseInt("00000001", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 0, 2, 1)); + assertEquals(Integer.parseInt("11110001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 4, 1)); + assertEquals(Integer.parseInt("11100011", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 1, 4, 1)); + assertEquals(Integer.parseInt("00000010", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 1, 1, 1)); + assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 7, 1, 1)); + assertEquals(Integer.parseInt("01111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 7, 1, 0)); + assertEquals(Integer.parseInt("10000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 7, 1, 1)); + assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 7, 1, 0)); + assertEquals(Integer.parseInt("01000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 6, 1, 1)); + assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 6, 1, 0)); + assertEquals(Integer.parseInt("00110000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 3, 3, 6)); + assertEquals(Integer.parseInt("01100000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 4, 3, 6)); + assertEquals(Integer.parseInt("11000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 5, 3, 6)); + assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 0, 8, 0xFF)); + assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 0xFF)); + assertEquals(0x7E, calcSetBitSeq(0xA5, 0, 8, 0xD9 + 0xA5)); + + // check truncation + assertEquals(Integer.parseInt("00000010", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 1, 1, 3)); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java new file mode 100644 index 00000000000..36b6c757be0 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.filter; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Date; +import java.util.Random; +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * This will test all of the filters in the PDFBox system. + */ +class TestFilters +{ + /** + * This will test all of the filters in the system. There will be COUNT + * of deterministic tests and COUNT of non-deterministic tests, see also + * the discussion in PDFBOX-1977. + * + * @throws IOException If there is an exception while encoding. + */ + @Test + void testFilters() // throws IOException + { + final int COUNT = 10; + Random rd = new Random(123456); + // 20 tests; 10 with deterministic seeding, 10 with relatively random. + for (int iter = 0; iter < COUNT * 2; iter++) + { + long seed; + if (iter < COUNT) + { + // deterministic seed + seed = rd.nextLong(); + } + else + { + // non-deterministic seed + Date currentDate = new Date(); + seed = new Random(currentDate.getTime() + iter ).nextLong(); + } + boolean success = true; + try + { + final Random random = new Random(seed); + final int numBytes = 10000 + random.nextInt(20000); + byte[] original = new byte[numBytes]; + + int upto = 0; + while(upto < numBytes) + { + final int left = numBytes - upto; + if (random.nextBoolean() || left < 2) + { + // Fill w/ pseudo-random bytes: + final int end = upto + Math.min(left, 10+random.nextInt(100)); + while(upto < end) + { + original[upto++] = (byte) random.nextInt(); + } + } + else + { + // Fill w/ very predictable bytes: + final int end = upto + Math.min(left, 2+random.nextInt(10)); + final byte value = (byte) random.nextInt(4); + while(upto < end) + { + original[upto++] = value; + } + } + } + + for( Filter filter : FilterFactory.INSTANCE.getAllFilters() ) + { + // Skip filters that don't currently support roundtripping + if (filter instanceof DCTFilter || + filter instanceof CCITTFaxFilter || + filter instanceof JPXFilter || + filter instanceof JBIG2Filter) + { + continue; + } + + try + { + System.out.println( "Testing " + filter.toString() + "; seed: " + seed % 233 ); + checkEncodeDecode( filter, original ); + } + catch (IOException e) + { + System.out.println( "Error checking encoding for filter " + + filter.toString() + "; continuing" ); + success = false; + } + catch (AssertionError e) + { + System.out.println( e + "; continuing" ); + success = false; + } + } + } + finally + { + if (!success) + { + System.err.println("NOTE: test failed with seed=" + seed); + } + } + } + } + + /** + * TODO: move to pdmodel package + * + * This will test the use of identity filter to decode stream and string. + * This test threw an IOException before the correction. + * + * @throws IOException + */ +// @Test +// void testPDFBOX4517() throws IOException +// { +// Loader.loadPDF(new File("target/pdfs/PDFBOX-4517-cryptfilter.pdf"), +// "userpassword1234"); +// } + + /** + * This will test the LZW filter with the sequence that failed in PDFBOX-1977. + * To check that the test itself is legit, revert LZWFilter.java to rev 1571801, + * which should fail this test. + * + * @throws IOException + */ + @Test + void testPDFBOX1977() throws IOException + { + Filter lzwFilter = FilterFactory.INSTANCE.getFilter(COSName.LZW_DECODE); + InputStream in = this.getClass().getResourceAsStream("PDFBOX-1977.bin"); + byte[] byteArray = in.readAllBytes(); + checkEncodeDecode(lzwFilter, byteArray); + } + + /** + * Test simple and corner cases (128 identical, 128 identical at the end) of RLE implementation. + * 128 non identical bytes likely to be caught in random testing. + * + * @throws IOException + */ + @Test + void testRLE() throws IOException + { + Filter rleFilter = FilterFactory.INSTANCE.getFilter(COSName.RUN_LENGTH_DECODE); + byte[] input0 = new byte[0]; + checkEncodeDecode(rleFilter, input0); + byte[] input1 = { 1, 2, 3, 4, 5, (byte) 128, (byte) 140, (byte) 180, (byte) 0xFF}; + checkEncodeDecode(rleFilter, input1); + byte[] input2 = new byte[10]; + checkEncodeDecode(rleFilter, input2); + byte[] input3 = new byte[128]; + checkEncodeDecode(rleFilter, input3); + byte[] input4 = new byte[129]; + checkEncodeDecode(rleFilter, input4); + byte[] input5 = new byte[128 + 128]; + checkEncodeDecode(rleFilter, input5); + byte[] input6 = new byte[1]; + checkEncodeDecode(rleFilter, input6); + byte[] input7 = {1, 2}; + checkEncodeDecode(rleFilter, input7); + byte[] input8 = new byte[2]; + checkEncodeDecode(rleFilter, input8); + } + + @Test + void testEmptyFilterList() throws Exception + { + assertThrows(IllegalArgumentException.class, () -> + { + Filter.decode(null, new ArrayList<>(), new COSDictionary(), null, null); + }); + } + + private void checkEncodeDecode(Filter filter, byte[] original) throws IOException + { + ByteArrayOutputStream encoded = new ByteArrayOutputStream(); + filter.encode(new ByteArrayInputStream(original), encoded, new COSDictionary()); + ByteArrayOutputStream decoded = new ByteArrayOutputStream(); + filter.decode(new ByteArrayInputStream(encoded.toByteArray()), + decoded, new COSDictionary(), 0); +// if (filter instanceof LZWFilter) +// original = null; + assertArrayEquals(original, decoded.toByteArray(), + "Data that is encoded and then decoded through " + filter.getClass() + + " does not match the original data"); + } +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html new file mode 100644 index 00000000000..42247d09ace --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html @@ -0,0 +1,25 @@ + + + + + + + +These classes will be used to test the various filters that are available with PDFBox. + + diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html b/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html new file mode 100644 index 00000000000..5e42f5a153b --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html @@ -0,0 +1,25 @@ + + + + + + + +These classes will be used to test the various COS objects that make up the core of PDFBox. + + diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java new file mode 100644 index 00000000000..09279fbe5d7 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.util; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +class StringUtilTest +{ + @Test + void testSplitOnSpace_happyPath() + { + String[] result = StringUtil.splitOnSpace( "a b c"); + assertArrayEquals(new String[] {"a", "b", "c"}, result); + } + + @Test + void testSplitOnSpace_emptyString() + { + String[] result = StringUtil.splitOnSpace(""); + assertArrayEquals(new String[] {""}, result); + } + + @Test + void testSplitOnSpace_onlySpaces() + { + String[] result = StringUtil.splitOnSpace(" "); + assertArrayEquals(new String[] {}, result); + } + + @Test + void testTokenizeOnSpace_happyPath() + { + String[] result = StringUtil.tokenizeOnSpace("a b c"); + assertArrayEquals(new String[] {"a", " ", "b", " ", "c"}, result); + } + + @Test + void testTokenizeOnSpace_emptyString() + { + String[] result = StringUtil.tokenizeOnSpace(""); + assertArrayEquals(new String[] {""}, result); + } + + @Test + void testTokenizeOnSpace_onlySpaces() + { + String[] result = StringUtil.tokenizeOnSpace(" "); + assertArrayEquals(new String[] {" ", " ", " "}, result); + } + + @Test + void testTokenizeOnSpace_onlySpacesWithText() + { + String[] result = StringUtil.tokenizeOnSpace(" a "); + assertArrayEquals(new String[] {" ", " ", "a", " ", " "}, result); + } +} \ No newline at end of file diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java new file mode 100644 index 00000000000..f6bf719741e --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.util; + +import java.io.IOException; +import java.text.ParsePosition; +import java.util.Calendar; +import java.util.GregorianCalendar; +import java.util.Locale; +import java.util.SimpleTimeZone; +import java.util.TimeZone; +import org.apache.pdfbox.cos.COSString; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +/** + * Test the date conversion utility. + * + * @author Ben Litchfield + * @author Fred Hansen + * + */ +class TestDateUtil +{ + private static final int MINS = 60*1000, HRS = 60*MINS; + // expect parse fail + private static final int BAD = -666; + + /** + * Test common date formats. + * + * @throws Exception when there is an exception + */ + @Test + void testExtract() throws Exception + { + TimeZone timezone = TimeZone.getDefault(); + TimeZone.setDefault(TimeZone.getTimeZone("UTC")); + + assertCalendarEquals( new GregorianCalendar( 2005, 4, 12 ), + DateConverter.toCalendar( "D:05/12/2005" ) ); + assertCalendarEquals( new GregorianCalendar( 2005, 4,12,15,57,16 ), + DateConverter.toCalendar( "5/12/2005 15:57:16" ) ); + + TimeZone.setDefault(timezone); + // check that new toCalendarSTATIC gives null for a null arg + assertNull(DateConverter.toCalendar((String)null)); + } + + /** + * Calendar.equals test case. + * + * @param expect the expected calendar value + * @param was the calendar value to be checked + */ + private void assertCalendarEquals(Calendar expect, Calendar was) + { + assertEquals( expect.getTimeInMillis(), was.getTimeInMillis() ); + assertEquals( expect.getTimeZone().getRawOffset(), + was.getTimeZone().getRawOffset() ); + } + + /** + * Test case for + * PDFBOX-598. + * + * @throws IOException if something went wrong. + */ + @Test + void testDateConversion() throws IOException + { + Calendar c = DateConverter.toCalendar("D:20050526205258+01'00'"); + assertEquals(2005, c.get(Calendar.YEAR)); + assertEquals(05-1, c.get(Calendar.MONTH)); + assertEquals(26, c.get(Calendar.DAY_OF_MONTH)); + assertEquals(20, c.get(Calendar.HOUR_OF_DAY)); + assertEquals(52, c.get(Calendar.MINUTE)); + assertEquals(58, c.get(Calendar.SECOND)); + assertEquals(0, c.get(Calendar.MILLISECOND)); + } + + /** + * Check toCalendarSTATIC. + * @param yr expected year value + * If an IOException is the expected result, yr should be null + * @param mon expected month value + * @param day expected dayofmonth value + * @param hr expected hour value + * @param min expected minute value + * @param sec expected second value + * @param offsetHours expected timezone offset in hours (-11..11) + * @param offsetMinutes expected timezone offset in minutes (0..59) + * @param orig A date to be parsed. + * @throws Exception If an unexpected error occurs. + */ + private static void checkParse(int yr, int mon, int day, + int hr, int min, int sec, int offsetHours, int offsetMinutes, + String orig) throws Exception + { + String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", + yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); + String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" + + "T%02d:%02d:%02d%+03d:%02d", + yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); + Calendar cal = DateConverter.toCalendar(orig); + if (cal != null) + { + assertEquals(iso8601Date, DateConverter.toISO8601(cal)); + assertEquals(pdfDate, DateConverter.toString(cal)); + } + // new toCalendarSTATIC() + cal = DateConverter.toCalendar(orig); + if (yr == BAD) + { + assertEquals(null, cal); + } + else + { + assertEquals(pdfDate, DateConverter.toString(cal)); + } + } + + /** + * Test dates in various formats. + * Years differ to make it easier to find failures. + * @throws Exception none expected + */ + @Test + void testDateConverter() throws Exception + { + int year = Calendar.getInstance().get(Calendar.YEAR); + checkParse(2010, 4,23, 0, 0, 0, 0, 0, "D:20100423"); + checkParse(2011, 4,23, 0, 0, 0, 0, 0, "20110423"); + checkParse(2012, 1, 1, 0, 0, 0, 0, 0, "D:2012"); + checkParse(2013, 1, 1, 0, 0, 0, 0, 0, "2013"); + + // PDFBOX-1219 + checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33+01:00 "); + + // Same with milliseconds + checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33.123+01:00"); + + // PDFBOX-465 + checkParse(2002, 5,12, 9,47, 0, 0, 0, "9:47 5/12/2002"); + // PDFBOX-465 + checkParse(2003,12,17, 2, 2, 3, 0, 0, "200312172:2:3"); + // PDFBOX-465 + checkParse(2009, 3,19,20, 1,22, 0, 0, " 20090319 200122"); + + checkParse(2014, 4, 1, 0, 0, 0, +2, 0, "20140401+0200"); + // "EEEE, MMM dd, yy", + checkParse(2115, 1,11, 0, 0, 0, 0, 0, "Friday, January 11, 2115"); + // "EEEE, MMM dd, yy", + checkParse(1915, 1,11, 0, 0, 0, 0, 0, "Monday, Jan 11, 1915"); + // "EEEE, MMM dd, yy", + checkParse(2215, 1,11, 0, 0, 0, 0, 0, "Wed, January 11, 2215"); + // "EEEE, MMM dd, yy", + checkParse(2015, 1,11, 0, 0, 0, 0, 0, " Sun, January 11, 2015 "); + checkParse(2016, 4, 1, 0, 0, 0, +4, 0, "20160401+04'00'"); + checkParse(2017, 4, 1, 0, 0, 0, +9, 0, "20170401+09'00'"); + checkParse(2017, 4, 1, 0, 0, 0, +9, 30, "20170401+09'30'"); + checkParse(2018, 4, 1, 0, 0, 0, -2, 0, "20180401-02'00'"); + checkParse(2019, 4, 1, 6, 1, 1, -11, 0, "20190401 6:1:1 -1100"); + checkParse(2020, 5,26,11,25,10, 0, 0, "26 May 2020 11:25:10"); + checkParse(2021, 5,26,11,23, 0, 0, 0, "26 May 2021 11:23"); + + // half hour timezones + checkParse(2016, 4, 1, 0, 0, 0, +4, 30, "20160401+04'30'"); + checkParse(2017, 4, 1, 0, 0, 0, +9, 30, "20170401+09'30'"); + checkParse(2018, 4, 1, 0, 0, 0, -2, 30, "20180401-02'30'"); + checkParse(2019, 4, 1, 6, 1, 1, -11, 30, "20190401 6:1:1 -1130"); + checkParse(2000, 2,29, 0, 0, 0, +11, 30, " 2000 Feb 29 GMT + 11:30"); + + // try dates invalid due to out of limit values + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Tuesday, May 32 2000 11:27 UCT"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "32 May 2000 11:25"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Tuesday, May 32 2000 11:25"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921301 11:25"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921232 11:25"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921001 11:60"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19920401 24:25"); + + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, + "20070430193647+713'00' illegal tz hr"); // PDFBOX-465 + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "nodigits"); + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Unknown"); // PDFBOX-465 + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "333three digit year"); + + checkParse(2000, 2,29, 0, 0, 0, 0, 0, "2000 Feb 29"); // valid date + checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 GMT + 11:00"); // valid date + checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 UTC + 11:00"); // valid date + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2100 Feb 29 GMT+11"); // invalid date + checkParse(2012, 2,29, 0, 0, 0,+11, 0, "2012 Feb 29 GMT+11"); // valid date + checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2012 Feb 30 GMT+11"); // invalid date + + checkParse(1970,12,23, 0, 8, 0, 0, 0, "1970 12 23:08"); // test ambiguous date + + // cannot have P for PM + // cannot have Sat. instead of Sat + // EST works, but EDT does not; EST is a special kludge in Java + + // test cases for all entries on old formats list + // "E, dd MMM yyyy hh:mm:ss a" + checkParse(1971, 7, 6, 17, 22, 1, 0, 0, "Tuesday, 6 Jul 1971 5:22:1 PM"); + // "EE, MMM dd, yyyy hh:mm:ss a" + checkParse(1972, 7, 6, 17, 22, 1, 0, 0, "Thu, July 6, 1972 5:22:1 pm"); + // "MM/dd/yyyy hh:mm:ss" + checkParse(1973, 7, 6, 17, 22, 1, 0, 0, "7/6/1973 17:22:1"); + // "MM/dd/yyyy" + checkParse(1974, 7, 6, 0, 0, 0, 0, 0, "7/6/1974"); + // "yyyy-MM-dd'T'HH:mm:ss'Z'" + checkParse(1975, 7, 6, 17, 22, 1, -10, 0, "1975-7-6T17:22:1-1000"); + // "yyyy-MM-dd'T'HH:mm:ssz" + checkParse(1976, 7, 6, 17, 22, 1, -4, 0, "1976-7-6T17:22:1GMT-4"); + // "yyyy-MM-dd'T'HH:mm:ssz" + checkParse(BAD, 7, 6, 17, 22, 1, -4, 0, "2076-7-6T17:22:1EDT"); // "EDT" is not a known tz ID + // "yyyy-MM-dd'T'HH:mm:ssz" + checkParse(1960, 7, 6, 17, 22, 1, -5, 0, "1960-7-6T17:22:1EST"); // "EST" does not have a DST rule + // "EEEE, MMM dd, yyyy" + checkParse(1977, 7, 6, 0, 0, 0, 0, 0, "Wednesday, Jul 6, 1977"); + // "EEEE MMM dd, yyyy HH:mm:ss" + checkParse(1978, 7, 6, 17, 22, 1, 0, 0, "Thu Jul 6, 1978 17:22:1"); + // "EEEE MMM dd HH:mm:ss z yyyy" + checkParse(1979, 7, 6, 17, 22, 1, +8, 0, "Friday July 6 17:22:1 GMT+08:00 1979"); + // "EEEE, MMM dd, yyyy 'at' hh:mma" + checkParse(1980, 7, 6, 16, 23, 0, 0, 0, "Sun, Jul 6, 1980 at 4:23pm"); + // "EEEEEEEEEE, MMMMMMMMMMMM dd, yyyy" + checkParse(1981, 7, 6, 0, 0, 0, 0, 0, "Monday, July 6, 1981"); + // "dd MMM yyyy hh:mm:ss" + checkParse(1982, 7, 6, 17, 22, 1, 0, 0, "6 Jul 1982 17:22:1"); + // "M/dd/yyyy hh:mm:ss" + checkParse(1983, 7, 6, 17, 22, 1, 0, 0, "7/6/1983 17:22:1"); + // "MM/d/yyyy hh:mm:ss" + checkParse(1984, 7, 6, 17, 22, 1, 0, 0, "7/6/1984 17:22:01"); + // "M/dd/yyyy" + checkParse(1985, 7, 6, 0, 0, 0, 0, 0, "7/6/1985"); + // "MM/d/yyyy" + checkParse(1986, 7, 6, 0, 0, 0, 0, 0, "07/06/1986"); + // "M/d/yyyy hh:mm:ss" + checkParse(1987, 7, 6, 17, 22, 1, 0, 0, "7/6/1987 17:22:1"); + // "M/d/yyyy" + checkParse(1988, 7, 6, 0, 0, 0, 0, 0, "7/6/1988"); + + // test ends of range of two digit years + checkParse(year-79, 1, 1, 0, 0, 0, 0, 0, "1/1/" + ((year-79)%100) + + " 00:00:00"); // "M/d/yy hh:mm:ss" + // "M/d/yy" + checkParse(year+19, 1, 1, 0, 0, 0, 0, 0, "1/1/" + ((year+19)%100)); + + // "yyyyMMdd hh:mm:ss Z" + checkParse(1991, 7, 6, 17, 7, 1, +6, 0, "19910706 17:7:1 Z+0600"); + // "yyyyMMdd hh:mm:ss" + checkParse(1992, 7, 6, 17, 7, 1, 0, 0, "19920706 17:07:01"); + // "yyyyMMdd'+00''00'''" + checkParse(1993, 7, 6, 0, 0, 0, 0, 0, "19930706+00'00'"); + // "yyyyMMdd'+01''00'''" + checkParse(1994, 7, 6, 0, 0, 0, 1, 0, "19940706+01'00'"); + // "yyyyMMdd'+02''00'''" + checkParse(1995, 7, 6, 0, 0, 0, 2, 0, "19950706+02'00'"); + // "yyyyMMdd'+03''00'''" + checkParse(1996, 7, 6, 0, 0, 0, 3, 0, "19960706+03'00'"); + // . . . + // "yyyyMMdd'-10''00'''" + checkParse(1997, 7, 6, 0, 0, 0, -10, 0, "19970706-10'00'"); + // "yyyyMMdd'-11''00'''" + checkParse(1998, 7, 6, 0, 0, 0, -11, 0, "19980706-11'00'"); + // "yyyyMMdd" + checkParse(1999, 7, 6, 0, 0, 0, 0, 0, "19990706"); + // ambiguous big-endian date + checkParse(2073,12,25, 0, 8, 0, 0, 0, "2073 12 25:08"); + + // PDFBOX-3315 GMT+12 + checkParse(2016, 4,11,16,01,15, 12, 0, "D:20160411160115+12'00'"); + } + + private static void checkToString(int yr, int mon, int day, + int hr, int min, int sec, + TimeZone tz, int offsetHours, int offsetMinutes) throws Exception + { + // construct a GregoreanCalendar from args + GregorianCalendar cal = new GregorianCalendar(tz, Locale.ENGLISH); + cal.set(yr, mon-1, day, hr, min, sec); + // create expected strings + String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", + yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); + String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" + + "T%02d:%02d:%02d%+03d:%02d", + yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); + // compare outputs from toString and toISO8601 with expected values + assertEquals(pdfDate, DateConverter.toString(cal)); + assertEquals(iso8601Date, DateConverter.toISO8601(cal)); + } + + /** + * Test toString() and toISO8601() for various dates. + * + * @throws Exception if something went wrong. + */ + @Test + void testToString() throws Exception + { // std DST + TimeZone tzPgh = TimeZone.getTimeZone("America/New_York"); // -5 -4 + TimeZone tzBerlin = TimeZone.getTimeZone("Europe/Berlin"); // +1 +2 + TimeZone tzMaputo = TimeZone.getTimeZone("Africa/Maputo"); // +2 +2 + TimeZone tzAruba = TimeZone.getTimeZone("America/Aruba"); // -4 -4 + TimeZone tzJamaica = TimeZone.getTimeZone("America/Jamaica");// -5 -5 + TimeZone tzMcMurdo = TimeZone.getTimeZone("Antartica/McMurdo");// +12 +13 + TimeZone tzAdelaide = TimeZone.getTimeZone("Australia/Adelaide");// +9:30 +10:30 + + assertNull(DateConverter.toCalendar((COSString) null)); + assertNull(DateConverter.toCalendar((String) null)); + assertNull(DateConverter.toCalendar("D: ")); + assertNull(DateConverter.toCalendar("D:")); + + checkToString(2013, 8, 28, 3, 14, 15, tzPgh, -4, 0); + checkToString(2014, 2, 28, 3, 14, 15, tzPgh, -5, 0); + checkToString(2015, 8, 28, 3, 14, 15, tzBerlin, +2, 0); + checkToString(2016, 2, 28, 3, 14, 15, tzBerlin, +1, 0); + checkToString(2017, 8, 28, 3, 14, 15, tzAruba, -4, 0); + checkToString(2018, 1, 1, 1, 14, 15, tzJamaica, -5, 0); + checkToString(2019, 12, 31, 12, 59, 59, tzJamaica, -5, 0); + checkToString(2020, 2, 29, 0, 0, 0, tzMaputo, +2, 0); + checkToString(2015, 8, 28, 3, 14, 15, tzAdelaide, +9, 30); + checkToString(2016, 2, 28, 3, 14, 15, tzAdelaide, +10, 30); + // McMurdo has a daylightsavings rule, but it seems never to apply + for (int m = 1; m <= 12; ++m) + { + checkToString(1980 + m, m, 1, 1, 14, 15, tzMcMurdo, +0, 0); + } + } + + private static void checkParseTZ(int expect, String src) + { + GregorianCalendar dest = DateConverter.newGreg(); + DateConverter.parseTZoffset(src, dest, new ParsePosition(0)); + assertEquals(expect, dest.get(Calendar.ZONE_OFFSET)); + } + + /** + * Timezone testcase. + */ + @Test + void testParseTZ() + { + // 1st parameter is what to expect + checkParseTZ(0*HRS+0*MINS, "+00:00"); + checkParseTZ(0*HRS+0*MINS, "-0000"); + checkParseTZ(1*HRS+0*MINS, "+1:00"); + checkParseTZ(-(1*HRS+0*MINS), "-1:00"); + checkParseTZ(-(1*HRS+30*MINS), "-0130"); + checkParseTZ(11*HRS+59*MINS, "1159"); + checkParseTZ(12*HRS+30*MINS, "1230"); + checkParseTZ(-(12*HRS+30*MINS), "-12:30"); + checkParseTZ(0*HRS+0*MINS, "Z"); + checkParseTZ(-(8*HRS+0*MINS), "PST"); + checkParseTZ(0*HRS+0*MINS, "EDT"); // EDT does not parse + checkParseTZ(-(3*HRS+0*MINS), "GMT-0300"); + checkParseTZ(+(11*HRS+0*MINS), "GMT+11:00"); + checkParseTZ(-(6*HRS+0*MINS), "America/Chicago"); + checkParseTZ(+(3*HRS+0*MINS), "Europe/Moscow"); + checkParseTZ(+(9*HRS+30*MINS), "Australia/Adelaide"); + checkParseTZ((5*HRS+0*MINS), "0500"); + checkParseTZ((5*HRS+0*MINS), "+0500"); + checkParseTZ((11*HRS+0*MINS), "+11'00'"); + checkParseTZ(0, "Z"); + // PDFBOX-3315, PDFBOX-2420 + checkParseTZ(12*HRS+0*MINS, "+12:00"); + checkParseTZ(-(12*HRS+0*MINS), "-12:00"); + checkParseTZ(14*HRS+0*MINS, "1400"); + checkParseTZ(-(14*HRS+0*MINS), "-1400"); + } + + private static void checkFormatOffset(double off, String expect) + { + TimeZone tz = new SimpleTimeZone((int)(off*60*60*1000), "junkID"); + String got = DateConverter.formatTZoffset(tz.getRawOffset(), ":"); + assertEquals(expect, got); + } + + /** + * Timezone offset testcase. + */ + @Test + void testFormatTZoffset() + { + // 2nd parameter is what to expect + checkFormatOffset(-12.1, "-12:06"); + checkFormatOffset(12.1, "+12:06"); + checkFormatOffset(0, "+00:00"); + checkFormatOffset(-1, "-01:00"); + checkFormatOffset(.5, "+00:30"); + checkFormatOffset(-0.5, "-00:30"); + checkFormatOffset(.1, "+00:06"); + checkFormatOffset(-0.1, "-00:06"); + checkFormatOffset(-12, "-12:00"); + checkFormatOffset(12, "+12:00"); + checkFormatOffset(-11.5, "-11:30"); + checkFormatOffset(11.5, "+11:30"); + checkFormatOffset(11.9, "+11:54"); + checkFormatOffset(11.1, "+11:06"); + checkFormatOffset(-11.9, "-11:54"); + checkFormatOffset(-11.1, "-11:06"); + // PDFBOX-2420 + checkFormatOffset(14, "+14:00"); + checkFormatOffset(-14, "-14:00"); + } + +} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java new file mode 100644 index 00000000000..000fa7b3107 --- /dev/null +++ b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java @@ -0,0 +1,87 @@ +/* + * Copyright 2016 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.cos.util; + +import java.nio.charset.StandardCharsets; +import java.util.Locale; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * + * @author Michael Doswald + */ +class TestHexUtil +{ + + /** + * Test conversion from short to char[] + */ + @Test + void testGetCharsFromShortWithoutPassingInABuffer() + { + assertArrayEquals( new char[]{'0','0','0','0'}, Hex.getChars( (short)0x0000)); + assertArrayEquals (new char[]{'0','0','0','F'}, Hex.getChars((short)0x000F)); + assertArrayEquals( new char[]{'A','B','C','D'}, Hex.getChars((short)0xABCD)); + assertArrayEquals( new char[]{'B','A','B','E'}, Hex.getChars((short)0xCAFEBABE)); + } + + /** + * Check conversion from String to a char[] which contains the UTF16-BE encoded + * bytes of the string as hex digits + * + */ + @Test + void testGetCharsUTF16BE() + { + assertArrayEquals(new char[]{'0','0','6','1','0','0','6','2'}, Hex.getCharsUTF16BE("ab")); + assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助")); + } + + /** + * Test getBytes() and getString() and decodeHex() + */ + @Test + void testMisc() + { + byte[] byteSrcArray = new byte[256]; + for (int i = 0; i < 256; ++i) + { + byteSrcArray[i] = (byte) i; + + byte[] bytes = Hex.getBytes((byte) i); + assertEquals(2, bytes.length); + String s2 = String.format(Locale.US, "%02X", i); + assertArrayEquals(s2.getBytes(StandardCharsets.US_ASCII), bytes); + s2 = Hex.getString((byte) i); + assertArrayEquals(s2.getBytes(StandardCharsets.US_ASCII), bytes); + + assertArrayEquals(new byte[]{(byte) i}, Hex.decodeHex(s2)); + } + byte[] byteDstArray = Hex.getBytes(byteSrcArray); + assertEquals(byteDstArray.length, byteSrcArray.length * 2); + + String dstString = Hex.getString(byteSrcArray); + assertEquals(dstString.length(), byteSrcArray.length * 2); + + assertArrayEquals(dstString.getBytes(StandardCharsets.US_ASCII), byteDstArray); + + assertArrayEquals(byteSrcArray, Hex.decodeHex(dstString)); + } + +} diff --git a/pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin b/pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee9403ee5a0d413be441f7d677da63013a0ee50d GIT binary patch literal 19321 zcmZ5nV_+U#(~gbCwr$%s8ryE{G`5{IZ0t0)jmBzh+x+T2?{|LeoZY?m%*-{{%sGo2 z2j~G#oG=SCK3l)sPN11| zS|UF?K>xiK0Pt^XkQ7S2IGa~`0kNtpi;$JJiPOQJ)Zu&-+e*Wtc7NFeLxQEjK0R&U zdK_`=-8Ed~^XJ#VghG`Q;R{IT_;v=v*OOny;wAb%c1<_30P=k(X+l)o9&~riL23#J zjJ3?7`yJ*1PJRCs88K>Z7m92X71V9Schi5he+GT;CFip36kohDE|Tu!YET&ZVB%B8 zu#PQVS2FcJ>iB>VkdRKTnM$H%G#LniEuw4tt~zNxRddxGw8Sz*uI)g(l)(OWYNBQ9 z6KX%|5jGtI$j7_~&ajupNh=~8jA41bb`RHLomj9hT1g`%R~SrMk!DRXDw&_Eq1UD= zc}UkPu{*Sid*%+mMrrHvBE9K8B!!%wboqf^A;TP^)>TY`rg6W?^NX+P7a~GO1 zVx+NViquh+F87PY#}m(`ao4k`Esp~Pd@caTsOY*6j9L3rd1ui`o9Vft>a50xsb6DT zO53_3Xcf^!r5VHR0vb_BOVl3t!i-tE*9J`pGt zrv+WGN+9y1~#|%B^R?ZCawa$Pp{n26K%0ENOnPNIv zT*SIdqXazTbhgM_!;lmex_gL!l!^{U{HqPtn7j{-qW$813!)a6hUzcu00DnSkk^Hl zi9et{+BQN|t(QEyZ6VY)Z!}nX&FWz_BTkH+aFF}w&ku`0DGn&~2W4$nI-%Aq!;7ll-!80 zR6T}HgJaui>6&m8acN+?WSI}0^I<-#!PT=0Ct|{PqH1QX-LicH9WT*k6IC+>>^KmD zSBg^AcF+4fh;C`K+4@8XO6uZN0_8XSgmf`mK>5RmA1wG;6wj)>F%(oaoHLzeky~B` zTnfuWC0e0oESQs}q)x@IJ?*LA`!PBiogya1mrxGKBn{VuDYlz8YfmNZ>!jS^ben+! z1r@#wsz|>`&t?ZY@d#6{Q@RFq)TG(HHXm6P1g07X{bT+A_5@W|xf0kJp)8aYW#al7 zlrhT3?2n&q&bs301lfYye7FX6;3{@lGK495-d7%0t2_~nDs%D47aOygeQ;mBfjRVu z8evHp*qg01;SY9#rEtm9~y9&vfauI+)qZz(9sDW zB5+kTIC3h&-Z{Y!rM<_`kiY2$;OtgZT!5sC5f!oNPpbs!jzQaoGQW$3hwt6Z@eD@E z*DgB-yTsHuVDo1r7JsHa1N_Reybi)=!|4|_JV1g2JD_(_-x9cAP0omSmIfWqcvK8U z-Z#nAVzduam|fs6O$nvfhZ3pJo|0Bzi^wfkD1%{G1eSS)Qs&_eYNnVxsa?`XM^91W zGYn*6tX0aB{CV2>&N@D5OLJ>aP7k!`plD^rcjycpX}96etOx|;JNnYOa1+%qpufG- zzbB_;8&^NvI&_-oR4E^}H5w>2bo_ZJLCgzsa+8&El^LyI8#;FZq-w7X<@MQHiAGs> zLhd{eJ+08W*r!Xj|L`m=Y``a&Z*YX`-1yRun|Qd~3KOM%u(0N*0?}B{mI#K;K-q9b*S&+x-lk!cPg1tKs^4z= zv;zeIRtpWvWC}n40z&}-6YF>31lQi2@6drC{H!+!g^Y?CROO}U?)LKZ2@zYo-9f|z zETbHa;;MF4H0@s~j;=K(1$uMCl5Ks{U^On-Lw|&h?>&aT9ka$90f4_-;GF%ts}56; zL$h3FWfVy;xY^i_qB~?{^VVR%M48xyi?i!bL5}Cq`PT=KRBd8mV8I!PcSkT)si+6) zIZYW)CPHHqd5X<`BR@LXkXLPSsgh*kATv#z~(Si>0yPkazaT&p}ue!D z9N5@b#tBr-+&x!APtscI;7?GN8no++vA}Sv;P`=R28s_M0IzU*uFK4G&wZ94z)F7y zocT#-DrQ^ghy(<(JUx0t+5@hA8<+Ltq`j63&kMQM17T=b%;){dRMRYP8BaBx9Of&x zwK~RQV3f4zERK9aa5TkpYeUPOXqGGTopx+rlJq`bAp z&PC)7@b1vi81$k7_|3=E`st>%!}HJd>`w1M!X8A`5`WX-f5Z^&k|D#Xi}#(VB*lWyG*9|C9uW(LkJI7PfI#p9|bd=^&hlU@ap zB*O&2X7$>IC4L1{1+{U6Vk4ydJe@Cpo;BDeOZM7^O0tWrLO~|f?c@)6OhHQhC5zAX z9(@WTIeDb#KQ>v(#dcMZ_py3{o+7d7)HWSV83iF&ZFJX>Ew?w-2y!~Q-GYBy={%Y2 z4j6|}tcQYpg(Vs8buP52loE=~>*dL`Q+fV+EES! z3%o-`-!A@n=1=ZgLrrgVJDjWSk#k3LF(pgSl7?^Ba%BQ?we979E8Ma{2n)fAuSu8f zi{Op77aYxQEIL>f)R-Y}C3&6nFIYeRL=V}&f92eepiY80S96>?ixO16?{#Vf%z0*P z9sbNvAIM$0&-qj3_sw^g{$%x^LqI@p_5QhJUD_Ez;0Vw$ZLwOf`Le>31B;cR`x^1D_34Y6_GB1iP6PYx8+2kM&sxcqae_0tTATI8wjiw2Lq=`fH8AQ#B`Z58w) z#mE4hs7WVsSnS;A6@B$y_JD%xOhK4-DNgk9H%>`N-`04wr0y2&)+yv+f=_Qr3#nr;(`b0U?C zZfUr~bPv4o-a|EVHGft9)Aj?i&j;*%w1aiirZ<<%R&Oae=49|g@pJN40Iw-bB|~gJ3^U%kN#c!89p{eg`8Ls zMKQskU*R1k5d~jrm{f5mkd#Wk&+M1?MjOq7;v~ww&m#|iA_t1q)!9gMtI?YBYp_3& z{VsnlXSwQYEyG!aFcA)#>7y4uf2g*Uo|iSq&p!sYX$dWerB2}$_8IdPEh$9;lL!OhBTeDY@S=l#jB?iPQ_^;G(Av4H6-e7@!&-ur{Br%)rccDIllOtorB)jB+`{?qkf=NQaW1y ztPi{`d@9CW(vB?AV9rKx(0NlMFU`Y;`hdY2Lm7_}EJVXg4K| znzcDavNp(tlm{RBh?#2f)(+1S0BhrVk@nXO6oeWnBA(ISDWI;mNL{b7>@Y$vGgU)> zLcDs?jV!V4^$Tx+3otR#0rr3dxQN93;cR`QbMCgw_PV=lq3=*b4)`ci!&Ts$%d%neY&eO+eUjI_2vNMcE}bJj6fQaJuz7r z%nAza03EQqf_$%^o+Q3zX6z@g-)p4Dy>e^8#klS1JYV1iyE^a=5H<8gZO{I8Iq*AdlnJDpBT7 zikR7r<%ZoQoHb~myG_Sdd*!DM%ndC!Y_avVf2sGoi2T_9-Tf#2{^)x#_|vZVQT3Bn zysWNIjA^OCT%`nT{04Bo!Ywb5%ENJZcphN5s9+DOvG{;Rj!YTejFFVs+=YdeDnYwS zwyaK*e3}!-aoKFcpTj2{o^6P`64L*Kc`n7J7)PVwew>zCO`WO$k>D`vsy+UMGXJ>b zKNB`nEJiw)N0znvmMYwaFXMmB)m7tEnsEfjDOdAdN{g zbelsQ??jenA6awiR{Wolmu;K@u^&Wu){NZEC$1gzvTx!yCIMJJO!%(&NH|mtlDu$i zzT}qW*|Fc06lG>2+SlZnG4e-|qoZ{BB5J-BWTcnA%;Z1LEfKCFApl1=T=aSjVcmFm zm*0pc%>z(^Rao86@ze21@8`8iZbl(q6KFLg>Q>P7IDb#MQ;5 z_g+Ny>8_DfLO5zi{`3>hyfL0G@Fc!(nQK^ksOa3uhg<{t1;`bUgm2tI*8;zz{0ASh z8E|MswzaN=&r!;7+9eAi9JOFjhLJ@Sn&h0@0w2e#6i4YNzFbNiTCxw$*9p8fyRbqk z!3i=aN(*N=b$6nJ2%;!;WZiI**1yXLt40i)fcG13vtIg~yGslcg#cp#iHXSewgT*gUfG`=Ln;{`Eh0M4igsv|iFBenOl)+w3c%;w9LuEY4Z>0wiBpTo65>3u)qI z3}dko2RBtBu4Fr?9hM2o?1oyI;@1gG}D&Gwi~kv>3dVcsK^(*cb) zR~#H=1?a4!Szj{o*Sw@F`=;w2z>$+iIB)#*@*P#*wFFt&go~#>%~tV>RY@SB2}-Dn_A2Df&|MKfF~@;=)va;sQC}gF6=#w@ z>JAsOg#=9&1R2y{NInZFK74bD8uj=A%HPtJeqeyWk9x60T-W&}tU(~ILWt`TD!&+T zHfXbaRQn--iNwSZSAHk*_$d<%*&vXRC=pl<)N)SKuYuVTx`}nKVcwA}IN~_uBAaaB zQ)*_M=?8VK%8&)S$z?Y26XyuL6SDZeKF7%Fr*R`Ki&4QhVH?FfPKr?MCT>5JOrFtv zybAfN=~6A4emScKPm|F*7E@m<`)qU2hMJtJcz#m)m92IrK7I7h_c)B)D-(}8*{kdqn{ZdTI+hFG!((FX{{L6t3#AVd4|tYDKRh6<=g|KJwu z?8xLkN)aan(_Un$Y{w+;5;R()UMO_=yzwE~kzTV9d^XR+w(K29VB&ZDCfJ?kAP-q^ z)yJE zjdaX3i(eqX@Uh=3gtucmCoj||QL2^G=eSaPj#>b4oPNIZ**UwEe+%#9D%v6TDMTgh z?YPT7(EmcCTqFg*l?AO*8OLI%zY?z>Jx;V|>|%KLv2UCPXELAJ-uco~lW>TF<6=@4 z?G9+Aa_XRBfFJtuy=C;X;z0Gs1X^n|;t7+zCNqdJpQ>{lOw_RDqpByWSS!Jl>hRR6 zM>&E8!^<|Uh9#asMd%J^>JSY15k%o5X*no_4t?KNV8cO|f_h%}lQv{hdU`rNT7)9C4Fr);x zEHX4t4Fu2f)+mFiAn{rhO?=!E;vS}96gFWJ+epvCno+G(hy#@)@a#84R_ro_Ihdpg z#x_SG2B;G-*8G*7h2?f8S#fo>ErfLY;5`Q&Mxh#Fsp6!`}cSn%TQE znBl$&U1-q3y{)STn{#yZeNS04f1s#d$o1&{8D--r#+S^#I`lChbmw5h6s!^Xs-V zocn-G9y8j!#!2IbbD)h+SWtBBq*LEoycMF;_zv#5LG{DB2?-Y06?2iEo6`i7OA3C5XG<@zZ@-hrV|wI!@kFarq30YE&UK*qA=7%1wX!lIUsEMCvAEycjhzmG4#m z{ElJ((5~-xBBCUcz~8by@0f(&lNa;zslBg&%5L(hdceaMJtCGdA?l1rk&n9f{HaqS z_-BaJiJ!)SKm_$l+o^F39coUS_c!s@_qCk~UA%*9WjaNx{q$CJ|Kt`B5c~__R^dYM zUVU;>MPcx?A>v%FuyqikQ*4M`Fo|r$f6fdcXz!_9AcE3BTOfM=2xlC)_yP4>=3U* z@XPelgyr>+A|PpUmTn~^38E1BBaCEON-#HY`+!XY0{!O&4GlCaGW&qLjG}PkHqz2G z?o3#iR?CGzuG?O+3C)wLc&UT86(;QY>P*Q^w|VQk>&d|^8!4_?M!H2<9uz(~- zqaqeh8~shQR9915!QiG!pk|vyu|+Fi{J_nk_g$Xcy|q&#>t*=cDJI-+h7rA)N#047 zw<>|mQN-dTJs;~5@&)M-gTu5HCc&|xke%zzlt90VRcc$M!LBm~TL##q-Y!Y?b~cpd zSE6yjHu80M$`CZUApzqtEa*pO_aX)ZkCmX{+u_FrXH@oJTmn#zQHOl0UZO*wQ)Fz8 zJCINx(1sp0O^Q9Zjx~s|d0R~G({G~OFJ2{D$Rh@%lcbRiRMXc$!Dgk%imtARE}~-B zK2@V)lMB3BrN?I->+=%N%9-D)j!8fR%NzBvjD)WHMym==Y-MFJBra1yN%wZO856B( zSE&ks-snXRjn>`pV;S?a{iy}t<@drFCwKXDI}4+@uePpT8#d`up0t~%7kzwNgmLWD zqyZ)5)cgqedQkHrg2YhEr!Ds%)Be1I`R_vZ?=tCc=a16w^2epWk56PaTIjInp=D;0 zA7HYFtF&l+$|u~Y{H%Vl8(TtDt@(8*8D1)oB?6T&INQWcHb&tSd9E!wo97HWWM+BM8C71j!y#kQ(`kFp<2gZl}mE-FfCv9(;qAC)cG%QMd{Gz+o!lmbXYf#m! zY34h^Sp6%1v9cyXAP#8E3D%O5WNi8Ogv_2=J8`~Hj55`+JK$oU)A2)5zp3gU%~;Ze zi-S%p{y^~`txyY+5o7@gzs1NXu+m8FU$(Ba*}$AkqpC&}P=C=L92sRUqnLl_!tR{v zUMuvKMhW|6B$Q}vb{s38D1(?MzD0RVD9s}F{u%k3azXnl@56CN8BrP4RgD9Q;n1YyAtgvdui7VXM1eBgk&x- zY+UkxP5fc+uW(zxU_D&0g^GD*%Pyzd`!;zp9Gek74LUUt596=_8HlKY2$bhRV*kDJ zN6P$YpvTBJG_Ip(BedN2U=_7jvNn7Kk!x3npT@#g?x7j3>twUBzo=W>{Lu7U6(ArS zPoi&Ukq&1Rba=dxQ3>p!P1|3QmQK)--cK{dI708W-wguA&kv_ z^@vH3Cc>iUtAG<%LIc^QEu)Z_%77cZ93pC{0yC8!OTtP#B=*1B)H@k;9KW&bPk>;HN7; zHhleou+Bb9S39Ya(ycN?uDw zO=vfWh1|$tE!7k6Njbty-%cBX&!M;TTJ4WlKheVA;C$2;6lcrI1Qup1tdDfxrw63s z5TySaaW;>-#B`%hKCIps4a-RhjqqAA48_lwR=Nb-$<_ zBo95*0ZhUsT~j^{lme zmK=+sG#rvQk@f8gW zw!5o1w3=0OB!0p#9oz{y=-^h*C2GxF+e` zJ7R)lea-@LGxQT%;jyo4T+mXWXBPSz{m8Ag4oxEP-*3QrbpgO0_9F2$w^baWfJUZg z)rkoU$2Ro%8_M>`dd)6Yar|}O4@TA$%D@>ve2Q1=&EYqfi$@U$dr;vl%_@`HMVs>_ zYLX8zI8uy}%6W1QAR8wT!_}gt?GgH_dLHMFlg|3+^Qlp~&2-TvE9* ziSuOKwzf^(YyqqZZ4duE^9S3%zyHO<=EcslL$`a;Lqpl%+i+BVT#aE3&M3?rOdU~w zp!F&=mC7>x=)trZ*IG8Tq+>ehP1jr4j)i$wX=^g?n7N1~hr-orE0f5k>l2G&1RFIf z`bI%Nwnn+)B?0ADTKKSJ4~UPgDt$_BCkj}SdGtjd8@bj9MC7eU>S9bpAHtoRXvcBv zd7IB)!owBRCNUm9*rDvb^NUNkq93r}6hLwTz2b+1j+9~9uGRFua6}PNdk@Of1~57R z`8h1ey566$VE?t;Pti^(_joDsqkzp&jfpF#J-KI)YCsx=p(dxYxbZgdj)|=-T(SgqB74}Hw8dTh)x1UjfD zTXCggl70L}>jT+8P?KpDal|;QJBYnp3Yx|sc)E2pICE?)iGIb-tZ=chc)J_bUzHB9=9G-m~yHd+Fh`s%-5a=44kXhgCXhvnEbEnT2b!) zVH@#^mZps9;crw_f#SPGuw%KbTY}s{GEY?>$X`3W()*Bl10yS8+=B;JfePIJtRenm zNJivN^LVz+76R&dT;_`jI`O1Q+2X5A%+%+W4+$xPNQYn4Y3@amVq*@lqkDW6(p%hS zXa;2~_8S%ZUoEQBrt&MOS4kK-=I(;4V8QhC(XX*@{+jgX#jl(q1T`G%2&f7#!^f5i zHq`_fgU%#&kPq$sj-A3>C(Nb<6JYX@*p!g*;y41erG9b=xTh{>Yj0L#4K=7Yy0?j0 zc%A`Pjg!7<0p%wS-y+Vz8oWC&(zpn;E8J?~IYq1PlUHkkwOO>H{EaZt2 zU7~V7>3hvoY;<@%;eJiJa3-P}m#X>PWp2njb_Dab(O5fABL51MG`LkcgOt0=>335= z7a1VaGM&k-rAyk3Nj-wTc<6z83WO1OwO~cFDyMlaAN!t;5+2RcgpQ~eiU#m#tg!Mb z`(0Pv>}7wxCt_;$)a2dX?Tn)3=H9j>SZX>?mtnSg{$*gtqO>2913+dgB%z^CNc@b2 zP+-=j98Z)kES8Wd_&hPj8%+GO$fJR)L~xLUT`(jsCAY{IVSL#n_l+tc`7(_#f{KM% z*NBYupeBcCic?>X8WWiifAe#1xSPGE@ifwVxBUOUPKZ#uzgh;M6+%Fn?wHkAYY7Tq z7NeNFt;uxCP(@iK3-44neI90RvyOpx$!EEg60? zux%+;4o>uC7cn#RKcWdrUl62JsER$PSiVraU>Rvxcmfc`g@#fUQP2lYbjN!(Jl#!8 zdI4lHRU4`??UvB*2O|NUq-tPl-F08fkt<~yoTtepcPrzer3d|&HUIzk#;0S|b;#)73?-8(1X={|_OATWw- zF1zHc5SWg|E0yaNA*XN4i977-bpqzK)z(<1zfO~0)H+$!GQQp2cWDDawr+&=exXAOql>pw}aXIr0|#_OT#e-V*)1M<%aF`#RTqr}i_<`PTQ8`dhGF zkjf=+1|Fa0R2nZ@5IXrp0ASozAWvT3yzw$y=479sO_nZKt`=39LiE=_!@%^7A#?b* z)szUCSBLiCXgg*CD4 z`KCUz@i}riRKT#@gT|k!u?f*QZuHp;bVhwI6M+PWZdKCu%zOoP34=(Zw&O9Zc_M~S zwkiooFFNVqLfje)e@oTx*=R_z1APcBzDE+vdYixU(=}6IATGjT94ViidC)ni{$jHj zgQtntp*?s9X^2d+Zxbez__XSxoyHr;P^o{kgONf=3~;#^*J#gO!}*)a(NTH>9F3cN z<}>4$q!gR@wJfJ;uo|%673{C>|8`h5le-f;32u}z)TNN%FPjn;$*i-N!~m(z98c>Y z3V1IcZUf8WFc2v8#6e4^Dx=4;C_&DkU|)^qw0hkhtjX%4N1_c7wB-L*NmrQYJ4>H& zsjwLZ$*tJtIF<4l>=2i%UhIZ42x7$2ce7svPuxlHNJ@j`ZQGXqPSOr&oTcEJfM$in zjg*q`C2;qSg=@m`RE8KZ9p8=05(D1(ly-e9bYg48SHLg-q^$!OerIPndKKq71gzGt zw~ClfqO5=tm|K!h3*OXTk!&`oA^SnY&J@4c=Ttl4U`?}oe|`Kc(*=&z%XgSi*CBlr zJ`A56R@5Rr4n6(rr25c^k2e`lVwef0a*vu>Xwkd|BEfb>_C4ZxTL|&vyPBK#DOYoW z9qC{tdK2{T64wO9duRzvU+0Ue%iPc7&15I%$lZz&b8`P-`!iKd$@+!nxf<9oTREB< zKNqKBudlJI&c~x*LqDT8*+}eOi@;#@J}ga6<5q_9#DP6c_9iyqqc z`qaI==__*jbkfC}fiVm9VR1RCH7%6k0;e0{1C5^~4*CHhjg!e4-+%BnZA?90AZufr zIj;Lllt0ty51jrT{6Prwvx-AAX%*W=GN^+6WSB?gUIooy{T7p5_t0(&Tli61zGA0Q zs1r+~2|Q;v?*n28Dyd?n%!xM-Bp{VZUc7?R80bQe#?T#{f5G^ppdLVPjouT-QK$ns z_U3FSh};uQ8&7_pPC)p54bChnS_K*u(%L~kr-FIN_ZNflrV4pDnh;iQ-Klx5)W*!P zg6bS}vkpAKOaa9lZEP-e^$Jw>+MK|jxm&C-yCE7+-~r)O;cNdD>HoTB-xwv~$!i61 zCfcVn9`<$bm=~hRg6Dmp9j()jC*R}sJ!Fc=a=6z5z~0BH76v1`u}~cjZ_616FcH&v zItiv=_5c;WXS(u{9|);b$x^4NwyBHSB|Z?Jd${TwVJv)hSDn!u7!cR%G5Xqq?p@xA zb$+IumM~er@SuA>tEeMAS!L;pu(s*t#?!4lthNfR5aY33hfRxUfsO0i)(f<-%@yN|^PdD~QWfiO z;*wo~3MGn|)^mS&^s7}Xsgj<^EEq8;B~VE#%PORPdbQRzn+|bk^FX0=%E7fsZRjMq zyAM{`+VUSVYpzLLpF8Q|>`X+ndN`mN~o0X=7CuMFw?#h}g}X zuD19E;@ScVav;_2%5S4IZDAd_9IJOgwUdJZUE~1sX~Ug)fa%z$eXYp+Dxj<9;%QQPmPun%S09@r98egR+}@D=#92x zv$B~proJAiJA?zom9sAwSzLJu-b&k2O*mRTN*@`MaI7qmNrXH_EFJM_m&`4doIm4w zO%n37^+2vwsrv~7!>&cj=vsDK<^LWOR=HA^miSU$> zWHLAM3C~oJXda@0*jikC8>}tbzoPz&B;U7dbhnB-tp2MwmuvK--i=zP{mR2i8gt|@ zfgL%+nGLdO8Ii}4=bm3vqpQ%mWWn(rKVb1P27WA&tRHWg6qn&|2qgXet-#MAe~d1w z(HD#h6o##aBW~VNSKs{(NixjMb?T6jD>kpZUQVDeK8|y=N;x^&;%0&O zrz`VzfbEy5_#xu6WRo&*5g*3+RMV>wFL5@8MR)xTz=o`E#eC)s6fTv>8yg>!%UAZu zV|NZHhu=BmF0IX5mCx7YGX#`MQ+KM7N-+CuIp76)8>39Gu+Tz`S{u3bF<88*>K)mlKqU~j?PCl^UtY-s>sB$ zwKM#j4>YFmS6Nr5N1WSc;B@QE=Xvf}HG}o6$K<101pGgI{!516oxj2S)!CE+4H;V0 z#z1tEds?NeJ=gy-s4}Z3P+GfT!oQxSar%A7ayt8|Zv z6=Gm?@T@CZJz(IB!+5v92hzv9JW+s zHc@CP;)H=zFKt&hZG1xXXbV%P0=FLiT#9S-F&?Y;Z80xV^?i@1zIVi{uSvmZ7a1fG zhY3VL17n<5`n%dmAZ7h~k}pm}z{qz_yyck<|1jLI>TktPea#i-n+(rjJZcu37BwZY zFM9w_adU*u&Zp;#02&EL>Czhq=DZi}IO1m}?egr%N@_v3U6o?B>!pp7|6tX}Pzb)W zQy{<>FlAq*6^}(q;>l^?TDkV>=l%BOS|xajXQ3AJNj#^bf|5Y9f^wWtms|fX7g)G2 z%GtPH_gC0{vibYwKUHK`I1LI5I!8H1R|d$=$WXbZc4U)-vw*BbFJDN|D&<*;OaxTA z!^3~$)MPYVgXjYjUy?V4qz8b;U`v1Z8-Nwwa(LqIxUh@+IbwRcwe=1*9{tpUTMgiD zA;vZI)ef4S#WX;|oxe>>lQ=P%Tb4Ee*_j;cl`|L_a^F|_rNHPHOp!5SyFqIXaIULu zK-(T<#gnY>|JV54zC-xk)*nCyzU025XT{rj$BxO$nADFly^}MnUFX4c|k#OZI$@L$b^To0T!1@e$p!UuEDK+n|v3)`m+3 zGHDiCQ3t(_1ETTk6}f*2nQOl0%s$!ehlj>z6{#u&bw`EE|A6&xlKG(+0ATZTBOCdS zQ?(V**vrB{lYgE3yB_EvMgG2#3a-TyUj>W1qw^L8}hC_wd?+@l*Ag1SgBQQuzrkIuvX@3hNM;9B6!4m%P=E^^C#rTZ(ZO5J{4+=mPWBWW|W^YRP7- z|K-R)kzDAMHAh<0OIOS1`dc{h`#$~8l#&y}g9%1pElG{-vXE5I`qeXv9@}4@NTkX6 zoQ?{y_jSt%olH+MSM$do#q%)@;c!lW==_b`58)G73GSeCZJTpa^2o|SGMi1_%SxS3 zBYa`9qIzt+re|gIt+?t!v1<&2?8_{+o)gOqO`&cyrDqZ0#+AKroKz$_OWhK5ha|&# z9$x@NhN>`YX)-XA2EWFsBXT)@VZEWf2SRZx0X(rs}@vA13AUs>|5(80!)86E6L zTx(>u)<9$~7OQpGqLNxn)>VI`7MT8>6h z9h);^@JN02vUBmjxGs)@hta+5@Jh|60@o1KG;5gFDtpp^HdQ!N9nnW}SpMSRKV;QI zMjk$;#D3&Yo(4IqA!Up*4LHbzS3KBh&lr)BE2M{R%H;trY7w;%S05}lEXas0m$3-c ztOk@r1c-QA#;-ghakeX^UAzN-b}i_Lf3jjQ+S4=w7Il%v4w{fE%$a)9@LrZ>C<|JNH3imhP7Hww$Q=g&D!zD@*9>0nA-OWqk9!h|KS`0qpo9hC z0p+s#oo0V>=ez9mzy2U|8!lD{U^hJqkFu=!e*RhQLD4`}|9pz~tmm9>A+a4;mfMhU zje+(Xrr{!>iv$9rD=Ui}^tlchqhFTOj(j8xs}-PpJvsJ1+VS@e3 z0!jZaFSb6=d=HMt7!wYi9l5gW%K!&pazTb-t|~#zXue?cSo7@JW|WQUWEz+Z<i@t1 zf8y}RJ3zo7YK{dafH2qURnTd9dK+1ISG$NG`;$sJoIZ~K$yGpn1o~^Xys&K;70l?) z$+0oqk4xxqj@T$Ebpv}F#%&MjBzTYQ4P+E9+bmQ1*<@&!mM2$i51O7m$O5L@U5%6I zx$WE?X`u56_27UClG^ZsnhG*Y+|@!X9-}Y9PrIhM>#`j`Vzt}6xbWIIe|C!ZCCN6E zr~`knv;4WXDvkymyj9R%yjux?^3kj!iw2V)A<|v(bdRyj1o?2uq3+}v0~dPfEUuGl zr8@EVtIXf97+7$tv-LK;U3~v;`Fs1#h`*i!0tW2SON+}vbZ*>8iQ9>?x*4BCTr&C2 zqzTEUQc)&GU*qib{Cffd2*hEfDi*M$sAK6%I=4f z|5P7{mfMj_&70+}RAhf{2o%XKtMoaB3{e~E>bG`p^@RC_@F2HteS(Au{xKLN?!4Bi z7U6bxwVnMb$nq3>f5BR(<(y+{ds;#05jmc+`o4Fikryq!Aqg^BZ92~o4HcH@D1c== zfGiLxaj0+I_Uw^-onRFdY*@tQ(GxCBL?}F0(*fznR5j3vy+gVehf1I@K5F4q#=`#I zr&ScZWw^|l3&THesQ!@X?*#M1k00#^V2WhO6LD|79c&bUoYMZXt0e&eT(h3t4IubA zUwlQ&j+l<&30%D+U)bL_(6WVS%oZ=Q6GOv13DBFF*rqy-*+Kyi5UYP6a0;P9aYl7) z8ICn>7qtB~Jg{ki1}z#(8>{j9Cw#K1cUFR!3X$l7Qs=yx7TorWjo(Hd7<*Wy-f0(4 zOR&^$m=P#oV-B_nuJajL1Kj=|;@>3mpRoSt%V(NMBjxf0ZwvE{DWVbD9@xB+s2V-| z3=?-X41R{TZeve=pa@P6-nMddTP+HQ3u6ecwEOjKIP^O^#mc*?%-NM$X9c#EC*7zk z?;3m*mA0w7OFAP``j%b0a{>$hZL`Oj63fwU?8n`UMQk;l(o9cWqzlW`TGTv)f0MIS-hh^n-HL5z{|g#0Ftvwh{U~k= zaBQQzQm1Fd>eWNEIrfY$(NF_^b#Z89ut+9`0z z=<#Uo_1@+xQ~L1icI3r*I4vNp43`s8j)AR^IOU+8JCd2*_pjj+hWk)dFqjMIy|K@g zja!C_C6Q^h4<1^f`T@U63S(#Ce;Z0mq)td#QCU}N4BAD-nt7>#m&R(EmrCDNgW+GQ z^K*maFxv%oPsD6*PRO;7aTN6#_!gBOs2?m_i8b&!!m%8?X!j9QN=AIS;KOhn8z-zc zW(cM&T;;r~D`6Q%xwUzrotcSLQ}K~VBj;2SdbkBvNO9JRbJhDo9By`IJeDg$()FaO1UpfWhbr^cnsbU&+oi3X5XU@^U0T^nipc$peutdGiUYv z3zf^=BXOc>BVoBK1!ucKl@+6`J}lK`}Ur{WIAtF$wXS z;WR>>p|s9LNUW?e<3=9?1H3oNcs+!O*5l;LjoiD97=;V0S|A#^;>1%BaFl5&xHG~o zWp@(IU|dB`Q97aLWjZTQQU*$pm%pkZ2sC3eiBDNe8Peqt5<|3Rt|jlf&1jsT^dUo< zn0?eao%6?O83|l?2D&sty7+9vt!|dK<%J5H4m~cWSwQyspfs))%~1D`d@&CH-va{! z0|NsJ&X4F@nzG}c;=mu>arFC=5xNPmKvCf7BdaegyO$MjRw1yjM>zB}*pa;%f6FEd zP}Cbxvx+PWRs#bA0|NsC0|NsA0RaI40RaI40|NsC0|NsB0s;a80s=NNRRVHOE*)ci zqHdrNF@L^5RW#6IpB^z-9}$2`3G?fgl-IBXB}w5vtm|ifPC!VBM8_=K0!thtn|Y1# zW3rtho6ENm%{+X0Y$xc~^Vs@EnD_B&tomQ}#I1?KqhHk}^F`Gf`Wd*dqD~odb~G{T=I_ zQjt_vobmBEfbnZ;lwQyq*-p9Fh?uX2$}M2Ynk8YZOHRg&1G5K*j`ON>Jy^sN3MapV z0Q%X=`?Khf8Y4Lm8*%n4NhA#Z92^bnyvA4a!-{rBM~KE97KB~KLmnpp;YLx24({vo zGXTiGiIJ$I-dC6{m{^&P`s9GHrNtbHVziyJ9t|@)HOBS0^PL2rR*>-48IoCs?R(iZ z2@~aFVUt0t?LBK39m2v8bWnBP11FGP4{SN`@jW)RJq5A)CJ;aFAz>by$HTr8{~dzq zTMEbH!l@+fP_Xk)dB+eUbQ9nJ))1BYe50?gi3p(_!xb63N_urNrTdZ><1LF z7u>^2U1^A{hSvHdk-I{~`~d+00dQQT#mGo3Eaj~3=p~_$43<6(2Tv=tULJIJn`571 zggXFfkl;#YmQHD5$12|6jiQwbW4Kxu8nZ(zK1CDYjGuC2Z(32fAzA&V2yFk2D6zF- z*)|>b^nc@*|50F&r|jJi%R8Ps(iyTv{uXG=2zvp8=!>QcoH#24wK&qp33Wg+=2!MG z1ab0({pCc<`vu^)vOcz;kum>5Ga}l)851e~ Mg4Cy&J9oI8cArN$#{d8T literal 0 HcmV?d00001 From 2896ce8841931b664fe242bba28437d60dc38c1a Mon Sep 17 00:00:00 2001 From: Lee Passey Date: Sun, 23 Feb 2025 13:41:41 -0800 Subject: [PATCH 2/3] Add unit test for COSObject class, to increase code covereage and to validate assumptions. --- .../org/apache/pdfbox/cos/TestCOSObject.java | 176 ++++++++++++++++++ .../org/apache/pdfbox/cos/TestVisitor.java | 108 +++++++++++ 2 files changed, 284 insertions(+) create mode 100644 pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java create mode 100644 pdfbox/src/test/java/org/apache/pdfbox/cos/TestVisitor.java diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java new file mode 100644 index 00000000000..25beb468e16 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import org.apache.pdfbox.io.RandomAccessReadView; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.apache.pdfbox.cos.TestVisitor.ESC_CHAR_STRING_PDF_FORMAT; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test class for {@link COSObject}. + */ +class TestCOSObject extends TestCOSBase implements ICOSParser +{ + static COSObjectKey key = new COSObjectKey( 121L, 0 ); + static COSString cosString; + + @BeforeAll + static void setUp() + { + cosString = new COSString( "test string" ); + cosString.setKey( key ); // same key as the proxy object, because + // this will be the dereferenced object. + cosString.setDirect( true ); // If we were writing this as the value + // in a COSDictionary (which we are not) we would use this object + // directly and not create a reference for it. + testCOSBase = new COSObject( cosString ); + } + + @Test + void testGetCOSObject() + { + assert( testCOSBase.getCOSObject() instanceof COSObject ); + } + + /** + * TODO: It seems that the "direct" flag indicates that this object can be + * added as a COSDictionary value directly, otherwise a new referenced object + * should be created. If so, {@link COSObject}s (which simply proxy other + * {@link COSBase}-derived objects) should never be "direct," and {@link COSObject} + * should be modified to enforce this rule. + */ + @Test + @Override + void testIsSetDirect() + { + testCOSBase.setDirect(true); + assertFalse(testCOSBase.isDirect()); + testCOSBase.setDirect(false); + assertFalse(testCOSBase.isDirect()); + } + + @Test + void testGetObject() + { + COSBase base = ((COSObject) testCOSBase).getObject(); + // testCOSBase has no parser, so the object returned should be the + // string object we initialized it with. + assertEquals( cosString, base ); + assertTrue( ((COSObject) testCOSBase).isDereferenced() ); + + final COSObject testCOSObject = new COSObject( key, this ); + + // start by making sure that the test object is indirect and is not dereferenced. + assertFalse( testCOSObject.isDereferenced() ); + + // getObject should cause the referenced object to be dereferenced + base = testCOSObject.getObject(); + assertTrue( testCOSObject.isDereferenced() ); + assertEquals( cosString, base ); + } + + /** + * Test accept() - tests the interface for visiting a document at the COS level. + * In the case of proxy {@link COSObject} the visitor is passed either to the + * encapsulated object, if it is present or can be dereferenced, or to the + * {@link COSNull#NULL} global object. + */ + @Test + void testAccept() throws IOException + { + String expected = "(" + ESC_CHAR_STRING_PDF_FORMAT + ")"; + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + TestVisitor visitor = new TestVisitor(outStream); + testCOSBase.accept( visitor ); + // the base test object encapsulates a string. + assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); + outStream.reset(); + // this new COSObject will dereference to the same string object as above. + COSObject testCOSObject = new COSObject( key, this ); + testCOSObject.accept( visitor ); + assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); + outStream.reset(); + testCOSObject.setToNull(); + testCOSObject.accept( visitor ); + assertEquals( "COSNull.NULL", outStream.toString( StandardCharsets.ISO_8859_1 )); + } + + @Test + void testIsCOSObjectNull() + { + COSObject testCOSObject = new COSObject( key, this ); + // The object has not been dereferenced, so this method should + // return true. + assertTrue( testCOSObject.isObjectNull()); + testCOSObject.getObject(); // This should dereference the object + assertFalse( testCOSObject.isObjectNull()); + + // this should set the encapsulated object to COSNull.NULL + // TODO: should COSObject.setToNull() also clear the COSObjectKey? + testCOSObject.setToNull(); + + // isObjectNull() does not test for a COSNull object but only tests + // if the encapsulated reference actually is null. Thus, if the + // encapsulated object is COSNull it will still return false. + // TODO: is this the intention? + assertFalse( testCOSObject.isObjectNull()); + + // set to null should have zeroed out the parser, so no further + // dereferencing should be possible. + COSBase base = testCOSObject.getObject(); + assertEquals( COSNull.NULL, base ); + } + + @Test + public void testNullObjectConstructor() + { + COSObject testCOSObject = new COSObject( COSNull.NULL, this ); + assertTrue( testCOSObject.isDereferenced()); + assertFalse( testCOSObject.isObjectNull()); // TODO: Is this really the intention? + COSBase base = testCOSObject.getObject(); + assertEquals( COSNull.NULL, base ); + } + + @Override + public COSBase dereferenceCOSObject( COSObject obj ) throws IOException + { + return cosString; + } + + /** + * Unused but required by ICOSParser interface + * + * @param startPosition start position within the underlying random access read + * @param streamLength stream length + * @return null + * @throws IOException + */ + @Override + public RandomAccessReadView createRandomAccessReadView( long startPosition, long streamLength ) throws IOException + { + return null; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/cos/TestVisitor.java b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestVisitor.java new file mode 100644 index 00000000000..8eeea419fd5 --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/cos/TestVisitor.java @@ -0,0 +1,108 @@ +package org.apache.pdfbox.cos; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +public class TestVisitor implements ICOSVisitor +{ + // TODO: make these statics package private in the other classes so we can + // consume them but remain in sync. + /** + * The true boolean token. + */ + private static final byte[] TRUE_BYTES = { 116, 114, 117, 101 }; // "true".getBytes("ISO-8859-1") + /** + * The false boolean token. + */ + private static final byte[] FALSE_BYTES = { 102, 97, 108, 115, 101 }; // "false".getBytes("ISO-8859-1") + + static final String ESC_CHAR_STRING_PDF_FORMAT = // We can probably change this to an arbitrary string + "\\( test#some\\) escaped< \\\\chars>!~1239857 "; + + private final ByteArrayOutputStream output; + + public TestVisitor( ByteArrayOutputStream outStream ) + { + output = outStream; + } + + @Override + public void visitFromArray( COSArray obj ) throws IOException + { + // TODO: Write something to the output buffer just so we know that the visitor got called. + } + + @Override + public void visitFromBoolean( COSBoolean cosBoolean ) throws IOException + { + if( cosBoolean.getValue() ) + { + output.write( TRUE_BYTES ); + } + else + { + output.write( FALSE_BYTES ); + } + + } + + @Override + public void visitFromDictionary( COSDictionary obj ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + + } + + @Override + public void visitFromDocument( COSDocument obj ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + + } + + @Override + public void visitFromFloat( COSFloat cosFloat ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + output.write( cosFloat.toString().getBytes( StandardCharsets.ISO_8859_1 ) ); + } + + @Override + public void visitFromInt( COSInteger cosInteger ) throws IOException + { + // Write something to the output buffer just so we know that the visitor got called. + output.write( Integer.toString( cosInteger.intValue() ).getBytes( StandardCharsets.ISO_8859_1 ) ); + } + + @Override + public void visitFromName( COSName obj ) throws IOException + { + throw new IOException(); + } + + @Override + public void visitFromNull( COSNull obj ) throws IOException + { + output.write( "COSNull.NULL".getBytes( StandardCharsets.ISO_8859_1 )); + } + + @Override + public void visitFromStream( COSStream obj ) throws IOException + { + + } + + @Override + public void visitFromString( COSString cosString ) throws IOException + { + if (cosString.getForceHexForm()) + { + output.write( ("<" + cosString.toHexString() + ">").getBytes( StandardCharsets.ISO_8859_1 )); + } + else + { + output.write( ("(" + ESC_CHAR_STRING_PDF_FORMAT + ")").getBytes( StandardCharsets.ISO_8859_1 )); + } + } +} From 6a6b4c31399f7431104ff0c9005d568aff2a7ec9 Mon Sep 17 00:00:00 2001 From: Lee Passey Date: Sun, 23 Feb 2025 14:55:45 -0800 Subject: [PATCH 3/3] Revert "Refine COS package to isolate COSObject code into it's own jar" This reverts commit a717f7da03fcda5d6a0c3a771fa86d253daabc87. --- pdfcos/pom.xml | 93 - .../java/org/apache/pdfbox/cos/COSArray.java | 859 --------- .../org/apache/pdfbox/cos/COSArrayList.java | 587 ------- .../java/org/apache/pdfbox/cos/COSBase.java | 98 -- .../org/apache/pdfbox/cos/COSBoolean.java | 142 -- .../org/apache/pdfbox/cos/COSDictionary.java | 1561 ----------------- .../apache/pdfbox/cos/COSDictionaryMap.java | 263 --- .../org/apache/pdfbox/cos/COSDocument.java | 633 ------- .../apache/pdfbox/cos/COSDocumentState.java | 58 - .../java/org/apache/pdfbox/cos/COSFloat.java | 225 --- .../org/apache/pdfbox/cos/COSIncrement.java | 358 ---- .../org/apache/pdfbox/cos/COSInteger.java | 205 --- .../java/org/apache/pdfbox/cos/COSName.java | 766 -------- .../java/org/apache/pdfbox/cos/COSNull.java | 66 - .../java/org/apache/pdfbox/cos/COSNumber.java | 110 -- .../java/org/apache/pdfbox/cos/COSObject.java | 210 --- .../apache/pdfbox/cos/COSObjectGetter.java | 32 - .../org/apache/pdfbox/cos/COSObjectKey.java | 152 -- .../java/org/apache/pdfbox/cos/COSStream.java | 449 ----- .../java/org/apache/pdfbox/cos/COSString.java | 274 --- .../org/apache/pdfbox/cos/COSUpdateInfo.java | 63 - .../org/apache/pdfbox/cos/COSUpdateState.java | 341 ---- .../org/apache/pdfbox/cos/ICOSParser.java | 51 - .../org/apache/pdfbox/cos/ICOSVisitor.java | 107 -- .../org/apache/pdfbox/cos/PDFDocEncoding.java | 163 -- .../pdfbox/cos/UnmodifiableCOSDictionary.java | 45 - .../pdfbox/cos/filter/ASCII85Filter.java | 52 - .../pdfbox/cos/filter/ASCII85InputStream.java | 274 --- .../cos/filter/ASCII85OutputStream.java | 246 --- .../pdfbox/cos/filter/ASCIIHexFilter.java | 145 -- .../cos/filter/CCITTFaxDecoderStream.java | 813 --------- .../cos/filter/CCITTFaxEncoderStream.java | 325 ---- .../pdfbox/cos/filter/CCITTFaxFilter.java | 158 -- .../cos/filter/COSCryptFilterDictionary.java | 137 -- .../filter/COSEncryptFilterDictionary.java | 136 -- .../pdfbox/cos/filter/COSInputStream.java | 111 -- .../pdfbox/cos/filter/COSOutputStream.java | 212 --- .../apache/pdfbox/cos/filter/CryptFilter.java | 62 - .../apache/pdfbox/cos/filter/DCTFilter.java | 344 ---- .../pdfbox/cos/filter/DecodeOptions.java | 264 --- .../pdfbox/cos/filter/DecodeResult.java | 89 - .../org/apache/pdfbox/cos/filter/Filter.java | 299 ---- .../pdfbox/cos/filter/FilterFactory.java | 103 -- .../apache/pdfbox/cos/filter/FlateFilter.java | 63 - .../cos/filter/FlateFilterDecoderStream.java | 243 --- .../pdfbox/cos/filter/IdentityFilter.java | 49 - .../apache/pdfbox/cos/filter/JBIG2Filter.java | 153 -- .../apache/pdfbox/cos/filter/JPXFilter.java | 211 --- .../apache/pdfbox/cos/filter/LZWFilter.java | 296 ---- .../filter/MissingImageReaderException.java | 37 - .../apache/pdfbox/cos/filter/Predictor.java | 366 ---- .../cos/filter/RunLengthDecodeFilter.java | 189 -- .../pdfbox/cos/filter/TIFFExtension.java | 106 -- .../org/apache/pdfbox/cos/filter/package.html | 25 - .../java/org/apache/pdfbox/cos/package.html | 72 - .../apache/pdfbox/cos/util/DateConverter.java | 737 -------- .../java/org/apache/pdfbox/cos/util/Hex.java | 247 --- .../apache/pdfbox/cos/util/StringUtil.java | 42 - ...rg.apache.pdfbox.cos.encryption.properties | 0 .../apache/pdfbox/cos/COSDictionaryTest.java | 38 - .../apache/pdfbox/cos/PDFDocEncodingTest.java | 111 -- .../org/apache/pdfbox/cos/TestCOSArray.java | 290 --- .../org/apache/pdfbox/cos/TestCOSBase.java | 77 - .../org/apache/pdfbox/cos/TestCOSBoolean.java | 116 -- .../org/apache/pdfbox/cos/TestCOSFloat.java | 435 ----- .../org/apache/pdfbox/cos/TestCOSInteger.java | 177 -- .../org/apache/pdfbox/cos/TestCOSName.java | 60 - .../org/apache/pdfbox/cos/TestCOSNumber.java | 132 -- .../org/apache/pdfbox/cos/TestCOSObject.java | 160 -- .../org/apache/pdfbox/cos/TestCOSStream.java | 215 --- .../org/apache/pdfbox/cos/TestCOSString.java | 360 ---- .../apache/pdfbox/cos/TestCOSUpdateInfo.java | 61 - .../org/apache/pdfbox/cos/TestVisitor.java | 96 - .../cos/UnmodifiableCOSDictionaryTest.java | 352 ---- .../pdfbox/cos/filter/PredictorTest.java | 89 - .../apache/pdfbox/cos/filter/TestFilters.java | 219 --- .../org/apache/pdfbox/cos/filter/package.html | 25 - .../java/org/apache/pdfbox/cos/package.html | 25 - .../pdfbox/cos/util/StringUtilTest.java | 73 - .../apache/pdfbox/cos/util/TestDateUtil.java | 424 ----- .../apache/pdfbox/cos/util/TestHexUtil.java | 87 - .../apache/pdfbox/cos/filter/PDFBOX-1977.bin | Bin 19321 -> 0 bytes 82 files changed, 18139 deletions(-) delete mode 100644 pdfcos/pom.xml delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/package.html delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java delete mode 100644 pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java delete mode 100644 pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/package.html delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java delete mode 100644 pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java delete mode 100644 pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin diff --git a/pdfcos/pom.xml b/pdfcos/pom.xml deleted file mode 100644 index f5572923c63..00000000000 --- a/pdfcos/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - 4.0.0 - - - org.apache.pdfbox - pdfbox-parent - 4.0.0-SNAPSHOT - ../parent/pom.xml - - - pdfcos - 4.0.0-SNAPSHOT - bundle - Apache PDFBOX COS system - - - 11 - 11 - UTF-8 - - - - - org.apache.pdfbox - pdfbox-io - ${project.version} - - - org.apache.pdfbox - encryption - ${project.version} - - - org.apache.logging.log4j - log4j-api - - - org.bouncycastle - bcprov-jdk18on - ${bouncycastle.version} - compile - true - - - org.bouncycastle - bcpkix-jdk18on - ${bouncycastle.version} - compile - true - - - - org.apache.logging.log4j - log4j-core - test - - - org.junit.jupiter - junit-jupiter - ${junit.version} - test - - - - - - - org.apache.felix - maven-bundle-plugin - true - - - org.apache.pdfbox.pdfcos - org.apache.pdfbox.pdfcos - - - - - org.apache.rat - apache-rat-plugin - - - src/test/resources/org/apache/pdfbox/pdfcos/*.txt - - - - - - - \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java deleted file mode 100644 index 3fc23a27949..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArray.java +++ /dev/null @@ -1,859 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.stream.Collectors; - -/** - * An array of PDFBase objects as part of the PDF document. - * - * @author Ben Litchfield - */ -public class COSArray extends COSBase implements Iterable, COSUpdateInfo -{ - private final ArrayList objects; - private final COSUpdateState updateState; - - public static COSArray of(float... floats) - { - ArrayList objects = new ArrayList<>(floats.length); - for (float f : floats) - { - objects.add(new COSFloat(f)); - } - return new COSArray(objects, true); - } - - /** - * Constructor. - */ - public COSArray() - { - this(new ArrayList<>(), true); - } - - /** - * Use the given list to initialize the COSArray. - * - * @param COSObjectGetters the initial list of COSObjectGetters - */ - public COSArray(List COSObjectGetters) - { - this( - COSObjectGetters.stream() - .map(co -> co == null ? null : co.getCOSObject()) - .collect(Collectors.toCollection(ArrayList::new)), - true - ); - } - - private COSArray(ArrayList cosObjects, boolean direct) - { - objects = cosObjects; - updateState = new COSUpdateState(this); - setDirect(direct); - } - - /** - * This will add an object to the array. - * - * @param object The object to add to the array. - */ - public void add( COSBase object ) - { - if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() - && object.getKey() != null) - { - COSObject cosObject = new COSObject(object, object.getKey()); - objects.add(cosObject); - getUpdateState().update(cosObject); - } - else - { - objects.add(object); - getUpdateState().update(object); - } - } - - /** - * This will add an object to the array. - * - * @param object The object to add to the array. - */ - public void add( COSObjectGetter object ) - { - COSBase base = null; - if (object != null) - { - base = object.getCOSObject(); - } - add(base); - } - - /** - * Add the specified object at the ith location and push the rest to the - * right. - * - * @param i The index to add at. - * @param object The object to add at that index. - */ - public void add( int i, COSBase object) - { - if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() - && object.getKey() != null) - { - COSObject cosObject = new COSObject(object, object.getKey()); - objects.add(i, cosObject); - getUpdateState().update(cosObject); - } - else - { - objects.add(i, object); - getUpdateState().update(object); - } - } - - /** - * This will remove all of the objects in the collection. - */ - public void clear() - { - objects.clear(); - getUpdateState().update(); - } - - /** - * This will remove all of the objects in the collection. - * - * @param objectsList The list of objects to remove from the collection. - */ - public void removeAll( Collection objectsList ) - { - objects.removeAll( objectsList ); - getUpdateState().update(); - } - - /** - * This will retain all of the objects in the collection. - * - * @param objectsList The list of objects to retain from the collection. - */ - public void retainAll( Collection objectsList ) - { - if (objects.retainAll(objectsList)) - { - getUpdateState().update(); - } - } - - /** - * This will add an object to the array. - * - * @param objectsList The object to add to the array. - */ - public void addAll( Collection objectsList ) - { - if (objects.addAll(objectsList)) - { - getUpdateState().update(objectsList); - } - } - - /** - * This will add all objects to this array. - * - * @param objectList The list of objects to add. - */ - public void addAll( COSArray objectList ) - { - if( objectList != null ) - { - if (objects.addAll(objectList.objects)) - { - getUpdateState().update(objectList); - } - } - } - - /** - * Add the specified object at the ith location and push the rest to the - * right. - * - * @param i The index to add at. - * @param objectList The object to add at that index. - */ - public void addAll( int i, Collection objectList ) - { - if (objects.addAll(i, objectList)) - { - getUpdateState().update(objectList); - } - } - - /** - * This will set an object at a specific index. - * - * @param index zero based index into array. - * @param object The object to set. - */ - public void set( int index, COSBase object ) - { - if ((object instanceof COSDictionary || object instanceof COSArray) && !object.isDirect() - && object.getKey() != null) - { - COSObject cosObject = new COSObject(object, object.getKey()); - objects.set(index, cosObject); - getUpdateState().update(cosObject); - } - else - { - objects.set(index, object); - getUpdateState().update(object); - } - } - - /** - * This will set an object at a specific index. - * - * @param index zero based index into array. - * @param intVal The object to set. - */ - public void set( int index, int intVal ) - { - objects.set( index, COSInteger.get(intVal)); - getUpdateState().update(); - } - - /** - * This will set an object at a specific index. - * - * @param index zero based index into array. - * @param object The object to set. - */ - public void set( int index, COSObjectGetter object ) - { - COSBase base = null; - if( object != null ) - { - base = object.getCOSObject(); - } - set(index, base); - } - - /** - * This will get an object from the array. This will dereference the object. - * If the object is COSNull then null will be returned. - * - * @param index The index into the array to get the object. - * - * @return The object at the requested index. - */ - public COSBase getObject( int index ) - { - COSBase obj = objects.get( index ); - if( obj instanceof COSObject ) - { - obj = ((COSObject)obj).getObject(); - } - if (obj instanceof COSNull) - { - obj = null; - } - return obj; - } - - /** - * This will get an object from the array. This will NOT dereference - * the COS object. - * - * @param index The index into the array to get the object. - * - * @return The object at the requested index. - */ - public COSBase get( int index ) - { - return objects.get( index ); - } - - /** - * Get the value of the array as an integer. - * - * @param index The index into the list. - * - * @return The value at that index or -1 if does not exist. - */ - public int getInt( int index ) - { - return getInt( index, -1 ); - } - - /** - * Get the value of the array as an integer, return the default if it does not exist. - * - * @param index The value of the array. - * @param defaultValue The value to return if the value is null. - * @return The value at the index or the defaultValue. - */ - public int getInt( int index, int defaultValue ) - { - int retval = defaultValue; - if ( index < size() ) - { - Object obj = objects.get( index ); - if( obj instanceof COSNumber ) - { - retval = ((COSNumber)obj).intValue(); - } - } - return retval; - } - - /** - * Set the value in the array as an integer. - * - * @param index The index into the array. - * @param value The value to set. - */ - public void setInt( int index, int value ) - { - set( index, COSInteger.get( value ) ); - } - - /** - * Set the value in the array as a name. - * @param index The index into the array. - * @param name The name to set in the array. - */ - public void setName( int index, String name ) - { - set( index, COSName.getPDFName( name ) ); - } - - /** - * Get the value of the array as a string. - * - * @param index The index into the array. - * @return The name converted to a string or null if it does not exist. - */ - public String getName( int index ) - { - return getName( index, null ); - } - - /** - * Get an entry in the array that is expected to be a COSName. - * @param index The index into the array. - * @param defaultValue The value to return if it is null. - * @return The value at the index or defaultValue if none is found. - */ - public String getName( int index, String defaultValue ) - { - String retval = defaultValue; - if( index < size() ) - { - Object obj = objects.get( index ); - if( obj instanceof COSName ) - { - retval = ((COSName)obj).getName(); - } - } - return retval; - } - - /** - * Set the value in the array as a string. - * @param index The index into the array. - * @param string The string to set in the array. - */ - public void setString( int index, String string ) - { - if ( string != null ) - { - set( index, new COSString( string ) ); - } - else - { - set( index, null ); - } - } - - /** - * Get the value of the array as a string. - * - * @param index The index into the array. - * @return The string or null if it does not exist. - */ - public String getString( int index ) - { - return getString( index, null ); - } - - /** - * Get an entry in the array that is expected to be a COSName. - * @param index The index into the array. - * @param defaultValue The value to return if it is null. - * @return The value at the index or defaultValue if none is found. - */ - public String getString( int index, String defaultValue ) - { - String retval = defaultValue; - if( index < size() ) - { - Object obj = objects.get( index ); - if( obj instanceof COSString ) - { - retval = ((COSString)obj).getString(); - } - } - return retval; - } - - /** - * This will get the size of this array. - * - * @return The number of elements in the array. - */ - public int size() - { - return objects.size(); - } - - /** - * Returns true if the container is empty, false otherwise. - * - * @return true if the container is empty, false otherwise - */ - public boolean isEmpty() - { - return objects.isEmpty(); - } - - /** - * This will remove an element from the array. - * - * @param i The index of the object to remove. - * - * @return The object that was removed. - */ - public COSBase remove( int i ) - { - COSBase removedEntry = objects.remove( i ); - getUpdateState().update(); - return removedEntry; - } - - /** - * This will remove an element from the array. - * - * @param o The object to remove. - * - * @return true if the object was removed, false - * otherwise - */ - public boolean remove( COSBase o ) - { - boolean removed = objects.remove(o); - if (removed) - { - getUpdateState().update(); - } - return removed; - } - - /** - * This will remove an element from the array. - * This method will also remove a reference to the object. - * - * @param o The object to remove. - * @return true if the object was removed, false - * otherwise - */ - public boolean removeObject(COSBase o) - { - boolean removed = this.remove(o); - if (!removed) - { - for (int i = 0; i < this.size(); i++) - { - COSBase entry = this.get(i); - if (entry instanceof COSObject) - { - COSObject objEntry = (COSObject) entry; - if (objEntry.getObject().equals(o)) - { - return this.remove(entry); - } - } - } - } - return removed; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSArray{" + objects + "}"; - } - - /** - * Get access to the list. - * - * @return an iterator over the array elements - */ - @Override - public Iterator iterator() - { - return objects.iterator(); - } - - /** - * This will return the index of the entry or -1 if it is not found. - * - * @param object The object to search for. - * @return The index of the object or -1. - */ - public int indexOf(COSBase object) - { - for (int i = 0; i < size(); i++) - { - COSBase item = get(i); - if (item == null) - { - if (object == null) - { - return i; - } - } - else if (item.equals(object)) - { - return i; - } - } - return -1; - } - - /** - * This will return the index of the entry or -1 if it is not found. - * This method will also find references to indirect objects. - * - * @param object The object to search for. - * @return The index of the object or -1. - */ - public int indexOfObject(COSBase object) - { - for (int i = 0; i < this.size(); i++) - { - COSBase item = this.get(i); - if (item == null) - { - if (item == object) - { - return i; - } - } - else if (item.equals(object)) - { - return i; - } - else if (item instanceof COSObject && ((COSObject) item).getObject() != null && - ((COSObject) item).getObject().equals(object)) - { - return i; - } - } - return -1; - } - - /** - * This will add null values until the size of the array is at least - * as large as the parameter. If the array is already larger than the - * parameter then nothing is done. - * - * @param size The desired size of the array. - */ - public void growToSize( int size ) - { - growToSize( size, null ); - } - - /** - * This will add the object until the size of the array is at least - * as large as the parameter. If the array is already larger than the - * parameter then nothing is done. - * - * @param size The desired size of the array. - * @param object The object to fill the array with. - */ - public void growToSize( int size, COSBase object ) - { - objects.ensureCapacity(size); - while( size() < size ) - { - add( object ); - getUpdateState().update(object); - } - getUpdateState().update(); - } - - /** - * Visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromArray(this); - } - - /** - * This will take an COSArray of numbers and convert it to a float[]. - * - * @return This COSArray as an array of float numbers. - */ - public float[] toFloatArray() - { - float[] retval = new float[size()]; - for (int i = 0; i < retval.length; i++) - { - COSBase base = getObject(i); - retval[i] = base instanceof COSNumber ? ((COSNumber) base).floatValue() : 0; - } - return retval; - } - - /** - * Clear the current contents of the COSArray and set it with the float[]. - * - * @param value The new value of the float array. - */ - public void setFloatArray( float[] value ) - { - this.clear(); - for (float aValue : value) - { - add(new COSFloat(aValue)); - } - } - - /** - * Return contents of COSArray as a Java List. - * - * @return the COSArray as List - */ - public List toList() - { - return new ArrayList<>(objects); - } - - /** - * This will return a list of names if the COSArray consists of COSNames only. - * - * @return the list of names of the COSArray of COSNames - */ - public List toCOSNameStringList() - { - return objects.stream() // - .map(o -> ((COSName) o).getName()) // - .collect(Collectors.toList()); - } - - /** - * This will return a list of names if the COSArray consists of COSStrings only. - * - * @return the list of names of the COSArray of COSStrings - */ - public List toCOSStringStringList() - { - return objects.stream() // - .map(o -> ((COSString) o).getString()) // - .collect(Collectors.toList()); - } - - /** - * This will return a list of float values if the COSArray consists of COSNumbers only. - * - * @return the list of float values of the COSArray of COSNumbers - */ - public List toCOSNumberFloatList() - { - List numbers = new ArrayList<>(size()); - for (int i = 0; i < size(); i++) - { - COSBase num = getObject(i); - if (num instanceof COSNumber) - { - numbers.add(((COSNumber) num).floatValue()); - } - else - { - numbers.add(null); - } - } - return numbers; - } - - /** - * This will return a list of int values if the COSArray consists of COSNumbers only. - * - * @return the list of int values of the COSArray of COSNumbers - */ - public List toCOSNumberIntegerList() - { - List numbers = new ArrayList<>(size()); - for (int i = 0; i < size(); i++) - { - COSBase num = getObject(i); - if (num instanceof COSNumber) - { - numbers.add(((COSNumber) num).intValue()); - } - else - { - numbers.add(null); - } - } - return numbers; - } - - /** - * This will take a list of integer objects and return a COSArray of COSInteger objects. - * - * @param integer A list of integers - * - * @return An array of COSInteger objects - */ - public static COSArray ofCOSIntegers(List integer) - { - COSArray retval = new COSArray(); - integer.forEach(s -> retval.add(COSInteger.get(s.longValue()))); - return retval; - } - - /** - * This will take a list of string objects and return a COSArray of COSName objects. - * - * @param strings A list of strings - * - * @return An array of COSName objects - */ - public static COSArray ofCOSNames(List strings) - { - COSArray retval = new COSArray(); - strings.forEach(s -> retval.add(COSName.getPDFName(s))); - return retval; - } - - /** - * This will take a list of string objects and return a COSArray of COSName objects. - * - * @param strings A list of strings - * - * @return An array of COSName objects - */ - public static COSArray ofCOSStrings(List strings) - { - COSArray retval = new COSArray(); - strings.forEach(s -> retval.add(new COSString(s))); - return retval; - } - - /** - * Returns the current {@link COSUpdateState} of this {@link COSArray}. - * - * @return The current {@link COSUpdateState} of this {@link COSArray}. - * @see COSUpdateState - */ - @Override - public COSUpdateState getUpdateState() - { - return updateState; - } - - /** - * Collects all indirect objects numbers within this COSArray and all included dictionaries. It is used to avoid - * mixed up object numbers when importing an existing page to another pdf. - * - * Expert use only. You might run into an endless recursion if choosing a wrong starting point. - * - * @param indirectObjects a collection of already found indirect objects. - * - */ - public void getIndirectObjectKeys(Collection indirectObjects) - { - if (indirectObjects == null) - { - return; - } - COSObjectKey key = getKey(); - if (key != null) - { - // avoid endless recursions - if (indirectObjects.contains(key)) - { - return; - } - else - { - indirectObjects.add(key); - } - } - - for (COSBase cosBase : objects) - { - if (cosBase == null) - { - continue; - } - COSObjectKey cosBaseKey = cosBase.getKey(); - if (cosBaseKey != null && indirectObjects.contains(cosBaseKey)) - { - continue; - } - if (cosBase instanceof COSObject) - { - // dereference object - cosBase = ((COSObject) cosBase).getObject(); - } - if (cosBase instanceof COSDictionary) - { - // descend to included dictionary to collect all included indirect objects - ((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects); - } - else if (cosBase instanceof COSArray) - { - // descend to included array to collect all included indirect objects - ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects); - } - else if (cosBaseKey != null) - { - // add key for all indirect objects other than COSDictionary/COSArray - indirectObjects.add(cosBaseKey); - } - } - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java deleted file mode 100644 index 86dedf4839e..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSArrayList.java +++ /dev/null @@ -1,587 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; - -/** - * This is an implementation of a List that will sync its contents to a COSArray. - * - * @author Ben Litchfield - * @param Element type. - */ -public class COSArrayList implements List -{ - private final COSArray array; - private final List actual; - - // indicates that the list has been filtered - // i.e. the number of entries in array and actual differ - private boolean isFiltered = false; - - private COSDictionary parentDict; - private COSName dictKey; - - /** - * Default constructor. - */ - public COSArrayList() - { - array = new COSArray(); - actual = new ArrayList<>(); - } - - /** - * Create the COSArrayList specifying the List and the backing COSArray. - * - *

User of this constructor need to ensure that the entries in the List and - * the backing COSArray are matching i.e. the COSObject of the List entry is - * included in the COSArray. - * - *

If the number of entries in the List and the COSArray differ - * it is assumed that the List has been filtered. In that case the COSArrayList - * shall only be used for reading purposes and no longer for updating. - * - * @param actualList The list of standard java objects - * @param cosArray The COS array object to sync to. - */ - public COSArrayList( List actualList, COSArray cosArray ) - { - actual = actualList; - array = cosArray; - - // if the number of entries differs this may come from a filter being - // applied at the PDModel level - if (actual.size() != array.size()) { - isFiltered = true; - } - } - - /** - * This constructor is to be used if the array doesn't exist, but is to be created and added to - * the parent dictionary as soon as the first element is added to the array. - * - * @param dictionary The dictionary that holds the item, and will hold the array if an item is - * added. - * @param dictionaryKey The key into the dictionary to set the item. - */ - public COSArrayList(COSDictionary dictionary, COSName dictionaryKey) - { - array = new COSArray(); - actual = new ArrayList<>(); - parentDict = dictionary; - dictKey = dictionaryKey; - } - - /** - * This is a really special constructor. Sometimes the PDF spec says - * that a dictionary entry can either be a single item or an array of those - * items. But in the PDModel interface we really just want to always return - * a java.util.List. In the case were we get the list and never modify it - * we don't want to convert to COSArray and put one element, unless we append - * to the list. So here we are going to create this object with a single - * item instead of a list, but allow more items to be added and then converted - * to an array. - * - * @param actualObject The PDModel object. - * @param item The COS Model object. - * @param dictionary The dictionary that holds the item, and will hold the array if an item is added. - * @param dictionaryKey The key into the dictionary to set the item. - */ - public COSArrayList( E actualObject, COSBase item, COSDictionary dictionary, COSName dictionaryKey ) - { - array = new COSArray(); - array.add( item ); - actual = new ArrayList<>(); - actual.add( actualObject ); - - parentDict = dictionary; - dictKey = dictionaryKey; - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return actual.size(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return actual.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(Object o) - { - return actual.contains(o); - } - - /** - * {@inheritDoc} - */ - @Override - public Iterator iterator() - { - return actual.iterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public Object[] toArray() - { - return actual.toArray(); - } - - /** - * {@inheritDoc} - */ - @Override - public X[] toArray(X[] a) - { - return actual.toArray(a); - - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(E o) - { - //when adding if there is a parentDict then change the item - //in the dictionary from a single item to an array. - if( parentDict != null ) - { - parentDict.setItem( dictKey, array ); - //clear the parent dict so it doesn't happen again, there might be - //a usecase for keeping the parentDict around but not now. - parentDict = null; - } - //string is a special case because we can't subclass to be COSObjectGetter - if( o instanceof String ) - { - array.add( new COSString( (String)o ) ); - } - else - { - if(array != null) - { - array.add(((COSObjectGetter)o).getCOSObject()); - } - } - return actual.add(o); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(Object o) - { - - if (isFiltered) { - throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); - } - - boolean retval = true; - int index = actual.indexOf( o ); - if( index >= 0 ) - { - actual.remove( index ); - array.remove( index ); - } - else - { - retval = false; - } - return retval; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll( Collection c) - { - return actual.containsAll( c ); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(Collection c) - { - if (isFiltered) { - throw new UnsupportedOperationException("Adding to a filtered List is not permitted"); - } - - //when adding if there is a parentDict then change the item - //in the dictionary from a single item to an array. - if( parentDict != null && !c.isEmpty()) - { - parentDict.setItem( dictKey, array ); - //clear the parent dict so it doesn't happen again, there might be - //a usecase for keeping the parentDict around but not now. - parentDict = null; - } - array.addAll( toCOSObjectList( c ) ); - return actual.addAll( c ); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(int index, Collection c) - { - if (isFiltered) { - throw new UnsupportedOperationException("Inserting to a filtered List is not permitted"); - } - - //when adding if there is a parentDict then change the item - //in the dictionary from a single item to an array. - if( parentDict != null && !c.isEmpty()) - { - parentDict.setItem( dictKey, array ); - //clear the parent dict so it doesn't happen again, there might be - //a usecase for keeping the parentDict around but not now. - parentDict = null; - } - - array.addAll( index, toCOSObjectList( c ) ); - return actual.addAll( index, c ); - } - - /** - * This will convert a list of COSObjectGetters to an array list of COSBase objects. - * - * @param COSObjectGetterList A list of COSObjectGetter. - * - * @return A list of COSBase. - * @throws IllegalArgumentException if an object type is not supported for conversion to a - * COSBase object. - */ - public static COSArray converterToCOSArray(List COSObjectGetterList) - { - COSArray array = null; - if( COSObjectGetterList != null ) - { - if( COSObjectGetterList instanceof COSArrayList ) - { - //if it is already a COSArrayList then we don't want to recreate the array, we want to reuse it. - array = ((COSArrayList)COSObjectGetterList).array; - } - else - { - array = new COSArray(); - for (Object next : COSObjectGetterList) - { - if( next instanceof String ) - { - array.add( new COSString( (String)next ) ); - } - else if( next instanceof Integer || next instanceof Long ) - { - array.add( COSInteger.get( ((Number)next).longValue() ) ); - } - else if( next instanceof Float || next instanceof Double ) - { - array.add( new COSFloat( ((Number)next).floatValue() ) ); - } - else if( next instanceof COSObjectGetter) - { - COSObjectGetter object = (COSObjectGetter)next; - array.add( object.getCOSObject() ); - } - else if( next == null ) - { - array.add( COSNull.NULL ); - } - else - { - throw new IllegalArgumentException( "Error: Don't know how to convert type to COSBase '" + - next.getClass().getName() + "'" ); - } - } - } - } - return array; - } - - private List toCOSObjectList( Collection list ) - { - List cosObjects = new ArrayList<>(list.size()); - list.forEach(next -> - { - if( next instanceof String ) - { - cosObjects.add( new COSString( (String)next ) ); - } - else - { - COSObjectGetter cos = (COSObjectGetter)next; - cosObjects.add( cos.getCOSObject() ); - } - }); - return cosObjects; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(Collection c) - { - c.forEach(item -> { - COSBase itemCOSBase = ((COSObjectGetter)item).getCOSObject(); - // remove all indirect objects too by dereferencing them - // before doing the comparison - for (int i=array.size()-1; i>=0; i--) - { - if (itemCOSBase.equals(array.getObject(i))) - { - array.remove(i); - } - } - }); - - return actual.removeAll( c ); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(Collection c) - { - c.forEach(item -> { - COSBase itemCOSBase = ((COSObjectGetter)item).getCOSObject(); - // remove all indirect objects too by dereferencing them - // before doing the comparison - for (int i=array.size()-1; i>=0; i--) - { - if (!itemCOSBase.equals(array.getObject(i))) - { - array.remove(i); - } - } - }); - - return actual.retainAll( c ); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - //when adding if there is a parentDict then change the item - //in the dictionary from a single item to an array. - if( parentDict != null ) - { - parentDict.setItem( dictKey, null ); - } - actual.clear(); - array.clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object o) - { - return actual.equals( o ); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return actual.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public E get(int index) - { - return actual.get( index ); - - } - - /** - * {@inheritDoc} - */ - @Override - public E set(int index, E element) - { - if (isFiltered) { - throw new UnsupportedOperationException("Replacing an element in a filtered List is not permitted"); - } - - if( element instanceof String ) - { - COSString item = new COSString( (String)element ); - if( parentDict != null && index == 0 ) - { - parentDict.setItem( dictKey, item ); - } - array.set( index, item ); - } - else - { - if( parentDict != null && index == 0 ) - { - parentDict.setItem( dictKey, ((COSObjectGetter)element).getCOSObject() ); - } - array.set( index, ((COSObjectGetter)element).getCOSObject() ); - } - return actual.set( index, element ); - } - - /** - * {@inheritDoc} - */ - @Override - public void add(int index, E element) - { - if (isFiltered) { - throw new UnsupportedOperationException("Adding an element in a filtered List is not permitted"); - } - - //when adding if there is a parentDict then change the item - //in the dictionary from a single item to an array. - if( parentDict != null ) - { - parentDict.setItem( dictKey, array ); - //clear the parent dict so it doesn't happen again, there might be - //a usecase for keeping the parentDict around but not now. - parentDict = null; - } - actual.add( index, element ); - if( element instanceof String ) - { - array.add( index, new COSString( (String)element ) ); - } - else - { - array.add( index, ((COSObjectGetter)element).getCOSObject() ); - } - } - - /** - * {@inheritDoc} - */ - @Override - public E remove(int index) - { - if (isFiltered) { - throw new UnsupportedOperationException("removing entries from a filtered List is not permitted"); - } - - array.remove( index ); - return actual.remove( index ); - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(Object o) - { - return actual.indexOf( o ); - } - - /** - * {@inheritDoc} - */ - @Override - public int lastIndexOf(Object o) - { - return actual.lastIndexOf( o ); - - } - - /** - * {@inheritDoc} - */ - @Override - public ListIterator listIterator() - { - return actual.listIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public ListIterator listIterator(int index) - { - return actual.listIterator( index ); - } - - /** - * {@inheritDoc} - */ - @Override - public List subList(int fromIndex, int toIndex) - { - return actual.subList( fromIndex, toIndex ); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSArrayList{" + array.toString() + "}"; - } - - /** - * This will return then underlying COSArray. - * - * @return the COSArray - */ - public COSArray toList() - { - return array; - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java deleted file mode 100644 index 6ce7e4b8e8e..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBase.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * The base object that all objects in the PDF document will extend. - * - * @author Ben Litchfield - */ -public abstract class COSBase implements COSObjectGetter -{ - protected boolean direct; // probably unnecessary - private COSObjectKey key; - - /** - * Constructor. - */ - public COSBase() - { - } - - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ - @Override - public COSBase getCOSObject() - { - return this; - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - public abstract void accept(ICOSVisitor visitor) throws IOException; - - /** - * If the state is set true, the dictionary will be written direct into the called object. - * This means, no indirect object will be created. - * - * @return the state - */ - public boolean isDirect() - { - return direct; - } - - /** - * Set the state true, if the dictionary should be written as a direct object and not indirect. - * - * @param direct set it true, for writing direct object - */ - public void setDirect(boolean direct) - { - this.direct = direct; - } - - /** - * This will return the COSObjectKey of an indirect object. - * - * @return the COSObjectKey - */ - public COSObjectKey getKey() - { - return key; - } - - /** - * Set the COSObjectKey of an indirect object. - * - * @param key the COSObjectKey of the indirect object - */ - public void setKey(COSObjectKey key) - { - this.key = key; - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java deleted file mode 100644 index 2e7dbd7fb3a..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSBoolean.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * This class represents a boolean value in the PDF document. - * - * @author Ben Litchfield - */ -public final class COSBoolean extends COSBase -{ - /** - * The true boolean token. - */ - public static final byte[] TRUE_BYTES = { 116, 114, 117, 101 }; // "true".getBytes("ISO-8859-1") - /** - * The false boolean token. - */ - public static final byte[] FALSE_BYTES = { 102, 97, 108, 115, 101 }; // "false".getBytes("ISO-8859-1") - - /** - * The PDF true value. - */ - public static final COSBoolean TRUE = new COSBoolean( true ); - - /** - * The PDF false value. - */ - public static final COSBoolean FALSE = new COSBoolean( false ); - - private final boolean value; - - /** - * Constructor. - * - * @param aValue The boolean value. - */ - private COSBoolean(boolean aValue) - { - value = aValue; - } - - /** - * This will get the value that this object wraps. - * - * @return The boolean value of this object. - */ - public boolean getValue() - { - return value; - } - - /** - * This will get the value that this object wraps. - * - * @return The boolean value of this object. - */ - public Boolean getValueAsObject() - { - return value ? Boolean.TRUE : Boolean.FALSE; - } - - /** - * This will get the boolean value. - * - * @param value Parameter telling which boolean value to get. - * - * @return The single boolean instance that matches the parameter. - */ - public static COSBoolean getBoolean( boolean value ) - { - return value ? TRUE : FALSE; - } - - /** - * This will get the boolean value. - * - * @param value Parameter telling which boolean value to get. - * - * @return The single boolean instance that matches the parameter. - */ - public static COSBoolean getBoolean( Boolean value ) - { - return getBoolean( value.booleanValue() ); - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromBoolean(this); - } - - /** - * Return a string representation of this object. - * - * @return The string value of this object. - */ - @Override - public String toString() - { - return String.valueOf( value ); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() { - //taken from java.lang.Boolean - return value ? 1231 : 1237; - } - - /** - * {@inheritDoc} - */ - public boolean equals(Object obj) - { - return this == obj; // this is correct because there are only two COSBoolean objects. - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java deleted file mode 100644 index b259486d473..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionary.java +++ /dev/null @@ -1,1561 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.InputStream; -import java.util.ArrayList; -// import java.util.Arrays; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.util.DateConverter; - -import java.io.IOException; -// import java.io.InputStream; -import java.util.function.BiConsumer; - -import static org.apache.pdfbox.cos.COSName.getPDFName; - -/** - * This class represents a dictionary where name/value pairs reside. - * - * @author Ben Litchfield - * - */ -public class COSDictionary extends COSBase implements COSUpdateInfo -{ - - /** - * Log instance. - */ - private static final Logger LOG = LogManager.getLogger(COSDictionary.class); - - private static final String PATH_SEPARATOR = "/"; - - /** - * The name-value pairs of this dictionary. The pairs are kept in the order they were added to the dictionary. - */ - protected Map items = new LinkedHashMap<>(); - private final COSUpdateState updateState; - - /** - * Constructor. - */ - public COSDictionary() - { - updateState = new COSUpdateState(this); - } - - /** - * Copy Constructor. This will make a shallow copy of this dictionary. - * - * @param dict The dictionary to copy. - */ - public COSDictionary(COSDictionary dict) - { - updateState = new COSUpdateState(this); - addAll(dict); - } - - /** - * @see Map#containsValue(Object) - * - * @param value The value to find in the map. - * - * @return true if the map contains this value. - */ - public boolean containsValue(Object value) - { - boolean contains = items.containsValue(value); - if (!contains && value instanceof COSObject) - { - contains = items.containsValue(((COSObject) value).getObject()); - } - return contains; - } - - /** - * Search in the map for the value that matches the parameter and - * return the first key that maps to that value. - * - * @param value The value to search for in the map. - * @return The key for the value in the map or null if it does not exist. - */ - public COSName getKeyForValue( Object value ) - { - for (Entry entry : items.entrySet()) - { - Object nextValue = entry.getValue(); - if (nextValue.equals(value) - || (nextValue instanceof COSObject && ((COSObject) nextValue).getObject() - .equals(value))) - { - return entry.getKey(); - } - } - return null; - } - - /** - * This will return the number of elements in this dictionary. - * - * @return The number of elements in the dictionary. - */ - public int size() - { - return items.size(); - } - - /** - * This will clear all items in the map. - */ - public void clear() - { - items.clear(); - getUpdateState().update(); - } - - /** - * This will get an object from this dictionary based on its key. If the - * object is a proxy object ({@link COSObject} then it will dereference - * it and get it from the document. If the object is COSNull then - * null will be returned. - * - * @param key The key to the object that we are getting. - * - * @return The object that matches the key. - */ - public COSBase getObjectFromDictionary( String key) - { - return getObjectFromDictionary( getPDFName( key)); - } - - /** - * This is a special case of getDictionaryObject that takes multiple keys. - * It will handle the situation where multiple keys could get the same value, - * i.e. if either CS or ColorSpace is used to get the colorspace. This will - * get an object from this dictionary. If the object is a proxy object then - * it will dereference it and get the underlying concrete object. - * If the object is COSNull then null will be returned. - * - * @param firstKey The first key to try. - * @param secondKey The second key to try. - * - * @return The object that matches the key. - */ - public COSBase getAlternateObjectFromDictionary( COSName firstKey, COSName secondKey) - { - COSBase retval = getObjectFromDictionary( firstKey); - if (retval == null && secondKey != null) - { - retval = getObjectFromDictionary( secondKey); - } - return retval; - } - - /** - * This will get an object from this dictionary. If the object is a reference - * then it will dereference it and return the concrete object. If the object - * is COSNull then null will be returned. - * - * @param key The key to the object that we are getting. - * - * @return The object that matches the key. - */ - public COSBase getObjectFromDictionary( COSName key) - { - COSBase retval = items.get(key); - if (retval instanceof COSObject) - { - retval = ((COSObject) retval).getObject(); - } - else if (retval instanceof COSNull) - { - retval = null; - } - return retval; - } - - /** - * This will set an item in this dictionary. If value is null then the result - * will be the same as removeItem( key ). - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setItem(COSName key, COSBase value) - { - if (value == null) - { - removeItem(key); - } - else - { - if ((value instanceof COSDictionary || value instanceof COSArray) && !value.isDirect() - && value.getKey() != null) - { - COSObject cosObject = new COSObject(value, value.getKey()); - items.put(key, cosObject); - getUpdateState().update(cosObject); - } - else - { - items.put(key, value); - getUpdateState().update(value); - } - } - } - - /** - * This will set an item in this dictionary. If value is null - * then the result will be the same as removeItem( key ). - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setItem(COSName key, COSObjectGetter value) - { - COSBase base = null; - if (value != null) - { - base = value.getCOSObject(); - } - setItem(key, base); - } - - /** - * This will set an item in this dictionary. If value is null then the - * result will be the same as removeItem( key ). - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setItem(String key, COSObjectGetter value) - { - setItem( getPDFName(key), value); - } - - /** - * This will set an item in this dictionary. - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setBoolean(String key, boolean value) - { - setItem( getPDFName(key), COSBoolean.getBoolean(value)); - } - - /** - * This will set an item in this dictionary. - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setBoolean(COSName key, boolean value) - { - setItem(key, COSBoolean.getBoolean(value)); - } - - /** - * This will set an item in the dictionary. If value is null then the result will be the same as removeItem( key ). - * - * @param key The key to the dictionary object. - * @param value The value to the dictionary object. - */ - public void setItem( String key, COSBase value ) - { - setItem( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSName - * object. If it is null then the object will be removed. - * - * @param key The key to the object, - * @param value The string value for the name. - */ - public void setName(String key, String value) - { - setName( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSName object. - * If it is null then the object will be removed. - * - * @param key The key to the object, - * @param value The string value for the name. - */ - public void setName(COSName key, String value) - { - COSName name = null; - if (value != null) - { - name = getPDFName(value); - } - setItem(key, name); - } - - /** - * Set the value of a date entry in this dictionary. - * - * @param key The key to the date value. - * @param date The date value. - */ - public void setDate(String key, Calendar date) - { - setDate( getPDFName(key), date); - } - - /** - * Set the date object. - * - * @param key The key to the date. - * @param date The date to set. - */ - public void setDate(COSName key, Calendar date) - { - setString(key, DateConverter.toString(date)); - } - - /** - * Set the date object. - * - * @param embedded The embedded dictionary. - * @param key The key to the date. - * @param date The date to set. - */ - public void setEmbeddedDate(COSName embedded, COSName key, Calendar date) - { - COSDictionary dic = getCOSDictionary(embedded); - if (dic == null && date != null) - { - dic = new COSDictionary(); - setItem(embedded, dic); - } - if (dic != null) - { - dic.setDate(key, date); - } - } - - /** - * This is a convenience method that will convert the value to a COSString object. If it is null then the object - * will be removed. - * - * @param key The key to the object, - * @param value The string value for the name. - */ - public void setString(String key, String value) - { - setString( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSString object. If it is null then the object - * will be removed. - * - * @param key The key to the object, - * @param value The string value for the name. - */ - public void setString(COSName key, String value) - { - COSString name = null; - if (value != null) - { - name = new COSString(value); - } - setItem(key, name); - } - - /** - * This is a convenience method that will convert the value to a COSString object. If it is null then the object - * will be removed. - * - * @param embedded The embedded dictionary to set the item in. - * @param key The key to the object, - * @param value The string value for the name. - */ - public void setEmbeddedString(COSName embedded, COSName key, String value) - { - COSDictionary dic = getCOSDictionary(embedded); - if (dic == null && value != null) - { - dic = new COSDictionary(); - setItem(embedded, dic); - } - if (dic != null) - { - dic.setString(key, value); - } - } - - /** - * This is a convenience method that will convert the value to a COSInteger object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setInt(String key, int value) - { - setInt( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSInteger object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setInt(COSName key, int value) - { - setItem(key, COSInteger.get(value)); - } - - /** - * This is a convenience method that will convert the value to a COSInteger object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setLong(String key, long value) - { - setLong( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSInteger object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setLong(COSName key, long value) - { - COSInteger intVal = COSInteger.get(value); - setItem(key, intVal); - } - - /** - * This is a convenience method that will convert the value to a COSInteger object. - * - * @param embeddedDictionary The embedded dictionary. - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setEmbeddedInt(COSName embeddedDictionary, COSName key, int value) - { - COSDictionary embedded = getCOSDictionary(embeddedDictionary); - if (embedded == null) - { - embedded = new COSDictionary(); - setItem(embeddedDictionary, embedded); - } - embedded.setInt(key, value); - } - - /** - * This is a convenience method that will convert the value to a COSFloat object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setFloat(String key, float value) - { - setFloat( getPDFName(key), value); - } - - /** - * This is a convenience method that will convert the value to a COSFloat object. - * - * @param key The key to the object, - * @param value The int value for the name. - */ - public void setFloat(COSName key, float value) - { - COSFloat fltVal = new COSFloat(value); - setItem(key, fltVal); - } - - /** - * Sets the given boolean value at bitPos in the flags. - * - * @param field The COSName of the field to set the value into. - * @param bitFlag the bit position to set the value in. - * @param value the value the bit position should have. - */ - public void setFlag(COSName field, int bitFlag, boolean value) - { - int currentFlags = getInt(field, 0); - if (value) - { - currentFlags = currentFlags | bitFlag; - } - else - { - currentFlags &= ~bitFlag; - } - setInt(field, currentFlags); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a COSName. Null is returned if the entry does not - * exist in the dictionary, or if the referenced object is not a COSName. - * - * @param key The key to the item in the dictionary. - * @return The COS name. - */ - public COSName getCOSName(COSName key) - { - COSBase name = getObjectFromDictionary( key); - if (name instanceof COSName) - { - return (COSName) name; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a proxy object ({@link COSObject}. Null is returned - * if the entry does not exist in the dictionary or if the referenced object - * is not a COSObject. - * - * @param key The key to the item in the dictionary. - * @return The COSObject. - */ - public COSObject getCOSObject(COSName key) - { - COSBase object = getItem(key); - if (object instanceof COSObject) - { - return (COSObject) object; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a COSDictionary. Null is returned if the entry - * does not exist in the dictionary or if the referenced object is not a - * COSDictionary. - * - * @param key The key to the item in the dictionary. - * @return The COSDictionary. - */ - public COSDictionary getCOSDictionary(COSName key) - { - COSBase dictionary = getObjectFromDictionary( key); - if (dictionary instanceof COSDictionary) - { - return (COSDictionary) dictionary; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a COSDictionary. If the object is not stored under - * the first key name, an entry using the second key name will be sought. - * Null is returned if neither entry exists in the dictionary, or if the - * entry found is not an instance of COSDictionary. - * - * @param firstKey The first key to the item in the dictionary. - * @param secondKey The second key to the item in the dictionary. - * @return The COSDictionary. - */ - public COSDictionary getAlternateCOSDictionary(COSName firstKey, COSName secondKey) - { - COSBase dictionary = getAlternateObjectFromDictionary( firstKey, secondKey ); - if (dictionary instanceof COSDictionary) - { - return (COSDictionary) dictionary; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a {@link COSStream}. Null is returned if the entry - * does not exist in the dictionary or if the object found is not an - * instance of COSStream. - * - * @param key The key to the item in the dictionary. - * @return The COSStream. - */ - public COSStream getCOSStream(COSName key) - { - COSBase base = getObjectFromDictionary( key); - if (base instanceof COSStream) - { - return (COSStream) base; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a {@link COSArray}. Null is returned if the entry - * does not exist in the dictionary or if the object found is not an - * instance of COSArray. - * - * @param key The key to the item in the dictionary. - * @return The COSArray. - */ - public COSArray getCOSArray(COSName key) - { - COSBase array = getObjectFromDictionary( key); - if (array instanceof COSArray) - { - return (COSArray) array; - } - return null; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a {@link COSName}. The default name is - * returned if the entry does not exist in the dictionary. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The COS name. - */ - public COSName getCOSName(COSName key, COSName defaultValue) - { - COSBase name = getObjectFromDictionary( key); - if (name instanceof COSName) - { - return (COSName) name; - } - return defaultValue; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a {@link COSName} and convert it to a Java string. - * Null is returned if the entry does not exist in the dictionary or if the - * object is neither a COSName nor a COSString. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public String getNameAsString(String key) - { - return getNameAsString( getPDFName(key)); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a name and convert it to a Java string. - * Null is returned if the entry does not exist in the dictionary or if the - * object is neither a COSName nor a COSString. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public String getNameAsString( COSName key ) - { - String retval = null; - COSBase name = getObjectFromDictionary( key); - if (name instanceof COSName) - { - retval = ((COSName) name).getName(); - } - else if (name instanceof COSString) - { - retval = ((COSString) name).getString(); - } - return retval; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a name and convert it to a Java string. - * Null is returned if the entry does not exist in the dictionary or if the - * object is neither a COSName nor a COSString. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The name converted to a string. - */ - public String getNameAsString( String key, String defaultValue) - { - return getNameAsString( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a name and convert it to a Java string. - * Null is returned if the entry does not exist in the dictionary or if the - * object is neither a COSName nor a COSString. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The name converted to a string. - */ - public String getNameAsString( COSName key, String defaultValue ) - { - String retval = getNameAsString(key); - if (retval == null) - { - retval = defaultValue; - } - return retval; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be converted to a Java string. Null is returned if - * the entry does not exist in the dictionary. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public String getString(String key) - { - return getString( getPDFName(key)); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a {@link COSString}. Null is returned if the entry - * does not exist in the dictionary or if it is not an instance of COSString. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public String getString(COSName key) - { - String retval = null; - COSBase value = getObjectFromDictionary( key); - if (value instanceof COSString) - { - retval = ((COSString) value).getString(); - } - return retval; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be converted to a Java string. If the entry does not - * exist in the dictionary the supplied default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a string. - */ - public String getString(String key, String defaultValue) - { - return getString( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be converted to a Java string. If the entry does not - * exist in the dictionary the supplied default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a string. - */ - public String getString(COSName key, String defaultValue) - { - String retval = getString(key); - if (retval == null) - { - retval = defaultValue; - } - return retval; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a name and convert it to a Java string. Null is - * returned if the entry does not exist in the dictionary. - * - * @param embedded The name of the embedded dictionary. - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public String getEmbeddedString(COSName embedded, COSName key) - { - return getEmbeddedString(embedded, key, null); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a name and convert it to a Java string. If the entry - * does not exist in the dictionary the supplied default value will be returned. - * - * @param embedded The name of the embedded dictionary. - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return. - * @return The name converted to a string. - */ - public String getEmbeddedString( COSName embedded, COSName key, String defaultValue ) - { - COSDictionary eDic = getCOSDictionary(embedded); - return eDic != null ? eDic.getString(key, defaultValue) : defaultValue; - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from this dictionary, which is expected to be a date expression, and - * convert it to a Java {@link Calendar}. Null is returned if the entry - * does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a date. - */ - public Calendar getDate(String key) - { - return getDate( getPDFName(key)); - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from this dictionary, which is expected to be a date expression, and - * convert it to a Java {@link Calendar}. Null is returned if the entry - * does not exist in the dictionary or if the date was invalid. - * - * @param key The key to the item in the dictionary. - * @return The name converted to a date. - */ - public Calendar getDate(COSName key) - { - COSBase base = getObjectFromDictionary( key); - if (base instanceof COSString) - { - return DateConverter.toCalendar((COSString) base); - } - return null; - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from this dictionary, which is expected to be a date expression, and - * convert it to a Java {@link Calendar}. If the entry does not exist in - * the dictionary or if the date is invalid the provided default value - * will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was invalid. - * @return The name converted to a date. - */ - public Calendar getDate(String key, Calendar defaultValue) - { - return getDate( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from this dictionary, which is expected to be a date expression, and - * convert it to a Java {@link Calendar}. If the entry does not exist in - * the dictionary or if the date is invalid the provided default value - * will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was invalid. - * @return The name converted to a date. - */ - public Calendar getDate(COSName key, Calendar defaultValue) - { - Calendar retval = getDate(key); - if (retval == null) - { - retval = defaultValue; - } - return retval; - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from an embedded dictionary, which is expected to be a date expression, - * and convert it to a Java {@link Calendar}. Null is returned if the entry - * does not exist in the dictionary or if the date was invalid. - * - * @param embedded The name of the embedded dictionary to use. - * @param key The key to the item in the dictionary. - * @return The name converted to a string. - */ - public Calendar getEmbeddedDate( COSName embedded, COSName key ) - { - return getEmbeddedDate(embedded, key, null); - } - - /** - * This is a convenience method that will get a {@link COSString} object - * from an embedded dictionary, which is expected to be a date expression, - * and convert it to a Java {@link Calendar} If the entry does not exist in - * the embedded dictionary or if the date is invalid the provided default - * value will be returned. - * - * @param embedded The embedded dictionary to get. - * @param key The key to the item in the dictionary. - * @param defaultValue The default value to return if the entry does not exist in the dictionary or if the date was - * invalid. - * @return The name converted to a string. - */ - public Calendar getEmbeddedDate(COSName embedded, COSName key, Calendar defaultValue) - { - COSDictionary eDic = getCOSDictionary(embedded); - return eDic != null ? eDic.getDate(key, defaultValue) : defaultValue; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a cos boolean and convert - * it to a primitive boolean. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value returned if the entry is null. - * - * @return The value converted to a boolean. - */ - public boolean getBoolean(String key, boolean defaultValue) - { - return getBoolean( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a COSBoolean and convert it to a primitive boolean. - * If the entry does not exist, or if it is not a valid boolean value, the - * provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value returned if the entry is null. - * - * @return The entry converted to a boolean. - */ - public boolean getBoolean(COSName key, boolean defaultValue) - { - return getBoolean(key, null, defaultValue); - } - - /** - * This is a convenience method that will an object from this dictionary - * that is expected to be a COSBoolean and convert it to a primitive boolean. - * If the entry does not exist, or if it is not a valid boolean value, the - * provided default value will be returned. - * - * @param firstKey The first key to the item in the dictionary. - * @param secondKey The second key to the item in the dictionary. - * @param defaultValue The value returned if the entry is null. - * - * @return The entry converted to a boolean. - */ - public boolean getBoolean(COSName firstKey, COSName secondKey, boolean defaultValue) - { - boolean retval = defaultValue; - COSBase bool = getAlternateObjectFromDictionary( firstKey, secondKey); - if (bool instanceof COSBoolean) - { - retval = bool == COSBoolean.TRUE; - } - return retval; - } - - /** - * Get an integer from an embedded dictionary. Useful for 1-1 mappings. default:-1 - * - * @param embeddedDictionary The name of the embedded dictionary. - * @param key The key in the embedded dictionary. - * - * @return The value of the embedded integer. - */ - public int getEmbeddedInt( COSName embeddedDictionary, COSName key ) - { - return getEmbeddedInt(embeddedDictionary, key, -1); - } - - /** - * Get an integer from an embedded dictionary. Useful for 1-1 mappings. - * - * @param embeddedDictionary The name of the embedded dictionary. - * @param key The key in the embedded dictionary. - * @param defaultValue The value if there is no embedded dictionary or it does not contain the key. - * - * @return The value of the embedded integer. - */ - public int getEmbeddedInt( COSName embeddedDictionary, COSName key, int defaultValue ) - { - COSDictionary embedded = getCOSDictionary(embeddedDictionary); - return embedded != null ? embedded.getInt(key, defaultValue) : defaultValue; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an int. -1 is returned if - * there is no value. - * - * @param key The key to the item in the dictionary. - * @return The integer value. - */ - public int getInt( String key ) - { - return getInt( getPDFName(key), -1); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an int. -1 is returned if - * there is no value. - * - * @param key The key to the item in the dictionary. - * @return The integer value.. - */ - public int getInt( COSName key ) - { - return getInt(key, -1); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an integer. If the dictionary value is null then - * the provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The integer value. - */ - public int getInt(String key, int defaultValue) - { - return getInt( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an integer. If the dictionary value is null then - * the provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The integer value. - */ - public int getInt(COSName key, int defaultValue) - { - return getInt(key, null, defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an integer. If the dictionary value is null then - * the -1 will be returned. - * - * @param firstKey The first key to the item in the dictionary. - * @param secondKey The second key to the item in the dictionary. - * @return The integer value. - */ - public int getInt(COSName firstKey, COSName secondKey) - { - return getInt(firstKey, secondKey, -1); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be an integer. If the dictionary value is null then - * the provided default value will be returned. - * - * @param firstKey The first key to the item in the dictionary. - * @param secondKey The second key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The integer value. - */ - public int getInt(COSName firstKey, COSName secondKey, int defaultValue) - { - int retval = defaultValue; - COSBase obj = getAlternateObjectFromDictionary( firstKey, secondKey); - if (obj instanceof COSNumber) - { - retval = ((COSNumber) obj).intValue(); - } - return retval; - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a long. -1 is returned if there is no value. - * - * @param key The key to the item in the dictionary. - * - * @return The long value. - */ - public long getLong(String key) - { - return getLong( getPDFName(key), -1L); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a long. -1 is returned if there is no value. - * - * @param key The key to the item in the dictionary. - * @return The long value. - */ - public long getLong(COSName key) - { - return getLong(key, -1L); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a long integer. If the dictionary value is null - * then the provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The long value. - */ - public long getLong( String key, long defaultValue ) - { - return getLong( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a long integer. If the dictionary value is null - * then the provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The integer value. - */ - public long getLong(COSName key, long defaultValue) - { - long retval = defaultValue; - COSBase obj = getObjectFromDictionary( key); - if (obj instanceof COSNumber) - { - retval = ((COSNumber) obj).longValue(); - } - return retval; - } - - /** - * This is a convenience method that will get an object from this - * dictionary that is expected to be a float. -1 is returned - * if there is no value. - * - * @param key The key to the item in the dictionary. - * @return The float value. - */ - public float getFloat(String key) - { - return getFloat( getPDFName(key), -1); - } - - /** - * This is a convenience method that will get an object from this - * dictionary that is expected to be a float. -1 is returned - * if there is no value. - * - * @param key The key to the item in the dictionary. - * @return The float value. - */ - public float getFloat(COSName key) - { - return getFloat(key, -1); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a float. If the dictionary value is null then the - * provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The float value. - */ - public float getFloat( String key, float defaultValue ) - { - return getFloat( getPDFName(key), defaultValue); - } - - /** - * This is a convenience method that will get an object from this dictionary - * that is expected to be a float. If the dictionary value is null then the - * provided default value will be returned. - * - * @param key The key to the item in the dictionary. - * @param defaultValue The value to return if the dictionary item is null. - * @return The float value. - */ - public float getFloat(COSName key, float defaultValue) - { - float retval = defaultValue; - COSBase obj = getObjectFromDictionary( key); - if (obj instanceof COSNumber) - { - retval = ((COSNumber) obj).floatValue(); - } - return retval; - } - - /** - * Gets the boolean value from the flags at the given bit position. - * - * @param field The COSName of the field to get the flag from. - * @param bitFlag the bitPosition to get the value from. - * - * @return true if the number at bitPos is '1' - */ - public boolean getFlag(COSName field, int bitFlag) - { - int ff = getInt(field, 0); - return (ff & bitFlag) == bitFlag; - } - - /** - * This will remove an item from this dictionary. This will do nothing - * if the object does not exist. - * - * @param key The key to the item to remove from this dictionary. - */ - public void removeItem(COSName key) - { - items.remove(key); - getUpdateState().update(); - } - - /** - * This will do a lookup from this dictionary. - * - * @param key The key to the object. - * - * @return The item that matches the key. - */ - public COSBase getItem(COSName key) - { - return items.get(key); - } - - /** - * This will do a lookup into the dictionary. - * - * @param key The key to the object. - * - * @return The item that matches the key. - */ - public COSBase getItem(String key) - { - return getItem( getPDFName(key)); - } - - /** - * This is a special case of getItem that takes multiple keys, it will handle the situation - * where multiple keys could get the same value, ie if either CS or ColorSpace is used to get - * the colorspace. This will get an object from this dictionary. - * - * @param firstKey The first key to try. - * @param secondKey The second key to try. - * - * @return The object that matches the key. - */ - public COSBase getAlternateItem(COSName firstKey, COSName secondKey) - { - COSBase retval = getItem(firstKey); - if (retval == null && secondKey != null) - { - retval = getItem(secondKey); - } - return retval; - } - - /** - * Returns the names of the entries in this dictionary. The returned set is in the order the entries were added to - * the dictionary. - * - * @since Apache PDFBox 1.1.0 - * @return names of the entries in this dictionary - */ - public Set keySet() - { - return items.keySet(); - } - - /** - * Returns the name-value entries in this dictionary. The returned set is in the order the entries were added to the - * dictionary. - * - * @since Apache PDFBox 1.1.0 - * @return name-value entries in this dictionary - */ - public Set> entrySet() - { - return items.entrySet(); - } - - /** - * Convenience method that calls {@link Map#forEach(BiConsumer) Map.forEach(BiConsumer)}. - * - * @param action The action to be performed for each entry - * - */ - public void forEach(BiConsumer action) - { - items.forEach(action); - } - - /** - * This will get all the values for the dictionary. - * - * @return All the values for the dictionary. - */ - public Collection getValues() - { - return items.values(); - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If there is an error visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromDictionary(this); - } - - /** - * This will add all the dictionary's keys/values to this dictionary. - * Existing key/value pairs will be overwritten. - * - * @param dict The dictionaries to get the key/value pairs from. - */ - public void addAll(COSDictionary dict) - { - items.putAll(dict.items); - } - - /** - * @see Map#containsKey(Object) - * - * @param name The key to find in the map. - * @return true if the map contains this key. - */ - public boolean containsKey(COSName name) - { - return this.items.containsKey(name); - } - - /** - * @see Map#containsKey(Object) - * - * @param name The key to find in the map. - * @return true if the map contains this key. - */ - public boolean containsKey(String name) - { - return containsKey( getPDFName(name)); - } - - /** - * Nice method, gives you every object you want Arrays works properly too. Try "P/Annots/[k]/Rect" where k means the - * index of the Annots array. - * - * @param objPath the relative path to the object. - * @return the object - */ - public COSBase getObjectFromPath(String objPath) - { - String[] path = objPath.split(PATH_SEPARATOR); - COSBase retval = this; - for (String pathString : path) - { - if (retval instanceof COSArray) - { - int idx = Integer.parseInt(pathString.replace("\\[", "").replace("\\]", "")); - retval = ((COSArray) retval).getObject(idx); - } - else if (retval instanceof COSDictionary) - { - retval = ((COSDictionary) retval).getObjectFromDictionary( pathString); - } - } - return retval; - } - - /** - * Returns an unmodifiable view of this dictionary. - * - * @return an unmodifiable view of this dictionary - */ - public COSDictionary asUnmodifiableDictionary() - { - return new UnmodifiableCOSDictionary(this); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - try - { - return getDictionaryString(this, new ArrayList<>()); - } - catch (IOException e) - { - LOG.debug("An exception occurred trying - returning error message instead", e); - return "COSDictionary{" + e.getMessage() + "}"; - } - } - - private static String getDictionaryString(COSBase base, List objs) throws IOException - { - if (base == null) - { - return "null"; - } - if (objs.contains(base)) - { - // avoid endless recursion - return "hash:" + base.hashCode(); - } - if (base instanceof COSDictionary) - { - objs.add(base); - StringBuilder sb = new StringBuilder("COSDictionary{"); - for (Entry x : ((COSDictionary) base).entrySet()) - { - sb.append(x.getKey()); - sb.append(":"); - sb.append(getDictionaryString(x.getValue(), objs)); - sb.append(";"); - } - sb.append("}"); - if (base instanceof COSStream) - { - try (InputStream stream = ((COSStream) base).createRawInputStream()) - { - byte[] b = stream.readAllBytes(); - sb.append("COSStream{").append( Arrays.hashCode( b)).append( "}"); - } - } - return sb.toString(); - } - if (base instanceof COSArray) - { - objs.add(base); - StringBuilder sb = new StringBuilder("COSArray{"); - for (COSBase x : (COSArray) base) - { - sb.append(getDictionaryString(x, objs)); - sb.append(";"); - } - sb.append("}"); - return sb.toString(); - } - if (base instanceof COSObject) - { - objs.add(base); - COSObject obj = (COSObject) base; - return "COSObject{" - + getDictionaryString( - obj.isObjectNull() ? COSNull.NULL : obj.getObject(), objs) - + "}"; - } - return base.toString(); - } - - /** - * Returns the current {@link COSUpdateState} of this {@link COSDictionary}. - * - * @return The current {@link COSUpdateState} of this {@link COSDictionary}. - * @see COSUpdateState - */ - @Override - public COSUpdateState getUpdateState() - { - return updateState; - } - - /** - * Collects all indirect objects numbers within this dictionary and all included dictionaries. It is used to avoid - * mixed up object numbers when importing an existing page to another pdf. - * - * Expert use only. You might run into an endless recursion if choosing a wrong starting point. - * - * @param indirectObjects a collection of already found indirect objects. - * - */ - public void getIndirectObjectKeys(Collection indirectObjects) - { - if (indirectObjects == null) - { - return; - } - COSObjectKey key = getKey(); - if (key != null) - { - // avoid endless recursions - if (indirectObjects.contains(key)) - { - return; - } - else - { - indirectObjects.add(key); - } - } - for (Entry entry : items.entrySet()) - { - COSBase cosBase = entry.getValue(); - COSObjectKey cosBaseKey = cosBase != null ? cosBase.getKey() : null; - // avoid endless recursions - if (COSName.PARENT.equals(entry.getKey()) - || (cosBaseKey != null && indirectObjects.contains(cosBaseKey))) - { - continue; - } - if (cosBase instanceof COSObject) - { - // dereference object - cosBase = ((COSObject) cosBase).getObject(); - } - if (cosBase instanceof COSDictionary) - { - // descend to included dictionary to collect all included indirect objects - ((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects); - } - else if (cosBase instanceof COSArray) - { - // descend to included array to collect all included indirect objects - ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects); - } - else if (cosBaseKey != null) - { - // add key for all indirect objects other than COSDictionary/COSArray - indirectObjects.add(cosBaseKey); - } - } - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java deleted file mode 100644 index 645dd20f508..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDictionaryMap.java +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -/** - * This is a Map that will automatically sync the contents to a COSDictionary. - * - * @author Ben Litchfield - */ -public class COSDictionaryMap implements Map -{ - private final COSDictionary map; - private final Map actuals; - - /** - * Constructor for this map. - * - * @param actualsMap The map with standard java objects as values. - * @param dicMap The map with COSBase objects as values. - */ - public COSDictionaryMap( Map actualsMap, COSDictionary dicMap ) - { - actuals = actualsMap; - map = dicMap; - } - - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return map.size(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return size() == 0; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsKey(Object key) - { - return actuals.containsKey( key ); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsValue(Object value) - { - return actuals.containsValue( value ); - } - - /** - * {@inheritDoc} - */ - @Override - public V get(Object key) - { - return actuals.get( key ); - } - - /** - * {@inheritDoc} - */ - @Override - public V put(K key, V value) - { - COSObjectGetter object = (COSObjectGetter)value; - - map.setItem( COSName.getPDFName( (String)key ), object.getCOSObject() ); - return actuals.put( key, value ); - } - - /** - * {@inheritDoc} - */ - @Override - public V remove(Object key) - { - map.removeItem( COSName.getPDFName( (String)key ) ); - return actuals.remove( key ); - } - - /** - * {@inheritDoc} - */ - @Override - public void putAll(Map t) - { - throw new UnsupportedOperationException("Not yet implemented"); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - map.clear(); - actuals.clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public Set keySet() - { - return actuals.keySet(); - } - - /** - * {@inheritDoc} - */ - @Override - public Collection values() - { - return actuals.values(); - } - - /** - * {@inheritDoc} - */ - @Override - public Set> entrySet() - { - return Collections.unmodifiableSet(actuals.entrySet()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object o) - { - boolean retval = false; - if( o instanceof COSDictionaryMap ) - { - COSDictionaryMap other = (COSDictionaryMap) o; - retval = other.map.equals( this.map ); - } - return retval; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return actuals.toString(); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return map.hashCode(); - } - - /** - * This will take a map<java.lang.String,org.apache.pdfbox.pdmodel.COSObjectGetter> - * and convert it into a COSDictionary. - * - * @param someMap A map containing COSObjectGetters - * - * @return A proper COSDictionary - */ - public static COSDictionary convert(Map someMap) - { - COSDictionary dic = new COSDictionary(); - someMap.forEach((name, objectable) -> - { - COSObjectGetter object = (COSObjectGetter) objectable; - dic.setItem(COSName.getPDFName(name), object.getCOSObject()); - }); - return dic; - } - - /** - * This will take a COS dictionary and convert it into COSDictionaryMap. All cos - * objects will be converted to their primitive form. - * - * @param map The COS mappings. - * @return A standard java map. - * @throws IOException If there is an error during the conversion. - */ - public static COSDictionaryMap convertBasicTypesToMap( COSDictionary map ) throws IOException - { - COSDictionaryMap retval = null; - if( map != null ) - { - Map actualMap = new HashMap<>(); - for( COSName key : map.keySet() ) - { - COSBase cosObj = map.getObjectFromDictionary( key ); - Object actualObject = null; - if( cosObj instanceof COSString ) - { - actualObject = ((COSString)cosObj).getString(); - } - else if( cosObj instanceof COSInteger ) - { - actualObject = ((COSInteger)cosObj).intValue(); - } - else if( cosObj instanceof COSName ) - { - actualObject = ((COSName)cosObj).getName(); - } - else if( cosObj instanceof COSFloat ) - { - actualObject = ((COSFloat)cosObj).floatValue(); - } - else if( cosObj instanceof COSBoolean ) - { - actualObject = ((COSBoolean)cosObj).getValue() ? Boolean.TRUE : Boolean.FALSE; - } - else - { - throw new IOException( "Error:unknown type of object to convert:" + cosObj ); - } - actualMap.put( key.getName(), actualObject ); - } - retval = new COSDictionaryMap<>( actualMap, map ); - } - - return retval; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java deleted file mode 100644 index 03e4581cd4f..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocument.java +++ /dev/null @@ -1,633 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.RandomAccessStreamCache; -import org.apache.pdfbox.io.RandomAccessStreamCache.StreamCacheCreateFunction; - -import java.io.Closeable; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.stream.Collectors; - -/** - * This is the in-memory representation of the PDF document. You need to call - * close() on this object when you are done using it!! - * - * @author Ben Litchfield - * - */ -public class COSDocument extends COSBase implements Closeable -{ - /** - * Log instance. - */ - private static final Logger LOG = LogManager.getLogger(COSDocument.class); - - private float version = 1.4f; - - /** - * Maps ObjectKeys to a COSObject. Note that references to these objects - * are also stored in COSDictionary objects that map a name to a specific object. - */ - private final Map objectPool = - new HashMap<>(); - - /** - * Maps object and generation id to object byte offsets. - */ - private final Map xrefTable = - new HashMap<>(); - - /** - * List containing COSStream objects which are associated with this document. - */ - private final List cosStreamList = new ArrayList<>(); - - /** - * Document trailer dictionary. - */ - private COSDictionary trailer; - - /** - * Signal that document is already decrypted. - */ - private boolean isDecrypted = false; - - private long startXref; - - private boolean closed = false; - - private boolean isXRefStream; - - private boolean hasHybridXRef = false; - - /** - * TODO: what is this used for? - */ - private final RandomAccessStreamCache streamCache ; - - /** - * Used for incremental saving, to avoid XRef object numbers from being reused. - */ - private long highestXRefObjectNumber; - - /** - * A parser that can deference a COSProxyObject. - */ - private final ICOSParser parser; - - private final COSDocumentState documentState = new COSDocumentState(); - private COSDictionary encryption; - - /** - * Constructor. Uses main memory to buffer PDF streams. - */ - public COSDocument() - { - this( IOUtils.createMemoryOnlyStreamCache()); - } - - /** - * Constructor. Uses main memory to buffer PDF streams. - * - * @param parser Parser to be used to parse the document on demand - */ - public COSDocument(ICOSParser parser) - { - this(IOUtils.createMemoryOnlyStreamCache(), parser); - } - - /** - * Constructor that will use the provided function to create a stream cache for the storage of the PDF streams. - * - * @param streamCacheCreateFunction a function to create an instance of a stream cache - * - */ - public COSDocument( RandomAccessStreamCache.StreamCacheCreateFunction streamCacheCreateFunction) - { - this(streamCacheCreateFunction, null); - } - - /** - * Constructor that will use the provided function to create a stream cache - * for the storage of the PDF streams. Note again that we are not talking - * i/o streams, but about dereferenced COSStream objects - * - * @param streamCacheCreateFunction a function to create an instance of a stream cache - * @param parser Parser to be used to parse the document on demand - * - */ - public COSDocument( StreamCacheCreateFunction streamCacheCreateFunction, ICOSParser parser) - { - streamCache = getStreamCache(streamCacheCreateFunction); - this.parser = parser; - } - - private RandomAccessStreamCache getStreamCache( StreamCacheCreateFunction streamCacheCreateFunction) - { - if (streamCacheCreateFunction == null) - { - return null; - } - try - { - return streamCacheCreateFunction.create(); - } - catch (IOException exception1) - { - LOG.warn( - "An error occured when creating stream cache. Using memory only cache as fallback.", - exception1); - } - try - { - return IOUtils.createMemoryOnlyStreamCache().create(); - } - catch (IOException exception2) - { - LOG.warn("An error occured when creating stream cache for fallback.", exception2); - } - return null; - } - - /** - * This will get the encryption dictionary for this document. This will still return the parameters if the document - * was decrypted. As the encryption architecture in PDF documents is pluggable this returns an abstract class, - * but the only supported subclass at this time is a - * PDStandardEncryption object. - * - * @return The encryption dictionary(most likely a PDStandardEncryption object) - */ - public COSDictionary getEncryption() - { - if (encryption == null && isEncrypted()) - { - encryption = trailer.getCOSDictionary( COSName.ENCRYPT ); - } - return encryption; - } - - /** - * Creates a new COSStream using the current configuration for scratch files. - * Note that a COSStream is not a traditional stream object, but rather a wrapper - * for the stream object defined By adobe COS - * - * @return the new COSStream - */ - public COSStream createCOSStream() - { - COSStream stream = new COSStream( streamCache ); - // collect all COSStreams so that they can be closed when closing the COSDocument. - // This is limited to newly created pdfs as all COSStreams of an existing pdf are - // collected within the map objectPool - cosStreamList.add( stream); - return stream; - } - - /** - * Creates a new COSStream using the current configuration for scratch files. Not for public use. - * Only COSParser should call this method. - * - * @param dictionary the corresponding dictionary - * @param startPosition the start position within the source - * @param streamLength the stream length - * @return the new COSStream - * @throws IOException if the random access view can't be read - */ - public COSStream createCOSStream(COSDictionary dictionary, long startPosition, - long streamLength) throws IOException - { - COSStream stream = new COSStream(streamCache, - parser.createRandomAccessReadView(startPosition, streamLength)); - dictionary.forEach(stream::setItem); - stream.setKey(dictionary.getKey()); - return stream; - } - - /** - * Get the dictionary containing the linearization information if the pdf is linearized. - * - * @return the dictionary containing the linearization information - */ - public COSDictionary getLinearizedDictionary() - { - // get all keys with a positive offset in ascending order, as the linearization dictionary shall be the first - // within the pdf - List objectKeys = xrefTable.entrySet().stream() // - .filter(e -> e.getValue() > 0L) // - .sorted(Entry.comparingByValue()) // - .map(Entry::getKey) // - .collect(Collectors.toList()); - for (COSObjectKey objectKey : objectKeys) - { - COSObject objectFromPool = getObjectFromPool(objectKey); - COSBase realObject = objectFromPool.getObject(); - if (realObject instanceof COSDictionary) - { - COSDictionary dic = (COSDictionary) realObject; - if (dic.getItem(COSName.LINEARIZED) != null) - { - return dic; - } - } - } - return null; - } - - /** - * This will get all dictionaries objects by type. - * - * @param type The type of the object. - * - * @return This will return all objects with the specified type. - */ - public List getObjectsByType(COSName type) - { - return getObjectsByType(type, null); - } - - /** - * This will get all dictionaries objects by type. - * - * @param type1 The first possible type of the object, mandatory. - * @param type2 The second possible type of the object, usually an abbreviation, optional. - * - * @return This will return all objects with the specified type(s). - */ - public List getObjectsByType(COSName type1, COSName type2) - { - List originKeys = new ArrayList<>(xrefTable.keySet()); - List retval = getObjectsByType(originKeys, type1, type2); - // there might be some additional objects if the brute force parser was triggered - // due to a broken cross-reference table/stream - if (originKeys.size() < xrefTable.size()) - { - List additionalKeys = new ArrayList<>(xrefTable.keySet()); - additionalKeys.removeAll(originKeys); - retval.addAll(getObjectsByType(additionalKeys, type1, type2)); - } - return retval; - } - - private List getObjectsByType(List keys, COSName type1, COSName type2) - { - List retval = new ArrayList<>(); - for (COSObjectKey objectKey : keys) - { - COSObject objectFromPool = getObjectFromPool(objectKey); - COSBase realObject = objectFromPool.getObject(); - if (realObject instanceof COSDictionary) - { - COSName dictType = ((COSDictionary) realObject).getCOSName(COSName.TYPE); - if (type1.equals(dictType) || (type2 != null && type2.equals(dictType))) - { - retval.add(objectFromPool); - } - } - } - return retval; - } - - /** - * This will set the header version of this PDF document. - * - * @param versionValue The version of the PDF document. - */ - public void setVersion( float versionValue ) - { - version = versionValue; - } - - /** - * This will get the version extracted from the header of this PDF document. - * - * @return The header version. - */ - public float getVersion() - { - return version; - } - - /** - * Signals that the document is decrypted completely. - */ - public void setDecrypted() - { - isDecrypted = true; - } - - /** - * Indicates if a encrypted pdf is already decrypted after parsing. - * - * @return true indicates that the pdf is decrypted. - */ - public boolean isDecrypted() - { - return isDecrypted; - } - - /** - * This will tell if this is an encrypted document. - * - * @return true If this document is encrypted. - */ - public boolean isEncrypted() - { - return trailer != null && trailer.getCOSDictionary(COSName.ENCRYPT) != null; - } - - /** - * This will get the encryption dictionary if the document is encrypted or null if the document - * is not encrypted. - * - * @return The encryption dictionary. - */ - public COSDictionary getEncryptionDictionary() - { - return trailer.getCOSDictionary(COSName.ENCRYPT); - } - - /** - * This will set the encryption dictionary, this should only be called when - * encrypting the document. - * - * @param encDictionary The encryption dictionary. - */ - public void setEncryptionDictionary( COSDictionary encDictionary ) - { - trailer.setItem( COSName.ENCRYPT, encDictionary ); - } - - /** - * This will get the document ID. - * - * @return The document id. - */ - public COSArray getDocumentID() - { - return getTrailer().getCOSArray(COSName.ID); - } - - /** - * This will set the document ID. This should be an array of two strings. This method cannot be - * used to remove the document id by passing null or an empty array; it will be recreated. Only - * the first existing string is used when writing, the second one is always recreated. If you - * don't want this, you'll have to modify the {@code COSWriter} class, look for {@link COSName#ID}. - * - * @param id The document id. - */ - public void setDocumentID( COSArray id ) - { - getTrailer().setItem(COSName.ID, id); - } - - /** - * This will get the document trailer. - * - * @return the document trailer dict - */ - public COSDictionary getTrailer() - { - return trailer; - } - - /** - * // MIT added, maybe this should not be supported as trailer is a persistence construct. - * This will set the document trailer. - * - * @param newTrailer the document trailer dictionary - */ - public void setTrailer(COSDictionary newTrailer) - { - trailer = newTrailer; - trailer.getUpdateState().setOriginDocumentState(documentState); - } - - /** - * Internal PDFBox use only. Get the object number of the highest XRef stream. This is needed to - * avoid reusing such a number in incremental saving. - * - * @return The object number of the highest XRef stream, or 0 if there was no XRef stream. - */ - public long getHighestXRefObjectNumber() - { - return highestXRefObjectNumber; - } - - /** - * Internal PDFBox use only. Sets the object number of the highest XRef stream. This is needed - * to avoid reusing such a number in incremental saving. - * - * @param highestXRefObjectNumber The object number of the highest XRef stream. - */ - public void setHighestXRefObjectNumber(long highestXRefObjectNumber) - { - this.highestXRefObjectNumber = highestXRefObjectNumber; - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromDocument(this); - } - - /** - * This will close all storage and delete the tmp files. - * - * @throws IOException If there is an error close resources. - */ - @Override - public void close() throws IOException - { - if (closed) - { - return; - } - - // Make sure that: - // - first Exception is kept - // - all COSStreams are closed - // - stream cache is closed - // - there's a way to see which errors occurred - IOException firstException = null; - - // close all open I/O streams - for (COSObject object : objectPool.values()) - { - if (!object.isObjectNull()) - { - COSBase cosObject = object.getObject(); - if (cosObject instanceof COSStream) - { - firstException = IOUtils.closeAndLogException((COSStream) cosObject, LOG, - "COSStream", firstException); - } - } - } - - for (COSStream stream : cosStreamList) - { - firstException = IOUtils.closeAndLogException(stream, LOG, "COSStream", firstException); - } - - if (streamCache != null) - { - firstException = IOUtils.closeAndLogException(streamCache, LOG, "Stream Cache", - firstException); - } - closed = true; - - // rethrow first exception to keep method contract - if (firstException != null) - { - throw firstException; - } - } - - /** - * Returns true if this document has been closed. - * - * @return true if the document is already closed, false otherwise - */ - public boolean isClosed() - { - return closed; - } - - /** - * This will get an object from the pool. - * - * @param key The object key. - * - * @return The object in the pool or a new one if it has not been parsed yet. - */ - public COSObject getObjectFromPool(COSObjectKey key) - { - COSObject obj = null; - if( key != null ) - { - // make "proxy" object if this was a forward reference - obj = objectPool.computeIfAbsent(key, k -> new COSObject(k, parser)); - } - return obj; - } - - /** - * Populate XRef HashMap with given values. - * Each entry maps ObjectKeys to byte offsets in the file. - * @param xrefTableValues xref table entries to be added - */ - public void addXRefTable( Map xrefTableValues ) - { - xrefTable.putAll( xrefTableValues ); - } - - /** - * Returns the xrefTable which is a mapping of ObjectKeys - * to byte offsets in the file. - * @return mapping of ObjectsKeys to byte offsets - */ - public Map getXrefTable() - { - return xrefTable; - } - - /** - * This method set the startxref value of the document. This will only - * be needed for incremental updates. - * - * @param startXrefValue the value for startXref - */ - public void setStartXref(long startXrefValue) - { - startXref = startXrefValue; - } - - /** - * Return the startXref Position of the parsed document. This will only be needed for incremental updates. - * - * @return a long with the old position of the startxref - */ - public long getStartXref() - { - return startXref; - } - - /** - * Determines if the trailer is a XRef stream or not. - * - * @return true if the trailer is a XRef stream - */ - public boolean isXRefStream() - { - return isXRefStream; - } - - /** - * Sets isXRefStream to the given value. You need to take care that the version of your PDF is - * 1.5 or higher. - * - * @param isXRefStreamValue the new value for isXRefStream - */ - public void setIsXRefStream(boolean isXRefStreamValue) - { - isXRefStream = isXRefStreamValue; - } - - /** - * Determines if the pdf has hybrid cross references, both plain tables and streams. - * - * @return true if the pdf has hybrid cross references - */ - public boolean hasHybridXRef() - { - return hasHybridXRef; - } - - /** - * Marks the pdf as document using hybrid cross references. - */ - public void setHasHybridXRef() - { - hasHybridXRef = true; - } - - /** - * Returns the {@link COSDocumentState} of this {@link COSDocument}. - * - * @return The {@link COSDocumentState} of this {@link COSDocument}. - * @see COSDocumentState - */ - public COSDocumentState getDocumentState() - { - return documentState; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java deleted file mode 100644 index 628148927ef..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSDocumentState.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -/** - * An instance of {@link COSDocumentState} collects all known states a {@link COSDocument} may have and shall allow - * their evaluation. - * - * @author Christian Appl - * @see COSDocument - */ -public class COSDocumentState -{ - - /** - * The parsing state of the document. - *

    - *
  • {@code true}, if the document is currently being parsed. (initial state)
  • - *
  • {@code false}, if the document's parsing completed and it may be edited and updated.
  • - *
- */ - private boolean parsing = true; - - /** - * Sets the {@link #parsing} state of the document. - * - * @param parsing The {@link #parsing} state to set. - */ - public void setParsing(boolean parsing) - { - this.parsing = parsing; - } - - /** - * Returns {@code true}, if the document´s {@link #parsing} is completed and it may be updated. - * - * @return {@code true}, if the document´s {@link #parsing} is completed and it may be updated. - */ - public boolean isAcceptingUpdates() - { - return !parsing; - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java deleted file mode 100644 index 4f9e7dd3d51..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSFloat.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; -import java.math.BigDecimal; - -/** - * This class represents a floating point number in a PDF document. - * - * @author Ben Litchfield - * - */ -public class COSFloat extends COSNumber -{ - private final float value; - private String valueAsString; - - public static final COSFloat ZERO = new COSFloat(0f, "0.0"); - public static final COSFloat ONE = new COSFloat(1f, "1.0"); - - /** - * Constructor. - * - * @param aFloat The primitive float object that this object wraps. - */ - public COSFloat( float aFloat ) - { - value = aFloat; - } - - /** - * An internal constructor to avoid formatting for the predefined constants. - * - * @param aFloat - * @param valueString - */ - private COSFloat(float aFloat, String valueString) - { - value = aFloat; - valueAsString = valueString; - } - - /** - * Constructor. - * - * @param aFloat The primitive float object that this object wraps. - * - * @throws IOException If aFloat is not a float. - */ - public COSFloat( String aFloat ) throws IOException - { - float parsedValue; - String stringValue = null; - try - { - float f = Float.parseFloat(aFloat); - parsedValue = coerce(f); - stringValue = f == parsedValue ? aFloat : null; - } - catch( NumberFormatException e ) - { - if (aFloat.startsWith("--")) - { - // PDFBOX-4289 has --16.33 - aFloat = aFloat.substring(1); - } - else if (aFloat.matches("^0\\.0*-\\d+")) - { - // PDFBOX-2990 has 0.00000-33917698 - // PDFBOX-3369 has 0.00-35095424 - // PDFBOX-3500 has 0.-262 - aFloat = "-" + aFloat.replaceFirst("-", ""); - } - else if (aFloat.matches("^-\\d+\\.-\\d+")) - { - // PDFBOX-5829 has -12.-1 - aFloat = "-" + aFloat.replace("-", ""); - } - else - { - throw new IOException("Error expected floating point number actual='" + aFloat + "'", e); - } - - try - { - parsedValue = coerce(Float.parseFloat(aFloat)); - } - catch (NumberFormatException e2) - { - throw new IOException("Error expected floating point number actual='" + aFloat + "'", e2); - } - } - value = parsedValue; - valueAsString = stringValue; - } - - /** - * Check and coerce the value field to be between MIN_NORMAL and MAX_VALUE. - * - * @param floatValue the value to be checked - * @return the coerced value - */ - private float coerce(float floatValue) - { - if (floatValue == Float.POSITIVE_INFINITY) - { - return Float.MAX_VALUE; - } - if (floatValue == Float.NEGATIVE_INFINITY) - { - return -Float.MAX_VALUE; - } - if (Math.abs(floatValue) < Float.MIN_NORMAL) - { - // values smaller than the smallest possible float value are converted to 0 - // see PDF spec, chapter 2 of Appendix C Implementation Limits - return 0f; - } - return floatValue; - } - - /** - * The value of the float object that this one wraps. - * - * @return The value of this object. - */ - @Override - public float floatValue() - { - return value; - } - - /** - * This will get the long value of this object. - * - * @return The long value of this object, - */ - @Override - public long longValue() - { - return (long) value; - } - - /** - * This will get the integer value of this object. - * - * @return The int value of this object, - */ - @Override - public int intValue() - { - return (int) value; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals( Object o ) - { - return o instanceof COSFloat && - Float.floatToIntBits(((COSFloat)o).value) == Float.floatToIntBits(value); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return Float.hashCode(value); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSFloat{" + formatString() + "}"; - } - - /** - * Builds, if needed, and returns the string representation of the current value. - * @return current value as string. - */ - public String formatString() - { - if (valueAsString == null) - { - String s = String.valueOf(value); - boolean simpleFormat = s.indexOf('E') < 0; - valueAsString = simpleFormat ? s - : new BigDecimal(s).stripTrailingZeros().toPlainString(); - } - return valueAsString; - } - - /** - * Visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromFloat(this); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java deleted file mode 100644 index 8b9d3b7590d..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSIncrement.java +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.Set; - -/** - * A {@link COSIncrement} starts at a given {@link COSUpdateInfo} to collect updates, that have been made to a - * {@link COSDocument} and therefore should be added to it´s next increment. - * - * @author Christian Appl - * @see COSUpdateState - * @see COSUpdateInfo - */ -public class COSIncrement implements Iterable -{ - - /** - * Contains the {@link COSBase}s, that shall be added to the increment at top level. - */ - private final Set objects = new LinkedHashSet<>(); - /** - * Contains the direct {@link COSBase}s, that are either contained written directly by structures contained in - * {@link #objects} or that must be excluded from being written as indirect {@link COSObject}s for other reasons. - */ - private final Set excluded = new HashSet<>(); - /** - * Contains all {@link COSObject}s, that have already been processed by this {@link COSIncrement} and shall not be - * processed again. - */ - private final Set processedObjects = new HashSet<>(); - /** - * Contains the {@link COSUpdateInfo} that this {@link COSIncrement} creates an increment for. - */ - private final COSUpdateInfo incrementOrigin; - /** - * Whether this {@link COSIncrement} has already been determined, or must still be evaluated. - */ - private boolean initialized = false; - - /** - * Creates a new {@link COSIncrement} for the given {@link COSUpdateInfo}, the increment will use it´s - * {@link COSDocumentState} as it´s own origin and shall collect all updates contained in the given - * {@link COSUpdateInfo}.
- * Should the given object be {@code null}, the resulting increment shall be empty. - * - * @param incrementOrigin The {@link COSUpdateInfo} serving as an update source for this {@link COSIncrement}. - */ - public COSIncrement(COSUpdateInfo incrementOrigin) - { - this.incrementOrigin = incrementOrigin; - } - - /** - * Collect all updates made to the given {@link COSBase} and it's contained structures.
- * This shall forward all {@link COSUpdateInfo} objects to the proper specialized collection methods. - * - * @param base The {@link COSBase} updates shall be collected for. - * @return Returns {@code true}, if the {@link COSBase} represents a direct child structure, that would require it´s - * parent to be updated instead. - * @see #collect(COSDictionary) - * @see #collect(COSArray) - * @see #collect(COSObject) - */ - private boolean collect(COSBase base) - { - if(contains(base)) - { - return false; - } - // handle updatable objects: - if(base instanceof COSDictionary) - { - return collect((COSDictionary) base); - } - else if(base instanceof COSObject) - { - return collect((COSObject) base); - } - else if(base instanceof COSArray) - { - return collect((COSArray) base); - } - return false; - } - - /** - * Collect all updates made to the given {@link COSDictionary} and it's contained structures. - * - * @param dictionary The {@link COSDictionary} updates shall be collected for. - * @return Returns {@code true}, if the {@link COSDictionary} represents a direct child structure, that would - * require it´s parent to be updated instead. - */ - private boolean collect(COSDictionary dictionary) - { - COSUpdateState updateState = dictionary.getUpdateState(); - // Is definitely part of the increment? - if(!isExcluded(dictionary) && !contains(dictionary) && updateState.isUpdated()) - { - add(dictionary); - } - boolean childDemandsParentUpdate = false; - // Collect children: - for(COSBase entry : dictionary.getValues()) - { - // Primitives can not be part of an increment. (on top level) - if(!(entry instanceof COSUpdateInfo) || contains(entry)) - { - continue; - } - COSUpdateInfo updatableEntry = (COSUpdateInfo) entry; - COSUpdateState entryUpdateState = updatableEntry.getUpdateState(); - // Entries with different document origin must be part of the increment! - updateDifferentOrigin(entryUpdateState); - // Always attempt to write COSArrays as direct objects. - if(updatableEntry.isNeedToBeUpdated() && - ((!(entry instanceof COSObject) && entry.isDirect()) || entry instanceof COSArray)) - { - // Exclude direct entries from the increment! - exclude(entry); - childDemandsParentUpdate = true; - } - // Collect descendants: - childDemandsParentUpdate = collect(entry) || childDemandsParentUpdate; - } - - if(isExcluded(dictionary)) - { - return childDemandsParentUpdate; - } - else - { - if(childDemandsParentUpdate && !contains(dictionary)) - { - add(dictionary); - } - return false; - } - } - - /** - * Collect all updates made to the given {@link COSArray} and it's contained structures. - * - * @param array The {@link COSDictionary} updates shall be collected for. - * @return Returns {@code true}, if the {@link COSArray}´s elements changed. A {@link COSArray} shall always be - * treated as a direct structure, that would require it´s parent to be updated instead. - */ - private boolean collect(COSArray array) - { - COSUpdateState updateState = array.getUpdateState(); - boolean childDemandsParentUpdate = updateState.isUpdated(); - for(COSBase entry : array) - { - // Primitives can not be part of an increment. (on top level) - if(!(entry instanceof COSUpdateInfo) || contains(entry)) - { - continue; - } - COSUpdateState entryUpdateState = ((COSUpdateInfo) entry).getUpdateState(); - // Entries with different document origin must be part of the increment! - updateDifferentOrigin(entryUpdateState); - // Collect descendants: - childDemandsParentUpdate = collect(entry) || childDemandsParentUpdate; - } - return childDemandsParentUpdate; - } - - /** - * Collect all updates made to the given {@link COSObject} and it's contained structures. - * - * @param object The {@link COSObject} updates shall be collected for. - * @return Always returns {@code false}. {@link COSObject}s by definition are indirect and shall never cause a - * parent structure to be updated. - */ - private boolean collect(COSObject object) - { - if(contains(object)) - { - return false; - } - addProcessedObject(object); - COSUpdateState updateState = object.getUpdateState(); - // Objects with different document origin must be part of the increment! - updateDifferentOrigin(updateState); - // determine actual, if necessary or possible without dereferencing: - COSUpdateInfo actual = null; - if(updateState.isUpdated() || object.isDereferenced()) - { - COSBase base = object.getObject(); - if(base instanceof COSUpdateInfo) - { - actual = (COSUpdateInfo) base; - } - } - // Skip? - if(actual == null || contains(actual.getCOSObject())) - { - return false; - } - boolean childDemandsParentUpdate = false; - COSUpdateState actualUpdateState = actual.getUpdateState(); - if(actualUpdateState.isUpdated()) - { - childDemandsParentUpdate = true; - } - exclude(actual.getCOSObject()); - childDemandsParentUpdate = collect(actual.getCOSObject()) || childDemandsParentUpdate; - if(updateState.isUpdated() || childDemandsParentUpdate) - { - add(actual.getCOSObject()); - } - return false; - } - - /** - * Returns {@code true}, if the given {@link COSBase} is already known to and has been processed by this - * {@link COSIncrement}. - * - * @param base The {@link COSBase} to check. - * @return {@code true}, if the given {@link COSBase} is already known to and has been processed by this - * {@link COSIncrement}. - * @see #objects - * @see #processedObjects - */ - public boolean contains(COSBase base) - { - return objects.contains(base) || (base instanceof COSObject && processedObjects.contains((COSObject) base)); - } - - /** - * Check whether the given {@link COSUpdateState}´s {@link COSDocumentState} differs from the {@link COSIncrement}´s - * known {@link #incrementOrigin}.
- * Should that be the case, the {@link COSUpdateState} originates from another {@link COSDocument} and must be added - * to the {@link COSIncrement}, hence call {@link COSUpdateState#update()}. - * - * @param updateState The {@link COSUpdateState} that shall be updated, if it's originating from another - * {@link COSDocument}. - * @see #incrementOrigin - */ - private void updateDifferentOrigin(COSUpdateState updateState) - { - if(incrementOrigin != null && updateState != null && - incrementOrigin.getUpdateState().getOriginDocumentState() != updateState.getOriginDocumentState()) - { - updateState.update(); - } - } - - /** - * The given object and actual {COSBase}s shall be part of the increment and must be added to {@link #objects}, - * if possible.
- * {@code null} values shall be skipped. - * - * @param object The {@link COSBase} to add to {@link #objects}. - * @see #objects - */ - private void add(COSBase object) - { - if(object != null) - { - objects.add(object); - } - } - - /** - * The given {@link COSObject} has been processed, or is being processed. It shall be added to - * {@link #processedObjects} to skip it, should it be encountered again.
- * {@code null} values shall be ignored. - * - * @param base The {@link COSObject} to add to {@link #processedObjects}. - * @see #processedObjects - */ - private void addProcessedObject(COSObject base) - { - if(base != null) - { - processedObjects.add(base); - } - } - - /** - * The given {@link COSBase}s are not fit for inclusion in an increment and shall be added to {@link #excluded}.
- * {@code null} values shall be ignored. - * - * @param base The {@link COSBase}s to add to {@link #excluded}. - * @return The {@link COSIncrement} itself, to allow method chaining. - * @see #excluded - */ - public COSIncrement exclude(COSBase... base) - { - if(base != null) - { - excluded.addAll( Arrays.asList(base)); - } - return this; - } - - /** - * Returns {@code true}, if the given {@link COSBase} has been excluded from the increment, and hence is contained - * in {@link #excluded}. - * - * @param base The {@link COSBase} to check for exclusion. - * @return {@code true}, if the given {@link COSBase} has been excluded from the increment, and hence is contained - * in {@link #excluded}. - * @see #excluded - */ - private boolean isExcluded(COSBase base) - { - return excluded.contains(base); - } - - /** - * Returns all indirect {@link COSBase}s, that shall be written to an increment as top level {@link COSObject}s.
- * Calling this method will cause the increment to be initialized. - * - * @return All indirect {@link COSBase}s, that shall be written to an increment as top level {@link COSObject}s. - * @see #objects - */ - public Set getObjects() - { - if(!initialized && incrementOrigin != null) - { - collect(incrementOrigin.getCOSObject()); - initialized = true; - } - return objects; - } - - /** - * Return an iterator for the determined {@link #objects} contained in this {@link COSIncrement}. - * - * @return An iterator for the determined {@link #objects} contained in this {@link COSIncrement}. - */ - @Override - public Iterator iterator() - { - return getObjects().iterator(); - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java deleted file mode 100644 index 14d5bf1b9df..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSInteger.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * This class represents an integer number in a PDF document. - * - * @author Ben Litchfield - */ -public final class COSInteger extends COSNumber -{ - - /** - * The lowest integer to be kept in the {@link #STATIC} array. - */ - private static final int LOW = -100; - - /** - * The highest integer to be kept in the {@link #STATIC} array. - */ - private static final int HIGH = 256; - - /** - * Static instances of all COSIntegers in the range from {@link #LOW} - * to {@link #HIGH}. - */ - private static final COSInteger[] STATIC = new COSInteger[HIGH - LOW + 1]; - - /** - * Constant for the number zero. - * @since Apache PDFBox 1.1.0 - */ - public static final COSInteger ZERO = get(0); - - /** - * Constant for the number one. - * @since Apache PDFBox 1.1.0 - */ - public static final COSInteger ONE = get(1); - - /** - * Constant for the number two. - * @since Apache PDFBox 1.1.0 - */ - public static final COSInteger TWO = get(2); - - /** - * Constant for the number three. - * @since Apache PDFBox 1.1.0 - */ - public static final COSInteger THREE = get(3); - - /** - * Constant for an out of range value which is bigger than Log.MAX_VALUE. - */ - protected static final COSInteger OUT_OF_RANGE_MAX = getInvalid(true); - - /** - * Constant for an out of range value which is smaller than Log.MIN_VALUE. - */ - protected static final COSInteger OUT_OF_RANGE_MIN = getInvalid(false); - - /** - * Returns a COSInteger instance with the given value. - * - * @param val integer value - * @return COSInteger instance - */ - public static COSInteger get(long val) - { - if (LOW <= val && val <= HIGH) - { - int index = (int) val - LOW; - // no synchronization needed - if (STATIC[index] == null) - { - STATIC[index] = new COSInteger(val, true); - } - return STATIC[index]; - } - return new COSInteger(val, true); - } - - private static COSInteger getInvalid(boolean maxValue) - { - return maxValue ? new COSInteger(Long.MAX_VALUE, false) - : new COSInteger(Long.MIN_VALUE, false); - } - - private final long value; - private final boolean isValid; - - /** - * constructor. - * - * @param val The integer value of this object. - * @param valid indicates if the value is valid. - */ - private COSInteger(long val, boolean valid) - { - value = val; - isValid = valid; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object o) - { - return o instanceof COSInteger && ((COSInteger)o).intValue() == intValue(); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - //taken from java.lang.Long - return (int)(value ^ (value >> 32)); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSInt{" + value + "}"; - } - - /** - * polymorphic access to value as float. - * - * @return The float value of this object. - */ - @Override - public float floatValue() - { - return value; - } - - /** - * Polymorphic access to value as int - * This will get the integer value of this object. - * - * @return The int value of this object, - */ - @Override - public int intValue() - { - return (int)value; - } - - /** - * Polymorphic access to value as int - * This will get the integer value of this object. - * - * @return The int value of this object, - */ - @Override - public long longValue() - { - return value; - } - - /** - * Indicates whether this instance represents a valid value. - * - * @return true if the value is valid - */ - public boolean isValid() - { - return isValid; - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromInt(this); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java deleted file mode 100644 index 4be540aecdd..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSName.java +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -// import org.apache.pdfbox.util.Hex; - -import java.io.IOException; -import java.lang.ref.Cleaner; -import java.lang.ref.WeakReference; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -/** - * A PDF Name object. - * - * @author Ben Litchfield - */ -public final class COSName extends COSBase implements Comparable -{ - // using ConcurrentHashMap because this can be accessed by multiple threads - private static final Map> NAME_MAP = // - new ConcurrentHashMap<>(8192); - private static final Cleaner CLEANER = Cleaner.create(); - - // - // IMPORTANT: this list is *alphabetized* and does not need any JavaDoc - // - - // A - public static final COSName A = getPDFName("A"); - public static final COSName AA = getPDFName("AA"); - public static final COSName ABSOLUTE_COLORIMETRIC = getPDFName("AbsoluteColorimetric"); - public static final COSName AC = getPDFName("AC"); - public static final COSName ACRO_FORM = getPDFName("AcroForm"); - public static final COSName ACTUAL_TEXT = getPDFName("ActualText"); - public static final COSName ADBE = getPDFName("ADBE"); - public static final COSName ADBE_PKCS7_DETACHED = getPDFName("adbe.pkcs7.detached"); - public static final COSName ADBE_PKCS7_SHA1 = getPDFName("adbe.pkcs7.sha1"); - public static final COSName ADBE_X509_RSA_SHA1 = getPDFName("adbe.x509.rsa_sha1"); - public static final COSName ADOBE_PPKLITE = getPDFName("Adobe.PPKLite"); - public static final COSName AESV2 = getPDFName("AESV2"); - public static final COSName AESV3 = getPDFName("AESV3"); - public static final COSName AF = getPDFName("AF"); - public static final COSName AF_RELATIONSHIP = COSName.getPDFName("AFRelationship"); - public static final COSName AFTER = getPDFName("After"); - public static final COSName AI_META_DATA = getPDFName("AIMetaData"); - public static final COSName AIS = getPDFName("AIS"); - public static final COSName ALL_OFF = getPDFName("AllOff"); - public static final COSName ALL_ON = getPDFName("AllOn"); - public static final COSName ALT = getPDFName("Alt"); - public static final COSName ALPHA = getPDFName("Alpha"); - public static final COSName ALTERNATE = getPDFName("Alternate"); - public static final COSName ANNOT = getPDFName("Annot"); - public static final COSName ANNOTS = getPDFName("Annots"); - public static final COSName ANTI_ALIAS = getPDFName("AntiAlias"); - public static final COSName ANY_OFF = getPDFName("AnyOff"); - public static final COSName ANY_ON = getPDFName("AnyOn"); - public static final COSName AP = getPDFName("AP"); - public static final COSName AP_REF = getPDFName("APRef"); - public static final COSName APP = getPDFName("App"); - public static final COSName ART_BOX = getPDFName("ArtBox"); - public static final COSName ARTIFACT = getPDFName("Artifact"); - public static final COSName AS = getPDFName("AS"); - public static final COSName ASCENT = getPDFName("Ascent"); - public static final COSName ASCII_HEX_DECODE = getPDFName("ASCIIHexDecode"); - public static final COSName ASCII_HEX_DECODE_ABBREVIATION = getPDFName("AHx"); - public static final COSName ASCII85_DECODE = getPDFName("ASCII85Decode"); - public static final COSName ASCII85_DECODE_ABBREVIATION = getPDFName("A85"); - public static final COSName ATTACHED = getPDFName("Attached"); - public static final COSName AUTHOR = getPDFName("Author"); - public static final COSName AVG_WIDTH = getPDFName("AvgWidth"); - // B - public static final COSName B = getPDFName("B"); - public static final COSName BACKGROUND = getPDFName("Background"); - public static final COSName BASE_ENCODING = getPDFName("BaseEncoding"); - public static final COSName BASE_FONT = getPDFName("BaseFont"); - public static final COSName BASE_STATE = getPDFName("BaseState"); - public static final COSName BASE_VERSION = getPDFName("BaseVersion"); - public static final COSName BBOX = getPDFName("BBox"); - public static final COSName BC = getPDFName("BC"); - public static final COSName BE = getPDFName("BE"); - public static final COSName BEAD = getPDFName("BEAD"); - public static final COSName BEFORE = getPDFName("Before"); - public static final COSName BG = getPDFName("BG"); - public static final COSName BITS_PER_COMPONENT = getPDFName("BitsPerComponent"); - public static final COSName BITS_PER_COORDINATE = getPDFName("BitsPerCoordinate"); - public static final COSName BITS_PER_FLAG = getPDFName("BitsPerFlag"); - public static final COSName BITS_PER_SAMPLE = getPDFName("BitsPerSample"); - public static final COSName BL = getPDFName("Bl"); - public static final COSName BLACK_IS_1 = getPDFName("BlackIs1"); - public static final COSName BLACK_POINT = getPDFName("BlackPoint"); - public static final COSName BLEED_BOX = getPDFName("BleedBox"); - public static final COSName BM = getPDFName("BM"); - public static final COSName BORDER = getPDFName("Border"); - public static final COSName BOUNDS = getPDFName("Bounds"); - public static final COSName BPC = getPDFName("BPC"); - public static final COSName BS = getPDFName("BS"); - //** Acro form field type for button fields. - public static final COSName BTN = getPDFName("Btn"); - public static final COSName BYTERANGE = getPDFName("ByteRange"); - // C - public static final COSName C = getPDFName("C"); - public static final COSName C0 = getPDFName("C0"); - public static final COSName C1 = getPDFName("C1"); - public static final COSName CA = getPDFName("CA"); - public static final COSName CA_NS = getPDFName("ca"); - public static final COSName CALGRAY = getPDFName("CalGray"); - public static final COSName CALRGB = getPDFName("CalRGB"); - public static final COSName CAP = getPDFName("Cap"); - public static final COSName CAP_HEIGHT = getPDFName("CapHeight"); - public static final COSName CATALOG = getPDFName("Catalog"); - public static final COSName CCITTFAX_DECODE = getPDFName("CCITTFaxDecode"); - public static final COSName CCITTFAX_DECODE_ABBREVIATION = getPDFName("CCF"); - public static final COSName CENTER_WINDOW = getPDFName("CenterWindow"); - public static final COSName CERT = getPDFName("Cert"); - public static final COSName CERTS = getPDFName("Certs"); - public static final COSName CF = getPDFName("CF"); - public static final COSName CFM = getPDFName("CFM"); - //** Acro form field type for choice fields. - public static final COSName CH = getPDFName("Ch"); - public static final COSName CHAR_PROCS = getPDFName("CharProcs"); - public static final COSName CHAR_SET = getPDFName("CharSet"); - public static final COSName CHECK_SUM = getPDFName("CheckSum"); - public static final COSName CI = getPDFName("CI"); - public static final COSName CICI_SIGNIT = getPDFName("CICI.SignIt"); - public static final COSName CID_FONT_TYPE0 = getPDFName("CIDFontType0"); - public static final COSName CID_FONT_TYPE2 = getPDFName("CIDFontType2"); - public static final COSName CID_TO_GID_MAP = getPDFName("CIDToGIDMap"); - public static final COSName CID_SET = getPDFName("CIDSet"); - public static final COSName CIDSYSTEMINFO = getPDFName("CIDSystemInfo"); - public static final COSName CL = getPDFName("CL"); - public static final COSName CLASS_MAP = getPDFName("ClassMap"); - public static final COSName CLR_F = getPDFName("ClrF"); - public static final COSName CLR_FF = getPDFName("ClrFf"); - public static final COSName CMAP = getPDFName("CMap"); - public static final COSName CMAPNAME = getPDFName("CMapName"); - public static final COSName CMYK = getPDFName("CMYK"); - public static final COSName CO = getPDFName("CO"); - public static final COSName COLOR = getPDFName("Color"); - public static final COSName COLLECTION = getPDFName("Collection"); - public static final COSName COLLECTION_ITEM = getPDFName("CollectionItem"); - public static final COSName COLLECTION_FIELD = getPDFName("CollectionField"); - public static final COSName COLLECTION_SCHEMA = getPDFName("CollectionSchema"); - public static final COSName COLLECTION_SORT = getPDFName("CollectionSort"); - public static final COSName COLLECTION_SUBITEM = getPDFName("CollectionSubitem"); - public static final COSName COLOR_BURN = getPDFName("ColorBurn"); - public static final COSName COLOR_DODGE = getPDFName("ColorDodge"); - public static final COSName COLORANTS = getPDFName("Colorants"); - public static final COSName COLORS = getPDFName("Colors"); - public static final COSName COLORSPACE = getPDFName("ColorSpace"); - public static final COSName COLUMNS = getPDFName("Columns"); - public static final COSName COMPATIBLE = getPDFName("Compatible"); - public static final COSName COMPONENTS = getPDFName("Components"); - public static final COSName CONTACT_INFO = getPDFName("ContactInfo"); - public static final COSName CONTENTS = getPDFName("Contents"); - public static final COSName COORDS = getPDFName("Coords"); - public static final COSName COUNT = getPDFName("Count"); - public static final COSName CP = getPDFName("CP"); - public static final COSName CREATION_DATE = getPDFName("CreationDate"); - public static final COSName CREATOR = getPDFName("Creator"); - public static final COSName CRL = getPDFName("CRL"); - public static final COSName CRLS = getPDFName("CRLS"); - public static final COSName CROP_BOX = getPDFName("CropBox"); - public static final COSName CRYPT = getPDFName("Crypt"); - public static final COSName CS = getPDFName("CS"); - public static final COSName CYX = getPDFName("CYX"); - // D - public static final COSName D = getPDFName("D"); - public static final COSName DA = getPDFName("DA"); - public static final COSName DARKEN = getPDFName("Darken"); - public static final COSName DATE = getPDFName("Date"); - public static final COSName DCT_DECODE = getPDFName("DCTDecode"); - public static final COSName DCT_DECODE_ABBREVIATION = getPDFName("DCT"); - public static final COSName DECODE = getPDFName("Decode"); - public static final COSName DECODE_PARMS = getPDFName("DecodeParms"); - public static final COSName DEFAULT = getPDFName("default"); - public static final COSName DEFAULT_CMYK = getPDFName("DefaultCMYK"); - public static final COSName DEFAULT_CRYPT_FILTER = getPDFName("DefaultCryptFilter"); - public static final COSName DEFAULT_GRAY = getPDFName("DefaultGray"); - public static final COSName DEFAULT_RGB = getPDFName("DefaultRGB"); - public static final COSName DESC = getPDFName("Desc"); - public static final COSName DESCENDANT_FONTS = getPDFName("DescendantFonts"); - public static final COSName DESCENT = getPDFName("Descent"); - public static final COSName DEST = getPDFName("Dest"); - public static final COSName DEST_OUTPUT_PROFILE = getPDFName("DestOutputProfile"); - public static final COSName DESTS = getPDFName("Dests"); - public static final COSName DEVICECMYK = getPDFName("DeviceCMYK"); - public static final COSName DEVICEGRAY = getPDFName("DeviceGray"); - public static final COSName DEVICEN = getPDFName("DeviceN"); - public static final COSName DEVICERGB = getPDFName("DeviceRGB"); - public static final COSName DI = getPDFName("Di"); - public static final COSName DIFFERENCE = getPDFName("Difference"); - public static final COSName DIFFERENCES = getPDFName("Differences"); - public static final COSName DIGEST_METHOD = getPDFName("DigestMethod"); - public static final COSName DIGEST_RIPEMD160 = getPDFName("RIPEMD160"); - public static final COSName DIGEST_SHA1 = getPDFName("SHA1"); - public static final COSName DIGEST_SHA256 = getPDFName("SHA256"); - public static final COSName DIGEST_SHA384 = getPDFName("SHA384"); - public static final COSName DIGEST_SHA512 = getPDFName("SHA512"); - public static final COSName DIRECTION = getPDFName("Direction"); - public static final COSName DISPLAY_DOC_TITLE = getPDFName("DisplayDocTitle"); - public static final COSName DL = getPDFName("DL"); - public static final COSName DM = getPDFName("Dm"); - public static final COSName DOC = getPDFName("Doc"); - public static final COSName DOC_CHECKSUM = getPDFName("DocChecksum"); - public static final COSName DOC_TIME_STAMP = getPDFName("DocTimeStamp"); - public static final COSName DOCMDP = getPDFName("DocMDP"); - public static final COSName DOCUMENT = getPDFName("Document"); - public static final COSName DOMAIN = getPDFName("Domain"); - public static final COSName DOS = getPDFName("DOS"); - public static final COSName DP = getPDFName("DP"); - public static final COSName DR = getPDFName("DR"); - public static final COSName DS = getPDFName("DS"); - public static final COSName DSS = getPDFName("DSS"); - public static final COSName DUPLEX = getPDFName("Duplex"); - public static final COSName DUR = getPDFName("Dur"); - public static final COSName DV = getPDFName("DV"); - public static final COSName DW = getPDFName("DW"); - public static final COSName DW2 = getPDFName("DW2"); - // E - public static final COSName E = getPDFName("E"); - public static final COSName EARLY_CHANGE = getPDFName("EarlyChange"); - public static final COSName EF = getPDFName("EF"); - public static final COSName EMBEDDED_FDFS = getPDFName("EmbeddedFDFs"); - public static final COSName EMBEDDED_FILE = getPDFName("EmbeddedFile"); - public static final COSName EMBEDDED_FILES = getPDFName("EmbeddedFiles"); - public static final COSName EMPTY = getPDFName(""); - public static final COSName ENCODE = getPDFName("Encode"); - public static final COSName ENCODED_BYTE_ALIGN = getPDFName("EncodedByteAlign"); - public static final COSName ENCODING = getPDFName("Encoding"); - public static final COSName ENCODING_90MS_RKSJ_H = getPDFName("90ms-RKSJ-H"); - public static final COSName ENCODING_90MS_RKSJ_V = getPDFName("90ms-RKSJ-V"); - public static final COSName ENCODING_ETEN_B5_H = getPDFName("ETen-B5-H"); - public static final COSName ENCODING_ETEN_B5_V = getPDFName("ETen-B5-V"); - public static final COSName ENCRYPT = getPDFName("Encrypt"); - public static final COSName ENCRYPT_META_DATA = getPDFName("EncryptMetadata"); - public static final COSName ENCRYPTED_PAYLOAD = getPDFName("EncryptedPayload"); - public static final COSName END_OF_LINE = getPDFName("EndOfLine"); - public static final COSName ENTRUST_PPKEF = getPDFName("Entrust.PPKEF"); - public static final COSName EXCLUSION = getPDFName("Exclusion"); - public static final COSName EXTENSIONS = getPDFName("Extensions"); - public static final COSName EXTENSION_LEVEL = getPDFName("ExtensionLevel"); - public static final COSName EX_DATA = getPDFName("ExData"); - public static final COSName EXPORT = getPDFName("Export"); - public static final COSName EXPORT_STATE = getPDFName("ExportState"); - public static final COSName EXT_G_STATE = getPDFName("ExtGState"); - public static final COSName EXTEND = getPDFName("Extend"); - public static final COSName EXTENDS = getPDFName("Extends"); - // F - public static final COSName F = getPDFName("F"); - public static final COSName F_DECODE_PARMS = getPDFName("FDecodeParms"); - public static final COSName F_FILTER = getPDFName("FFilter"); - public static final COSName FB = getPDFName("FB"); - public static final COSName FDF = getPDFName("FDF"); - public static final COSName FF = getPDFName("Ff"); - public static final COSName FIELDS = getPDFName("Fields"); - public static final COSName FILESPEC = getPDFName("Filespec"); - public static final COSName FILTER = getPDFName("Filter"); - public static final COSName FIRST = getPDFName("First"); - public static final COSName FIRST_CHAR = getPDFName("FirstChar"); - public static final COSName FIT_WINDOW = getPDFName("FitWindow"); - public static final COSName FL = getPDFName("FL"); - public static final COSName FLAGS = getPDFName("Flags"); - public static final COSName FLATE_DECODE = getPDFName("FlateDecode"); - public static final COSName FLATE_DECODE_ABBREVIATION = getPDFName("Fl"); - public static final COSName FO = getPDFName("Fo"); - public static final COSName FOLDERS = getPDFName("Folders"); - public static final COSName FONT = getPDFName("Font"); - public static final COSName FONT_BBOX = getPDFName("FontBBox"); - public static final COSName FONT_DESC = getPDFName("FontDescriptor"); - public static final COSName FONT_FAMILY = getPDFName("FontFamily"); - public static final COSName FONT_FILE = getPDFName("FontFile"); - public static final COSName FONT_FILE2 = getPDFName("FontFile2"); - public static final COSName FONT_FILE3 = getPDFName("FontFile3"); - public static final COSName FONT_MATRIX = getPDFName("FontMatrix"); - public static final COSName FONT_NAME = getPDFName("FontName"); - public static final COSName FONT_STRETCH = getPDFName("FontStretch"); - public static final COSName FONT_WEIGHT = getPDFName("FontWeight"); - public static final COSName FORM = getPDFName("Form"); - public static final COSName FORMTYPE = getPDFName("FormType"); - public static final COSName FRM = getPDFName("FRM"); - public static final COSName FS = getPDFName("FS"); - public static final COSName FT = getPDFName("FT"); - public static final COSName FUNCTION = getPDFName("Function"); - public static final COSName FUNCTION_TYPE = getPDFName("FunctionType"); - public static final COSName FUNCTIONS = getPDFName("Functions"); - // G - public static final COSName G = getPDFName("G"); - public static final COSName GAMMA = getPDFName("Gamma"); - public static final COSName GROUP = getPDFName("Group"); - public static final COSName GTS_PDFA1 = getPDFName("GTS_PDFA1"); - // H - public static final COSName H = getPDFName("H"); - public static final COSName HARD_LIGHT = getPDFName("HardLight"); - public static final COSName HEIGHT = getPDFName("Height"); - public static final COSName HELV = getPDFName("Helv"); - public static final COSName HIDE_MENUBAR = getPDFName("HideMenubar"); - public static final COSName HIDE_TOOLBAR = getPDFName("HideToolbar"); - public static final COSName HIDE_WINDOWUI = getPDFName("HideWindowUI"); - public static final COSName HUE = getPDFName("Hue"); - // I - public static final COSName I = getPDFName("I"); - public static final COSName IC = getPDFName("IC"); - public static final COSName ICCBASED = getPDFName("ICCBased"); - public static final COSName ID = getPDFName("ID"); - public static final COSName ID_TREE = getPDFName("IDTree"); - public static final COSName IDENTITY = getPDFName("Identity"); - public static final COSName IDENTITY_H = getPDFName("Identity-H"); - public static final COSName IDENTITY_V = getPDFName("Identity-V"); - public static final COSName IF = getPDFName("IF"); - public static final COSName ILLUSTRATOR = getPDFName("Illustrator"); - public static final COSName IM = getPDFName("IM"); - public static final COSName IMAGE = getPDFName("Image"); - public static final COSName IMAGE_MASK = getPDFName("ImageMask"); - public static final COSName INDEX = getPDFName("Index"); - public static final COSName INDEXED = getPDFName("Indexed"); - public static final COSName INFO = getPDFName("Info"); - public static final COSName INKLIST = getPDFName("InkList"); - public static final COSName INTENT = getPDFName("Intent"); - public static final COSName INTERPOLATE = getPDFName("Interpolate"); - public static final COSName IRT = getPDFName("IRT"); - public static final COSName IT = getPDFName("IT"); - public static final COSName ITALIC_ANGLE = getPDFName("ItalicAngle"); - public static final COSName ISSUER = getPDFName("Issuer"); - public static final COSName IX = getPDFName("IX"); - - // J - public static final COSName JAVA_SCRIPT = getPDFName("JavaScript"); - public static final COSName JBIG2_DECODE = getPDFName("JBIG2Decode"); - public static final COSName JBIG2_GLOBALS = getPDFName("JBIG2Globals"); - public static final COSName JPX_DECODE = getPDFName("JPXDecode"); - public static final COSName JS = getPDFName("JS"); - // K - public static final COSName K = getPDFName("K"); - public static final COSName KEYWORDS = getPDFName("Keywords"); - public static final COSName KEY_USAGE = getPDFName("KeyUsage"); - public static final COSName KIDS = getPDFName("Kids"); - // L - public static final COSName L = getPDFName("L"); - public static final COSName LAB = getPDFName("Lab"); - public static final COSName LANG = getPDFName("Lang"); - public static final COSName LAST = getPDFName("Last"); - public static final COSName LAST_CHAR = getPDFName("LastChar"); - public static final COSName LAST_MODIFIED = getPDFName("LastModified"); - public static final COSName LC = getPDFName("LC"); - public static final COSName LE = getPDFName("LE"); - public static final COSName LEADING = getPDFName("Leading"); - public static final COSName LEGAL_ATTESTATION = getPDFName("LegalAttestation"); - public static final COSName LENGTH = getPDFName("Length"); - public static final COSName LENGTH1 = getPDFName("Length1"); - public static final COSName LENGTH2 = getPDFName("Length2"); - public static final COSName LENGTH3 = getPDFName("Length3"); - public static final COSName LIGHTEN = getPDFName("Lighten"); - public static final COSName LIMITS = getPDFName("Limits"); - public static final COSName LINEARIZED = getPDFName("Linearized"); - public static final COSName LINK = getPDFName("Link"); - public static final COSName LJ = getPDFName("LJ"); - public static final COSName LL = getPDFName("LL"); - public static final COSName LLE = getPDFName("LLE"); - public static final COSName LLO = getPDFName("LLO"); - public static final COSName LOCATION = getPDFName("Location"); - public static final COSName LUMINOSITY = getPDFName("Luminosity"); - public static final COSName LW = getPDFName("LW"); - public static final COSName LZW_DECODE = getPDFName("LZWDecode"); - public static final COSName LZW_DECODE_ABBREVIATION = getPDFName("LZW"); - // M - public static final COSName M = getPDFName("M"); - public static final COSName MAC = getPDFName("Mac"); - public static final COSName MAC_EXPERT_ENCODING = getPDFName("MacExpertEncoding"); - public static final COSName MAC_ROMAN_ENCODING = getPDFName("MacRomanEncoding"); - public static final COSName MARK_INFO = getPDFName("MarkInfo"); - public static final COSName MASK = getPDFName("Mask"); - public static final COSName MATRIX = getPDFName("Matrix"); - public static final COSName MATTE = getPDFName("Matte"); - public static final COSName MAX_LEN = getPDFName("MaxLen"); - public static final COSName MAX_WIDTH = getPDFName("MaxWidth"); - public static final COSName MCID = getPDFName("MCID"); - public static final COSName MDP = getPDFName("MDP"); - public static final COSName MEDIA_BOX = getPDFName("MediaBox"); - public static final COSName MEASURE = getPDFName("Measure"); - public static final COSName METADATA = getPDFName("Metadata"); - public static final COSName MISSING_WIDTH = getPDFName("MissingWidth"); - public static final COSName MIX = getPDFName("Mix"); - public static final COSName MK = getPDFName("MK"); - public static final COSName ML = getPDFName("ML"); - public static final COSName MM_TYPE1 = getPDFName("MMType1"); - public static final COSName MOD_DATE = getPDFName("ModDate"); - public static final COSName MULTIPLY = getPDFName("Multiply"); - // N - public static final COSName N = getPDFName("N"); - public static final COSName NAME = getPDFName("Name"); - public static final COSName NAMES = getPDFName("Names"); - public static final COSName NAVIGATOR = getPDFName("Navigator"); - public static final COSName NEED_APPEARANCES = getPDFName("NeedAppearances"); - public static final COSName NEW_WINDOW = getPDFName("NewWindow"); - public static final COSName NEXT = getPDFName("Next"); - public static final COSName NM = getPDFName("NM"); - public static final COSName NON_EFONT_NO_WARN = getPDFName("NonEFontNoWarn"); - public static final COSName NON_FULL_SCREEN_PAGE_MODE = getPDFName("NonFullScreenPageMode"); - public static final COSName NONE = getPDFName("None"); - public static final COSName NORMAL = getPDFName("Normal"); - public static final COSName NUMS = getPDFName("Nums"); - // O - public static final COSName O = getPDFName("O"); - public static final COSName OBJ = getPDFName("Obj"); - public static final COSName OBJR = getPDFName("OBJR"); - public static final COSName OBJ_STM = getPDFName("ObjStm"); - public static final COSName OC = getPDFName("OC"); - public static final COSName OCG = getPDFName("OCG"); - public static final COSName OCGS = getPDFName("OCGs"); - public static final COSName OCMD = getPDFName("OCMD"); - public static final COSName OCPROPERTIES = getPDFName("OCProperties"); - public static final COSName OCSP = getPDFName("OCSP"); - public static final COSName OCSPS = getPDFName("OCSPs"); - public static final COSName OE = getPDFName("OE"); - public static final COSName OID = getPDFName("OID"); - - /** - * "OFF", to be used for OCGs, not for Acroform - */ - public static final COSName OFF = getPDFName("OFF"); - - /** - * "Off", to be used for Acroform, not for OCGs - */ - public static final COSName Off = getPDFName("Off"); - - public static final COSName ON = getPDFName("ON"); - public static final COSName OP = getPDFName("OP"); - public static final COSName OP_NS = getPDFName("op"); - public static final COSName OPEN_ACTION = getPDFName("OpenAction"); - public static final COSName OPEN_TYPE = getPDFName("OpenType"); - public static final COSName OPM = getPDFName("OPM"); - public static final COSName OPT = getPDFName("Opt"); - public static final COSName ORDER = getPDFName("Order"); - public static final COSName ORDERING = getPDFName("Ordering"); - public static final COSName OS = getPDFName("OS"); - public static final COSName OUTLINES = getPDFName("Outlines"); - public static final COSName OUTPUT_CONDITION = getPDFName("OutputCondition"); - public static final COSName OUTPUT_CONDITION_IDENTIFIER = getPDFName( - "OutputConditionIdentifier"); - public static final COSName OUTPUT_INTENT = getPDFName("OutputIntent"); - public static final COSName OUTPUT_INTENTS = getPDFName("OutputIntents"); - public static final COSName OVERLAY = getPDFName("Overlay"); - // P - public static final COSName P = getPDFName("P"); - public static final COSName PA = getPDFName("PA"); - public static final COSName PAGE = getPDFName("Page"); - public static final COSName PAGE_LABELS = getPDFName("PageLabels"); - public static final COSName PAGE_LAYOUT = getPDFName("PageLayout"); - public static final COSName PAGE_MODE = getPDFName("PageMode"); - public static final COSName PAGES = getPDFName("Pages"); - public static final COSName PAINT_TYPE = getPDFName("PaintType"); - public static final COSName PANOSE = getPDFName("Panose"); - public static final COSName PARAMS = getPDFName("Params"); - public static final COSName PARENT = getPDFName("Parent"); - public static final COSName PARENT_TREE = getPDFName("ParentTree"); - public static final COSName PARENT_TREE_NEXT_KEY = getPDFName("ParentTreeNextKey"); - public static final COSName PART = getPDFName("Part"); - public static final COSName PATH = getPDFName("Path"); - public static final COSName PATTERN = getPDFName("Pattern"); - public static final COSName PATTERN_TYPE = getPDFName("PatternType"); - public static final COSName PC = getPDFName("PC"); - public static final COSName PDF_DOC_ENCODING = getPDFName("PDFDocEncoding"); - public static final COSName PERMS = getPDFName("Perms"); - public static final COSName PERCEPTUAL = getPDFName("Perceptual"); - public static final COSName PIECE_INFO = getPDFName("PieceInfo"); - public static final COSName PG = getPDFName("Pg"); - public static final COSName PI = getPDFName("PI"); - public static final COSName PO = getPDFName("PO"); - public static final COSName POPUP = getPDFName("Popup"); - public static final COSName PRE_RELEASE = getPDFName("PreRelease"); - public static final COSName PREDICTOR = getPDFName("Predictor"); - public static final COSName PREV = getPDFName("Prev"); - public static final COSName PRINT = getPDFName("Print"); - public static final COSName PRINT_AREA = getPDFName("PrintArea"); - public static final COSName PRINT_CLIP = getPDFName("PrintClip"); - public static final COSName PRINT_SCALING = getPDFName("PrintScaling"); - public static final COSName PRINT_STATE = getPDFName("PrintState"); - public static final COSName PRIVATE = getPDFName("Private"); - public static final COSName PROC_SET = getPDFName("ProcSet"); - public static final COSName PROCESS = getPDFName("Process"); - public static final COSName PRODUCER = getPDFName("Producer"); - public static final COSName PROP_BUILD = getPDFName("Prop_Build"); - public static final COSName PROPERTIES = getPDFName("Properties"); - public static final COSName PS = getPDFName("PS"); - public static final COSName PUB_SEC = getPDFName("PubSec"); - public static final COSName PV = getPDFName("PV"); - // Q - public static final COSName Q = getPDFName("Q"); - public static final COSName QUADPOINTS = getPDFName("QuadPoints"); - // R - public static final COSName R = getPDFName("R"); - public static final COSName RANGE = getPDFName("Range"); - public static final COSName RC = getPDFName("RC"); - public static final COSName RD = getPDFName("RD"); - public static final COSName REASON = getPDFName("Reason"); - public static final COSName REASONS = getPDFName("Reasons"); - public static final COSName RECIPIENTS = getPDFName("Recipients"); - public static final COSName RECT = getPDFName("Rect"); - public static final COSName REFERENCE = getPDFName("Reference"); - public static final COSName REGISTRY = getPDFName("Registry"); - public static final COSName REGISTRY_NAME = getPDFName("RegistryName"); - public static final COSName RELATIVE_COLORIMETRIC = getPDFName("RelativeColorimetric"); - public static final COSName RENAME = getPDFName("Rename"); - public static final COSName REPEAT = getPDFName("Repeat"); - public static final COSName RES_FORK = getPDFName("ResFork"); - public static final COSName RESOURCES = getPDFName("Resources"); - public static final COSName RGB = getPDFName("RGB"); - public static final COSName RI = getPDFName("RI"); - public static final COSName ROLE_MAP = getPDFName("RoleMap"); - public static final COSName ROOT = getPDFName("Root"); - public static final COSName ROTATE = getPDFName("Rotate"); - public static final COSName ROWS = getPDFName("Rows"); - public static final COSName RT = getPDFName("RT"); - public static final COSName RUN_LENGTH_DECODE = getPDFName("RunLengthDecode"); - public static final COSName RUN_LENGTH_DECODE_ABBREVIATION = getPDFName("RL"); - public static final COSName RV = getPDFName("RV"); - // S - public static final COSName S = getPDFName("S"); - public static final COSName SA = getPDFName("SA"); - public static final COSName SATURATION = getPDFName("Saturation"); - public static final COSName SCHEMA = getPDFName("Schema"); - public static final COSName SCREEN = getPDFName("Screen"); - public static final COSName SE = getPDFName("SE"); - public static final COSName SEPARATION = getPDFName("Separation"); - public static final COSName SET_F = getPDFName("SetF"); - public static final COSName SET_FF = getPDFName("SetFf"); - public static final COSName SHADING = getPDFName("Shading"); - public static final COSName SHADING_TYPE = getPDFName("ShadingType"); - public static final COSName SIG = getPDFName("Sig"); - public static final COSName SIG_FLAGS = getPDFName("SigFlags"); - public static final COSName SIG_REF = getPDFName("SigRef"); - public static final COSName SIZE = getPDFName("Size"); - public static final COSName SM = getPDFName("SM"); - public static final COSName SMASK = getPDFName("SMask"); - public static final COSName SMASK_IN_DATA = getPDFName("SMaskInData"); - public static final COSName SOFT_LIGHT = getPDFName("SoftLight"); - public static final COSName SORT = getPDFName("Sort"); - public static final COSName SOUND = getPDFName("Sound"); - public static final COSName SPLIT = getPDFName("Split"); - public static final COSName SS = getPDFName("SS"); - public static final COSName ST = getPDFName("St"); - public static final COSName STANDARD_ENCODING = getPDFName("StandardEncoding"); - public static final COSName STATE = getPDFName("State"); - public static final COSName STATE_MODEL = getPDFName("StateModel"); - public static final COSName STATUS = getPDFName("Status"); - public static final COSName STD_CF = getPDFName("StdCF"); - public static final COSName STEM_H = getPDFName("StemH"); - public static final COSName STEM_V = getPDFName("StemV"); - public static final COSName STM_F = getPDFName("StmF"); - public static final COSName STR_F = getPDFName("StrF"); - public static final COSName STRUCT_ELEM = getPDFName("StructElem"); - public static final COSName STRUCT_PARENT = getPDFName("StructParent"); - public static final COSName STRUCT_PARENTS = getPDFName("StructParents"); - public static final COSName STRUCT_TREE_ROOT = getPDFName("StructTreeRoot"); - public static final COSName STYLE = getPDFName("Style"); - public static final COSName SUB_FILTER = getPDFName("SubFilter"); - public static final COSName SUBJ = getPDFName("Subj"); - public static final COSName SUBJECT = getPDFName("Subject"); - public static final COSName SUBJECT_DN = getPDFName("SubjectDN"); - public static final COSName SUBTYPE = getPDFName("Subtype"); - public static final COSName SUPPLEMENT = getPDFName("Supplement"); - public static final COSName SV = getPDFName("SV"); - public static final COSName SV_CERT = getPDFName("SVCert"); - public static final COSName SW = getPDFName("SW"); - public static final COSName SY = getPDFName("Sy"); - public static final COSName SYNCHRONOUS = getPDFName("Synchronous"); - // T - public static final COSName T = getPDFName("T"); - public static final COSName TARGET = getPDFName("Target"); - public static final COSName TEMPLATES = getPDFName("Templates"); - public static final COSName THREAD = getPDFName("Thread"); - public static final COSName THREADS = getPDFName("Threads"); - public static final COSName THREE_DD = getPDFName("3DD"); - public static final COSName THUMB = getPDFName("Thumb"); - public static final COSName TI = getPDFName("TI"); - public static final COSName TILING_TYPE = getPDFName("TilingType"); - public static final COSName TIME_STAMP = getPDFName("TimeStamp"); - public static final COSName TITLE = getPDFName("Title"); - public static final COSName TK = getPDFName("TK"); - public static final COSName TM = getPDFName("TM"); - public static final COSName TO_UNICODE = getPDFName("ToUnicode"); - public static final COSName TR = getPDFName("TR"); - public static final COSName TR2 = getPDFName("TR2"); - public static final COSName TRAPPED = getPDFName("Trapped"); - public static final COSName TRANS = getPDFName("Trans"); - public static final COSName TRANSFORM_METHOD = getPDFName("TransformMethod"); - public static final COSName TRANSFORM_PARAMS = getPDFName("TransformParams"); - public static final COSName TRANSPARENCY = getPDFName("Transparency"); - public static final COSName TREF = getPDFName("TRef"); - public static final COSName TRIM_BOX = getPDFName("TrimBox"); - public static final COSName TRUE_TYPE = getPDFName("TrueType"); - public static final COSName TRUSTED_MODE = getPDFName("TrustedMode"); - public static final COSName TU = getPDFName("TU"); - /** Acro form field type for text field. */ - public static final COSName TX = getPDFName("Tx"); - public static final COSName TYPE = getPDFName("Type"); - public static final COSName TYPE0 = getPDFName("Type0"); - public static final COSName TYPE1 = getPDFName("Type1"); - public static final COSName TYPE3 = getPDFName("Type3"); - // U - public static final COSName U = getPDFName("U"); - public static final COSName UE = getPDFName("UE"); - public static final COSName UF = getPDFName("UF"); - public static final COSName UNCHANGED = getPDFName("Unchanged"); - public static final COSName UNIX = getPDFName("Unix"); - public static final COSName URI = getPDFName("URI"); - public static final COSName URL = getPDFName("URL"); - public static final COSName URL_TYPE = getPDFName("URLType"); - public static final COSName USAGE = getPDFName("Usage"); - public static final COSName USE_CMAP = getPDFName("UseCMap"); - public static final COSName USER_UNIT = getPDFName("UserUnit"); - // V - public static final COSName V = getPDFName("V"); - public static final COSName VE = getPDFName("VE"); - public static final COSName VERISIGN_PPKVS = getPDFName("VeriSign.PPKVS"); - public static final COSName VERSION = getPDFName("Version"); - public static final COSName VERTICES = getPDFName("Vertices"); - public static final COSName VERTICES_PER_ROW = getPDFName("VerticesPerRow"); - public static final COSName VIEW = getPDFName("View"); - public static final COSName VIEW_AREA = getPDFName("ViewArea"); - public static final COSName VIEW_CLIP = getPDFName("ViewClip"); - public static final COSName VIEW_STATE = getPDFName("ViewState"); - public static final COSName VIEWER_PREFERENCES = getPDFName("ViewerPreferences"); - public static final COSName VOLUME = getPDFName("Volume"); - public static final COSName VP = getPDFName("VP"); - public static final COSName VRI = getPDFName("VRI"); - // W - public static final COSName W = getPDFName("W"); - public static final COSName W2 = getPDFName("W2"); - public static final COSName WC = getPDFName("WC"); - public static final COSName WHITE_POINT = getPDFName("WhitePoint"); - public static final COSName WIDGET = getPDFName("Widget"); - public static final COSName WIDTH = getPDFName("Width"); - public static final COSName WIDTHS = getPDFName("Widths"); - public static final COSName WIN = getPDFName("Win"); - public static final COSName WIN_ANSI_ENCODING = getPDFName("WinAnsiEncoding"); - public static final COSName WMODE = getPDFName("WMode"); - public static final COSName WP = getPDFName("WP"); - public static final COSName WS = getPDFName("WS"); - // X - public static final COSName X = getPDFName("X"); - public static final COSName XFA = getPDFName("XFA"); - public static final COSName X_STEP = getPDFName("XStep"); - public static final COSName XHEIGHT = getPDFName("XHeight"); - public static final COSName XOBJECT = getPDFName("XObject"); - public static final COSName XREF = getPDFName("XRef"); - public static final COSName XREF_STM = getPDFName("XRefStm"); - // Y - public static final COSName Y = getPDFName("Y"); - public static final COSName Y_STEP = getPDFName("YStep"); - public static final COSName YES = getPDFName("Yes"); - - // Z - public static final COSName ZA_DB = getPDFName("ZaDb"); - - // fields - private final String name; - - /** - * This will get a COSName object with that name. - * - * @param aName The name of the object. - * - * @return A COSName with the specified name. - */ - public static COSName getPDFName(String aName) - { - WeakReference weakRef = NAME_MAP.get(aName); - COSName name = weakRef != null ? weakRef.get() : null; - - if (name == null) - { - // Although we use a ConcurrentHashMap, we cannot use computeIfAbsent() because the returned reference - // might be stale (even the newly created one). - // Use double checked locking to make the code thread safe. - synchronized (NAME_MAP) - { - weakRef = NAME_MAP.get(aName); - name = weakRef != null ? weakRef.get() : null; - if (name == null) - { - name = new COSName(aName); - CLEANER.register(name, () -> NAME_MAP.remove(aName)); - NAME_MAP.put(aName, new WeakReference<>(name)); - } - } - } - - return name; - } - - /** - * Private constructor. This will limit the number of COSName objects that are created. - * - * @param aName The name of the COSName object. - */ - private COSName(String aName) - { - this.name = aName; - } - - /** - * This will get the name of this COSName object. - * - * @return The name of the object. - */ - public String getName() - { - return name; - } - - @Override - public String toString() - { - return "COSName{" + name + "}"; - } - - @Override - public boolean equals(Object object) - { - return object instanceof COSName && name.equals(((COSName) object).name); - } - - @Override - public int hashCode() - { - return name.hashCode(); - } - - @Override - public int compareTo(COSName other) - { - return name.compareTo(other.name); - } - - /** - * Returns true if the name is the empty string. - * @return true if the name is the empty string. - */ - public boolean isEmpty() - { - return name.isEmpty(); - } - - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromName(this); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java deleted file mode 100644 index 55bdf2733a5..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNull.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * This class represents a null PDF object. - * - * @author Ben Litchfield - */ -public final class COSNull extends COSBase -{ - /** - * The null token. - */ - public static final byte[] NULL_BYTES = {110, 117, 108, 108}; //"null".getBytes( "ISO-8859-1" ); - - /** - * The one null object in the system. - */ - public static final COSNull NULL = new COSNull(); - - /** - * Constructor. - */ - private COSNull() - { - //limit creation to one instance. - } - - /** - * Visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromNull(this); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSNull{}"; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java deleted file mode 100644 index 97d4fcbe894..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSNumber.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * This class represents an abstract number in a PDF document. - * - * @author Ben Litchfield - */ -public abstract class COSNumber extends COSBase -{ - /** - * This will get the float value of this number. - * - * @return The float value of this object. - */ - public abstract float floatValue(); - - /** - * This will get the integer value of this number. - * - * @return The integer value of this number. - */ - public abstract int intValue(); - - /** - * This will get the long value of this number. - * - * @return The long value of this number. - */ - public abstract long longValue(); - - /** - * This factory method will get the appropriate number object. - * - * @param number The string representation of the number. - * - * @return A number object, either float or int. - * - * @throws IOException If the string is not a number. - */ - public static COSNumber get( String number ) throws IOException - { - if (number.length() == 1) - { - char digit = number.charAt(0); - if ('0' <= digit && digit <= '9') - { - return COSInteger.get((long) digit - '0'); - } - if (digit == '-' || digit == '.') - { - // See https://issues.apache.org/jira/browse/PDFBOX-592 - return COSInteger.ZERO; - } - throw new IOException("Not a number: " + number); - } - if (isFloat(number)) - { - return new COSFloat(number); - } - try - { - return COSInteger.get(Long.parseLong(number)); - } - catch (NumberFormatException e) - { - // check if the given string could be a number at all - String numberString = number.startsWith("+") || number.startsWith("-") - ? number.substring(1) : number; - if (!numberString.matches("[0-9]*")) - { - throw new IOException("Not a number: " + number); - } - // return a limited COSInteger value which is marked as invalid - return number.startsWith("-") ? COSInteger.OUT_OF_RANGE_MIN - : COSInteger.OUT_OF_RANGE_MAX; - } - } - - private static boolean isFloat( String number ) - { - int length = number.length(); - for (int i = 0; i < length; i++) - { - char digit = number.charAt(i); - if (digit == '.' || digit == 'e') - { - return true; - } - } - return false; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java deleted file mode 100644 index 9ad4fd90d25..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObject.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.IOException; - -/** - * This class represents a PDF object. - * - * @author Ben Litchfield - * - */ -public class COSObject extends COSBase implements COSUpdateInfo -{ - private COSBase baseObject; - private ICOSParser parser; - private boolean isDereferenced = false; - private final COSUpdateState updateState; - - private static final Logger LOG = LogManager.getLogger(COSObject.class); - - /** - * Constructor. - * - * @param object The object that this encapsulates. - * - */ - public COSObject(COSBase object) - { - updateState = new COSUpdateState(this); - baseObject = object; - isDereferenced = true; - direct = false; - } - - /** - * Constructor. - * - * @param object The object that this encapsulates. - * @param objectKey The COSObjectKey of the encapsulated object - */ - public COSObject(COSBase object, COSObjectKey objectKey) - { - this(objectKey, null); - baseObject = object; - isDereferenced = true; - direct = false; - } - - /** - * Constructor. - * - * @param object The object that this encapsulates. - * @param parser The parser to be used to load the object on demand - * - */ - public COSObject(COSBase object, ICOSParser parser) - { - updateState = new COSUpdateState(this); - baseObject = object; - isDereferenced = object != null; - this.parser = parser; - direct = false; - } - - /** - * Constructor. - * - * @param key The object number of the encapsulated object. - * @param parser The parser to be used to load the object on demand - * - */ - public COSObject(COSObjectKey key, ICOSParser parser) - { - updateState = new COSUpdateState(this); - this.parser = parser; - setKey(key); - direct = false; - } - - /** - * Indicates if the referenced object is present or not. - * - * @return true if the indirect object is dereferenced - */ - public boolean isObjectNull() - { - return baseObject == null || baseObject.equals( COSNull.NULL ); - } - - /** - * Proxy objects can never be direct, so direct is always false - * @param direct ignored - */ - @Override - public void setDirect(boolean direct) - { - this.direct = false; - } - - /** - * This will get the object that this object encapsulates. - * - * @return The encapsulated object. - */ - public COSBase getObject() - { - if (!isDereferenced && parser != null) - { - try - { - // mark as dereferenced to avoid endless recursions - isDereferenced = true; - baseObject = parser.dereferenceCOSObject(this); - getUpdateState().dereferenceChild(baseObject); - } - catch (IOException e) - { - LOG.error(() -> "Can't dereference " + this, e); - } - finally - { - parser = null; - } - } - return baseObject; - } - - /** - * Sets the referenced object to COSNull and removes the initially assigned parser. - */ - public final void setToNull() - { - if(baseObject != null) - { - getUpdateState().update(); - } - baseObject = COSNull.NULL; - parser = null; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "COSObject{" + getKey() + "}"; - } - - /** - * visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept( ICOSVisitor visitor ) throws IOException - { - COSBase object = getObject(); - if (object != null) - { - object.accept(visitor); - } - else - { - COSNull.NULL.accept(visitor); - } - } - - /** - * Returns {@code true}, if the hereby referenced {@link COSBase} has already been parsed and loaded. - * - * @return {@code true}, if the hereby referenced {@link COSBase} has already been parsed and loaded. - */ - public boolean isDereferenced() - { - return isDereferenced; - } - - /** - * Returns the current {@link COSUpdateState} of this {@link COSObject}. - * - * @return The current {@link COSUpdateState} of this {@link COSObject}. - * @see COSUpdateState - */ - @Override - public COSUpdateState getUpdateState() - { - return updateState; - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java deleted file mode 100644 index a9465095c11..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectGetter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -/** - * This is an interface used to get/create the underlying COSObject. - * - * @author Ben Litchfield - */ -public interface COSObjectGetter -{ - /** - * Convert this standard java object to a COS object. - * - * @return The cos object that matches this Java object. - */ - COSBase getCOSObject(); -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java deleted file mode 100644 index 73238534a44..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSObjectKey.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -/** - * Object representing the physical reference to an indirect pdf object. - * - * @author Michael Traut - * - */ -public final class COSObjectKey implements Comparable -{ - private static final int NUMBER_OFFSET = Short.SIZE; - private static final long GENERATION_MASK = (long) Math.pow(2, NUMBER_OFFSET) - 1; - // combined number and generation - // The lowest 16 bits hold the generation 0-65535 - // The rest is used for the number (even though 34 bit are sufficient for 10 digits) - private final long numberAndGeneration; - // index within a compressed object stream if applicable otherwise -1 - private final int streamIndex; - - /** - * Constructor. - * - * @param num The object number. - * @param gen The object generation number. - */ - public COSObjectKey(long num, int gen) - { - this(num, gen, -1); - } - - /** - * Constructor. - * - * @param num The object number. - * @param gen The object generation number. - * @param index The index within a compressed object stream - */ - public COSObjectKey(long num, int gen, int index) - { - if (num < 0) - { - throw new IllegalArgumentException("Object number must not be a negative value"); - } - if (gen < 0) - { - throw new IllegalArgumentException("Generation number must not be a negative value"); - } - numberAndGeneration = computeInternalHash(num, gen); - this.streamIndex = index; - } - - /** - * Calculate the internal hash value for the given object number and generation number. - * - * @param num the object number - * @param gen the generation number - * @return the internal hash for the given values - */ - public static final long computeInternalHash(long num, int gen) - { - return num << NUMBER_OFFSET | (gen & GENERATION_MASK); - } - - /** - * Return the internal hash value which is based on the number and the generation. - * - * @return the internal hash value - */ - public long getInternalHash() - { - return numberAndGeneration; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - COSObjectKey objToBeCompared = obj instanceof COSObjectKey ? (COSObjectKey)obj : null; - return objToBeCompared != null - && objToBeCompared.numberAndGeneration == numberAndGeneration; - } - - /** - * This will get the object generation number. - * - * @return The object generation number. - */ - public int getGeneration() - { - return (int) (numberAndGeneration & GENERATION_MASK); - } - - /** - * This will get the object number. - * - * @return The object number. - */ - public long getNumber() - { - return numberAndGeneration >>> NUMBER_OFFSET; - } - - /** - * The index within a compressed object stream. - * - * @return the index within a compressed object stream if applicable otherwise -1 - */ - public int getStreamIndex() - { - return streamIndex; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return Long.hashCode(numberAndGeneration); - } - - @Override - public String toString() - { - return getNumber() + " " + getGeneration() + " R"; - } - - @Override - public int compareTo(COSObjectKey other) - { - return Long.compare(numberAndGeneration, other.numberAndGeneration); - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java deleted file mode 100644 index 5af35433c27..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSStream.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.FilterOutputStream; -import java.io.IOException; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.Closeable; -import java.io.InputStream; -import java.io.OutputStream; - -import java.util.ArrayList; -import java.util.List; -import org.apache.pdfbox.cos.filter.COSInputStream; -// import org.apache.pdfbox.cos.filter.COSOutputStream; -import org.apache.pdfbox.cos.filter.COSOutputStream; -import org.apache.pdfbox.cos.filter.DecodeOptions; -import org.apache.pdfbox.cos.filter.Filter; -import org.apache.pdfbox.cos.filter.FilterFactory; - -import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.RandomAccess; -import org.apache.pdfbox.io.RandomAccessInputStream; -import org.apache.pdfbox.io.RandomAccessOutputStream; -import org.apache.pdfbox.io.RandomAccessRead; -import org.apache.pdfbox.io.RandomAccessReadBuffer; -import org.apache.pdfbox.io.RandomAccessStreamCache; -import org.apache.pdfbox.io.RandomAccessReadView; - -/** - * This class represents a stream object in a PDF document. This is not an IO - * stream that can be written to and read from, but a COSStream as defined by - * the PDF specification which consists of a dictionary followed by zero or more - * bytes of data that must be decoded before use. - * - * The stream dictionary has a required "length" entry that indicates how many bytes - * are in the stream. This class holds a backing store which is the true IO stream - * for decoding or encoding the byte data. - * - * @author Ben Litchfield - */ -public class COSStream extends COSDictionary implements Closeable -{ - // backing store, in-memory or on-disk - private RandomAccess randomAccess; - // used as a temp buffer when creating a new stream - private RandomAccessStreamCache streamCache; - // indicates if the stream cache was created within this COSStream instance - private boolean closeStreamCache = false; - // true if there's an open OutputStream - private boolean isWriting; - // random access view to be read from - private RandomAccessReadView randomAccessReadView; - - private static final Logger LOG = LogManager.getLogger(COSStream.class); - - /** - * Creates a new stream with an empty dictionary. - *

- * Try to avoid using this constructor because it creates a new scratch file in memory. Instead, - * use {@link COSDocument#createCOSStream() document.getDocument().createCOSStream()} which will - * use the existing scratch file (in memory or in temp file) of the document. - *

- */ - public COSStream() - { - this(null); - } - - /** - * Creates a new stream with an empty dictionary. Data is stored in the given scratch file. - * - * @param streamCache Stream cache for writing stream data. - */ - public COSStream(RandomAccessStreamCache streamCache) - { - setInt(COSName.LENGTH, 0); - this.streamCache = streamCache; - } - - /** - * Creates a new stream with an empty dictionary. Data is read from the given random accessview. Written data is - * stored in the given scratch file. - * - * @param streamCache Stream cache for writing stream data. - * @param randomAccessReadView source for the data to be read - * @throws IOException if the length of the random access view isn't available - */ - public COSStream(RandomAccessStreamCache streamCache, RandomAccessReadView randomAccessReadView) - throws IOException - { - this(streamCache); - this.randomAccessReadView = randomAccessReadView; - setInt(COSName.LENGTH, (int) randomAccessReadView.length()); - } - - /** - * Throws if the random access backing store has been closed. Helpful for catching cases where - * a user tries to use a COSStream which has outlived its COSDocument. - */ - private void checkClosed() throws IOException - { - if (randomAccess != null && randomAccess.isClosed()) - { - throw new IOException("COSStream has been closed and cannot be read. " + - "Perhaps its enclosing PDDocument has been closed?"); - // Tip for debugging: look at the destination file with an editor, you'll see an - // incomplete stream at the bottom. - } - } - - private RandomAccessStreamCache getStreamCache() throws IOException - { - if (streamCache == null) - { - streamCache = IOUtils.createMemoryOnlyStreamCache().create(); - closeStreamCache = true; - } - return streamCache; - } - - /** - * Returns a new InputStream which reads the encoded PDF stream data. Experts only! - * - * @return InputStream containing raw, encoded PDF stream data. - * @throws IOException If the stream could not be read. - */ - public InputStream createRawInputStream() throws IOException - { - checkClosed(); - if (isWriting) - { - throw new IllegalStateException("Cannot read while there is an open stream writer"); - } - if (randomAccess == null) - { - if (randomAccessReadView != null) - { - randomAccessReadView.seek(0); - return new RandomAccessInputStream( randomAccessReadView); - } - else - { - throw new IOException( - "Create InputStream called without data being written before to stream."); - } - } - else - { - return new RandomAccessInputStream(randomAccess); - } - } - - /** - * TODO: fix so that this is in a filter class, using this object as input. - * - * Returns a new InputStream which reads the decoded stream data. - * - * @return InputStream containing decoded stream data. - * @throws IOException If the stream could not be read. - */ - public COSInputStream createInputStream() throws IOException - { - return createInputStream( DecodeOptions.DEFAULT); - } - - public COSInputStream createInputStream(DecodeOptions options) throws IOException - { - InputStream input = createRawInputStream(); - return COSInputStream.create(getFilterList(), this, input, options); - } - - /** - * Returns a new RandomAccessRead which reads the decoded stream data. - * - * @return RandomAccessRead containing decoded stream data. - * @throws IOException If the stream could not be read. - */ - public RandomAccessRead createView() throws IOException - { - List filterList = getFilterList(); - if (filterList.isEmpty()) - { - if (randomAccess == null && randomAccessReadView != null) - { - return new RandomAccessReadView(randomAccessReadView, 0, - randomAccessReadView.length()); - } - else - { - return new RandomAccessReadBuffer( createRawInputStream()); - } - } - return Filter.decode(createRawInputStream(), filterList, this, DecodeOptions.DEFAULT, null); - } - - /** - * Returns a new OutputStream for writing stream data, using the current filters. - * - * @return OutputStream for un-encoded stream data. - * @throws IOException If the output stream could not be created. - */ - public OutputStream createOutputStream() throws IOException - { - return createOutputStream(null); - } - - /** - * Returns a new OutputStream for writing stream data, using and the given filters. - * - * @param filters COSArray or COSName of filters to be used. - * @return OutputStream for un-encoded stream data. - * @throws IOException If the output stream could not be created. - */ - public OutputStream createOutputStream(COSBase filters) throws IOException - { - checkClosed(); - if (isWriting) - { - throw new IllegalStateException("Cannot have more than one open stream writer."); - } - // apply filters, if any - if (filters != null) - { - setItem(COSName.FILTER, filters); - } - if (randomAccess != null) - randomAccess.clear(); - else - randomAccess = getStreamCache().createBuffer(); - OutputStream randomOut = new RandomAccessOutputStream(randomAccess); - OutputStream cosOut = new COSOutputStream( getFilterList(), this, randomOut, - getStreamCache()); - isWriting = true; - return new FilterOutputStream(cosOut) - { - @Override - public void write(byte[] b, int off, int len) throws IOException - { - this.out.write(b, off, len); - } - - @Override - public void close() throws IOException - { - super.close(); - setInt(COSName.LENGTH, (int)randomAccess.length()); - isWriting = false; - } - }; - } - - /** - * Returns a new OutputStream for writing encoded PDF data. Experts only! - * - * @return OutputStream for raw PDF stream data. - * @throws IOException If the output stream could not be created. - */ - public OutputStream createRawOutputStream() throws IOException - { - checkClosed(); - if (isWriting) - { - throw new IllegalStateException("Cannot have more than one open stream writer."); - } - if (randomAccess != null) - randomAccess.clear(); - else - randomAccess = getStreamCache().createBuffer(); - OutputStream out = new RandomAccessOutputStream(randomAccess); - isWriting = true; - return new FilterOutputStream(out) - { - @Override - public void write(byte[] b, int off, int len) throws IOException - { - this.out.write(b, off, len); - } - - @Override - public void close() throws IOException - { - super.close(); - setInt(COSName.LENGTH, (int)randomAccess.length()); - isWriting = false; - } - }; - } - - /** - * Returns the list of filters. - */ - public List getFilterList() throws IOException - { - List filterList; - COSBase filters = getFilters(); - if (filters instanceof COSName) - { - filterList = new ArrayList<>(1); - filterList.add(FilterFactory.INSTANCE.getFilter((COSName)filters)); - } - else if (filters instanceof COSArray) - { - COSArray filterArray = (COSArray)filters; - filterList = new ArrayList<>(filterArray.size()); - for (int i = 0; i < filterArray.size(); i++) - { - COSBase base = filterArray.get(i); - if (!(base instanceof COSName)) - { - throw new IOException("Forbidden type in filter array: " + - (base == null ? "null" : base.getClass().getName())); - } - filterList.add(FilterFactory.INSTANCE.getFilter((COSName) base)); - } - } - else - { - filterList = new ArrayList<>(); - } - return filterList; - } - - /** - * Returns the length of the encoded stream. - * - * @return length in bytes - */ - public long getLength() - { - if (isWriting) - { - throw new IllegalStateException("There is an open OutputStream associated with this " + - "COSStream. It must be closed before querying the " + - "length of this COSStream."); - } - return getInt(COSName.LENGTH, 0); - } - - /** - * This will return the filters to apply to the byte stream. - * The method will return - *
    - *
  • null if no filters are to be applied - *
  • a COSName if one filter is to be applied - *
  • a COSArray containing COSNames if multiple filters are to be applied - *
- * - * @return the COSBase object representing the filters - */ - public COSBase getFilters() - { - return getObjectFromDictionary( COSName.FILTER); - } - - /** - * Returns the contents of the stream as a PDF "text string". - * - * @return the PDF string representation of the stream content - */ - public String toTextString() - { - try (InputStream input = COSInputStream.create(getFilterList(), this, createRawInputStream() )) - { - byte[] array = input.readAllBytes(); - COSString string = new COSString( array); - return string.getString(); - } - catch (IOException e) - { - LOG.debug("An exception occurred trying to get the content - returning empty string instead", e); - return ""; - } - } - - @Override - public void accept( ICOSVisitor visitor) throws IOException - { - visitor.visitFromStream(this); - } - - /** - * {@inheritDoc} - * - * Called by PDFBox when the PDDocument is closed, this closes the stream and removes the data. You will usually not - * need this. - * - * @throws IOException if something went wrong when closing the stream - */ - @Override - public void close() throws IOException - { - try - { - if (closeStreamCache && streamCache != null) - { - streamCache.close(); - streamCache = null; - } - } - finally - { - try - { - // marks the scratch file pages as free - if (randomAccess != null) - { - randomAccess.close(); - randomAccess = null; - } - } - finally - { - if (randomAccessReadView != null) - { - randomAccessReadView.close(); - randomAccessReadView = null; - } - } - } - } - - /** - * Indicates whether the stream contains any data or not. - * - * @return true if the stream contains any data - */ - public boolean hasData() - { - return randomAccess != null || randomAccessReadView != null; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java deleted file mode 100644 index 804213c4d63..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSString.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.util.Hex; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; - -/** - * A string object, which may be a text string, a PDFDocEncoded string, ASCII string, or byte string. - * - *

Text strings are used for character strings that contain information intended to be - * human-readable, such as text annotations, bookmark names, article names, document information, - * and so forth. - * - *

PDFDocEncoded strings are used for characters that are represented in a single byte. - * - *

ASCII strings are used for characters that are represented in a single byte using ASCII - * encoding. - * - *

Byte strings are used for binary data represented as a series of bytes, but the encoding is - * not known. The bytes of the string need not represent characters. - * - * @author Ben Litchfield - * @author John Hewson - */ -public final class COSString extends COSBase -{ - private static final Logger LOG = LogManager.getLogger(COSString.class); - - private final byte[] bytes; - private final boolean forceHexForm; - - // legacy behaviour for old PDFParser - public static final boolean FORCE_PARSING = - Boolean.getBoolean("org.apache.pdfbox.forceParsing"); - - /** - * Creates a new PDF string from a byte array. This method can be used to read a string from - * an existing PDF file, or to create a new byte string. - * - * @param bytes The raw bytes of the PDF text string or byte string. - */ - public COSString(byte[] bytes) - { - this(bytes, false); - } - - /** - * Creates a new PDF string from a byte array. This method can be used to read a string from an existing PDF file, - * or to create a new byte string. - * - * @param bytes The raw bytes of the PDF text string or byte string. - * @param forceHex forces the hexadecimal presentation of the string if set to true - * - */ - public COSString(byte[] bytes, boolean forceHex) - { - forceHexForm = forceHex; - this.bytes = Arrays.copyOf(bytes, bytes.length); - } - - /** - * Creates a new text string from a Java String. - * - * @param text The string value of the object. - */ - public COSString(String text) - { - this(text, false); - } - - /** - * Creates a new text string from a Java String. - * - * @param text The string value of the object. - * @param forceHex forces the hexadecimal presentation of the string if set to true - * - */ - public COSString(String text, boolean forceHex) - { - forceHexForm = forceHex; - // check whether the string uses only characters available in PDFDocEncoding - boolean isOnlyPDFDocEncoding = true; - for (char c : text.toCharArray()) - { - if (!PDFDocEncoding.containsChar(c)) - { - isOnlyPDFDocEncoding = false; - break; - } - } - - if (isOnlyPDFDocEncoding) - { - // PDFDocEncoded string - bytes = PDFDocEncoding.getBytes(text); - } - else - { - // UTF-16BE encoded string with a leading byte order marker - byte[] data = text.getBytes(StandardCharsets.UTF_16BE); - bytes = new byte[data.length + 2]; - bytes[0] = (byte) 0xFE; - bytes[1] = (byte) 0xFF; - System.arraycopy(data, 0, bytes, 2, data.length); - } - } - - /** - * This will create a COS string from a string of hex characters. - * - * @param hex A hex string. - * @return A cos string with the hex characters converted to their actual bytes. - * @throws IOException If there is an error with the hex string. - */ - public static COSString parseHex(String hex) throws IOException - { - ByteArrayOutputStream bytes = new ByteArrayOutputStream(); - StringBuilder hexBuffer = new StringBuilder(hex.trim()); - - // if odd number then the last hex digit is assumed to be 0 - if (hexBuffer.length() % 2 != 0) - { - hexBuffer.append('0'); - } - - int length = hexBuffer.length(); - for (int i = 0; i < length; i += 2) - { - try - { - bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16)); - } - catch (NumberFormatException e) - { - if (FORCE_PARSING) - { - LOG.warn("Encountered a malformed hex string"); - bytes.write('?'); // todo: what does Acrobat do? Any example PDFs? - } - else - { - throw new IOException("Invalid hex string: " + hex, e); - } - } - } - - return new COSString(bytes.toByteArray()); - } - - /** - * Returns true if the string is to be written in hex form. - * - * @return true if the COSString is written in hex form - */ - public boolean getForceHexForm() - { - return forceHexForm; - } - - /** - * Returns the content of this string as a PDF text string. - * - * @return the PDF string representation of the COSString - */ - public String getString() - { - // text string - BOM indicates Unicode - if (bytes.length >= 2) - { - if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF) - { - // UTF-16BE - return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE); - } - else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE) - { - // UTF-16LE - not in the PDF spec! - return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE); - } - } - - // otherwise use PDFDocEncoding - return PDFDocEncoding.toString(bytes); - } - - /** - * Returns the content of this string as a PDF ASCII string. - * - * @return the ASCII string representation of the COSString - */ - public String getASCII() - { - // ASCII string - return new String(bytes, StandardCharsets.US_ASCII); - } - - /** - * Returns the raw bytes of the string using a new byte array. Best used with a PDF byte string. - * - * @return a clone of the underlying byte[] representation of the COSString - */ - public byte[] getBytes() - { - return Arrays.copyOf(bytes, bytes.length); - } - - /** - * This will take this string and create a hex representation of the bytes that make the string. - * - * @return A hex string representing the bytes in this string. - */ - public String toHexString() - { - return Hex.getString(bytes); - } - - /** - * Visitor pattern double dispatch method. - * - * @param visitor The object to notify when visiting this object. - * @throws IOException If an error occurs while visiting this object. - */ - @Override - public void accept(ICOSVisitor visitor) throws IOException - { - visitor.visitFromString(this); - } - - @Override - public boolean equals(Object obj) - { - if (obj instanceof COSString) - { - COSString strObj = (COSString) obj; - return getString().equals(strObj.getString()) && - forceHexForm == strObj.forceHexForm; - } - return false; - } - - @Override - public int hashCode() - { - int result = Arrays.hashCode(bytes); - return result + (forceHexForm ? 17 : 0); - } - - @Override - public String toString() - { - return "COSString{" + getString() + "}"; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java deleted file mode 100644 index 0a425419c53..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateInfo.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -public interface COSUpdateInfo extends COSObjectGetter -{ - - /** - * Get the update state for the COSWriter. This indicates whether an object is to be written - * when there is an incremental save. - * - * @return the update state. - */ - default boolean isNeedToBeUpdated() - { - return getUpdateState().isUpdated(); - } - - /** - * Set the update state of the dictionary for the COSWriter. This indicates whether an object is - * to be written when there is an incremental save. - * - * @param flag the update state. - */ - default void setNeedToBeUpdated( boolean flag ) - { - getUpdateState().update(flag); - } - - /** - * Uses this {@link COSUpdateInfo} as the base object of a new {@link COSIncrement}. - * - * @return A {@link COSIncrement} based on this {@link COSUpdateInfo}. - * @see COSIncrement - */ - default COSIncrement toIncrement() - { - return getUpdateState().toIncrement(); - } - - /** - * Returns the current {@link COSUpdateState} of this {@link COSUpdateInfo}. - * - * @return The current {@link COSUpdateState} of this {@link COSUpdateInfo}. - * @see COSUpdateState - */ - COSUpdateState getUpdateState(); - -} \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java deleted file mode 100644 index ad663f8e706..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/COSUpdateState.java +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -/** - * A {@link COSUpdateState} instance manages update states for a {@link COSUpdateInfo}. Such states are used to create - * a {@link COSIncrement} for the incremental saving of a {@link COSDocument}. - * - * @author Christian Appl - * @see COSDocumentState - * @see COSUpdateInfo - * @see COSIncrement - */ -public class COSUpdateState -{ - - /** - * The {@link COSUpdateInfo} the {@link COSUpdateState} does manage update states for. - */ - private final COSUpdateInfo updateInfo; - /** - * The {@link COSDocumentState} the {@link #updateInfo} is linked to. - */ - private COSDocumentState originDocumentState = null; - /** - * The actual update state of {@link #updateInfo}. - *

    - *
  • {@code true}, if {@link #updateInfo} has been updated after the document completed parsing.
  • - *
  • {@code false}, if {@link #updateInfo} has remained unaltered since the document completed parsing.
  • - *
- */ - private boolean updated = false; - - /** - * Creates a new {@link COSUpdateState} for the given {@link COSUpdateInfo}. - * - * @param updateInfo The {@link COSUpdateInfo}, that shall be managed by this {@link COSUpdateState}. - */ - public COSUpdateState(COSUpdateInfo updateInfo) - { - this.updateInfo = updateInfo; - } - - /** - *

- * Links the given {@link COSDocumentState} to the {@link #updated} state of the managed {@link #updateInfo}.
- *

- *

- * This shall also initialize {@link #updated} accordingly and will also set the same {@link COSDocumentState} for - * all possibly contained substructures. - *

- *

- * Should {@link #originDocumentState} already have been set, by a prior call to this method, this shall deny to - * overwrite it. - *

- *

- * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are - * allowed. - *

- *

- * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. - *

- * - * @param originDocumentState The {@link COSDocumentState} that shall be linked to this {@link COSUpdateState}. - * @see #originDocumentState - * @see #updated - */ - public void setOriginDocumentState(COSDocumentState originDocumentState) - { - setOriginDocumentState(originDocumentState, false); - } - - /** - *

- * Links the given {@link COSDocumentState} to the {@link #updated} state of the managed {@link #updateInfo}.
- *

- *

- * This shall also initialize {@link #updated} accordingly and will also set the same {@link COSDocumentState} for - * all possibly contained substructures. - *

- *

- * Should {@link #originDocumentState} already have been set, by a prior call to this method, this shall deny to - * overwrite it. - *

- *

- * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are - * allowed. - *

- *

- * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. - *

- *

- * Additionally to {@link #setOriginDocumentState(COSDocumentState)}, this shall also deny changing - * {@link #updated}, should the flag {@code dereferencing} indicate, that this is caused by dereferencing a - * {@link COSObject}. - *

- * - * @param originDocumentState The {@link COSDocumentState} that shall be linked to this {@link COSUpdateState}. - * @param dereferencing {@code true}, if this update of the {@link COSDocumentState} is caused by - * dereferencing a {@link COSObject}. - * @see #originDocumentState - * @see #updated - */ - private void setOriginDocumentState(COSDocumentState originDocumentState, boolean dereferencing) - { - if(this.originDocumentState != null || originDocumentState == null) - { - return; - } - this.originDocumentState = originDocumentState; - if(!dereferencing) - { - update(); - } - - if(updateInfo instanceof COSDictionary) - { - COSDictionary dictionary = (COSDictionary) updateInfo; - for(COSBase entry : dictionary.getValues()) - { - if (entry instanceof COSUpdateInfo) - { - ((COSUpdateInfo) entry).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); - } - } - } - else if(updateInfo instanceof COSArray) - { - COSArray array = (COSArray) updateInfo; - for(COSBase entry : array) - { - if (entry instanceof COSUpdateInfo) - { - ((COSUpdateInfo) entry).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); - } - } - } - else if(updateInfo instanceof COSObject) - { - COSObject object = (COSObject) updateInfo; - COSBase reference; - if(object.isDereferenced() && (reference = object.getObject()) instanceof COSUpdateInfo) - { - ((COSUpdateInfo) reference).getUpdateState().setOriginDocumentState(originDocumentState, dereferencing); - } - } - } - - /** - *

- * Returns the {@link #originDocumentState}, that is linked to the managed {@link #updateInfo}. - *

- *

- * {@link COSDocumentState#isAcceptingUpdates()} shall determine, whether updates to {@link #updateInfo} are - * allowed. - *

- *

- * As long as no {@link COSDocumentState} is linked to this {@link COSUpdateState}, it shall not accept updates. - *

- * - * @return The {@link COSDocumentState} linked to this {@link COSUpdateState}. - * @see #setOriginDocumentState(COSDocumentState) - */ - public COSDocumentState getOriginDocumentState() - { - return originDocumentState; - } - - /** - * Returns {@code true}, if the linked {@link #originDocumentState} {@link COSDocumentState#isAcceptingUpdates()} - * and such a {@link COSDocumentState} has been linked to this {@link COSUpdateState}. - * - * @return {@code true}, if the linked {@link #originDocumentState} {@link COSDocumentState#isAcceptingUpdates()} - * and such a {@link COSDocumentState} has been linked to this {@link COSUpdateState}. - * @see #originDocumentState - * @see COSDocumentState#isAcceptingUpdates() - */ - boolean isAcceptingUpdates() - { - return originDocumentState != null && originDocumentState.isAcceptingUpdates(); - } - - /** - * Returns the actual {@link #updated} state of the managed {@link #updateInfo}. - * - * @return The actual {@link #updated} state of the managed {@link #updateInfo} - * @see #updated - */ - public boolean isUpdated() - { - return updated; - } - - /** - * Calls {@link #update(boolean)} with {@code true} as the new update state.
- * This shall only then have an effect, if {@link #isAcceptingUpdates()} returns {@code true}. - * - * @see #update(boolean) - * @see #updated - * @see #isAcceptingUpdates() - */ - void update() - { - update(true); - } - - /** - * Sets the {@link #updated} state of the managed {@link #updateInfo} to the given state.
- * This shall only then have an effect, if {@link #isAcceptingUpdates()} returns {@code true}. - * - * @param updated The state to set for {@link #updated}. - * @see #update(boolean) - * @see #updated - * @see #isAcceptingUpdates() - */ - void update(boolean updated) - { - if(isAcceptingUpdates()) - { - this.updated = updated; - } - } - - /** - *

- * Shall call {@link #update()} for this {@link COSUpdateState} and shall - * {@link #setOriginDocumentState(COSDocumentState)} for the given child, initializing it´s {@link #updated} state - * and {@link #originDocumentState}. - *

- *

- * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. - *

- * - * @param child The child that shall also be updated. - * @see #update() - * @see #setOriginDocumentState(COSDocumentState) - */ - void update(COSBase child) - { - update(); - if(child instanceof COSUpdateInfo) - { - ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState); - } - } - - /** - *

- * Shall call {@link #update()} for this {@link COSUpdateState} and shall - * {@link #setOriginDocumentState(COSDocumentState)} for the given children, initializing their {@link #updated} - * state and {@link #originDocumentState}. - *

- *

- * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. - *

- * - * @param children The children that shall also be updated. - * @see #update() - * @see #setOriginDocumentState(COSDocumentState) - */ - void update(COSArray children) - { - update((Iterable) children); - } - - /** - *

- * Shall call {@link #update()} for this {@link COSUpdateState} and shall - * {@link #setOriginDocumentState(COSDocumentState)} for the given children, initializing their {@link #updated} - * state and {@link #originDocumentState}. - *

- *

- * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo}. - *

- * - * @param children The children that shall also be updated. - * @see #update() - * @see #setOriginDocumentState(COSDocumentState) - */ - void update(Iterable children) - { - update(); - if(children == null) - { - return; - } - for(COSBase child : children) - { - if(child instanceof COSUpdateInfo) - { - ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState); - } - } - } - - /** - * This shall {@link #setOriginDocumentState(COSDocumentState, boolean)} for the dereferenced child, - * initializing its {@link #originDocumentState}. - *

- * This shall have no effect for a child, that is not an instance of {@link COSUpdateInfo} and will never change - * the child´s {@link #updated} state. - *

- * - * @param child The child, that has been dereferenced. - * @see #setOriginDocumentState(COSDocumentState, boolean) - */ - void dereferenceChild(COSBase child) - { - if(child instanceof COSUpdateInfo) - { - ((COSUpdateInfo) child).getUpdateState().setOriginDocumentState(originDocumentState, true); - } - } - - /** - * Uses the managed {@link #updateInfo} as the base object of a new {@link COSIncrement}. - * - * @return A {@link COSIncrement} based on the managed {@link #updateInfo}. - * @see COSUpdateInfo - * @see COSIncrement - */ - COSIncrement toIncrement() - { - return new COSIncrement(updateInfo); - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java deleted file mode 100644 index 34da698feb2..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSParser.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import org.apache.pdfbox.io.RandomAccessReadView; - -import java.io.IOException; - -/** - * Presumably this is a parser interface which requires that implementing classes - * be able to dereference (read/decode) indirect (Proxy) objects. Note that this - * is just an interface declaration. The actual parser implementations will be - * in the pdfparser package. - */ -public interface ICOSParser -{ - /** - * Dereference the COSBase object which is referenced by the given COSObject. - * - * @param obj the COSObject which references the COSBase object to be dereferenced. - * @return the referenced object - * @throws IOException if something went wrong when dereferencing the COSBase object - */ - COSBase dereferenceCOSObject( COSObject obj ) throws IOException; - - /** - * Creates {@link RandomAccessReadView} object for unknown reasons starting at the given position with the given length. - * - * @param startPosition start position within the underlying random access read - * @param streamLength stream length - * @return the random access read view - * @throws IOException if something went wrong when creating the view for the RandomAccessRead - */ - RandomAccessReadView createRandomAccessReadView( long startPosition, long streamLength ) - throws IOException; - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java deleted file mode 100644 index 142110322d8..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/ICOSVisitor.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.IOException; - -/** - * An interface for visiting a PDF document at the type (COS) level. - * - * @author Michael Traut - */ -public interface ICOSVisitor -{ - /** - * Notification of visit to Array object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromArray( COSArray obj ) throws IOException; - - /** - * Notification of visit to boolean object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromBoolean( COSBoolean obj ) throws IOException; - - /** - * Notification of visit to dictionary object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromDictionary( COSDictionary obj ) throws IOException; - - /** - * Notification of visit to document object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromDocument( COSDocument obj ) throws IOException; - - /** - * Notification of visit to float object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromFloat( COSFloat obj ) throws IOException; - - /** - * Notification of visit to integer object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromInt( COSInteger obj ) throws IOException; - - /** - * Notification of visit to name object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromName( COSName obj ) throws IOException; - - /** - * Notification of visit to null object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromNull( COSNull obj ) throws IOException; - - /** - * Notification of visit to stream object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromStream( COSStream obj ) throws IOException; - - /** - * Notification of visit to string object. - * - * @param obj The Object that is being visited. - * @throws IOException If there is an error while visiting this object. - */ - void visitFromString( COSString obj ) throws IOException; -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java deleted file mode 100644 index 8885550ceca..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.util.HashMap; -import java.util.Map; - -/** - * The "PDFDocEncoding" encoding. PDFDocEncoding is a predefined text encoding - * unique to PDF. It supports a superset of the ISO Latin 1 character set which - * happens, as Adobe’s PDF Reference 1.2 puts it, to be “compatible with Unicode - * in that all Unicode codes less than 256 match PDFDocEncoding.” Note that - * this is *not* a Type 1 font encoding, it is used only within PDF "text strings". - */ -final class PDFDocEncoding -{ - private static final char REPLACEMENT_CHARACTER = '\uFFFD'; - - private static final int[] CODE_TO_UNI; - private static final Map UNI_TO_CODE; - - static - { - CODE_TO_UNI = new int[256]; - UNI_TO_CODE = new HashMap<>(256); - - // initialize with basically ISO-8859-1 - for (int i = 0; i < 256; i++) - { - // skip entries not in Unicode column - if (i > 0x17 && i < 0x20) - { - continue; - } - if (i > 0x7E && i < 0xA1) - { - continue; - } - if (i == 0xAD) - { - continue; - } - - set(i, (char)i); - } - - // then do all deviations (based on the table in ISO 32000-1:2008) - // block 1 - set(0x18, '\u02D8'); // BREVE - set(0x19, '\u02C7'); // CARON - set(0x1A, '\u02C6'); // MODIFIER LETTER CIRCUMFLEX ACCENT - set(0x1B, '\u02D9'); // DOT ABOVE - set(0x1C, '\u02DD'); // DOUBLE ACUTE ACCENT - set(0x1D, '\u02DB'); // OGONEK - set(0x1E, '\u02DA'); // RING ABOVE - set(0x1F, '\u02DC'); // SMALL TILDE - // block 2 - set(0x7F, REPLACEMENT_CHARACTER); // undefined - set(0x80, '\u2022'); // BULLET - set(0x81, '\u2020'); // DAGGER - set(0x82, '\u2021'); // DOUBLE DAGGER - set(0x83, '\u2026'); // HORIZONTAL ELLIPSIS - set(0x84, '\u2014'); // EM DASH - set(0x85, '\u2013'); // EN DASH - set(0x86, '\u0192'); // LATIN SMALL LETTER SCRIPT F - set(0x87, '\u2044'); // FRACTION SLASH (solidus) - set(0x88, '\u2039'); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - set(0x89, '\u203A'); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - set(0x8A, '\u2212'); // MINUS SIGN - set(0x8B, '\u2030'); // PER MILLE SIGN - set(0x8C, '\u201E'); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase) - set(0x8D, '\u201C'); // LEFT DOUBLE QUOTATION MARK (quotedblleft) - set(0x8E, '\u201D'); // RIGHT DOUBLE QUOTATION MARK (quotedblright) - set(0x8F, '\u2018'); // LEFT SINGLE QUOTATION MARK (quoteleft) - set(0x90, '\u2019'); // RIGHT SINGLE QUOTATION MARK (quoteright) - set(0x91, '\u201A'); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase) - set(0x92, '\u2122'); // TRADE MARK SIGN - set(0x93, '\uFB01'); // LATIN SMALL LIGATURE FI - set(0x94, '\uFB02'); // LATIN SMALL LIGATURE FL - set(0x95, '\u0141'); // LATIN CAPITAL LETTER L WITH STROKE - set(0x96, '\u0152'); // LATIN CAPITAL LIGATURE OE - set(0x97, '\u0160'); // LATIN CAPITAL LETTER S WITH CARON - set(0x98, '\u0178'); // LATIN CAPITAL LETTER Y WITH DIAERESIS - set(0x99, '\u017D'); // LATIN CAPITAL LETTER Z WITH CARON - set(0x9A, '\u0131'); // LATIN SMALL LETTER DOTLESS I - set(0x9B, '\u0142'); // LATIN SMALL LETTER L WITH STROKE - set(0x9C, '\u0153'); // LATIN SMALL LIGATURE OE - set(0x9D, '\u0161'); // LATIN SMALL LETTER S WITH CARON - set(0x9E, '\u017E'); // LATIN SMALL LETTER Z WITH CARON - set(0x9F, REPLACEMENT_CHARACTER); // undefined - set(0xA0, '\u20AC'); // EURO SIGN - // end of deviations - } - - private PDFDocEncoding() - { - } - - private static void set(int code, char unicode) - { - CODE_TO_UNI[code] = unicode; - UNI_TO_CODE.put(unicode, code); - } - - /** - * Returns the string representation of the given PDFDocEncoded bytes. - */ - public static String toString(byte[] bytes) - { - StringBuilder sb = new StringBuilder(bytes.length); - for (byte b : bytes) - { - if ((b & 0xff) >= CODE_TO_UNI.length) - { - sb.append('?'); - } - else - { - sb.append((char)CODE_TO_UNI[b & 0xff]); - } - } - return sb.toString(); - } - - /** - * Returns the given string encoded with PDFDocEncoding. - */ - public static byte[] getBytes(String text) - { - ByteArrayOutputStream out = new ByteArrayOutputStream(text.length()); - for (char c : text.toCharArray()) - { - out.write(UNI_TO_CODE.getOrDefault(c, 0)); - } - return out.toByteArray(); - } - - /** - * Returns true if the given character is available in PDFDocEncoding. - * - * @param character UTF-16 character - */ - public static boolean containsChar(char character) - { - return UNI_TO_CODE.containsKey(character); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java deleted file mode 100644 index 465a07e61b0..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionary.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.util.Collections; - -/** - * An unmodifiable COSDictionary. - * - * @author John Hewson - */ -final class UnmodifiableCOSDictionary extends COSDictionary -{ - /** - * {@inheritDoc} - */ - UnmodifiableCOSDictionary(COSDictionary dict) - { - super(); - items = Collections.unmodifiableMap(dict.items); - } - - /** - * {@inheritDoc} - */ - @Override - public void setNeedToBeUpdated(boolean flag) - { - throw new UnsupportedOperationException(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java deleted file mode 100644 index b53ac45702b..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85Filter.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.pdfbox.cos.COSDictionary; - -/** - * Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data. - * @author Ben Litchfield - */ -final class ASCII85Filter extends Filter -{ - @Override - public DecodeResult decode( InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - try (ASCII85InputStream is = new ASCII85InputStream(encoded)) - { - is.transferTo(decoded); - } - decoded.flush(); - return new DecodeResult(parameters); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - try (ASCII85OutputStream os = new ASCII85OutputStream(encoded)) - { - input.transferTo(os); - } - encoded.flush(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java deleted file mode 100644 index cfd75c9ee95..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85InputStream.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; - -/** - * This class represents an ASCII85 stream. - * - * @author Ben Litchfield - * - */ -final class ASCII85InputStream extends FilterInputStream -{ - private int index; - private int n; - private boolean eof; - - private byte[] ascii; - private byte[] b; - - private static final char TERMINATOR = '~'; - private static final char OFFSET = '!'; - private static final char NEWLINE = '\n'; - private static final char RETURN = '\r'; - private static final char SPACE = ' '; - private static final char PADDING_U = 'u'; - private static final char Z = 'z'; - - /** - * Constructor. - * - * @param is The input stream to actually read from. - */ - ASCII85InputStream(InputStream is) - { - super(is); - index = 0; - n = 0; - eof = false; - ascii = new byte[5]; - b = new byte[4]; - } - - /** - * This will read the next byte from the stream. - * - * @return The next byte read from the stream. - * - * @throws IOException If there is an error reading from the wrapped stream. - */ - @Override - public int read() throws IOException - { - if (index >= n) - { - if (eof) - { - return -1; - } - index = 0; - int k; - byte z; - do - { - int zz = (byte) in.read(); - if (zz == -1) - { - eof = true; - return -1; - } - z = (byte) zz; - } while (z == NEWLINE || z == RETURN || z == SPACE); - - if (z == TERMINATOR) - { - eof = true; - ascii = b = null; - n = 0; - return -1; - } - else if (z == Z) - { - b[0] = b[1] = b[2] = b[3] = 0; - n = 4; - } - else - { - ascii[0] = z; // may be EOF here.... - for (k = 1; k < 5; ++k) - { - do - { - int zz = (byte) in.read(); - if (zz == -1) - { - eof = true; - return -1; - } - z = (byte) zz; - } while (z == NEWLINE || z == RETURN || z == SPACE); - ascii[k] = z; - if (z == TERMINATOR) - { - // don't include ~ as padding byte - ascii[k] = (byte) PADDING_U; - break; - } - } - n = k - 1; - if (n == 0) - { - eof = true; - ascii = null; - b = null; - return -1; - } - if (k < 5) - { - for (++k; k < 5; ++k) - { - // use 'u' for padding - ascii[k] = (byte) PADDING_U; - } - eof = true; - } - // decode stream - long t = 0; - for (k = 0; k < 5; ++k) - { - z = (byte) (ascii[k] - OFFSET); - if (z < 0 || z > 93) - { - n = 0; - eof = true; - ascii = null; - b = null; - throw new IOException("Invalid data in Ascii85 stream"); - } - t = (t * 85L) + z; - } - for (k = 3; k >= 0; --k) - { - b[k] = (byte) (t & 0xFFL); - t >>>= 8; - } - } - } - return b[index++] & 0xFF; - } - - /** - * This will read a chunk of data. - * - * @param data The buffer to write data to. - * @param offset The offset into the data stream. - * @param len The number of byte to attempt to read. - * - * @return The number of bytes actually read. - * - * @throws IOException If there is an error reading data from the underlying stream. - */ - @Override - public int read(byte[] data, int offset, int len) throws IOException - { - if (eof && index >= n) - { - return -1; - } - for (int i = 0; i < len; i++) - { - if (index < n) - { - data[i + offset] = b[index++]; - } - else - { - int t = read(); - if (t == -1) - { - return i; - } - data[i + offset] = (byte) t; - } - } - return len; - } - - /** - * This will close the underlying stream and release any resources. - * - * @throws IOException If there is an error closing the underlying stream. - */ - @Override - public void close() throws IOException - { - ascii = null; - eof = true; - b = null; - super.close(); - } - - /** - * non supported interface methods. - * - * @return False always. - */ - @Override - public boolean markSupported() - { - return false; - } - - /** - * Unsupported. - * - * @param nValue ignored. - * - * @return Always zero. - */ - @Override - public long skip(long nValue) - { - return 0; - } - - /** - * Unsupported. - * - * @return Always zero. - */ - @Override - public int available() - { - return 0; - } - - /** - * Unsupported. - * - * @param readlimit ignored. - */ - @Override - public synchronized void mark(int readlimit) - { - } - - /** - * Unsupported. - * - * @throws IOException telling that this is an unsupported action. - */ - @Override - public synchronized void reset() throws IOException - { - throw new IOException("Reset is not supported"); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java deleted file mode 100644 index 6b441bc73fe..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCII85OutputStream.java +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.FilterOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -/** - * This class represents an ASCII85 output stream. - * - * @author Ben Litchfield - * - */ -final class ASCII85OutputStream extends FilterOutputStream -{ - - private int lineBreak; - private int count; - - private byte[] indata; - private byte[] outdata; - - /** - * Function produces five ASCII printing characters from - * four bytes of binary data. - */ - private int maxline; - private boolean flushed; - private char terminator; - private static final char OFFSET = '!'; - private static final char NEWLINE = '\n'; - private static final char Z = 'z'; - - /** - * Constructor. - * - * @param out The output stream to write to. - */ - ASCII85OutputStream(OutputStream out) - { - super(out); - lineBreak = 36 * 2; - maxline = 36 * 2; - count = 0; - indata = new byte[4]; - outdata = new byte[5]; - flushed = true; - terminator = '~'; - } - - /** - * This will set the terminating character. - * - * @param term The terminating character. - */ - public void setTerminator(char term) - { - if (term < 118 || term > 126 || term == Z) - { - throw new IllegalArgumentException("Terminator must be 118-126 excluding z"); - } - terminator = term; - } - - /** - * This will get the terminating character. - * - * @return The terminating character. - */ - public char getTerminator() - { - return terminator; - } - - /** - * This will set the line length that will be used. - * - * @param l The length of the line to use. - */ - public void setLineLength(int l) - { - if (lineBreak > l) - { - lineBreak = l; - } - maxline = l; - } - - /** - * This will get the length of the line. - * - * @return The line length attribute. - */ - public int getLineLength() - { - return maxline; - } - - /** - * This will transform the next four ascii bytes. - */ - private void transformASCII85() - { - long word = ((((indata[0] << 8) | (indata[1] & 0xFF)) << 16) | ((indata[2] & 0xFF) << 8) | (indata[3] & 0xFF)) & 0xFFFFFFFFL; - - if (word == 0) - { - outdata[0] = (byte) Z; - outdata[1] = 0; - return; - } - long x; - x = word / (85L * 85L * 85L * 85L); - outdata[0] = (byte) (x + OFFSET); - word -= x * 85L * 85L * 85L * 85L; - - x = word / (85L * 85L * 85L); - outdata[1] = (byte) (x + OFFSET); - word -= x * 85L * 85L * 85L; - - x = word / (85L * 85L); - outdata[2] = (byte) (x + OFFSET); - word -= x * 85L * 85L; - - x = word / 85L; - outdata[3] = (byte) (x + OFFSET); - - outdata[4] = (byte) ((word % 85L) + OFFSET); - } - - /** - * This will write a single byte. - * - * @param b The byte to write. - * - * @throws IOException If there is an error writing to the stream. - */ - @Override - public void write(int b) throws IOException - { - flushed = false; - indata[count++] = (byte) b; - if (count < 4) - { - return; - } - transformASCII85(); - for (int i = 0; i < 5; i++) - { - if (outdata[i] == 0) - { - break; - } - out.write(outdata[i]); - if (--lineBreak == 0) - { - out.write(NEWLINE); - lineBreak = maxline; - } - } - count = 0; - } - - /** - * This will flush the data to the stream. - * - * @throws IOException If there is an error writing the data to the stream. - */ - @Override - public void flush() throws IOException - { - if (flushed) - { - return; - } - if (count > 0) - { - for (int i = count; i < 4; i++) - { - indata[i] = 0; - } - transformASCII85(); - if (outdata[0] == Z) - { - for (int i = 0; i < 5; i++) // expand 'z', - { - outdata[i] = (byte) OFFSET; - } - } - for (int i = 0; i < count + 1; i++) - { - out.write(outdata[i]); - if (--lineBreak == 0) - { - out.write(NEWLINE); - lineBreak = maxline; - } - } - } - if (--lineBreak == 0) - { - out.write(NEWLINE); - } - out.write(terminator); - out.write('>'); - out.write(NEWLINE); - count = 0; - lineBreak = maxline; - flushed = true; - super.flush(); - } - - /** - * This will close the stream. - * - * @throws IOException If there is an error closing the wrapped stream. - */ - @Override - public void close() throws IOException - { - try - { - flush(); - super.close(); - } - finally - { - indata = outdata = null; - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java deleted file mode 100644 index b0c5fbe8681..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/ASCIIHexFilter.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.util.Hex; - -/** - * Decodes data encoded in an ASCII hexadecimal form, reproducing the original binary data. - * - * @author Ben Litchfield - */ -final class ASCIIHexFilter extends Filter -{ - private static final Logger LOG = LogManager.getLogger(ASCIIHexFilter.class); - - private static final int[] REVERSE_HEX = { - /* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 10 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 20 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 30 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 40 */ -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, - /* 50 */ 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, - /* 60 */ -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, - /* 70 */ 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 80 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 90 */ -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, - /* 100 */ 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - /* 110 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 120 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 130 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 140 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 150 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 160 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 170 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 180 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 190 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 200 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 210 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 220 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 230 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 240 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - /* 250 */ -1, -1, -1, -1, -1, -1 - }; - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - int value, firstByte, secondByte; - while ((firstByte = encoded.read()) != -1) - { - // always after first char - while (isWhitespace(firstByte)) - { - firstByte = encoded.read(); - } - if (firstByte == -1 || isEOD(firstByte)) - { - break; - } - - if (REVERSE_HEX[firstByte] == -1) - { - LOG.error("Invalid hex, int: {} char: {}", firstByte, (char) firstByte); - } - value = REVERSE_HEX[firstByte] * 16; - secondByte = encoded.read(); - - if (secondByte == -1 || isEOD(secondByte)) - { - // second value behaves like 0 in case of EOD - decoded.write(value); - break; - } - if (REVERSE_HEX[secondByte] == -1) - { - LOG.error("Invalid hex, int: {} char: {}", secondByte, (char) secondByte); - } - value += REVERSE_HEX[secondByte]; - decoded.write(value); - } - decoded.flush(); - return new DecodeResult(parameters); - } - - // whitespace - // 0 0x00 Null (NUL) - // 9 0x09 Tab (HT) - // 10 0x0A Line feed (LF) - // 12 0x0C Form feed (FF) - // 13 0x0D Carriage return (CR) - // 32 0x20 Space (SP) - private static boolean isWhitespace(int c) - { - switch (c) - { - case 0: - case 9: - case 10: - case 12: - case 13: - case 32: - return true; - default: - return false; - } - } - - private static boolean isEOD(int c) - { - return c == '>'; - } - - @Override - public void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - int byteRead; - while ((byteRead = input.read()) != -1) - { - Hex.writeHexByte((byte)byteRead, encoded); - } - encoded.flush(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java deleted file mode 100644 index e07ab3d7f03..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxDecoderStream.java +++ /dev/null @@ -1,813 +0,0 @@ -/* - * Copyright (c) 2012, Harald Kuhr - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * * Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -package org.apache.pdfbox.cos.filter; - - -import java.io.EOFException; -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; - -/** - * CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. - * - * @author Harald Kuhr - * @author Oliver Schmidtmer - * @author last modified by $Author: haraldk$ - * @version $Id: CCITTFaxDecoderStream.java,v 1.0 23.05.12 15:55 haraldk Exp$ - * - * Taken from commit 24c6682236e5a02151359486aa4075ddc5ab1534 of 18.08.2018 from twelvemonkeys/imageio/plugins/tiff/CCITTFaxDecoderStream.java - * - * Initial changes for PDFBox, discussed in PDFBOX-3338: - * - removed Validate() usages - * - catch VALUE_EOL in decode1D() - */ -final class CCITTFaxDecoderStream extends FilterInputStream { - // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. - - private final int columns; - private final byte[] decodedRow; - - private final boolean optionG32D; - // Leading zeros for aligning EOL - private final boolean optionG3Fill; - private final boolean optionUncompressed; - private final boolean optionByteAligned; - - private final int type; - - private int decodedLength; - private int decodedPos; - - private int[] changesReferenceRow; - private int[] changesCurrentRow; - private int changesReferenceRowCount; - private int changesCurrentRowCount; - - private int lastChangingElement = 0; - - /** - * Creates a CCITTFaxDecoderStream. - * This constructor may be used for CCITT streams embedded in PDF files, - * which use EncodedByteAlign. - * - * @param stream the compressed CCITT stream. - * @param columns the number of columns in the stream. - * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE}, - * {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}. - * @param options CCITT T.4 or T.6 options. - * @param byteAligned enable byte alignment used in PDF files (EncodedByteAlign). - */ - public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, - final long options, final boolean byteAligned) { - super(stream); - - this.columns = columns; - this.type = type; - - // We know this is only used for b/w (1 bit) - decodedRow = new byte[(columns + 7) / 8]; - changesReferenceRow = new int[columns + 2]; - changesCurrentRow = new int[columns + 2]; - - switch (type) { - case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: - optionByteAligned = byteAligned; - optionG32D = false; - optionG3Fill = false; - optionUncompressed = false; - break; - case TIFFExtension.COMPRESSION_CCITT_T4: - optionByteAligned = byteAligned; - optionG32D = (options & TIFFExtension.GROUP3OPT_2DENCODING) != 0; - optionG3Fill = (options & TIFFExtension.GROUP3OPT_FILLBITS) != 0; - optionUncompressed = (options & TIFFExtension.GROUP3OPT_UNCOMPRESSED) != 0; - break; - case TIFFExtension.COMPRESSION_CCITT_T6: - optionByteAligned = byteAligned; - optionG32D = false; - optionG3Fill = false; - optionUncompressed = (options & TIFFExtension.GROUP4OPT_UNCOMPRESSED) != 0; - break; - default: - throw new IllegalArgumentException("Illegal parameter: " + type); - } - - } - - private void fetch() throws IOException { - if (decodedPos >= decodedLength) { - decodedLength = 0; - - try { - decodeRow(); - } - catch (ArrayIndexOutOfBoundsException e) { - // Mask the AIOOBE as an IOException - throw new IOException("Malformed CCITT stream", e); - } - catch (EOFException e) { - // TODO: Rewrite to avoid throw/catch for normal flow... - if (decodedLength != 0) { - throw e; - } - - // ..otherwise, just let client code try to read past the - // end of stream - decodedLength = -1; - } - - decodedPos = 0; - } - } - - private void decode1D() throws IOException { - int index = 0; - boolean white = true; - changesCurrentRowCount = 0; - - do { - int completeRun; - - if (white) { - completeRun = decodeRun(whiteRunTree); - } - else { - completeRun = decodeRun(blackRunTree); - } - - index += completeRun; - changesCurrentRow[changesCurrentRowCount++] = index; - - // Flip color for next run - white = !white; - } while (index < columns); - } - - private void decode2D() throws IOException { - changesReferenceRowCount = changesCurrentRowCount; - int[] tmp = changesCurrentRow; - changesCurrentRow = changesReferenceRow; - changesReferenceRow = tmp; - - boolean white = true; - int index = 0; - changesCurrentRowCount = 0; - - mode: while (index < columns) { - // read mode - Node n = codeTree.root; - - while (true) { - n = n.walk(readBit()); - - if (n == null) { - continue mode; - } - else if (n.isLeaf) { - switch (n.value) { - case VALUE_HMODE: - int runLength; - runLength = decodeRun(white ? whiteRunTree : blackRunTree); - index += runLength; - changesCurrentRow[changesCurrentRowCount++] = index; - - runLength = decodeRun(white ? blackRunTree : whiteRunTree); - index += runLength; - changesCurrentRow[changesCurrentRowCount++] = index; - break; - - case VALUE_PASSMODE: - int pChangingElement = getNextChangingElement(index, white) + 1; - - if (pChangingElement >= changesReferenceRowCount) { - index = columns; - } - else { - index = changesReferenceRow[pChangingElement]; - } - - break; - - default: - // Vertical mode (-3 to 3) - int vChangingElement = getNextChangingElement(index, white); - - if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) { - index = columns + n.value; - } - else { - index = changesReferenceRow[vChangingElement] + n.value; - } - - changesCurrentRow[changesCurrentRowCount] = index; - changesCurrentRowCount++; - white = !white; - - break; - } - - continue mode; - } - } - } - } - - private int getNextChangingElement(final int a0, final boolean white) { - int start = (lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1); - if (start > 2) { - start -= 2; - } - - if (a0 == 0) { - return start; - } - - for (int i = start; i < changesReferenceRowCount; i += 2) { - if (a0 < changesReferenceRow[i]) { - lastChangingElement = i; - return i; - } - } - - return -1; - } - - private void decodeRowType2() throws IOException { - if (optionByteAligned) { - resetBuffer(); - } - decode1D(); - } - - private void decodeRowType4() throws IOException { - if (optionByteAligned) { - resetBuffer(); - } - eof: while (true) { - // read till next EOL code - Node n = eolOnlyTree.root; - - while (true) { - n = n.walk(readBit()); - - if (n == null) { - continue eof; - } - - if (n.isLeaf) { - break eof; - } - } - } - - if (!optionG32D || readBit()) { - decode1D(); - } - else { - decode2D(); - } - } - - private void decodeRowType6() throws IOException { - if (optionByteAligned) { - resetBuffer(); - } - decode2D(); - } - - private void decodeRow() throws IOException { - switch (type) { - case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE: - decodeRowType2(); - break; - case TIFFExtension.COMPRESSION_CCITT_T4: - decodeRowType4(); - break; - case TIFFExtension.COMPRESSION_CCITT_T6: - decodeRowType6(); - break; - default: - throw new IllegalArgumentException("Illegal parameter: " + type); - } - - int index = 0; - boolean white = true; - - lastChangingElement = 0; - for (int i = 0; i <= changesCurrentRowCount; i++) { - int nextChange = columns; - - if (i != changesCurrentRowCount) { - nextChange = changesCurrentRow[i]; - } - - if (nextChange > columns) { - nextChange = columns; - } - - int byteIndex = index / 8; - - while (index % 8 != 0 && (nextChange - index) > 0) { - decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); - index++; - } - - if (index % 8 == 0) { - byteIndex = index / 8; - final byte value = (byte) (white ? 0x00 : 0xff); - - while ((nextChange - index) > 7) { - decodedRow[byteIndex] = value; - index += 8; - ++byteIndex; - } - } - - while ((nextChange - index) > 0) { - if (index % 8 == 0) { - decodedRow[byteIndex] = 0; - } - - decodedRow[byteIndex] |= (white ? 0 : 1 << (7 - ((index) % 8))); - index++; - } - - white = !white; - } - - if (index != columns) { - throw new IOException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns); - } - - decodedLength = (index + 7) / 8; - } - - private int decodeRun(final Tree tree) throws IOException { - int total = 0; - - Node n = tree.root; - - while (true) { - boolean bit = readBit(); - n = n.walk(bit); - - if (n == null) { - throw new IOException("Unknown code in Huffman RLE stream"); - } - - if (n.isLeaf) { - total += n.value; - if (n.value >= 64) { - n = tree.root; - } - else if (n.value >= 0) { - return total; - } - else { - return columns; - } - } - } - } - - private void resetBuffer() { - bufferPos = -1; - } - - int buffer = -1; - int bufferPos = -1; - - private boolean readBit() throws IOException { - if (bufferPos < 0 || bufferPos > 7) { - buffer = in.read(); - - if (buffer == -1) { - throw new EOFException("Unexpected end of Huffman RLE stream"); - } - - bufferPos = 0; - } - - boolean isSet = (buffer & 0x80) != 0; - buffer <<= 1; - bufferPos++; - - return isSet; - } - - @Override - public int read() throws IOException { - if (decodedLength < 0) { - return 0x0; - } - - if (decodedPos >= decodedLength) { - fetch(); - - if (decodedLength < 0) { - return 0x0; - } - } - - return decodedRow[decodedPos++] & 0xff; - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - if (decodedLength < 0) { - Arrays.fill(b, off, off + len, (byte) 0x0); - return len; - } - - if (decodedPos >= decodedLength) { - fetch(); - - if (decodedLength < 0) { - Arrays.fill(b, off, off + len, (byte) 0x0); - return len; - } - } - - int read = Math.min(decodedLength - decodedPos, len); - System.arraycopy(decodedRow, decodedPos, b, off, read); - decodedPos += read; - - return read; - } - - @Override - public long skip(long n) throws IOException { - if (decodedLength < 0) { - return -1; - } - - if (decodedPos >= decodedLength) { - fetch(); - - if (decodedLength < 0) { - return -1; - } - } - - int skipped = (int) Math.min(decodedLength - decodedPos, n); - decodedPos += skipped; - - return skipped; - } - - @Override - public boolean markSupported() { - return false; - } - - @Override - public synchronized void reset() throws IOException { - throw new IOException("mark/reset not supported"); - } - - private static final class Node { - Node left; - Node right; - - int value; // > 63 non term. - - boolean canBeFill = false; - boolean isLeaf = false; - - void set(final boolean next, final Node node) { - if (!next) { - left = node; - } - else { - right = node; - } - } - - Node walk(final boolean next) { - return next ? right : left; - } - - @Override - public String toString() { - return "[leaf=" + isLeaf + ", value=" + value + ", canBeFill=" + canBeFill + "]"; - } - } - - private static final class Tree { - final Node root = new Node(); - - void fill(final int depth, final int path, final int value) throws IOException { - Node current = root; - - for (int i = 0; i < depth; i++) { - int bitPos = depth - 1 - i; - boolean isSet = ((path >> bitPos) & 1) == 1; - Node next = current.walk(isSet); - - if (next == null) { - next = new Node(); - - if (i == depth - 1) { - next.value = value; - next.isLeaf = true; - } - - if (path == 0) { - next.canBeFill = true; - } - - current.set(isSet, next); - } - else { - if (next.isLeaf) { - throw new IOException("node is leaf, no other following"); - } - } - - current = next; - } - } - - void fill(final int depth, final int path, final Node node) throws IOException { - Node current = root; - - for (int i = 0; i < depth; i++) { - int bitPos = depth - 1 - i; - boolean isSet = ((path >> bitPos) & 1) == 1; - Node next = current.walk(isSet); - - if (next == null) { - if (i == depth - 1) { - next = node; - } - else { - next = new Node(); - } - - if (path == 0) { - next.canBeFill = true; - } - - current.set(isSet, next); - } - else { - if (next.isLeaf) { - throw new IOException("node is leaf, no other following"); - } - } - - current = next; - } - } - } - - static final short[][] BLACK_CODES = { - { // 2 bits - 0x2, 0x3, - }, - { // 3 bits - 0x2, 0x3, - }, - { // 4 bits - 0x2, 0x3, - }, - { // 5 bits - 0x3, - }, - { // 6 bits - 0x4, 0x5, - }, - { // 7 bits - 0x4, 0x5, 0x7, - }, - { // 8 bits - 0x4, 0x7, - }, - { // 9 bits - 0x18, - }, - { // 10 bits - 0x17, 0x18, 0x37, 0x8, 0xf, - }, - { // 11 bits - 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, - }, - { // 12 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, - 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, - 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, - 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, - }, - { // 13 bits - 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, - 0x74, 0x75, 0x76, 0x77, - } - }; - static final short[][] BLACK_RUN_LENGTHS = { - { // 2 bits - 3, 2, - }, - { // 3 bits - 1, 4, - }, - { // 4 bits - 6, 5, - }, - { // 5 bits - 7, - }, - { // 6 bits - 9, 8, - }, - { // 7 bits - 10, 11, 12, - }, - { // 8 bits - 13, 14, - }, - { // 9 bits - 15, - }, - { // 10 bits - 16, 17, 0, 18, 64, - }, - { // 11 bits - 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, - }, - { // 12 bits - 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, - 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, - 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, - }, - { // 13 bits - 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, - 1152, 1216, - } - }; - - public static final short[][] WHITE_CODES = { - { // 4 bits - 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, - }, - { // 5 bits - 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, - }, - { // 6 bits - 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, - }, - { // 7 bits - 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, - }, - { // 8 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, - 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, - 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, - }, - { // 9 bits - 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, - }, - { // 10 bits - }, - { // 11 bits - 0x8, 0xc, 0xd, - }, - { // 12 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, - } - }; - - public static final short[][] WHITE_RUN_LENGTHS = { - { // 4 bits - 2, 3, 4, 5, 6, 7, - }, - { // 5 bits - 128, 8, 9, 64, 10, 11, - }, - { // 6 bits - 192, 1664, 16, 17, 13, 14, 15, 1, 12, - }, - { // 7 bits - 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, - }, - { // 8 bits - 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, - 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, - }, - { // 9 bits - 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, - }, - { // 10 bits - }, - { // 11 bits - 1792, 1856, 1920, - }, - { // 12 bits - 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, - } - }; - - static final Node EOL; - static final Node FILL; - static final Tree blackRunTree; - static final Tree whiteRunTree; - static final Tree eolOnlyTree; - static final Tree codeTree; - - static final int VALUE_EOL = -2000; - static final int VALUE_FILL = -1000; - static final int VALUE_PASSMODE = -3000; - static final int VALUE_HMODE = -4000; - - static { - EOL = new Node(); - EOL.isLeaf = true; - EOL.value = VALUE_EOL; - FILL = new Node(); - FILL.value = VALUE_FILL; - FILL.left = FILL; - FILL.right = EOL; - - eolOnlyTree = new Tree(); - try { - eolOnlyTree.fill(12, 0, FILL); - eolOnlyTree.fill(12, 1, EOL); - } - catch (IOException e) { - throw new AssertionError(e); - } - - blackRunTree = new Tree(); - try { - for (int i = 0; i < BLACK_CODES.length; i++) { - for (int j = 0; j < BLACK_CODES[i].length; j++) { - blackRunTree.fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); - } - } - blackRunTree.fill(12, 0, FILL); - blackRunTree.fill(12, 1, EOL); - } - catch (IOException e) { - throw new AssertionError(e); - } - - whiteRunTree = new Tree(); - try { - for (int i = 0; i < WHITE_CODES.length; i++) { - for (int j = 0; j < WHITE_CODES[i].length; j++) { - whiteRunTree.fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); - } - } - - whiteRunTree.fill(12, 0, FILL); - whiteRunTree.fill(12, 1, EOL); - } - catch (IOException e) { - throw new AssertionError(e); - } - - codeTree = new Tree(); - try { - codeTree.fill(4, 1, VALUE_PASSMODE); // pass mode - codeTree.fill(3, 1, VALUE_HMODE); // H mode - codeTree.fill(1, 1, 0); // V(0) - codeTree.fill(3, 3, 1); // V_R(1) - codeTree.fill(6, 3, 2); // V_R(2) - codeTree.fill(7, 3, 3); // V_R(3) - codeTree.fill(3, 2, -1); // V_L(1) - codeTree.fill(6, 2, -2); // V_L(2) - codeTree.fill(7, 2, -3); // V_L(3) - } - catch (IOException e) { - throw new AssertionError(e); - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java deleted file mode 100644 index c0c3c75759f..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxEncoderStream.java +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2013, Harald Kuhr - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name "TwelveMonkeys" nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.OutputStream; - -/** - * CCITT Modified Group 4 (T6) fax compression. - * - * @author Oliver Schmidtmer - * - * Taken from commit 047884e3d9e1b30516c79b147ead763303dc9bcb of 21.4.2016 from - * twelvemonkeys/imageio/plugins/tiff/CCITTFaxEncoderStream.java - * - * Initial changes for PDFBox: - * - removed Validate - * - G4 compression only - * - removed options - */ -final class CCITTFaxEncoderStream extends OutputStream { - - private int currentBufferLength = 0; - private final byte[] inputBuffer; - private final int inputBufferLength; - private final int columns; - private final int rows; - - private int[] changesCurrentRow; - private int[] changesReferenceRow; - private int currentRow = 0; - private int changesCurrentRowLength = 0; - private int changesReferenceRowLength = 0; - private byte outputBuffer = 0; - private byte outputBufferBitLength = 0; - private final int fillOrder; - private final OutputStream stream; - - CCITTFaxEncoderStream(final OutputStream stream, final int columns, final int rows, final int fillOrder) { - - this.stream = stream; - this.columns = columns; - this.rows = rows; - this.fillOrder = fillOrder; - - this.changesReferenceRow = new int[columns]; - this.changesCurrentRow = new int[columns]; - - inputBufferLength = (columns + 7) / 8; - inputBuffer = new byte[inputBufferLength]; - } - - @Override - public void write(int b) throws IOException { - inputBuffer[currentBufferLength] = (byte) b; - currentBufferLength++; - - if (currentBufferLength == inputBufferLength) { - encodeRow(); - currentBufferLength = 0; - } - } - - @Override - public void flush() throws IOException { - stream.flush(); - } - - @Override - public void close() throws IOException { - stream.close(); - } - - private void encodeRow() throws IOException { - currentRow++; - int[] tmp = changesReferenceRow; - changesReferenceRow = changesCurrentRow; - changesCurrentRow = tmp; - changesReferenceRowLength = changesCurrentRowLength; - changesCurrentRowLength = 0; - - int index = 0; - boolean white = true; - while (index < columns) { - int byteIndex = index / 8; - int bit = index % 8; - if ((((inputBuffer[byteIndex] >> (7 - bit)) & 1) == 1) == (white)) { - changesCurrentRow[changesCurrentRowLength] = index; - changesCurrentRowLength++; - white = !white; - } - index++; - } - - encodeRowType6(); - - if (currentRow == rows) { - writeEOL(); - writeEOL(); - fill(); - } - } - - - private void encodeRowType6() throws IOException { - encode2D(); - } - - private int[] getNextChanges(int pos, boolean white) { - int[] result = {columns, columns}; - for (int i = 0; i < changesCurrentRowLength; i++) { - if (pos < changesCurrentRow[i] || (pos == 0 && white)) { - result[0] = changesCurrentRow[i]; - if ((i + 1) < changesCurrentRowLength) { - result[1] = changesCurrentRow[i + 1]; - } - break; - } - } - - return result; - } - - private void writeRun(int runLength, boolean white) throws IOException { - int nonterm = runLength / 64; - Code[] codes = white ? WHITE_NONTERMINATING_CODES : BLACK_NONTERMINATING_CODES; - while (nonterm > 0) { - if (nonterm >= codes.length) { - write(codes[codes.length - 1].code, codes[codes.length - 1].length); - nonterm -= codes.length; - } - else { - write(codes[nonterm - 1].code, codes[nonterm - 1].length); - nonterm = 0; - } - } - - Code c = white ? WHITE_TERMINATING_CODES[runLength % 64] : BLACK_TERMINATING_CODES[runLength % 64]; - write(c.code, c.length); - } - - private void encode2D() throws IOException { - boolean white = true; - int index = 0; // a0 - while (index < columns) { - int[] nextChanges = getNextChanges(index, white); // a1, a2 - - int[] nextRefs = getNextRefChanges(index, white); // b1, b2 - - int difference = nextChanges[0] - nextRefs[0]; - if (nextChanges[0] > nextRefs[1]) { - // PMODE - write(1, 4); - index = nextRefs[1]; - } - else if (difference > 3 || difference < -3) { - // HMODE - write(1, 3); - writeRun(nextChanges[0] - index, white); - writeRun(nextChanges[1] - nextChanges[0], !white); - index = nextChanges[1]; - - } - else { - // VMODE - switch (difference) { - case 0: - write(1, 1); - break; - case 1: - write(3, 3); - break; - case 2: - write(3, 6); - break; - case 3: - write(3, 7); - break; - case -1: - write(2, 3); - break; - case -2: - write(2, 6); - break; - case -3: - write(2, 7); - break; - default: - break; - } - white = !white; - index = nextRefs[0] + difference; - } - } - } - - private int[] getNextRefChanges(int a0, boolean white) { - int[] result = {columns, columns}; - for (int i = (white ? 0 : 1); i < changesReferenceRowLength; i += 2) { - if (changesReferenceRow[i] > a0 || (a0 == 0 && i == 0)) { - result[0] = changesReferenceRow[i]; - if ((i + 1) < changesReferenceRowLength) { - result[1] = changesReferenceRow[i + 1]; - } - break; - } - } - return result; - } - - private void write(int code, int codeLength) throws IOException { - - for (int i = 0; i < codeLength; i++) { - boolean codeBit = ((code >> (codeLength - i - 1)) & 1) == 1; - if (fillOrder == TIFFExtension.FILL_LEFT_TO_RIGHT) { - outputBuffer |= (codeBit ? 1 << (7 - ((outputBufferBitLength) % 8)) : 0); - } - else { - outputBuffer |= (codeBit ? 1 << (((outputBufferBitLength) % 8)) : 0); - } - outputBufferBitLength++; - - if (outputBufferBitLength == 8) { - stream.write(outputBuffer); - clearOutputBuffer(); - } - } - } - - private void writeEOL() throws IOException { - write(1, 12); - } - - private void fill() throws IOException { - if (outputBufferBitLength != 0) { - stream.write(outputBuffer); - } - clearOutputBuffer(); - } - - private void clearOutputBuffer() { - outputBuffer = 0; - outputBufferBitLength = 0; - } - - private static class Code { - private Code(int code, int length) { - this.code = code; - this.length = length; - } - - final int code; - final int length; - } - - private static final Code[] WHITE_TERMINATING_CODES; - - private static final Code[] WHITE_NONTERMINATING_CODES; - - private static final Code[] BLACK_TERMINATING_CODES; - - private static final Code[] BLACK_NONTERMINATING_CODES; - - static { - // Setup HUFFMAN Codes - WHITE_TERMINATING_CODES = new Code[64]; - WHITE_NONTERMINATING_CODES = new Code[40]; - for (int i = 0; i < CCITTFaxDecoderStream.WHITE_CODES.length; i++) { - int bitLength = i + 4; - for (int j = 0; j < CCITTFaxDecoderStream.WHITE_CODES[i].length; j++) { - int value = CCITTFaxDecoderStream.WHITE_RUN_LENGTHS[i][j]; - int code = CCITTFaxDecoderStream.WHITE_CODES[i][j]; - - if (value < 64) { - WHITE_TERMINATING_CODES[value] = new Code(code, bitLength); - } - else { - WHITE_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); - } - } - } - - BLACK_TERMINATING_CODES = new Code[64]; - BLACK_NONTERMINATING_CODES = new Code[40]; - for (int i = 0; i < CCITTFaxDecoderStream.BLACK_CODES.length; i++) { - int bitLength = i + 2; - for (int j = 0; j < CCITTFaxDecoderStream.BLACK_CODES[i].length; j++) { - int value = CCITTFaxDecoderStream.BLACK_RUN_LENGTHS[i][j]; - int code = CCITTFaxDecoderStream.BLACK_CODES[i][j]; - - if (value < 64) { - BLACK_TERMINATING_CODES[value] = new Code(code, bitLength); - } - else { - BLACK_NONTERMINATING_CODES[(value / 64) - 1] = new Code(code, bitLength); - } - } - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java deleted file mode 100644 index bd5341e58bd..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CCITTFaxFilter.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.PushbackInputStream; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; - -/** - * Decodes image data that has been encoded using either Group 3 or Group 4 - * CCITT facsimile (fax) encoding, and encodes image data to Group 4. - * - * @author Ben Litchfield - * @author Marcel Kammer - * @author Paul King - */ -final class CCITTFaxFilter extends Filter -{ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - // get decode parameters - COSDictionary decodeParms = getDecodeParams(parameters, index); - - // parse dimensions - int cols = decodeParms.getInt(COSName.COLUMNS, 1728); - int rows = decodeParms.getInt(COSName.ROWS, 0); - int height = parameters.getInt(COSName.HEIGHT, COSName.H, 0); - if (rows > 0 && height > 0) - { - // PDFBOX-771, PDFBOX-3727: rows in DecodeParms sometimes contains an incorrect value - rows = height; - } - else - { - // at least one of the values has to have a valid value - rows = Math.max(rows, height); - } - - // decompress data - int k = decodeParms.getInt(COSName.K, 0); - boolean encodedByteAlign = decodeParms.getBoolean(COSName.ENCODED_BYTE_ALIGN, false); - int arraySize = (cols + 7) / 8 * rows; - // TODO possible options?? - byte[] decompressed = new byte[arraySize]; - CCITTFaxDecoderStream s; - int type; - long tiffOptions = 0; - if (k == 0) - { - type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D - byte[] streamData = new byte[20]; - int bytesRead = encoded.read(streamData); - if (bytesRead == -1) - { - throw new IOException("EOF while reading CCITT header"); - } - PushbackInputStream pushbackInputStream = new PushbackInputStream(encoded, streamData.length); - pushbackInputStream.unread(streamData, 0, bytesRead); - encoded = pushbackInputStream; - if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1)) - { - // leading EOL (0b000000000001) not found, search further and try RLE if not - // found - type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE; - short b = (short) (((streamData[0] << 8) + (streamData[1] & 0xff)) >> 4); - for (int i = 12; i < bytesRead * 8; i++) - { - b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01)); - if ((b & 0xFFF) == 1) - { - type = TIFFExtension.COMPRESSION_CCITT_T4; - break; - } - } - } - } - else if (k > 0) - { - // Group 3 2D - type = TIFFExtension.COMPRESSION_CCITT_T4; - tiffOptions = TIFFExtension.GROUP3OPT_2DENCODING; - } - else - { - // Group 4 - type = TIFFExtension.COMPRESSION_CCITT_T6; - } - s = new CCITTFaxDecoderStream(encoded, cols, type, tiffOptions, encodedByteAlign); - readFromDecoderStream(s, decompressed); - - // invert bitmap - boolean blackIsOne = decodeParms.getBoolean(COSName.BLACK_IS_1, false); - if (!blackIsOne) - { - // Inverting the bitmap - // Note the previous approach with starting from an IndexColorModel didn't work - // reliably. In some cases the image wouldn't be painted for some reason. - // So a safe but slower approach was taken. - invertBitmap(decompressed); - } - - decoded.write(decompressed); - return new DecodeResult(parameters); - } - - void readFromDecoderStream(CCITTFaxDecoderStream decoderStream, byte[] result) - throws IOException - { - int pos = 0; - int read; - while ((read = decoderStream.read(result, pos, result.length - pos)) > -1) - { - pos += read; - if (pos >= result.length) - { - break; - } - } - } - - private void invertBitmap(byte[] bufferData) - { - for (int i = 0, c = bufferData.length; i < c; i++) - { - bufferData[i] = (byte) (~bufferData[i] & 0xFF); - } - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - int cols = parameters.getInt(COSName.COLUMNS); - int rows = parameters.getInt(COSName.ROWS); - CCITTFaxEncoderStream ccittFaxEncoderStream = - new CCITTFaxEncoderStream(encoded, cols, rows, TIFFExtension.FILL_LEFT_TO_RIGHT); - input.transferTo(ccittFaxEncoderStream); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java deleted file mode 100644 index cb359bc84ba..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSCryptFilterDictionary.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos.filter; - -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSBoolean; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObjectGetter; - -/** - * This class is a specialized view of the crypt filter dictionary of a PDF - * document It contains a low level dictionary (COSDictionary) and provides - * methods to manage its fields. - */ -public class COSCryptFilterDictionary implements COSObjectGetter -{ - - /** - * COS crypt filter dictionary. - */ - protected COSDictionary cryptFilterDictionary = null; - - /** - * creates a new empty crypt filter dictionary. - */ - public COSCryptFilterDictionary() - { - cryptFilterDictionary = new COSDictionary(); - } - - /** - * creates a new crypt filter dictionary from the low level dictionary provided. - * @param d the low level dictionary that will be managed by the newly created object - */ - public COSCryptFilterDictionary( COSDictionary d) - { - cryptFilterDictionary = d; - } - - /** - * This will get the dictionary associated with this crypt filter dictionary. - * - * @return The COS dictionary that this object wraps. - */ - @Override - public COSDictionary getCOSObject() - { - return cryptFilterDictionary; - } - - /** - * This will set the number of bits to use for the crypt filter algorithm. - * - * @param length The new key length. - */ - public void setLength(int length) - { - cryptFilterDictionary.setInt(COSName.LENGTH, length); - } - - /** - * This will return the Length entry of the crypt filter dictionary.

- * The length in bits for the crypt filter algorithm. This will return a multiple of 8. - * - * @return The length in bits for the encryption algorithm - */ - public int getLength() - { - return cryptFilterDictionary.getInt( COSName.LENGTH, 40 ); - } - - /** - * This will set the crypt filter method. - * Allowed values are: NONE, V2, AESV2, AESV3 - * - * @param cfm name of the crypt filter method. - * - */ - public void setCryptFilterMethod(COSName cfm) - { - cryptFilterDictionary.setItem( COSName.CFM, cfm ); - } - - /** - * This will return the crypt filter method. - * Allowed values are: NONE, V2, AESV2, AESV3 - * - * @return the name of the crypt filter method. - */ - public COSName getCryptFilterMethod() - { - return cryptFilterDictionary.getCOSName(COSName.CFM); - } - - /** - * Will get the EncryptMetaData dictionary info. - * - * @return true if EncryptMetaData is explicitly set (the default is true) - */ - public boolean isEncryptMetaData() - { - COSBase value = cryptFilterDictionary.getObjectFromDictionary( COSName.ENCRYPT_META_DATA ); - if (value instanceof COSBoolean) - { - return ((COSBoolean) value).getValue(); - } - - // default is true (see 7.6.3.2 Standard Encryption Dictionary PDF 32000-1:2008) - return true; - } - - /** - * Set the EncryptMetaData dictionary info. - * - * @param encryptMetaData true if EncryptMetaData shall be set. - */ - public void setEncryptMetaData(boolean encryptMetaData) - { - getCOSObject().setBoolean(COSName.ENCRYPT_META_DATA, encryptMetaData); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java deleted file mode 100644 index 2434863d609..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSEncryptFilterDictionary.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos.filter; - -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSBoolean; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSObjectGetter; - -/** - * This class is a specialized view of the encryption filter dictionary of a - * PDF document. It contains a low level dictionary (COSDictionary) and provides - * the methods to manage its fields. - */ -public class COSEncryptFilterDictionary implements COSObjectGetter -{ - /** - * COS crypt filter dictionary. - */ - protected COSDictionary cryptFilterDictionary = null; - - /** - * creates a new empty crypt filter dictionary. - */ - public COSEncryptFilterDictionary() - { - cryptFilterDictionary = new COSDictionary(); - } - - /** - * creates a new crypt filter dictionary from the low level dictionary provided. - * @param d the low level dictionary that will be managed by the newly created object - */ - public COSEncryptFilterDictionary( COSDictionary d) - { - cryptFilterDictionary = d; - } - - /** - * This will get the dictionary associated with this crypt filter dictionary. - * - * @return The COS dictionary that this object wraps. - */ - @Override - public COSDictionary getCOSObject() - { - return cryptFilterDictionary; - } - - /** - * This will set the number of bits to use for the crypt filter algorithm. - * - * @param length The new key length. - */ - public void setLength(int length) - { - cryptFilterDictionary.setInt( COSName.LENGTH, length); - } - - /** - * This will return the Length entry of the crypt filter dictionary.

- * The length in bits for the crypt filter algorithm. This will return a multiple of 8. - * - * @return The length in bits for the encryption algorithm - */ - public int getLength() - { - return cryptFilterDictionary.getInt( COSName.LENGTH, 40 ); - } - - /** - * This will set the crypt filter method. - * Allowed values are: NONE, V2, AESV2, AESV3 - * - * @param cfm name of the crypt filter method. - * - */ - public void setCryptFilterMethod(COSName cfm) - { - cryptFilterDictionary.setItem( COSName.CFM, cfm ); - } - - /** - * This will return the crypt filter method. - * Allowed values are: NONE, V2, AESV2, AESV3 - * - * @return the name of the crypt filter method. - */ - public COSName getCryptFilterMethod() - { - return cryptFilterDictionary.getCOSName(COSName.CFM); - } - - /** - * Will get the EncryptMetaData dictionary info. - * - * @return true if EncryptMetaData is explicitly set (the default is true) - */ - public boolean isEncryptMetaData() - { - COSBase value = getCOSObject().getObjectFromDictionary( COSName.ENCRYPT_META_DATA); - if (value instanceof COSBoolean) - { - return ((COSBoolean) value).getValue(); - } - - // default is true (see 7.6.3.2 Standard Encryption Dictionary PDF 32000-1:2008) - return true; - } - - /** - * Set the EncryptMetaData dictionary info. - * - * @param encryptMetaData true if EncryptMetaData shall be set. - */ - public void setEncryptMetaData(boolean encryptMetaData) - { - getCOSObject().setBoolean(COSName.ENCRYPT_META_DATA, encryptMetaData); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java deleted file mode 100644 index b86ca6847d1..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSInputStream.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos.filter; - -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.io.RandomAccessInputStream; -import org.apache.pdfbox.io.RandomAccessRead; - -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * An InputStream which reads from an encoded COS stream. - * - * @author John Hewson - */ -public final class COSInputStream extends FilterInputStream -{ - public static COSInputStream create( COSStream cosStreamObject ) throws IOException - { - return create( cosStreamObject.getFilterList(), cosStreamObject, cosStreamObject.createRawInputStream()); - } - - /** - * Creates a new COSInputStream from an encoded input stream. - * - * @param filters Filters to be applied. - * @param parameters Filter parameters. - * @param in Encoded input stream. - * @return Decoded stream. - * @throws IOException If the stream could not be read. - */ - public static COSInputStream create(List filters, COSDictionary parameters, InputStream in) - throws IOException - { - return create(filters, parameters, in, DecodeOptions.DEFAULT); - } - - /** - * Creates a new COSInputStream from an encoded input stream. - * - * @param filters Filters to be applied. - * @param parameters Filter parameters. - * @param in Encoded input stream. - * @param options decode options for the encoded stream - * @return Decoded stream. - * @throws IOException If the stream could not be read. - */ - public static COSInputStream create( List filters, COSDictionary parameters, InputStream in, - DecodeOptions options) throws IOException - { - if (filters.isEmpty()) - { - return new COSInputStream(in, Collections.emptyList()); - } - List results = new ArrayList<>(filters.size()); - RandomAccessRead decoded = Filter.decode(in, filters, parameters, options, results); - return new COSInputStream(new RandomAccessInputStream(decoded), results); - } - - private final List decodeResults; - - /** - * Constructor. - * - * @param input decoded stream - * @param decodeResults results of decoding - */ - private COSInputStream(InputStream input, List decodeResults) - { - super(input); - this.decodeResults = decodeResults; - } - - /** - * Returns the result of the last filter, for use by repair mechanisms. - * - * @return the result of the last filter - */ - public DecodeResult getDecodeResult() - { - if (decodeResults.isEmpty()) - { - return DecodeResult.createDefault(); - } - else - { - return decodeResults.get(decodeResults.size() - 1); - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java deleted file mode 100644 index 08a7ffe6728..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/COSOutputStream.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos.filter; - -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.io.RandomAccess; -import org.apache.pdfbox.io.RandomAccessInputStream; -import org.apache.pdfbox.io.RandomAccessOutputStream; -import org.apache.pdfbox.io.RandomAccessStreamCache; - -import java.io.FilterOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.List; - -/** - * An OutputStream which writes to an encoded COS stream. - * - * @author John Hewson - */ -public final class COSOutputStream extends FilterOutputStream -{ - private final List filters; - private final COSDictionary parameters; - private final RandomAccessStreamCache streamCache; - private RandomAccess buffer; - - /** - * * TODO: fix so that this is in a filter class, using this object as input. - * Returns a new OutputStream for writing stream data, using the current filters. - * - * @return OutputStream for un-encoded stream data. - * @throws IOException If the output stream could not be created. - */ -// public OutputStream createOutputStream() throws IOException -// { -// return createOutputStream(null); -// } -// -// /** -// * Returns a new OutputStream for writing stream data, using and the given filters. -// * -// * @param filters COSArray or COSName of filters to be used. -// * @return OutputStream for un-encoded stream data. -// * @throws IOException If the output stream could not be created. -// */ -// public OutputStream createOutputStream( COSBase filters) throws IOException -// { -// checkClosed(); -// if (isWriting) -// { -// throw new IllegalStateException("Cannot have more than one open stream writer."); -// } -// // apply filters, if any -// if (filters != null) -// { -// setItem(COSName.FILTER, filters); -// } -// if (randomAccess != null) -// randomAccess.clear(); -// else -// randomAccess = getStreamCache().createBuffer(); -// OutputStream randomOut = new RandomAccessOutputStream( randomAccess); -// OutputStream cosOut = new COSOutputStream( getFilterList(), this, randomOut, -// getStreamCache()); -// isWriting = true; -// return new FilterOutputStream( cosOut) -// { -// @Override -// public void write(byte[] b, int off, int len) throws IOException -// { -// this.out.write(b, off, len); -// } -// -// @Override -// public void close() throws IOException -// { -// super.close(); -// setInt(COSName.LENGTH, (int)randomAccess.length()); -// isWriting = false; -// } -// }; -// } - - /** - * Creates a new COSOutputStream writes to an encoded COS stream. - * - * @param filters Filters to apply. - * @param parameters Filter parameters. - * @param output Encoded stream. - * @param streamCache Stream cache to use. - * - * @throws IOException If there was an error creating a temporary buffer - */ - public COSOutputStream( List filters, COSDictionary parameters, OutputStream output, - RandomAccessStreamCache streamCache ) throws IOException - { - super(output); - this.filters = filters; - this.parameters = parameters; - this.streamCache = streamCache; - buffer = filters.isEmpty() ? null : streamCache.createBuffer(); - } - - @Override - public void write(byte[] b) throws IOException - { - write(b, 0, b.length); - } - - @Override - public void write(byte[] b, int off, int len) throws IOException - { - if (buffer != null) - { - buffer.write(b, off, len); - } - else - { - super.write(b, off, len); - } - } - - @Override - public void write(int b) throws IOException - { - if (buffer != null) - { - buffer.write(b); - } - else - { - super.write(b); - } - } - - @Override - public void flush() throws IOException - { - if (buffer == null) - { - super.flush(); - } - } - - @Override - public void close() throws IOException - { - try - { - if (buffer != null) - { - try - { - // apply filters in reverse order - for (int i = filters.size() - 1; i >= 0; i--) - { - try (InputStream unfilteredIn = new RandomAccessInputStream(buffer)) - { - if (i == 0) - { - /* - * The last filter to run can encode directly to the enclosed output - * stream. - */ - filters.get(i).encode(unfilteredIn, out, parameters, i); - } - else - { - RandomAccess filteredBuffer = streamCache.createBuffer(); - try (OutputStream filteredOut = new RandomAccessOutputStream(filteredBuffer)) - { - filters.get(i).encode(unfilteredIn, filteredOut, parameters, i); - } - finally - { - buffer.close(); - buffer = filteredBuffer; - } - } - } - } - } - finally - { - buffer.close(); - buffer = null; - } - } - } - finally - { - super.close(); - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java deleted file mode 100644 index 5c1e2e2a11f..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/CryptFilter.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; - -/** - * Decrypts data encrypted by a security handler, reproducing the data as it was before encryption. - * @author Adam Nichols - */ -final class CryptFilter extends Filter -{ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - COSName encryptionName = parameters.getCOSName(COSName.NAME); - if(encryptionName == null || encryptionName.equals(COSName.IDENTITY)) - { - // currently the only supported implementation is the Identity crypt filter - Filter identityFilter = new IdentityFilter(); - identityFilter.decode(encoded, decoded, parameters, index); - return new DecodeResult(parameters); - } - throw new IOException("Unsupported crypt filter " + encryptionName.getName()); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - COSName encryptionName = parameters.getCOSName(COSName.NAME); - if(encryptionName == null || encryptionName.equals(COSName.IDENTITY)) - { - // currently the only supported implementation is the Identity crypt filter - Filter identityFilter = new IdentityFilter(); - identityFilter.encode(input, encoded, parameters); - } - else - { - throw new IOException("Unsupported crypt filter " + encryptionName.getName()); - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java deleted file mode 100644 index c5f3ac54ac1..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DCTFilter.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.awt.color.CMMException; -import java.awt.image.BufferedImage; -import java.awt.image.DataBufferByte; -import java.awt.image.Raster; -import java.awt.image.WritableRaster; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import javax.imageio.IIOException; -import javax.imageio.ImageIO; -import javax.imageio.ImageReadParam; -import javax.imageio.ImageReader; -import javax.imageio.metadata.IIOMetadata; -import javax.imageio.metadata.IIOMetadataNode; -import javax.imageio.stream.ImageInputStream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSDictionary; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; - -/** - * Decompresses data encoded using a DCT (discrete cosine transform) - * technique based on the JPEG standard. - * - * @author John Hewson - */ -final class DCTFilter extends Filter -{ - private static final Logger LOG = LogManager.getLogger(DCTFilter.class); - - private static final int POS_TRANSFORM = 11; - private static final String ADOBE = "Adobe"; - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary - parameters, int index, DecodeOptions options) throws IOException - { - ImageReader reader = findRasterReader("JPEG", "a suitable JAI I/O image filter is not installed"); - try (ImageInputStream iis = ImageIO.createImageInputStream(encoded)) - { - - // skip one LF if there - if (iis.read() != 0x0A) - { - iis.seek(0); - } - - reader.setInput(iis); - ImageReadParam irp = reader.getDefaultReadParam(); - irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), - options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); - irp.setSourceRegion(options.getSourceRegion()); - options.setFilterSubsampled(true); - - Raster raster = readImageRaster(reader, irp); - - // special handling for 4-component images - if (raster.getNumBands() == 4) - { - // get APP14 marker - Integer transform; - try - { - transform = getAdobeTransform(reader.getImageMetadata(0)); - } - catch (IIOException | NegativeArraySizeException e) - { - // we really tried asking nicely, now we're using brute force. - LOG.debug("Couldn't read usíng getAdobeTransform() - using getAdobeTransformByBruteForce() as fallback", e); - transform = getAdobeTransformByBruteForce(iis); - } - int colorTransform = transform != null ? transform : 0; - - // 0 = Unknown (RGB or CMYK), 1 = YCbCr, 2 = YCCK - // https://exiftool.org/TagNames/JPEG.html#Adobe - switch (colorTransform) - { - case 0: - // already CMYK - break; - case 1: - LOG.warn("There is no 4 channel YCbCr, using YCCK"); - // fallthrough - case 2: - raster = fromYCCKtoCMYK(raster); - break; - default: - throw new IllegalArgumentException("Unknown colorTransform"); - } - } - else if (raster.getNumBands() == 3) - { - // BGR to RGB - raster = fromBGRtoRGB(raster); - } - - DataBufferByte dataBuffer = (DataBufferByte)raster.getDataBuffer(); - decoded.write(dataBuffer.getData()); - } - catch (CMMException ex) - { - // PDFBOX-5732 - throw new IOException(ex); - } - finally - { - reader.dispose(); - } - return new DecodeResult(parameters); - } - - private Raster readImageRaster(ImageReader reader, ImageReadParam irp) throws IOException - { - String numChannels = getNumChannels(reader); - // get the raster using horrible JAI workarounds - ImageIO.setUseCache(false); - Raster raster; - // Strategy: use read() for RGB or "can't get metadata" - // use readRaster() for CMYK and gray and as fallback if read() fails - // after "can't get metadata" because "no meta" file was CMYK - if ("3".equals(numChannels) || numChannels.isEmpty()) - { - try - { - // I'd like to use ImageReader#readRaster but it is buggy and can't read RGB correctly - BufferedImage image = reader.read(0, irp); - if (image.getColorModel().getNumColorComponents() == 4) - { - throw new IIOException("CMYK image"); - } - raster = image.getRaster(); - } - catch (IIOException e) - { - // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but - // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files - LOG.debug("Couldn't read use read() for RGB image - using readRaster() as fallback", e); - raster = reader.readRaster(0, irp); - } - } - else - { - // JAI can't read CMYK JPEGs using ImageReader#read or ImageIO.read but - // fortunately ImageReader#readRaster isn't buggy when reading 4-channel files - raster = reader.readRaster(0, irp); - } - return raster; - } - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); - } - - // reads the APP14 Adobe transform tag and returns its value, or 0 if unknown - private Integer getAdobeTransform(IIOMetadata metadata) - { - Element tree = (Element)metadata.getAsTree("javax_imageio_jpeg_image_1.0"); - Element markerSequence = (Element)tree.getElementsByTagName("markerSequence").item(0); - NodeList app14AdobeNodeList = markerSequence.getElementsByTagName("app14Adobe"); - if (app14AdobeNodeList != null) - { - int app14AdobeNodeListLength = app14AdobeNodeList.getLength(); - if (app14AdobeNodeListLength > 0) - { - if (app14AdobeNodeListLength > 1) - { - LOG.warn("app14Adobe entry appears several times, using the last one"); - } - Element adobe = (Element) app14AdobeNodeList.item(app14AdobeNodeListLength - 1); - return Integer.valueOf(adobe.getAttribute("transform")); - } - } - return 0; - } - - // See in https://github.com/haraldk/TwelveMonkeys - // com.twelvemonkeys.imageio.plugins.jpeg.AdobeDCT class for structure of APP14 segment - private int getAdobeTransformByBruteForce(ImageInputStream iis) throws IOException - { - int a = 0; - iis.seek(0); - int by; - while ((by = iis.read()) != -1) - { - if (ADOBE.charAt(a) == by) - { - ++a; - if (a != ADOBE.length()) - { - continue; - } - // match - a = 0; - long afterAdobePos = iis.getStreamPosition(); - iis.seek(afterAdobePos - 9); - int tag = iis.readUnsignedShort(); - if (tag != 0xFFEE) - { - iis.seek(afterAdobePos); - continue; - } - int len = iis.readUnsignedShort(); - if (len >= POS_TRANSFORM + 1) - { - byte[] app14 = new byte[Math.max(len, POS_TRANSFORM + 1)]; - if (iis.read(app14) >= POS_TRANSFORM + 1) - { - return app14[POS_TRANSFORM]; - } - } - } - else - { - a = 0; - } - } - return 0; - } - - // converts YCCK image to CMYK. YCCK is an equivalent encoding for - // CMYK data, so no color management code is needed here, nor does the - // PDF color space have to be consulted - private WritableRaster fromYCCKtoCMYK(Raster raster) - { - WritableRaster writableRaster = raster.createCompatibleWritableRaster(); - - int[] value = new int[4]; - for (int y = 0, height = raster.getHeight(); y < height; y++) - { - for (int x = 0, width = raster.getWidth(); x < width; x++) - { - raster.getPixel(x, y, value); - - // 4-channels 0..255 - float Y = value[0]; - float Cb = value[1]; - float Cr = value[2]; - float K = value[3]; - - // YCCK to RGB, see http://software.intel.com/en-us/node/442744 - int r = clamp(Y + 1.402f * Cr - 179.456f); - int g = clamp(Y - 0.34414f * Cb - 0.71414f * Cr + 135.45984f); - int b = clamp(Y + 1.772f * Cb - 226.816f); - - // naive RGB to CMYK - int cyan = 255 - r; - int magenta = 255 - g; - int yellow = 255 - b; - - // update new raster - value[0] = cyan; - value[1] = magenta; - value[2] = yellow; - value[3] = (int)K; - writableRaster.setPixel(x, y, value); - } - } - return writableRaster; - } - - // converts from BGR to RGB - private WritableRaster fromBGRtoRGB(Raster raster) - { - WritableRaster writableRaster = raster.createCompatibleWritableRaster(); - - int width = raster.getWidth(); - int height = raster.getHeight(); - int w3 = width * 3; - int[] tab = new int[w3]; - //BEWARE: handling the full image at a time is slower than one line at a time - for (int y = 0; y < height; y++) - { - raster.getPixels(0, y, width, 1, tab); - for (int off = 0; off < w3; off += 3) - { - int tmp = tab[off]; - tab[off] = tab[off + 2]; - tab[off + 2] = tmp; - } - writableRaster.setPixels(0, y, width, 1, tab); - } - return writableRaster; - } - - // returns the number of channels as a string, or an empty string if there is an error getting the meta data - private String getNumChannels(ImageReader reader) - { - try - { - IIOMetadata imageMetadata = reader.getImageMetadata(0); - if (imageMetadata == null) - { - return ""; - } - IIOMetadataNode metaTree = (IIOMetadataNode) imageMetadata.getAsTree("javax_imageio_1.0"); - Element numChannelsItem = (Element) metaTree.getElementsByTagName("NumChannels").item(0); - if (numChannelsItem == null) - { - return ""; - } - return numChannelsItem.getAttribute("value"); - } - catch (IOException | NegativeArraySizeException e) - { - LOG.debug("Couldn't read metadata - returning empty string", e); - return ""; - } - } - - // clamps value to 0-255 range - private int clamp(float value) - { - return (int)((value < 0) ? 0 : ((value > 255) ? 255 : value)); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - throw new UnsupportedOperationException("DCTFilter encoding not implemented, use the JPEGFactory methods instead"); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java deleted file mode 100644 index a3ef43a3c1f..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeOptions.java +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.awt.Rectangle; - -/** - * Options that may be passed to a Filter to request special handling when decoding the stream. - * Filters may not honor some or all of the specified options, and so callers should check the - * honored flag if further processing relies on the options being used. - */ -public class DecodeOptions -{ - /** - * Default decode options. The honored flag for this instance is always true, as it represents - * the default behavior. - */ - public static final DecodeOptions DEFAULT = new FinalDecodeOptions(true); - - private Rectangle sourceRegion = null; - private int subsamplingX = 1; - private int subsamplingY = 1; - private int subsamplingOffsetX = 0; - private int subsamplingOffsetY = 0; - private boolean filterSubsampled = false; - - /** - * Constructs an empty DecodeOptions instance - */ - public DecodeOptions() - { - // this constructor is intentionally left empty - } - - /** - * Constructs an instance specifying the region of the image that should be decoded. The actual - * region will be clipped to the dimensions of the image. - * - * @param sourceRegion Region of the source image that should be decoded - */ - public DecodeOptions(Rectangle sourceRegion) - { - this.sourceRegion = sourceRegion; - } - - /** - * Constructs an instance specifying the region of the image that should be decoded. The actual - * region will be clipped to the dimensions of the image. - * - * @param x x-coordinate of the top-left corner of the region to be decoded - * @param y y-coordinate of the top-left corner of the region to be decoded - * @param width Width of the region to be decoded - * @param height Height of the region to be decoded - */ - public DecodeOptions(int x, int y, int width, int height) - { - this(new Rectangle(x, y, width, height)); - } - - /** - * Constructs an instance specifying the image should be decoded using subsampling. The - * subsampling will be the same for the X and Y axes. - * - * @param subsampling The number of rows and columns to advance in the source for each pixel in - * the decoded image. - */ - public DecodeOptions(int subsampling) - { - subsamplingX = subsampling; - subsamplingY = subsampling; - } - - /** - * When decoding an image, the part of the image that should be decoded, or null if the entire - * image is needed. - * - * @return The source region to decode, or null if the entire image should be decoded - */ - public Rectangle getSourceRegion() - { - return sourceRegion; - } - - /** - * Sets the region of the source image that should be decoded. The region will be clipped to the - * dimensions of the source image. Setting this value to null will result in the entire image - * being decoded. - * - * @param sourceRegion The source region to decode, or null if the entire image should be - * decoded. - */ - public void setSourceRegion(Rectangle sourceRegion) - { - this.sourceRegion = sourceRegion; - } - - /** - * When decoding an image, the number of columns to advance in the source for every pixel - * decoded. - * - * @return The x-axis subsampling value - */ - public int getSubsamplingX() - { - return subsamplingX; - } - - /** - * Sets the number of columns to advance in the source for every pixel decoded - * - * @param ssX The x-axis subsampling value - */ - public void setSubsamplingX(int ssX) - { - this.subsamplingX = ssX; - } - - /** - * When decoding an image, the number of rows to advance in the source for every pixel decoded. - * - * @return The y-axis subsampling value - */ - public int getSubsamplingY() - { - return subsamplingY; - } - - /** - * Sets the number of rows to advance in the source for every pixel decoded - * - * @param ssY The y-axis subsampling value - */ - public void setSubsamplingY(int ssY) - { - this.subsamplingY = ssY; - } - - /** - * When decoding an image, the horizontal offset for subsampling - * - * @return The x-axis subsampling offset - */ - public int getSubsamplingOffsetX() - { - return subsamplingOffsetX; - } - - /** - * Sets the horizontal subsampling offset for decoding images - * - * @param ssOffsetX The x-axis subsampling offset - */ - public void setSubsamplingOffsetX(int ssOffsetX) - { - this.subsamplingOffsetX = ssOffsetX; - } - - /** - * When decoding an image, the vertical offset for subsampling - * - * @return The y-axis subsampling offset - */ - public int getSubsamplingOffsetY() - { - return subsamplingOffsetY; - } - - /** - * Sets the vertical subsampling offset for decoding images - * - * @param ssOffsetY The y-axis subsampling offset - */ - public void setSubsamplingOffsetY(int ssOffsetY) - { - this.subsamplingOffsetY = ssOffsetY; - } - - /** - * Flag used by the filter to specify if it performed subsampling. - * - * Some filters may be unable or unwilling to apply subsampling, and so the caller must check - * this flag after decoding. - * - * @return True if the filter applied the options specified by this instance, false otherwise. - */ - public boolean isFilterSubsampled() - { - return filterSubsampled; - } - - /** - * Used internally by filters to signal they have applied subsampling as requested by this - * options instance. - * - * @param filterSubsampled Value specifying if the filter could meet the requested options. - * Usually a filter will only call this with the value true, as the default value - * for the flag is false. - */ - void setFilterSubsampled(boolean filterSubsampled) - { - this.filterSubsampled = filterSubsampled; - } - - /** - * Helper class for reusable instances which may not be modified. - */ - private static class FinalDecodeOptions extends DecodeOptions - { - FinalDecodeOptions(boolean filterSubsampled) - { - super.setFilterSubsampled(filterSubsampled); - } - - @Override - public void setSourceRegion(Rectangle sourceRegion) - { - throw new UnsupportedOperationException("This instance may not be modified."); - } - - @Override - public void setSubsamplingX(int ssX) - { - throw new UnsupportedOperationException("This instance may not be modified."); - } - - @Override - public void setSubsamplingY(int ssY) - { - throw new UnsupportedOperationException("This instance may not be modified."); - } - - @Override - public void setSubsamplingOffsetX(int ssOffsetX) - { - throw new UnsupportedOperationException("This instance may not be modified."); - } - - @Override - public void setSubsamplingOffsetY(int ssOffsetY) - { - throw new UnsupportedOperationException("This instance may not be modified."); - } - - @Override - void setFilterSubsampled(boolean filterSubsampled) - { - // Silently ignore the request. - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java deleted file mode 100644 index b8eb65242a6..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/DecodeResult.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.awt.image.BufferedImage; -import org.apache.pdfbox.cos.COSDictionary; -// import org.apache.pdfbox.pdmodel.graphics.color.PDJPXColorSpace; - -/** - * The result of a filter decode operation. Allows information such as color space to be - * extracted from image streams, and for stream parameters to be repaired during reading. - * - * @author John Hewson - */ -public final class DecodeResult -{ - private final COSDictionary parameters; -// private PDJPXColorSpace colorSpace; - private BufferedImage smask; - - DecodeResult(COSDictionary parameters) - { - this.parameters = parameters; - } - -// DecodeResult(COSDictionary parameters, PDJPXColorSpace colorSpace) -// { -// this.parameters = parameters; -// this.colorSpace = colorSpace; -// } - - /** - * Return a default DecodeResult. - * - * @return a default instance of DecodeResult - */ - public static DecodeResult createDefault() - { - return new DecodeResult(new COSDictionary()); - } - - /** - * Returns the stream parameters, repaired using the embedded stream data. - * @return the repaired stream parameters, or an empty dictionary - */ - public COSDictionary getParameters() - { - return parameters; - } - - /** - * Returns the embedded JPX color space, if any. - * @return the embedded JPX color space, or null if there is none. - */ -// public PDJPXColorSpace getJPXColorSpace() -// { -// return colorSpace; -// } - - // Sets the JPX color space -// void setColorSpace(PDJPXColorSpace colorSpace) -// { -// this.colorSpace = colorSpace; -// } - - void setJPXSMask(BufferedImage smask) - { - this.smask = smask; - } - - public BufferedImage getJPXSMask() - { - return smask; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java deleted file mode 100644 index 42419c1c68a..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Filter.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.zip.Deflater; -import javax.imageio.ImageIO; -import javax.imageio.ImageReader; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.io.IOUtils; -import org.apache.pdfbox.io.RandomAccessInputStream; -import org.apache.pdfbox.io.RandomAccessOutputStream; -import org.apache.pdfbox.io.RandomAccessRead; -import org.apache.pdfbox.io.RandomAccessReadBuffer; -import org.apache.pdfbox.io.RandomAccessReadWriteBuffer; - -/** - * A filter for stream data. - * - * @author Ben Litchfield - * @author John Hewson - */ -public abstract class Filter -{ - private static final Logger LOG = LogManager.getLogger(Filter.class); - - /** - * Compression Level System Property. Set this to a value from 0 to 9 to change the zlib deflate - * compression level used to compress /Flate streams. The default value is -1 which is - * {@link Deflater#DEFAULT_COMPRESSION}. To set maximum compression, use - * {@code System.setProperty(Filter.SYSPROP_DEFLATELEVEL, "9");} - */ - public static final String SYSPROP_DEFLATELEVEL = "org.apache.pdfbox.filter.deflatelevel"; - - /** - * Constructor. - */ - protected Filter() - { - } - - /** - * Decodes data, producing the original non-encoded data. - * @param encoded the encoded byte stream - * @param decoded the stream where decoded data will be written - * @param parameters the parameters used for decoding - * @param index the index to the filter being decoded - * @return repaired parameters dictionary, or the original parameters dictionary - * @throws IOException if the stream cannot be decoded - */ - public abstract DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, - int index) throws IOException; - - /** - * Decodes data, with optional DecodeOptions. Not all filters support all options, and so - * callers should check the options' honored flag to test if they were applied. - * - * @param encoded the encoded byte stream - * @param decoded the stream where decoded data will be written - * @param parameters the parameters used for decoding - * @param index the index to the filter being decoded - * @param options additional options for decoding - * @return repaired parameters dictionary, or the original parameters dictionary - * @throws IOException if the stream cannot be decoded - */ - public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, - int index, DecodeOptions options) throws IOException - { - return decode(encoded, decoded, parameters, index); - } - - /** - * Encodes data. - * @param input the byte stream to encode - * @param encoded the stream where encoded data will be written - * @param parameters the parameters used for encoding - * @param index the index to the filter being encoded - * @throws IOException if the stream cannot be encoded - */ - public final void encode(InputStream input, OutputStream encoded, COSDictionary parameters, - int index) throws IOException - { - encode(input, encoded, parameters.asUnmodifiableDictionary()); - } - - // implemented in subclasses - protected abstract void encode(InputStream input, OutputStream encoded, - COSDictionary parameters) throws IOException; - - // gets the decode params for a specific filter index, this is used to - // normalise the DecodeParams entry so that it is always a dictionary - protected COSDictionary getDecodeParams(COSDictionary dictionary, int index) - { - COSBase filter = dictionary.getAlternateObjectFromDictionary( COSName.F, COSName.FILTER ); - COSBase obj = dictionary.getAlternateObjectFromDictionary( COSName.DP, COSName.DECODE_PARMS ); - if (filter instanceof COSName && obj instanceof COSDictionary) - { - // PDFBOX-3932: The PDF specification requires "If there is only one filter and that - // filter has parameters, DecodeParms shall be set to the filter’s parameter dictionary" - // but tests show that Adobe means "one filter name object". - return (COSDictionary)obj; - } - else if (filter instanceof COSArray && obj instanceof COSArray) - { - COSArray array = (COSArray)obj; - if (index < array.size()) - { - COSBase objAtIndex = array.getObject(index); - if (objAtIndex instanceof COSDictionary) - { - return (COSDictionary) objAtIndex; - } - } - } - else if (obj != null && !(filter instanceof COSArray || obj instanceof COSArray)) - { - LOG.error("Expected DecodeParams to be an Array or Dictionary but found {}", - obj.getClass().getName()); - } - return new COSDictionary(); - } - - /** - * Finds a suitable image reader for an image format. - * - * @param formatName The image format to search for. - * @param errorCause The probably cause if something goes wrong. - * @return The image reader for the format. - * @throws MissingImageReaderException if no image reader is found. - */ - public static final ImageReader findImageReader(String formatName, String errorCause) - throws MissingImageReaderException - { - Iterator readers = ImageIO.getImageReadersByFormatName(formatName); - while (readers.hasNext()) - { - ImageReader reader = readers.next(); - if (reader != null) - { - return reader; - } - } - throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); - } - - /** - * Finds a suitable image raster reader for an image format. - * - * @param formatName The image format to search for. - * @param errorCause The probably cause if something goes wrong. - * @return The image reader for the format. - * @throws MissingImageReaderException if no image reader is found. - */ - public static final ImageReader findRasterReader(String formatName, String errorCause) - throws MissingImageReaderException - { - Iterator readers = ImageIO.getImageReadersByFormatName(formatName); - while (readers.hasNext()) - { - ImageReader reader = readers.next(); - if (reader != null) - { - if (reader.canReadRaster()) - { - return reader; - } - reader.dispose(); - } - } - throw new MissingImageReaderException("Cannot read " + formatName + " image: " + errorCause); - } - - /** - * @return the ZIP compression level configured for PDFBox - */ - public static int getCompressionLevel() - { - int compressionLevel = Deflater.DEFAULT_COMPRESSION; - try - { - compressionLevel = Integer.parseInt(System.getProperty(Filter.SYSPROP_DEFLATELEVEL, "-1")); - } - catch (NumberFormatException ex) - { - LOG.warn(ex.getMessage(), ex); - } - return Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); - } - - /** - * Decodes data, with optional DecodeOptions. Not all filters support all options, and so callers should check the - * options' honored flag to test if they were applied. - * - * @param encoded the input stream holding the encoded data - * @param filterList list of filters to be used for decoding - * @param parameters the parameters used for decoding - * @param options additional options for decoding - * @param results list of optional decoding results for each filter - * @return the decoded stream data - * @throws IOException if the stream cannot be decoded - * @throws IllegalArgumentException if filterList is empty - */ - public static RandomAccessRead decode(InputStream encoded, List filterList, - COSDictionary parameters, DecodeOptions options, List results) - throws IOException - { - long length = parameters.getLong(COSName.LENGTH, - RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB); - if (filterList.isEmpty()) - { - throw new IllegalArgumentException("Empty filterList"); - } - if (filterList.size() > 1) - { - Set filterSet = new HashSet<>(filterList); - if (filterSet.size() != filterList.size()) - { - List reducedFilterList = new ArrayList<>(); - for (Filter filter : filterList) - { - if (!reducedFilterList.contains(filter)) - { - reducedFilterList.add(filter); - } - } - // replace origin list with the reduced one - filterList = reducedFilterList; - LOG.warn("Removed duplicated filter entries"); - } - } - InputStream input = encoded; - RandomAccessReadWriteBuffer randomAccessWriteBuffer = null; - OutputStream output = null; - // apply filters - for (int i = 0; i < filterList.size(); i++) - { - if (i > 0) - { - randomAccessWriteBuffer.seek(0); - input = new RandomAccessInputStream(randomAccessWriteBuffer); - length = randomAccessWriteBuffer.length(); - } - // we don't know the size of the decoded stream, just estimate a 4 times bigger size than the encoded stream - // use the estimated stream size as chunk size, use the default chunk size as limit to avoid to big values - if (length <= 0 || length >= RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB / 4) - { - length = RandomAccessReadBuffer.DEFAULT_CHUNK_SIZE_4KB; - } - else - { - length = length * 4; - } - randomAccessWriteBuffer = new RandomAccessReadWriteBuffer((int) length); - output = new RandomAccessOutputStream(randomAccessWriteBuffer); - try - { - DecodeResult result = filterList.get(i).decode(input, output, parameters, i, - options); - if (results != null) - { - results.add(result); - } - } - finally - { - IOUtils.closeQuietly(input); - } - } - randomAccessWriteBuffer.seek(0); - return randomAccessWriteBuffer; - } - -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java deleted file mode 100644 index 38fd0b32518..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FilterFactory.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import org.apache.pdfbox.cos.COSName; - -/** - * Factory for Filter classes. - * - * @author Ben Litchfield - */ -public final class FilterFactory -{ - /** - * Singleton instance. - */ - public static final FilterFactory INSTANCE = new FilterFactory(); - - private final Map filters = new HashMap<>(); - - private FilterFactory() - { - Filter flate = new FlateFilter(); - Filter dct = new DCTFilter(); - Filter ccittFax = new CCITTFaxFilter(); - Filter lzw = new LZWFilter(); - Filter asciiHex = new ASCIIHexFilter(); - Filter ascii85 = new ASCII85Filter(); - Filter runLength = new RunLengthDecodeFilter(); - Filter crypt = new CryptFilter(); - Filter jpx = new JPXFilter(); - Filter jbig2 = new JBIG2Filter(); - - filters.put(COSName.FLATE_DECODE, flate); - filters.put(COSName.FLATE_DECODE_ABBREVIATION, flate); - filters.put(COSName.DCT_DECODE, dct); - filters.put(COSName.DCT_DECODE_ABBREVIATION, dct); - filters.put(COSName.CCITTFAX_DECODE, ccittFax); - filters.put(COSName.CCITTFAX_DECODE_ABBREVIATION, ccittFax); - filters.put(COSName.LZW_DECODE, lzw); - filters.put(COSName.LZW_DECODE_ABBREVIATION, lzw); - filters.put(COSName.ASCII_HEX_DECODE, asciiHex); - filters.put(COSName.ASCII_HEX_DECODE_ABBREVIATION, asciiHex); - filters.put(COSName.ASCII85_DECODE, ascii85); - filters.put(COSName.ASCII85_DECODE_ABBREVIATION, ascii85); - filters.put(COSName.RUN_LENGTH_DECODE, runLength); - filters.put(COSName.RUN_LENGTH_DECODE_ABBREVIATION, runLength); - filters.put(COSName.CRYPT, crypt); - filters.put(COSName.JPX_DECODE, jpx); - filters.put(COSName.JBIG2_DECODE, jbig2); - } - - /** - * Returns a filter instance given its name as a string. - * @param filterName the name of the filter to retrieve - * @return the filter that matches the name - * @throws IOException if the filter name was invalid - */ - public Filter getFilter(String filterName) throws IOException - { - return getFilter(COSName.getPDFName(filterName)); - } - - /** - * Returns a filter instance given its COSName. - * @param filterName the name of the filter to retrieve - * @return the filter that matches the name - * @throws IOException if the filter name was invalid - */ - public Filter getFilter(COSName filterName) throws IOException - { - Filter filter = filters.get(filterName); - if (filter == null) - { - throw new IOException("Invalid filter: " + filterName); - } - return filter; - } - - // returns all available filters, for testing - Collection getAllFilters() - { - return filters.values(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java deleted file mode 100644 index d3cbde5ec46..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilter.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import org.apache.pdfbox.cos.COSDictionary; - -/** - * Decompresses data encoded using the zlib/deflate compression method, - * reproducing the original text or binary data. - * - * @author Ben Litchfield - * @author Marcel Kammer - */ -final class FlateFilter extends Filter -{ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - final COSDictionary decodeParams = getDecodeParams(parameters, index); - - try (FlateFilterDecoderStream decoderStream = new FlateFilterDecoderStream(encoded)) - { - OutputStream wrapPredictor = Predictor.wrapPredictor(decoded, decodeParams); - decoderStream.transferTo(wrapPredictor); - wrapPredictor.flush(); - } - return new DecodeResult(parameters); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - int compressionLevel = getCompressionLevel(); - Deflater deflater = new Deflater(compressionLevel); - try (DeflaterOutputStream out = new DeflaterOutputStream(encoded,deflater)) - { - input.transferTo(out); - } - encoded.flush(); - deflater.end(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java deleted file mode 100644 index 1ca4dfb9114..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/FlateFilterDecoderStream.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -/** - * Stream based decoder for the flate filter which uses zlib/deflate compression. - * - * Use Inflater instead of InflateInputStream to avoid an EOFException due to a probably missing Z_STREAM_END, see - * PDFBOX-1232 for details. - * - */ -public final class FlateFilterDecoderStream extends FilterInputStream -{ - private static final Logger LOG = LogManager.getLogger(FlateFilterDecoderStream.class); - - private boolean isEOF = false; - private int currentDataIndex = 0; - private int bytesDecoded = 0; - - private byte[] buffer = new byte[2048]; - private byte[] decodedData = new byte[4096]; - // use nowrap mode to bypass zlib-header and checksum to avoid a DataFormatException - private final Inflater inflater = new Inflater(true); - - /** - * Constructor. - * - * @param inputStream The input stream to actually read from. - */ - public FlateFilterDecoderStream(InputStream inputStream) throws IOException - { - super(inputStream); - // skip zlib header - in.read(); - in.read(); - } - - private boolean fetch() throws IOException - { - currentDataIndex = 0; - if (isEOF || inflater.finished()) - { - isEOF = true; - bytesDecoded = 0; - return false; - } - if (inflater.needsInput()) - { - int bytesRead = in.read(buffer); - if (bytesRead > -1) - { - inflater.setInput(buffer, 0, bytesRead); - } - else - { - isEOF = true; - return false; - } - } - try - { - // overwrite formerly read bytes - if (bytesDecoded > 0) - { - Arrays.fill(decodedData, 0, bytesDecoded, (byte) 0); - } - bytesDecoded = inflater.inflate(decodedData); - } - catch (DataFormatException exception) - { - isEOF = true; - // check if some bytes could be read at all - int countZeros = 0; - for (int i = 0; i < decodedData.length; i++) - { - if (decodedData[i] == 0) - { - countZeros++; - } - else - { - countZeros = 0; - } - } - bytesDecoded = decodedData.length - countZeros; - // don't throw an exception, use the already read data or an empty stream - LOG.warn("FlateFilter: premature end of stream due to a DataFormatException = {}", - exception.getMessage()); - return bytesDecoded > 0; - } - return true; - } - - /** - * This will read the next byte from the stream. - * - * @return The next byte read from the stream. - * - * @throws IOException If there is an error reading from the wrapped stream. - */ - @Override - public int read() throws IOException - { - if (isEOF) - { - return -1; - } - if (currentDataIndex == bytesDecoded && !fetch()) - { - return -1; - } - return decodedData[currentDataIndex++] & 0xFF; - } - - /** - * This will read a chunk of data. - * - * @param data The buffer to write data to. - * @param offset The offset into the data stream. - * @param length The number of byte to attempt to read. - * - * @return The number of bytes actually read. - * - * @throws IOException If there is an error reading data from the underlying stream. - */ - @Override - public int read(byte[] data, int offset, int length) throws IOException - { - if (isEOF) - { - return -1; - } - int numberOfBytesRead = 0; - while (numberOfBytesRead < length) - { - int available = bytesDecoded - currentDataIndex; - if (available > 0) - { - int bytes2Copy = Math.min(length - numberOfBytesRead, available); - System.arraycopy(decodedData, currentDataIndex, data, numberOfBytesRead + offset, - bytes2Copy); - currentDataIndex += bytes2Copy; - numberOfBytesRead += bytes2Copy; - } - else if (!fetch()) - { - break; - } - } - return numberOfBytesRead; - } - - /** - * This will close the underlying stream and release any resources. - * - * @throws IOException If there is an error closing the underlying stream. - */ - @Override - public void close() throws IOException - { - inflater.end(); - super.close(); - } - - /** - * mark/reset isn't supported. - * - * @return always false. - */ - @Override - public boolean markSupported() - { - return false; - } - - /** - * Unsupported. - * - * @param n ignored. - * - * @return always zero. - */ - @Override - public long skip(long n) - { - return 0; - } - - /** - * Unsupported. - * - * @return always zero. - */ - @Override - public int available() - { - return 0; - } - - /** - * Unsupported. - * - * @param readlimit ignored. - */ - @Override - public synchronized void mark(int readlimit) - { - } - - /** - * Unsupported. - * - * @throws IOException always throw as reset is an unsupported feature. - */ - @Override - public synchronized void reset() throws IOException - { - throw new IOException("reset is not supported"); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java deleted file mode 100644 index 11ad49e1498..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/IdentityFilter.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.pdfbox.cos.COSDictionary; - -/** - * The IdentityFilter filter passes the data through without any modifications. - * It is defined in section 7.6.5 of the PDF 1.7 spec and also stated in table 26. - * - * @author Adam Nichols - */ -final class IdentityFilter extends Filter -{ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) - throws IOException - { - encoded.transferTo(decoded); - decoded.flush(); - return new DecodeResult(parameters); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - input.transferTo(encoded); - encoded.flush(); - } -} \ No newline at end of file diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java deleted file mode 100644 index be685c311c5..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JBIG2Filter.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.awt.image.BufferedImage; -import java.awt.image.DataBuffer; -import java.awt.image.DataBufferByte; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.SequenceInputStream; -import javax.imageio.ImageIO; -import javax.imageio.ImageReadParam; -import javax.imageio.ImageReader; -import javax.imageio.stream.ImageInputStream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSStream; - -/** - * Decompresses data encoded using the JBIG2 standard, reproducing the original - * monochrome (1 bit per pixel) image data (or an approximation of that data). - * - * Requires a JBIG2 plugin for Java Image I/O to be installed. A known working - * plug-in is the Apache PDFBox JBIG2 plugin. - * - * @author Timo Boehme - */ -final class JBIG2Filter extends Filter -{ - private static final Logger LOG = LogManager.getLogger(JBIG2Filter.class); - - private static boolean levigoLogged = false; - - private static synchronized void logLevigoDonated() - { - if (!levigoLogged) - { - LOG.info("The Levigo JBIG2 plugin has been donated to the Apache Foundation"); - LOG.info("and an improved version is available for download at " - + "https://pdfbox.apache.org/download.cgi"); - levigoLogged = true; - } - } - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary - parameters, int index, DecodeOptions options) throws IOException - { - ImageReader reader = findImageReader("JBIG2", "jbig2-imageio is not installed"); - if (reader.getClass().getName().contains("levigo")) - { - logLevigoDonated(); - } - - int bits = parameters.getInt(COSName.BITS_PER_COMPONENT, 1); - COSDictionary params = getDecodeParams(parameters, index); - - ImageReadParam irp = reader.getDefaultReadParam(); - irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), - options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); - irp.setSourceRegion(options.getSourceRegion()); - options.setFilterSubsampled(true); - - InputStream source = encoded; - if (params != null) - { - COSStream globals = params.getCOSStream(COSName.JBIG2_GLOBALS); - if (globals != null) - { - source = new SequenceInputStream( COSInputStream.create( globals ), encoded); - } - } - - try (ImageInputStream iis = ImageIO.createImageInputStream(source)) - { - reader.setInput(iis); - - BufferedImage image; - try - { - image = reader.read(0, irp); - } - catch (Exception e) - { - // wrap and rethrow any exceptions - throw new IOException("Could not read JBIG2 image", e); - } - - // I am assuming since JBIG2 is always black and white - // depending on your renderer this might or might be needed - if (image.getColorModel().getPixelSize() != bits) - { - if (bits != 1) - { - LOG.warn("Attempting to handle a JBIG2 with more than 1-bit depth"); - } - BufferedImage packedImage = new BufferedImage(image.getWidth(), image.getHeight(), - BufferedImage.TYPE_BYTE_BINARY); -// Graphics graphics = packedImage.getGraphics(); -// graphics.drawImage(image, 0, 0, null); -// graphics.dispose(); - image = packedImage; - } - - DataBuffer dBuf = image.getData().getDataBuffer(); - if (dBuf.getDataType() == DataBuffer.TYPE_BYTE) - { - decoded.write(((DataBufferByte) dBuf).getData()); - } - else - { - throw new IOException("Unexpected image buffer type"); - } - } - finally - { - reader.dispose(); - } - - return new DecodeResult(parameters); - } - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - throw new UnsupportedOperationException("JBIG2 encoding not implemented"); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java deleted file mode 100644 index 8f41f9e97e4..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/JPXFilter.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -// import java.awt.color.ColorSpace; -import java.awt.image.BufferedImage; -import java.awt.image.DataBuffer; -import java.awt.image.DataBufferByte; -import java.awt.image.DataBufferUShort; -import java.awt.image.IndexColorModel; -import java.awt.image.MultiPixelPackedSampleModel; -import java.awt.image.Raster; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import javax.imageio.ImageReadParam; -import javax.imageio.ImageReader; -import javax.imageio.stream.ImageInputStream; -import javax.imageio.stream.MemoryCacheImageInputStream; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -// import org.apache.pdfbox.pdmodel.graphics.color.PDJPXColorSpace; - -/** - * Decompress data encoded using the wavelet-based JPEG 2000 standard, - * reproducing the original data. - * - * Requires the Java Advanced Imaging (JAI) Image I/O Tools to be installed from java.net, see - * jai-imageio. - * Alternatively you can build from the source available in the - * jai-imageio-core svn repo. - * - * Mac OS X users should download the tar.gz file for linux and unpack it to obtain the - * required jar files. The .so file can be safely ignored. - * - * @author John Hewson - * @author Timo Boehme - */ -public final class JPXFilter extends Filter -{ - /** - * {@inheritDoc} - */ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary - parameters, int index, DecodeOptions options) throws IOException - { - DecodeResult result = new DecodeResult(new COSDictionary()); - result.getParameters().addAll(parameters); - BufferedImage image = readJPX(encoded, options, result); - - Raster raster = image.getRaster(); - switch (raster.getDataBuffer().getDataType()) - { - case DataBuffer.TYPE_BYTE: - DataBufferByte byteBuffer = (DataBufferByte) raster.getDataBuffer(); - decoded.write(byteBuffer.getData()); - return result; - - case DataBuffer.TYPE_USHORT: - DataBufferUShort wordBuffer = (DataBufferUShort) raster.getDataBuffer(); - for (short w : wordBuffer.getData()) - { - decoded.write(w >> 8); - decoded.write(w); - } - return result; - - case DataBuffer.TYPE_INT: - // not yet used (as of October 2018) but works as fallback - // if we decide to convert to BufferedImage.TYPE_INT_RGB - int[] ar = new int[raster.getNumBands()]; - for (int y = 0; y < image.getHeight(); ++y) - { - for (int x = 0; x < image.getWidth(); ++x) - { - raster.getPixel(x, y, ar); - for (int i = 0; i < ar.length; ++i) - { - decoded.write(ar[i]); - } - } - } - return result; - - default: - throw new IOException("Data type " + raster.getDataBuffer().getDataType() + " not implemented"); - } - } - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - return decode(encoded, decoded, parameters, index, DecodeOptions.DEFAULT); - } - - // try to read using JAI Image I/O - private BufferedImage readJPX(InputStream input, DecodeOptions options, DecodeResult result) throws IOException - { - ImageReader reader = findImageReader("JPEG2000", "Java Advanced Imaging (JAI) Image I/O Tools are not installed"); - // PDFBOX-4121: ImageIO.createImageInputStream() is much slower - try (ImageInputStream iis = new MemoryCacheImageInputStream(input)) - { - reader.setInput(iis, true, true); - ImageReadParam irp = reader.getDefaultReadParam(); - irp.setSourceRegion(options.getSourceRegion()); - irp.setSourceSubsampling(options.getSubsamplingX(), options.getSubsamplingY(), - options.getSubsamplingOffsetX(), options.getSubsamplingOffsetY()); - options.setFilterSubsampled(true); - - BufferedImage image; - try - { - image = reader.read(0, irp); - } - catch (Exception e) - { - // wrap and rethrow any exceptions - throw new IOException("Could not read JPEG 2000 (JPX) image", e); - } - - COSDictionary parameters = result.getParameters(); - - // "If the image stream uses the JPXDecode filter, this entry is optional - // and shall be ignored if present" - // - // note that indexed color spaces make the BPC logic tricky, see PDFBOX-2204 - int bpc = image.getColorModel().getPixelSize() / image.getRaster().getNumBands(); - parameters.setInt(COSName.BITS_PER_COMPONENT, bpc); - - // "Decode shall be ignored, except in the case where the image is treated as a mask" - if (!parameters.getBoolean(COSName.IMAGE_MASK, false)) - { - parameters.setItem(COSName.DECODE, null); - } - - // override dimensions, see PDFBOX-1735 - parameters.setInt(COSName.WIDTH, reader.getWidth(0)); - parameters.setInt(COSName.HEIGHT, reader.getHeight(0)); - - // extract embedded color space - if (!parameters.containsKey(COSName.COLORSPACE)) - { - if (image.getSampleModel() instanceof MultiPixelPackedSampleModel && - image.getColorModel().getPixelSize() == 1 && - image.getRaster().getNumBands() == 1 && - image.getColorModel() instanceof IndexColorModel) - { - // PDFBOX-4326: - // force CS_GRAY colorspace because colorspace in IndexColorModel - // has 3 colors despite that there is only 1 color per pixel - // in raster -// result.setColorSpace(new PDJPXColorSpace(ColorSpace.getInstance(ColorSpace.CS_GRAY))); - } -// else if (image.getTransparency() == Transparency.TRANSLUCENT && -// parameters.getInt(COSName.SMASK_IN_DATA) > 0) -// { -// // PDFBOX-5657: save the soft mask in DecodeResult and use it later -// // we never had SMaskInData = 2, maybe more work is needed -// BufferedImage smask = new BufferedImage( -// image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); -// smask.setData(image.getAlphaRaster()); -// result.setJPXSMask(smask); -// // create opaque image -// BufferedImage bim = new BufferedImage( -// image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB); -// Graphics2D g2d = (Graphics2D) bim.getGraphics(); -// g2d.drawImage(image, 0, 0, null); -// g2d.dispose(); -// image = bim; -// result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); -// } -// else -// { -// result.setColorSpace(new PDJPXColorSpace(image.getColorModel().getColorSpace())); -// } - } - - return image; - } - finally - { - reader.dispose(); - } - } - - /** - * {@inheritDoc} - */ - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - throw new UnsupportedOperationException("JPX encoding not implemented"); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java deleted file mode 100644 index 926ea8c236b..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/LZWFilter.java +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright 2014 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import javax.imageio.stream.MemoryCacheImageInputStream; -import javax.imageio.stream.MemoryCacheImageOutputStream; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; - -/** - * - * This is the filter used for the LZWDecode filter. - * - * @author Ben Litchfield - * @author Tilman Hausherr - */ -public class LZWFilter extends Filter -{ - /** - * Log instance. - */ - private static final Logger LOG = LogManager.getLogger(LZWFilter.class); - - /** - * The LZW clear table code. - */ - public static final long CLEAR_TABLE = 256; - - /** - * The LZW end of data code. - */ - public static final long EOD = 257; - - //BEWARE: codeTable must be local to each method, because there is only - // one instance of each filter - - /** - * {@inheritDoc} - */ - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - COSDictionary decodeParams = getDecodeParams(parameters, index); - boolean earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1) != 0; - doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange); - return new DecodeResult(parameters); - } - - private static void doLZWDecode(InputStream encoded, OutputStream decoded, boolean earlyChange) throws IOException - { - List codeTable = new ArrayList<>(); - int chunk = 9; - final MemoryCacheImageInputStream in = new MemoryCacheImageInputStream(encoded); - long nextCommand; - long prevCommand = -1; - - try - { - while ((nextCommand = in.readBits(chunk)) != EOD) - { - if (nextCommand == CLEAR_TABLE) - { - chunk = 9; - codeTable = createCodeTable(); - prevCommand = -1; - } - else - { - if (nextCommand < codeTable.size()) - { - byte[] data = codeTable.get((int) nextCommand); - byte firstByte = data[0]; - decoded.write(data); - if (prevCommand != -1) - { - checkIndexBounds(codeTable, prevCommand, in); - data = codeTable.get((int) prevCommand); - byte[] newData = Arrays.copyOf(data, data.length + 1); - newData[data.length] = firstByte; - codeTable.add(newData); - } - } - else - { - checkIndexBounds(codeTable, prevCommand, in); - byte[] data = codeTable.get((int) prevCommand); - byte[] newData = Arrays.copyOf(data, data.length + 1); - newData[data.length] = data[0]; - decoded.write(newData); - codeTable.add(newData); - } - - chunk = calculateChunk(codeTable.size(), earlyChange); - prevCommand = nextCommand; - } - } - } - catch (EOFException ex) - { - LOG.warn("Premature EOF in LZW stream, EOD code missing", ex); - } - decoded.flush(); - } - - private static void checkIndexBounds(List codeTable, long index, MemoryCacheImageInputStream in) - throws IOException - { - if (index < 0) - { - throw new IOException("negative array index: " + index + " near offset " - + in.getStreamPosition()); - } - if (index >= codeTable.size()) - { - throw new IOException("array index overflow: " + index + - " >= " + codeTable.size() + " near offset " - + in.getStreamPosition()); - } - } - - /** - * {@inheritDoc} - */ - @Override - protected void encode(InputStream rawData, OutputStream encoded, COSDictionary parameters) - throws IOException - { - List codeTable = createCodeTable(); - int chunk = 9; - - byte[] inputPattern = null; - try (MemoryCacheImageOutputStream out = new MemoryCacheImageOutputStream(encoded)) - { - out.writeBits(CLEAR_TABLE, chunk); - int foundCode = -1; - int r; - while ((r = rawData.read()) != -1) - { - byte by = (byte) r; - if (inputPattern == null) - { - inputPattern = new byte[] { by }; - foundCode = by & 0xff; - } - else - { - inputPattern = Arrays.copyOf(inputPattern, inputPattern.length + 1); - inputPattern[inputPattern.length - 1] = by; - int newFoundCode = findPatternCode(codeTable, inputPattern); - if (newFoundCode == -1) - { - // use previous - chunk = calculateChunk(codeTable.size() - 1, true); - out.writeBits(foundCode, chunk); - // create new table entry - codeTable.add(inputPattern); - - if (codeTable.size() == 4096) - { - // code table is full - out.writeBits(CLEAR_TABLE, chunk); - codeTable = createCodeTable(); - } - - inputPattern = new byte[] { by }; - foundCode = by & 0xff; - } - else - { - foundCode = newFoundCode; - } - } - } - if (foundCode != -1) - { - chunk = calculateChunk(codeTable.size() - 1, true); - out.writeBits(foundCode, chunk); - } - - // PPDFBOX-1977: the decoder wouldn't know that the encoder would output - // an EOD as code, so he would have increased his own code table and - // possibly adjusted the chunk. Therefore, the encoder must behave as - // if the code table had just grown and thus it must be checked it is - // needed to adjust the chunk, based on an increased table size parameter - chunk = calculateChunk(codeTable.size(), true); - - out.writeBits(EOD, chunk); - - // pad with 0 - out.writeBits(0, 7); - - // must do or file will be empty :-( - out.flush(); - } - } - - /** - * Find a matching pattern in the code table. - * - * @param codeTable The LZW code table. - * @param pattern The pattern to be searched for. - * @return The index of the matching pattern or -1 if nothing is found. - */ - private static int findPatternCode(List codeTable, byte[] pattern) - { - // for the first 256 entries, index matches value - if (pattern.length == 1) - { - return pattern[0]; - } - - // no need to test the first 256 + 2 entries against longer patterns - for (int i = 257; i < codeTable.size(); i++) - { - if (Arrays.equals(codeTable.get(i), pattern)) - { - return i; - } - } - - return -1; - } - - /** - * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE markers. - */ - private static List createCodeTable() - { - List codeTable = new ArrayList<>(4096); - codeTable.addAll(INITIAL_CODE_TABLE); - return codeTable; - } - - private static final List INITIAL_CODE_TABLE = createInitialCodeTable(); - - private static List createInitialCodeTable() - { - List codeTable = new ArrayList<>(258); - for (int i = 0; i < 256; ++i) - { - codeTable.add(new byte[] { (byte) (i & 0xFF) }); - } - codeTable.add(null); // 256 EOD - codeTable.add(null); // 257 CLEAR_TABLE - return codeTable; - } - - /** - * Calculate the appropriate chunk size - * - * @param tabSize the size of the code table - * @param earlyChange true for early chunk increase - * - * @return a value between 9 and 12 - */ - private static int calculateChunk(int tabSize, boolean earlyChange) - { - int i = tabSize + (earlyChange ? 1 : 0); - if (i >= 2048) - { - return 12; - } - if (i >= 1024) - { - return 11; - } - if (i >= 512) - { - return 10; - } - return 9; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java deleted file mode 100644 index 0ff744829a0..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/MissingImageReaderException.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; - -/** - * Thrown when a required JAI ImageReader is missing. - * - * @author John Hewson - */ -public class MissingImageReaderException extends IOException -{ - /** - * - */ - private static final long serialVersionUID = 1L; - - public MissingImageReaderException(String message) - { - super(message); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java deleted file mode 100644 index b3f6f922129..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/Predictor.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright 2014 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.FilterOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; - -/** - * Helper class to contain predictor decoding used by Flate and LZW filter. - * To see the history, look at the FlateFilter class. - */ -public final class Predictor -{ - - private Predictor() - { - } - - /** - * Decodes a single line of data in-place. - * @param predictor Predictor value for the current line - * @param colors Number of color components, from decode parameters. - * @param bitsPerComponent Number of bits per components, from decode parameters. - * @param columns Number samples in a row, from decode parameters. - * @param actline Current (active) line to decode. Data will be decoded in-place, - * i.e. - the contents of this buffer will be modified. - * @param lastline The previous decoded line. When decoding the first line, this - * parameter should be an empty byte array of the same length as - * actline. - */ - static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, byte[] actline, byte[] lastline) - { - if (predictor == 1) - { - // no prediction - return; - } - final int bitsPerPixel = colors * bitsPerComponent; - final int bytesPerPixel = (bitsPerPixel + 7) / 8; - final int rowlength = actline.length; - switch (predictor) - { - case 2: - // PRED TIFF SUB - if (bitsPerComponent == 8) - { - // for 8 bits per component it is the same algorithm as PRED SUB of PNG format - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p] & 0xff; - int left = actline[p - bytesPerPixel] & 0xff; - actline[p] = (byte) (sub + left); - } - break; - } - if (bitsPerComponent == 16) - { - for (int p = bytesPerPixel; p < rowlength - 1; p += 2) - { - int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); - int left = (((actline[p - bytesPerPixel] & 0xff) << 8) - + (actline[p - bytesPerPixel + 1] & 0xff)); - actline[p] = (byte) (((sub + left) >> 8) & 0xff); - actline[p + 1] = (byte) ((sub + left) & 0xff); - } - break; - } - if (bitsPerComponent == 1 && colors == 1) - { - // bytesPerPixel cannot be used: - // "A row shall occupy a whole number of bytes, rounded up if necessary. - // Samples and their components shall be packed into bytes - // from high-order to low-order bits." - for (int p = 0; p < rowlength; p++) - { - for (int bit = 7; bit >= 0; --bit) - { - int sub = (actline[p] >> bit) & 1; - if (p == 0 && bit == 7) - { - continue; - } - int left; - if (bit == 7) - { - // use bit #0 from previous byte - left = actline[p - 1] & 1; - } - else - { - // use "previous" bit - left = (actline[p] >> (bit + 1)) & 1; - } - if (((sub + left) & 1) == 0) - { - // reset bit - actline[p] &= ~(1 << bit); - } - else - { - // set bit - actline[p] |= 1 << bit; - } - } - } - break; - } - // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too - int elements = columns * colors; - for (int p = colors; p < elements; ++p) - { - int bytePosSub = p * bitsPerComponent / 8; - int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; - int bytePosLeft = (p - colors) * bitsPerComponent / 8; - int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; - - int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); - int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); - actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent, sub + left); - } - break; - case 10: - // PRED NONE - // do nothing - break; - case 11: - // PRED SUB - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p]; - int left = actline[p - bytesPerPixel]; - actline[p] = (byte) (sub + left); - } - break; - case 12: - // PRED UP - for (int p = 0; p < rowlength; p++) - { - int up = actline[p] & 0xff; - int prior = lastline[p] & 0xff; - actline[p] = (byte) ((up + prior) & 0xff); - } - break; - case 13: - // PRED AVG - for (int p = 0; p < rowlength; p++) - { - int avg = actline[p] & 0xff; - int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; - int up = lastline[p] & 0xff; - actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); - } - break; - case 14: - // PRED PAETH - for (int p = 0; p < rowlength; p++) - { - int paeth = actline[p] & 0xff; - int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left - int b = lastline[p] & 0xff;// upper - int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft - int value = a + b - c; - int absa = Math.abs(value - a); - int absb = Math.abs(value - b); - int absc = Math.abs(value - c); - - if (absa <= absb && absa <= absc) - { - actline[p] = (byte) ((paeth + a) & 0xff); - } - else if (absb <= absc) - { - actline[p] = (byte) ((paeth + b) & 0xff); - } - else - { - actline[p] = (byte) ((paeth + c) & 0xff); - } - } - break; - default: - break; - } - } - - static int calculateRowLength(int colors, int bitsPerComponent, int columns) - { - final int bitsPerPixel = colors * bitsPerComponent; - return (columns * bitsPerPixel + 7) / 8; - } - - // get value from bit interval from a byte - static int getBitSeq(int by, int startBit, int bitSize) - { - int mask = ((1 << bitSize) - 1); - return (by >>> startBit) & mask; - } - - // set value in a bit interval and return that value - static int calcSetBitSeq(int by, int startBit, int bitSize, int val) - { - int mask = ((1 << bitSize) - 1); - int truncatedVal = val & mask; - mask = ~(mask << startBit); - return (by & mask) | (truncatedVal << startBit); - } - - /** - * Wraps and OutputStream in a predictor decoding stream as necessary. - * If no predictor is specified by the parameters, the original stream is returned as is. - * - * @param out The stream to which decoded data should be written - * @param decodeParams Decode parameters for the stream - * @return An OutputStream is returned, which will write decoded data - * into the given stream. If no predictor is specified, the original stream is returned. - */ - static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams) - { - int predictor = decodeParams.getInt(COSName.PREDICTOR); - if (predictor > 1) - { - int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); - int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); - int columns = decodeParams.getInt(COSName.COLUMNS, 1); - - return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns); - } - else - { - return out; - } - } - - /** - * Output stream that implements predictor decoding. Data is buffered until a complete - * row is available, which is then decoded and written to the underlying stream. - * The previous row is retained for decoding the next row. - */ - private static final class PredictorOutputStream extends FilterOutputStream - { - // current predictor type - private int predictor; - // image decode parameters - private final int colors; - private final int bitsPerComponent; - private final int columns; - private final int rowLength; - // PNG predictor (predictor>=10) means every row has a (potentially different) - // predictor value - private final boolean predictorPerRow; - - // data buffers - private byte[] currentRow; - private byte[] lastRow; - // amount of data in the current row - private int currentRowData = 0; - // was the per-row predictor value read for the current row being processed - private boolean predictorRead = false; - - PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, int columns) - { - super(out); - this.predictor = predictor; - this.colors = colors; - this.bitsPerComponent = bitsPerComponent; - this.columns = columns; - this.rowLength = calculateRowLength(colors, bitsPerComponent, columns); - this.predictorPerRow = predictor >= 10; - currentRow = new byte[rowLength]; - lastRow = new byte[rowLength]; - } - - @Override - public void write(byte[] bytes) throws IOException - { - write(bytes, 0, bytes.length); - } - - @Override - public void write(byte[] bytes, int off, int len) throws IOException - { - int currentOffset = off; - int maxOffset = currentOffset + len; - while (currentOffset < maxOffset) - { - if (predictorPerRow && currentRowData == 0 && !predictorRead) - { - // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) - // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ... - predictor = bytes[currentOffset] + 10; - currentOffset++; - predictorRead = true; - } - else - { - int toRead = Math.min(rowLength - currentRowData, maxOffset - currentOffset); - System.arraycopy(bytes, currentOffset, currentRow, currentRowData, toRead); - currentRowData += toRead; - currentOffset += toRead; - - // current row is filled, decode it, write it to underlying stream, - // and reset the state. - if (currentRowData == currentRow.length) - { - decodeAndWriteRow(); - } - } - } - } - - private void decodeAndWriteRow() throws IOException - { - decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow); - out.write(currentRow); - flipRows(); - } - - /** - * Flips the row buffers (to avoid copying), and resets the current-row index - * and predictorRead flag - */ - private void flipRows() - { - byte[] temp = lastRow; - lastRow = currentRow; - currentRow = temp; - currentRowData = 0; - predictorRead = false; - } - - @Override - public void flush() throws IOException - { - // The last row is allowed to be incomplete, and should be completed with zeros. - if (currentRowData > 0) - { - Arrays.fill(currentRow, currentRowData, rowLength, (byte)0); - decodeAndWriteRow(); - } - super.flush(); - } - - @Override - public void write(int i) throws IOException - { - throw new UnsupportedOperationException("Not supported"); - } - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java deleted file mode 100644 index cb6665f7f46..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/RunLengthDecodeFilter.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.pdfbox.cos.COSDictionary; - -/** - * Decompresses data encoded using a byte-oriented run-length encoding algorithm, - * reproducing the original text or binary data - * - * @author Ben Litchfield - * @author Tilman Hausherr - */ -final class RunLengthDecodeFilter extends Filter -{ - private static final int RUN_LENGTH_EOD = 128; - - @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException - { - int dupAmount; - byte[] buffer = new byte[128]; - while ((dupAmount = encoded.read()) != -1 && dupAmount != RUN_LENGTH_EOD) - { - if (dupAmount <= 127) - { - int amountToCopy = dupAmount + 1; - int compressedRead; - while (amountToCopy > 0) - { - compressedRead = encoded.read(buffer, 0, amountToCopy); - // EOF reached? - if (compressedRead == -1) - { - break; - } - decoded.write(buffer, 0, compressedRead); - amountToCopy -= compressedRead; - } - } - else - { - int dupByte = encoded.read(); - // EOF reached? - if (dupByte == -1) - { - break; - } - for (int i = 0; i < 257 - dupAmount; i++) - { - decoded.write(dupByte); - } - } - } - return new DecodeResult(parameters); - } - - @Override - protected void encode(InputStream input, OutputStream encoded, COSDictionary parameters) - throws IOException - { - // Not used in PDFBox except for testing the decoder. - int lastVal = -1; - int byt; - int count = 0; - boolean equality = false; - - // buffer for "unequal" runs, size between 2 and 128 - byte[] buf = new byte[128]; - - while ((byt = input.read()) != -1) - { - if (lastVal == -1) - { - // first time - lastVal = byt; - count = 1; - } - else - { - if (count == 128) - { - if (equality) - { - // max length of equals - encoded.write(129); // = 257 - 128 - encoded.write(lastVal); - } - else - { - // max length of unequals - encoded.write(127); - encoded.write(buf, 0, 128); - } - equality = false; - lastVal = byt; - count = 1; - } - else if (count == 1) - { - if (byt == lastVal) - { - equality = true; - } - else - { - buf[0] = (byte) lastVal; - buf[1] = (byte) byt; - lastVal = byt; - } - count = 2; - } - else - { - // 1 < count < 128 - if (byt == lastVal) - { - if (equality) - { - ++count; - } - else - { - // write all we got except the last - encoded.write(count - 2); - encoded.write(buf, 0, count - 1); - count = 2; - equality = true; - } - } - else - { - if (equality) - { - // equality ends here - encoded.write(257 - count); - encoded.write(lastVal); - equality = false; - count = 1; - } - else - { - buf[count] = (byte) byt; - ++count; - } - lastVal = byt; - } - } - } - } - if (count > 0) - { - if (count == 1) - { - encoded.write(0); - encoded.write(lastVal); - } - else if (equality) - { - encoded.write(257 - count); - encoded.write(lastVal); - } - else - { - encoded.write(count - 1); - encoded.write(buf, 0, count); - } - } - encoded.write(RUN_LENGTH_EOD); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java deleted file mode 100644 index f5625f59996..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/TIFFExtension.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2012, Harald Kuhr - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name "TwelveMonkeys" nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -package org.apache.pdfbox.cos.filter; - -/** - * TIFFExtension - * - * @author Harald Kuhr - * @author last modified by $Author: haraldk$ - * @version $Id: TIFFExtension.java,v 1.0 08.05.12 16:45 haraldk Exp$ - */ -interface TIFFExtension { - /** CCITT T.4/Group 3 Fax compression. */ - int COMPRESSION_CCITT_T4 = 3; - /** CCITT T.6/Group 4 Fax compression. */ - int COMPRESSION_CCITT_T6 = 4; - /** LZW Compression. Was baseline, but moved to extension due to license issues in the LZW algorithm. */ - int COMPRESSION_LZW = 5; - /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ - int COMPRESSION_OLD_JPEG = 6; - /** JPEG Compression (lossy). */ - int COMPRESSION_JPEG = 7; - /** Custom: PKZIP-style Deflate. */ - int COMPRESSION_DEFLATE = 32946; - /** Adobe-style Deflate. */ - int COMPRESSION_ZLIB = 8; - - int PHOTOMETRIC_SEPARATED = 5; - int PHOTOMETRIC_YCBCR = 6; - int PHOTOMETRIC_CIELAB = 8; - int PHOTOMETRIC_ICCLAB = 9; - int PHOTOMETRIC_ITULAB = 10; - - int PLANARCONFIG_PLANAR = 2; - - int PREDICTOR_HORIZONTAL_DIFFERENCING = 2; - int PREDICTOR_HORIZONTAL_FLOATINGPOINT = 3; - - int FILL_RIGHT_TO_LEFT = 2; - - int SAMPLEFORMAT_INT = 2; - int SAMPLEFORMAT_FP = 3; - int SAMPLEFORMAT_UNDEFINED = 4; - - int YCBCR_POSITIONING_CENTERED = 1; - int YCBCR_POSITIONING_COSITED = 2; - - /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ - int JPEG_PROC_BASELINE = 1; - /** Deprecated. For backwards compatibility only ("Old-style" JPEG). */ - int JPEG_PROC_LOSSLESS = 14; - - /** For use with Photometric: 5 (Separated), when image data is in CMYK color space. */ - int INKSET_CMYK = 1; - - /** - * For use with Photometric: 5 (Separated), when image data is in a color space other than CMYK. - * See {@link com.twelvemonkeys.imageio.metadata.exif.TIFF#TAG_INK_NAMES InkNames} field for a - * description of the inks to be used. - */ - int INKSET_NOT_CMYK = 2; - - int ORIENTATION_TOPRIGHT = 2; - int ORIENTATION_BOTRIGHT = 3; - int ORIENTATION_BOTLEFT = 4; - int ORIENTATION_LEFTTOP = 5; - int ORIENTATION_RIGHTTOP = 6; - int ORIENTATION_RIGHTBOT = 7; - int ORIENTATION_LEFTBOT = 8; - - int GROUP3OPT_2DENCODING = 1; - int GROUP3OPT_UNCOMPRESSED = 2; - int GROUP3OPT_FILLBITS = 4; - int GROUP3OPT_BYTEALIGNED = 8; - int GROUP4OPT_UNCOMPRESSED = 2; - int GROUP4OPT_BYTEALIGNED = 4; - int COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE = 2; - int FILL_LEFT_TO_RIGHT = 1; // Default -} - diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html b/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html deleted file mode 100644 index 1d1f771a2b2..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/filter/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -This package will hold the PDFBox implementations of the filters that are used in PDF documents. - - diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html b/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html deleted file mode 100644 index beecb1289c1..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/package.html +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - -

COS stands for Carousel Object Syntax, which is the syntax used to structure - PDF files. Although Carousel was only a code name for what later became Acrobat, - the name is still used to refer to the way a PDF file is composed. COS objects - are the building blocks of PDF files, and represent document components like - pages, bookmarks, fonts, and annotations. -

-

- The official PDF documentation claims that PDF is composed of 8 types - of COS objects, however it goes on to state that there are two types - of numeric objects, integer and real numbers, which brings the actual - number of object types to 9. Virtually all COS objects have a label - so they can be referenced indirectly. -

-

- The following classes encapsulate COS objects: COSArray, COSBoolean, - COSDictionary, COSName, COSNumber, COSString, COSStream, and COSNull. - All of these objects are derived from COSBase which holds the object label - ("key") and mandates the implementation of certain abstract methods. - The object label is encapsulated in the COSObjectKey class. This package - also includes the classes COSFloat and COSInteger which extend COSNumber -

-

Also defined are COSDocument, which represents the collection of - all the COS objects in a PDF document, and COSObject which is a - proxy object for all other COS objects. Both the COSDictionary and - COSObject classes extend COSBase despite the fact that they are not - true COS objects. -

-

- The proxy object, COSObject, is not derived from COSBase, but implements - COSObjectGetter and has the same label as whichever concrete class it - represents. Typically, the associated concrete class is not instantiated - until the COSObject's getCOSObject() method is called, after which it - will contain a reference to the decoded concrete class. The abstract class - COSObjectGetter requires the implementation of the getCOSObject() method, - which will return whichever COSBase derived class is associated with the - object. The use of COSObjectGetter is not limited to objects in this - package but is used throughout pdfbox. -

-

- Other classes are defined in this package to directly support the - base classes, such as COSObjectKey and COSInputStream. These classes - do not derive from COSBase, and most do not implement COSObjectGetter(). -

-

- A brief summary of the PDF file structure can be found at - Medium.com -

-See also the PDF Reference 1.7. - - diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java deleted file mode 100644 index 1baf1bb5b32..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/DateConverter.java +++ /dev/null @@ -1,737 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.util; - -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.Locale; -import java.util.SimpleTimeZone; -import java.util.TimeZone; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.pdfbox.cos.COSString; - -import java.text.ParsePosition; -import java.text.SimpleDateFormat; - -/* - * Date format is described in PDF Reference 1.7 section 3.8.2 - * (www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf) - * and also in PDF 32000-1:2008 - * (http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf)) - * although the latter inexplicably omits the trailing apostrophe. - * - * The interpretation of dates without timezones is unclear. - * The code below assumes that such dates are in UTC+00 (aka GMT). - * This is in keeping with the PDF Reference's assertion that: - * numerical fields default to zero values. - * However, the Reference does go on to make the cryptic remark: - * If no UT information is specified, the relationship of the specified - * time to UT is considered to be unknown. Whether or not the time - * zone is known, the rest of the date should be specified in local time. - * I understand this to refer to _creating_ a pdf date value. That is, - * code that can get the wall clock time and cannot get the timezone - * should write the wall clock time with a time zone of zero. - * When _parsing_ a PDF date, the statement talks about "the rest of the date" - * being local time, thus explicitly excluding the use of the local time - * for the time zone. -*/ - -/** - * Converts dates to strings and back using the PDF date standard - * in section 3.8.2 of PDF Reference 1.7. - * - * @author Ben Litchfield - * @author Fred Hansen - * - * TODO Move members of this class elsewhere for shared use in pdfbox and xmpbox. - */ -public final class DateConverter -{ - private static final Logger LOG = LogManager.getLogger(DateConverter.class); - - private DateConverter() - { - } - - // milliseconds/1000 = seconds; seconds / 60 = minutes; minutes/60 = hours - private static final int MINUTES_PER_HOUR = 60; - private static final int SECONDS_PER_MINUTE = 60; - private static final int MILLIS_PER_MINUTE = SECONDS_PER_MINUTE*1000; - private static final int MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE; - private static final int HALF_DAY = 12 * MINUTES_PER_HOUR * MILLIS_PER_MINUTE, DAY = 2*HALF_DAY; - - /* - * The Date format is supposed to be the PDF_DATE_FORMAT, but other - * forms appear. These lists offer alternatives to be tried - * if parseBigEndianDate fails. - * - * The time zone offset generally trails the date string, so it is processed - * separately with parseTZoffset. (This does not preclude having time - * zones in the elements below; one does.) - * - * Alas, SimpleDateFormat is badly non-reentrant -- it modifies its - * calendar field (PDFBox-402), so these lists are strings to create - * SimpleDate format as needed. - * - * Some past entries have been elided because they duplicate existing - * entries. See the API for SimpleDateFormat, which says - * "For parsing, the number of pattern letters is ignored - * unless it's needed to separate two adjacent fields." - * - * toCalendar(String, String[]) tests to see that the entire input text - * has been consumed. Therefore the ordering of formats is important. - * If one format begins with the entirety of another, the longer - * must precede the other in the list. - * - * HH is for 0-23 hours and hh for 1-12 hours; an "a" field must follow "hh" - * Where year is yy, four digit years are accepted - * and two digit years are converted to four digits in the range - * [thisyear-79...thisyear+20] - */ - private static final String[] ALPHA_START_FORMATS = - { - "EEEE, dd MMM yy hh:mm:ss a", - "EEEE, MMM dd, yy hh:mm:ss a", - "EEEE, MMM dd, yy 'at' hh:mma", // Acrobat Net Distiller 1.0 for Windows - "EEEE, MMM dd, yy", // Acrobat Distiller 1.0.2 for Macintosh && PDFBOX-465 - "EEEE MMM dd, yy HH:mm:ss", // ECMP5 - "EEEE MMM dd HH:mm:ss z yy", // GNU Ghostscript 7.0.7 - "EEEE MMM dd HH:mm:ss yy", // GNU Ghostscript 7.0.7 variant - }; - - private static final String[] DIGIT_START_FORMATS = - { - "dd MMM yy HH:mm:ss", // for 26 May 2000 11:25:00 - "dd MMM yy HH:mm", // for 26 May 2000 11:25 - "yyyy MMM d", // ambiguity resolved only by omitting time - "yyyymmddhh:mm:ss", // test case "200712172:2:3" - "H:m M/d/yy", // test case "9:47 5/12/2008" - "M/d/yy HH:mm:ss", - "M/d/yy HH:mm", - "M/d/yy", - - // proposed rule that is unreachable due to "dd MMM yy HH:mm:ss" - // "yyyy MMM d HH:mm:ss", - - // rules made unreachable by "M/d/yy HH:mm:ss" "M/d/yy HH:mm" "M/d/yy", - // (incoming digit strings do not mark themselves as y, m, or d!) - // "d/MM/yyyy HH:mm:ss", // PDFBOX-164 and PDFBOX-170 - // "M/dd/yyyy hh:mm:ss", - // "MM/d/yyyy hh:mm:ss", - // "M/d/yyyy HH:mm:ss", - // "M/dd/yyyy", - // "MM/d/yyyy", - // "M/d/yyyy", - // "M/d/yyyy HH:mm:ss", - // "M/d/yy HH:mm:ss", - // subsumed by big-endian parse - // "yyyy-MM-dd'T'HH:mm:ss", - // "yyyy-MM-dd'T'HH:mm:ss", - // "yyyymmdd hh:mm:ss", - // "yyyymmdd", - // "yyyymmddX''00''", // covers 24 cases - // (originally the above ended with '+00''00'''; - // the first apostrophe quoted the plus, - // '' mapped to a single ', and the ''' was invalid) - }; - - /** - * Converts a Calendar to a string formatted as: - * D:yyyyMMddHHmmss#hh'mm' where # is Z, +, or -. - * - * @param cal The date to convert to a string. May be null. - * The DST_OFFSET is included when computing the output time zone. - * - * @return The date as a String to be used in a PDF document, - * or null if the cal value is null - */ - public static String toString( Calendar cal) - { - if (cal == null) - { - return null; - } - String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + - cal.get(Calendar.DST_OFFSET), "'"); - return String.format( Locale.US, "D:" - + "%1$4tY%1$2tm%1$2td" // yyyyMMdd - + "%1$2tH%1$2tM%1$2tS" // HHmmss - + "%2$s" // time zone - + "'", // trailing apostrophe - cal, offset); - } - - /** - * Converts the date to ISO 8601 string format: - * yyyy-mm-ddThh:MM:ss#hh:mm (where '#" is '+' or '-'). - * - * @param cal The date to convert. Must not be null. - * The DST_OFFSET is included in the output value. - * - * @return The date represented as an ISO 8601 string. - */ - public static String toISO8601(Calendar cal) - { - String offset = formatTZoffset(cal.get(Calendar.ZONE_OFFSET) + - cal.get(Calendar.DST_OFFSET), ":"); - return String.format(Locale.US, - "%1$4tY" // yyyy - + "-%1$2tm" // -mm (%tm adds one to cal month value) - + "-%1$2td" // -dd (%tm adds one to cal month value) - + "T" // T - + "%1$2tH:%1$2tM:%1$2tS" // HHmmss - + "%2$s", // time zone - cal, offset); - } - - /* - * Constrain a timezone offset to the range [-14:00 thru +14:00]. - * by adding or subtracting multiples of a full day. - */ - private static int restrainTZoffset(long proposedOffset) - { - if (proposedOffset <= 14 * MILLIS_PER_HOUR && proposedOffset >= -14 * MILLIS_PER_HOUR) - { - // https://www.w3.org/TR/xmlschema-2/#dateTime-timezones - // Timezones between 14:00 and -14:00 are valid - return (int) proposedOffset; - } - // Constrain a timezone offset to the range [-11:59 thru +12:00]. - proposedOffset = ((proposedOffset + HALF_DAY) % DAY + DAY) % DAY; - if (proposedOffset == 0) - { - return HALF_DAY; - } - // 0 <= proposedOffset < DAY - proposedOffset = (proposedOffset - HALF_DAY) % HALF_DAY; - // -HALF_DAY < proposedOffset < HALF_DAY - return (int)proposedOffset; - } - - /* - * Formats a time zone offset as #hh^mm - * where # is + or -, hh is hours, ^ is a separator, and mm is minutes. - * Any separator may be specified by the second argument; - * the usual values are ":" (ISO 8601), "" (RFC 822), and "'" (PDF). - * The returned value is constrained to the range -11:59 ... 11:59. - * For offset of 0 millis, the String returned is "+00^00", never "Z". - * To get a "general" offset in form GMT#hh:mm, write - * "GMT"+DateConverter.formatTZoffset(offset, ":"); - * - * Take thought in choosing the source for the millis value. - * It can come from calendarValue.getTimeZone() or from - * calendarValue.get(Calendar.ZONE_OFFSET). If a TimeZone was created - * from a valid time zone ID, then it may have a daylight savings rule. - * (As of July 4, 2013, the data base at http://www.iana.org/time-zones - * recognized 629 time zone regions. But a TimeZone created as - * new SimpleTimeZone(millisOffset, "ID"), - * will not have a daylight savings rule. (Not even if there is a - * known time zone with the given ID. To get the TimeZone named "xDT" - * with its DST rule, use an ID of EST5EDT, CST6CDT, MST7MDT, or PST8PDT. - * - * When parsing PDF dates, the incoming values DOES NOT have a TIMEZONE value. - * At most it has an OFFSET value like -04'00'. It is generally impossible to - * determine what TIMEZONE corresponds to a given OFFSET. If the date is - * in the summer when daylight savings is in effect, an offset of -0400 - * might correspond to any one of the 38 regions (of 53) with standard time - * offset -0400 and no daylight saving. Or it might correspond to - * any one of the 31 regions (out of 43) that observe daylight savings - * and have standard time offset of -0500. - * - * If a Calendar has not been assigned a TimeZone with setTimeZone(), - * it will have by default the local TIMEZONE, not just the OFFSET. In the - * USA, this TimeZone will have a daylight savings rule. - * - * The offset assigned with calVal.set(Calendar.ZONE_OFFSET) differs - * from the offset in the TimeZone set by Calendar.setTimeZone(). Example: - * Suppose my local TimeZone is America/New_York. It has an offset of -05'00'. - * And suppose I set a GregorianCalendar's ZONE_OFFSET to -07'00' - * calVal = new GregorianCalendar(); // TimeZone is the local default - * calVal.set(Calendar.ZONE_OFFSET, -7* MILLIS_PER_HOUR); - * Four different offsets can be computed from calVal: - * calVal.get(Calendar.ZONE_OFFSET) => -07:00 - * calVal.get(Calendar.ZONE_OFFSET) + calVal.get(Calendar.DST_OFFSET) => -06:00 - * calVal.getTimeZone().getRawOffset() => -05:00 - * calVal.getTimeZone().getOffset(calVal.getTimeInMillis()) => -04:00 - * - * Which is correct??? I dunno, though setTimeZone() does seem to affect - * ZONE_OFFSET, and not vice versa. One cannot even test whether TimeZone - * or ZONE_OFFSET has been set; both have been set by initialization code. - * TimeZone is initialized to the local default time zone - * and ZONE_OFFSET is set from it. - * - * My choice in this DateConverter class has been to set the - * initial TimeZone of a GregorianCalendar to GMT. Thereafter - * the TimeZone is modified with {@link #adjustTimeZoneNicely}. - * - * package-private for testing - */ - static String formatTZoffset(long millis, String sep) - { - SimpleDateFormat sdf = new SimpleDateFormat("Z", Locale.ENGLISH); // #hhmm - sdf.setTimeZone(new SimpleTimeZone(restrainTZoffset(millis),"unknown")); - String tz = sdf.format(new Date()); - return tz.substring(0,3) + sep + tz.substring(3); - } - - /* - * Parses an integer from a string, starting at and advancing a ParsePosition. - * Returns The integer that was at the given parse position, or the remedy value - * if no digits were found. - * - * The ParsePosition will be incremented by the number of digits found, but no - * more than maxlen. That is, the ParsePosition will advance across at most - * maxlen initial digits in text. The error index is ignored and unchanged. - * - * maxlen is the maximum length of the integer to parse, usually 2, but 4 for - * year fields. If the field of length maxlen begins with a digit, but contains - * a non-digit, no error is signaled and the integer value is returned. - */ - private static int parseTimeField(String text, ParsePosition where, int maxlen, int remedy) - { - if (text == null) - { - return remedy; - } - // it would seem that DecimalFormat.parse() would be simpler; - // but that class blithely ignores setMaximumIntegerDigits - int retval = 0; - int index = where.getIndex(); - int limit = index + Math.min(maxlen, text.length()-index); - for (; index < limit; index++) - { - // convert digit to integer - int cval = text.charAt(index) - '0'; - // test to see if we got a digit - if (cval < 0 || cval > 9) - { - // no digit at index - break; - } - // append the digit to the return value - retval = retval * 10 + cval; - } - if (index == where.getIndex()) - { - return remedy; - } - where.setIndex(index); - return retval; - } - - /* - * Advances the ParsePosition past any and all the characters that match - * those in the optionals list. In particular, a space will skip all spaces. - * - * The start value is incremented by the number of optionals found. The error - * index is ignored and unchanged. - * - * Returns the last non-space character passed over (even if space is not in - * the optionals list.) - */ - private static char skipOptionals(String text, ParsePosition where, String optionals) - { - char retval = ' '; - char currch; - while (where.getIndex() < text.length() && - optionals.indexOf((currch = text.charAt(where.getIndex()))) >= 0) - { - retval = (currch != ' ') ? currch : retval; - where.setIndex(where.getIndex() + 1); - } - return retval; - } - - /* - * If the victim string is at the given position in the text, this method - * advances the position past that string. - * - * `where` is the initial position to look at. After return, this will have - * been incremented by the length of the victim if it was found. The error - * index is ignored and unchanged. - */ - private static boolean skipString(String text, String victim, ParsePosition where) - { - if (text.startsWith(victim, where.getIndex())) - { - where.setIndex(where.getIndex()+victim.length()); - return true; - } - return false; - } - - /* - * Construct a new GregorianCalendar and set defaults. - * Locale is ENGLISH. - * TimeZone is "UTC" (zero offset and no DST). - * Parsing is NOT lenient. Milliseconds are zero. - * - * package-private for testing - */ - static GregorianCalendar newGreg() - { - GregorianCalendar retCal = new GregorianCalendar(new SimpleTimeZone(0, "UTC"), Locale.ENGLISH); - retCal.setLenient(false); - retCal.set(Calendar.MILLISECOND, 0); - return retCal; - } - - /* - * Install a TimeZone on a GregorianCalendar without changing the - * hours value. A plain GregorianCalendat.setTimeZone() - * adjusts the Calendar.HOUR value to compensate. This is *BAD* - * (not to say *EVIL*) when we have already set the time. - */ - private static void adjustTimeZoneNicely(GregorianCalendar cal, TimeZone tz) - { - cal.setTimeZone(tz); - int offset = (cal.get(Calendar.ZONE_OFFSET) + cal.get(Calendar.DST_OFFSET)) / - MILLIS_PER_MINUTE; - cal.add(Calendar.MINUTE, -offset); - } - - /* - * Parses the end of a date string for a time zone and, if one is found, - * sets the time zone of the GregorianCalendar. Otherwise the calendar - * time zone is unchanged. - * - * The text is parsed as - * (Z|GMT|UTC)? [+- ]* h [': ]? m '? - * where the leading String is optional, h is two digits by default, - * but may be a single digit if followed by one of space, apostrophe, - * colon, or the end of string. Similarly, m is one or two digits. - * This scheme accepts the format of PDF, RFC 822, and ISO8601. - * If none of these applies (as for a time zone name), we try - * TimeZone.getTimeZone(). - * - * Scanning begins at where.index. After success, the returned index - * is that of the next character after the recognized string. - * - * package-private for testing - */ - static boolean parseTZoffset(String text, GregorianCalendar cal, - ParsePosition initialWhere) - { - ParsePosition where = new ParsePosition(initialWhere.getIndex()); - TimeZone tz = new SimpleTimeZone(0, "GMT"); - int tzHours, tzMin; - char sign = skipOptionals(text, where, "Z+- "); - boolean hadGMT = (sign == 'Z' || skipString(text, "GMT", where) || - skipString(text, "UTC", where)); - sign = (!hadGMT) ? sign : skipOptionals(text, where, "+- "); - - tzHours = parseTimeField(text, where, 2, -999); - skipOptionals(text, where, "': "); - tzMin = parseTimeField(text, where, 2, 0); - skipOptionals(text, where, "' "); - - if (tzHours != -999) - { - // we parsed a time zone in default format - int hrSign = (sign == '-' ? -1 : 1); - tz.setRawOffset(restrainTZoffset(hrSign * (tzHours * (long) MILLIS_PER_HOUR + - tzMin * (long) MILLIS_PER_MINUTE))); - updateZoneId(tz); - } - else if ( ! hadGMT) - { - // try to process as a name; "GMT" or "UTC" has already been processed - String tzText = text.substring(initialWhere.getIndex()).trim(); - tz = TimeZone.getTimeZone(tzText); - // getTimeZone returns "GMT" for unknown ids - if ("GMT".equals(tz.getID())) - { - // no timezone in text, cal amd initialWhere are unchanged - return false; - } - else - { - // we got a tz by name; use it - where.setIndex(text.length()); - } - } - adjustTimeZoneNicely(cal, tz); - initialWhere.setIndex(where.getIndex()); - return true; - } - - /** - * Update the zone ID based on the raw offset. This is either GMT, GMT+hh:mm or GMT-hh:mm, where - * n is between 1 and 14. The highest negative hour is -14, the highest positive hour is 12. - * Zones that don't fit in this schema are set to zone ID "unknown". - * - * @param tz the time zone to update. - */ - private static void updateZoneId(TimeZone tz) - { - int offset = tz.getRawOffset(); - char pm = '+'; - if (offset < 0) - { - pm = '-'; - offset = -offset; - } - int hh = offset / 3600000; - int mm = offset % 3600000 / 60000; - if (offset == 0) - { - tz.setID("GMT"); - } - else if (pm == '+' && hh <= 12) - { - tz.setID(String.format(Locale.US, "GMT+%02d:%02d", hh, mm)); - } - else if (pm == '-' && hh <= 14) - { - tz.setID(String.format(Locale.US, "GMT-%02d:%02d", hh, mm)); - } - else - { - tz.setID("unknown"); - } - } - - /* - * Parses a big-endian date: year month day hour min sec. - * The year must be four digits. Other fields may be adjacent - * and delimited by length or they may follow appropriate delimiters. - * year [ -/]* month [ -/]* dayofmonth [ T]* hour [:] min [:] sec [.secFraction] - * If any numeric field is omitted, all following fields must also be omitted. - * No time zone is processed. - * - * Ambiguous dates can produce unexpected results. For example: - * 1970 12 23:08 will parse as 1970 December 23 00:08:00 - * - * The parse begins at `where, on return the index - * is advanced to just beyond the last character processed. - * The error index is ignored and unchanged. - */ - private static GregorianCalendar parseBigEndianDate(String text, - ParsePosition initialWhere) - { - ParsePosition where = new ParsePosition(initialWhere.getIndex()); - int year = parseTimeField(text, where, 4, 0); - if (where.getIndex() != 4 + initialWhere.getIndex()) - { - return null; - } - skipOptionals(text, where, "/- "); - int month = parseTimeField(text, where, 2, 1) - 1; // Calendar months are 0...11 - skipOptionals(text, where, "/- "); - int day = parseTimeField(text, where, 2, 1); - skipOptionals(text, where, " T"); - int hour = parseTimeField(text, where, 2, 0); - skipOptionals(text, where, ": "); - int minute = parseTimeField(text, where, 2, 0); - skipOptionals(text, where, ": "); - int second = parseTimeField(text, where, 2, 0); - char nextC = skipOptionals(text, where, "."); - if (nextC == '.') - { - // fractions of a second: skip up to 19 digits - parseTimeField(text, where, 19, 0); - } - - GregorianCalendar dest = newGreg(); - try - { - dest.set(year, month, day, hour, minute, second); - // trigger limit tests - dest.getTimeInMillis(); - } - catch (IllegalArgumentException ill) - { - LOG.debug("Couldn't parse arguments text:{} initialWhere:{}", text, initialWhere, ill); - return null; - } - initialWhere.setIndex(where.getIndex()); - skipOptionals(text, initialWhere, " "); - // dest has at least a year value - return dest; - } - - /* - * See if text can be parsed as a date according to any of a list of - * formats. The time zone may be included as part of the format, or - * omitted in favor of later testing for a trailing time zone. - * - * The parse starts at `where`, upon return it will have been - * incremented to refer to the next non-space character after the date. - * If no date was found, the value is unchanged. - * The error index is ignored and unchanged. - * - * If there is a failure to find a date, or the GregorianCalendar - * for the date that was found. Unless a time zone was - * part of the format, the time zone will be GMT+0 - */ - private static GregorianCalendar parseSimpleDate(String text, String[] fmts, - ParsePosition initialWhere) - { - for(String fmt : fmts) - { - ParsePosition where = new ParsePosition(initialWhere.getIndex()); - SimpleDateFormat sdf = new SimpleDateFormat(fmt, Locale.ENGLISH); - GregorianCalendar retCal = newGreg(); - sdf.setCalendar(retCal); - if (sdf.parse(text, where) != null) - { - initialWhere.setIndex(where.getIndex()); - skipOptionals(text, initialWhere, " "); - return retCal; - } - } - return null; - } - - /* - * Parses a String to see if it begins with a date, and if so, - * returns that date. The date must be strictly correct--no - * field may exceed the appropriate limit. - * (That is, the Calendar has setLenient(false).) - * Skips initial spaces, but does NOT check for "D:" - * - * The scan first tries parseBigEndianDate and parseTZoffset - * and then tries parseSimpleDate with appropriate formats, - * again followed by parseTZoffset. If at any stage the entire - * text is consumed, that date value is returned immediately. - * Otherwise the date that consumes the longest initial part - * of the text is returned. - * - * - PDF format dates are among those recognized by parseBigEndianDate. - * - The formats tried are alphaStartFormats or digitStartFormat and - * any listed in the value of moreFmts. - */ - private static Calendar parseDate(String text, ParsePosition initialWhere) - { - if (text == null || text.isEmpty() || "D:".equals(text.trim())) - { - return null; - } - - // remember longestr date string - int longestLen = -999999; - // theorem: the above value will never be used - // proof: longestLen is only used if longestDate is not null - - GregorianCalendar longestDate = null; // null says no date found yet - int whereLen; // tempcopy of where.getIndex() - - ParsePosition where = new ParsePosition(initialWhere.getIndex()); - // check for null (throws exception) and trim off surrounding spaces - skipOptionals(text, where, " "); - int startPosition = where.getIndex(); - - // try big-endian parse - GregorianCalendar retCal = parseBigEndianDate(text, where); - // check for success and a timezone - if (retCal != null && (where.getIndex() == text.length() || - parseTZoffset(text, retCal, where))) - { - // if text is fully consumed, return the date else remember it and its length - whereLen = where.getIndex(); - if (whereLen == text.length()) - { - initialWhere.setIndex(whereLen); - return retCal; - } - longestLen = whereLen; - longestDate = retCal; - } - - // try one of the sets of standard formats - where.setIndex(startPosition); - String [] formats - = Character.isDigit(text.charAt(startPosition)) - ? DIGIT_START_FORMATS - : ALPHA_START_FORMATS; - retCal = parseSimpleDate(text, formats, where); - // check for success and a timezone - if (retCal != null && - (where.getIndex() == text.length() || - parseTZoffset(text, retCal, where))) - { - // if text is fully consumed, return the date else remember it and its length - whereLen = where.getIndex(); - if (whereLen == text.length()) - { - initialWhere.setIndex(whereLen); - return retCal; - } - if (whereLen > longestLen) - { - longestLen = whereLen; - longestDate = retCal; - } - } - - if (longestDate != null) - { - initialWhere.setIndex(longestLen); - return longestDate; - } - return retCal; - } - - /** - * Returns the Calendar for a given COS string containing a date, - * or {@code null} if it cannot be parsed. - * - * The returned value will have 0 for DST_OFFSET. - * - * @param text A COS string containing a date. - * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. - */ - public static Calendar toCalendar(COSString text) - { - if (text == null) - { - return null; - } - return toCalendar(text.getString()); - } - - /** - * Returns the Calendar for a given string containing a date, - * or {@code null} if it cannot be parsed. - * - * The returned value will have 0 for DST_OFFSET. - * - * @param text A COS string containing a date. - * @return The Calendar that the text string represents, or {@code null} if it cannot be parsed. - */ - public static Calendar toCalendar(String text) - { - if (text == null || text.trim().isEmpty()) - { - return null; - } - - ParsePosition where = new ParsePosition(0); - skipOptionals(text, where, " "); - skipString(text, "D:", where); - Calendar calendar = parseDate(text, where); - - if (calendar == null || where.getIndex() != text.length()) - { - // the date string is invalid - return null; - } - return calendar; - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java deleted file mode 100644 index 538f8cb2f13..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/Hex.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos.util; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Base64; - -/** - * Utility functions for hex encoding. - * - * @author John Hewson - */ -public final class Hex -{ - private static final Logger LOG = LogManager.getLogger(Hex.class); - - /** - * for hex conversion. - * - * https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal - * - */ - private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; - private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; - - private Hex() {} - - /** - * Returns a hex string of the given byte. - * - * @param b the byte to be converted - * @return the hex string representing the given byte - */ - public static String getString(byte b) - { - char[] chars = {HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]}; - return new String(chars); - } - - /** - * Returns a hex string of the given byte array. - * - * @param bytes the bytes to be converted - * @return the hex string representing the given bytes - */ - public static String getString(byte[] bytes) - { - StringBuilder string = new StringBuilder(bytes.length * 2); - for (byte b : bytes) - { - string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]); - } - return string.toString(); - } - - /** - * Returns the bytes corresponding to the ASCII hex encoding of the given byte. - * - * @param b the byte to be converted - * @return the ASCII hex encoding of the given byte - */ - public static byte[] getBytes(byte b) - { - return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]}; - } - - /** - * Returns the bytes corresponding to the ASCII hex encoding of the given bytes. - * - * @param bytes the bytey to be converted - * @return the ASCII hex encoding of the given bytes - */ - public static byte[] getBytes(byte[] bytes) - { - byte[] asciiBytes = new byte[bytes.length*2]; - for(int i=0; i< bytes.length; i++) - { - asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])]; - asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])]; - } - return asciiBytes; - } - - /** - * Returns the characters corresponding to the ASCII hex encoding of the given short. - * - * @param num the short value to be converted - * @return the ASCII hex encoding of the given short value - */ - public static char[] getChars(short num) - { - char[] hex = new char[4]; - hex[0] = HEX_CHARS[(num >> 12) & 0x0F]; - hex[1] = HEX_CHARS[(num >> 8) & 0x0F]; - hex[2] = HEX_CHARS[(num >> 4) & 0x0F]; - hex[3] = HEX_CHARS[num & 0x0F]; - return hex; - } - - /** - * Takes the characters in the given string, convert it to bytes in UTF16-BE format - * and build a char array that corresponds to the ASCII hex encoding of the resulting - * bytes. - * - * Example: - *
-     *   getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
-     * 
- * - * @param text The string to convert - * @return The string converted to hex - */ - public static char[] getCharsUTF16BE(String text) - { - // Note that the internal representation of string in Java is already UTF-16. Therefore - // we do not need to use an encoder to convert the string to its byte representation. - char[] hex = new char[text.length()*4]; - - for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++) - { - char c = text.charAt(stringIdx); - hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F]; - hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F]; - hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F]; - hex[charIdx++] = HEX_CHARS[c & 0x0F]; - } - - return hex; - } - - /** - * Writes the given byte as hex value to the given output stream. - * @param b the byte to be written - * @param output the output stream to be written to - * @throws IOException exception if anything went wrong - */ - public static void writeHexByte(byte b, OutputStream output) throws IOException - { - output.write(HEX_BYTES[getHighNibble(b)]); - output.write(HEX_BYTES[getLowNibble(b)]); - } - - /** - * Writes the given byte array as hex value to the given output stream. - * @param bytes the byte array to be written - * @param output the output stream to be written to - * @throws IOException exception if anything went wrong - */ - public static void writeHexBytes(byte[] bytes, OutputStream output) throws IOException - { - for (byte b : bytes) - { - writeHexByte(b, output); - } - } - - /** - * Get the high nibble of the given byte. - * - * @param b the given byte - * @return the high nibble - */ - private static int getHighNibble(byte b) - { - return (b & 0xF0) >> 4; - } - - /** - * Get the low nibble of the given byte. - * - * @param b the given byte - * @return the low nibble - */ - private static int getLowNibble(byte b) - { - return b & 0x0F; - } - - /** - * Decode a base64 String. - * - * @param base64Value a base64 encoded String. - * - * @return the decoded String as a byte array. - * - * @throws IllegalArgumentException if this isn't a base64 encoded string. - */ - public static byte[] decodeBase64(String base64Value) - { - return Base64.getDecoder(). - decode(StringUtil.PATTERN_SPACE.matcher(base64Value).replaceAll("")); - } - - /** - * Decodes a hex String into a byte array. - * - * @param s A String with ASCII hex. - * @return decoded byte array. - */ - public static byte[] decodeHex(String s) - { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - int i = 0; - while (i < s.length() - 1) - { - if (s.charAt(i) == '\n' || s.charAt(i) == '\r') - { - ++i; - } - else - { - String hexByte = s.substring(i, i + 2); - try - { - baos.write(Integer.parseInt(hexByte, 16)); // Byte.parseByte won't work with "9C" - } - catch (NumberFormatException ex) - { - LOG.error(() -> "Can't parse " + hexByte + ", aborting decode", ex); - break; - } - i += 2; - } - } - return baos.toByteArray(); - } -} diff --git a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java b/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java deleted file mode 100644 index 743c3bf9a8c..00000000000 --- a/pdfcos/src/main/java/org/apache/pdfbox/cos/util/StringUtil.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.util; - -import java.util.Arrays; -import java.util.regex.Pattern; - -public final class StringUtil -{ - public static final Pattern PATTERN_SPACE = Pattern.compile("\\s"); - - public static String[] splitOnSpace(String s) - { - return PATTERN_SPACE.split(s); - } - - /** - * Split at spaces but keep them - * - * @param s - * @return - */ - public static String[] tokenizeOnSpace(String s) - { - return Arrays.stream(s.split("(?<=" + StringUtil.PATTERN_SPACE + ")|(?=" + StringUtil.PATTERN_SPACE + ")")) - .toArray(String[]::new); - } -} diff --git a/pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties b/pdfcos/src/test/java/org.apache.pdfbox.cos.encryption.properties deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java deleted file mode 100644 index 0a6e0c344f6..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/COSDictionaryTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertNotEquals; - -class COSDictionaryTest -{ - @Test - void testCOSDictionaryNotEqualsCOSStream() - { - COSDictionary cosDictionary = new COSDictionary(); - COSStream cosStream = new COSStream(); - cosDictionary.setItem( COSName.BE, COSName.BE); - cosDictionary.setInt(COSName.LENGTH, 0); - cosStream.setItem(COSName.BE, COSName.BE); - assertNotEquals(cosDictionary, cosStream, - "a COSDictionary shall not be equal to a COSStream with the same dictionary entries"); - assertNotEquals(cosStream, cosDictionary, - "a COSStream shall not be equal to a COSDictionary with the same dictionary entries"); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java deleted file mode 100644 index 2d70bc9436d..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/PDFDocEncodingTest.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -/** - * Test for PDFDocEncoding. - * - */ -class PDFDocEncodingTest -{ - - static final List deviations = new ArrayList<>(); - - static - { - // all deviations (based on the table in ISO 32000-1:2008) - // block 1 - deviations.add(String.valueOf('\u02D8')); // BREVE - deviations.add(String.valueOf('\u02C7')); // CARON - deviations.add(String.valueOf('\u02C6')); // MODIFIER LETTER CIRCUMFLEX ACCENT - deviations.add(String.valueOf('\u02D9')); // DOT ABOVE - deviations.add(String.valueOf('\u02DD')); // DOUBLE ACUTE ACCENT - deviations.add(String.valueOf('\u02DB')); // OGONEK - deviations.add(String.valueOf('\u02DA')); // RING ABOVE - deviations.add(String.valueOf('\u02DC')); // SMALL TILDE - // block 2 - deviations.add(String.valueOf('\u2022')); // BULLET - deviations.add(String.valueOf('\u2020')); // DAGGER - deviations.add(String.valueOf('\u2021')); // DOUBLE DAGGER - deviations.add(String.valueOf('\u2026')); // HORIZONTAL ELLIPSIS - deviations.add(String.valueOf('\u2014')); // EM DASH - deviations.add(String.valueOf('\u2013')); // EN DASH - deviations.add(String.valueOf('\u0192')); // LATIN SMALL LETTER SCRIPT F - deviations.add(String.valueOf('\u2044')); // FRACTION SLASH (solidus) - deviations.add(String.valueOf('\u2039')); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - deviations.add(String.valueOf('\u203A')); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - deviations.add(String.valueOf('\u2212')); // MINUS SIGN - deviations.add(String.valueOf('\u2030')); // PER MILLE SIGN - deviations.add(String.valueOf('\u201E')); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase) - deviations.add(String.valueOf('\u201C')); // LEFT DOUBLE QUOTATION MARK (quotedblleft) - deviations.add(String.valueOf('\u201D')); // RIGHT DOUBLE QUOTATION MARK (quotedblright) - deviations.add(String.valueOf('\u2018')); // LEFT SINGLE QUOTATION MARK (quoteleft) - deviations.add(String.valueOf('\u2019')); // RIGHT SINGLE QUOTATION MARK (quoteright) - deviations.add(String.valueOf('\u201A')); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase) - deviations.add(String.valueOf('\u2122')); // TRADE MARK SIGN - deviations.add(String.valueOf('\uFB01')); // LATIN SMALL LIGATURE FI - deviations.add(String.valueOf('\uFB02')); // LATIN SMALL LIGATURE FL - deviations.add(String.valueOf('\u0141')); // LATIN CAPITAL LETTER L WITH STROKE - deviations.add(String.valueOf('\u0152')); // LATIN CAPITAL LIGATURE OE - deviations.add(String.valueOf('\u0160')); // LATIN CAPITAL LETTER S WITH CARON - deviations.add(String.valueOf('\u0178')); // LATIN CAPITAL LETTER Y WITH DIAERESIS - deviations.add(String.valueOf('\u017D')); // LATIN CAPITAL LETTER Z WITH CARON - deviations.add(String.valueOf('\u0131')); // LATIN SMALL LETTER DOTLESS I - deviations.add(String.valueOf('\u0142')); // LATIN SMALL LETTER L WITH STROKE - deviations.add(String.valueOf('\u0153')); // LATIN SMALL LIGATURE OE - deviations.add(String.valueOf('\u0161')); // LATIN SMALL LETTER S WITH CARON - deviations.add(String.valueOf('\u017E')); // LATIN SMALL LETTER Z WITH CARON - deviations.add(String.valueOf('\u20AC')); // EURO SIGN - // end of deviations - } - - @Test - void testDeviations() - { - deviations.forEach(deviation -> - { - COSString cosString = new COSString( deviation); - assertEquals(cosString.getString(), deviation); - }); - } - - /** - * PDFBOX-3864: Test that chars smaller than 256 which are NOT part of PDFDocEncoding are - * handled correctly. - * - * @throws IOException - */ - @Test - void testPDFBox3864() throws IOException - { - for (int i = 0; i < 256; i++) - { - String hex = String.format("FEFF%04X", i); - COSString cs1 = COSString.parseHex(hex); - COSString cs2 = new COSString(cs1.getString()); - assertEquals(cs1, cs2); - } - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java deleted file mode 100644 index 44dd846556f..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSArray.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright 2018 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.util.Arrays; -import java.util.List; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Unittests for {@link COSArray} - */ -class TestCOSArray -{ - @Test - void testCreate() - { - COSArray cosArray = new COSArray(); - assertEquals(0, cosArray.size()); - Assertions.assertThrows(NullPointerException.class, () -> new COSArray( - (List) null), - "Constructor should have thrown an exception"); - - cosArray = new COSArray(Arrays.asList( COSName.A, COSName.B, COSName.C)); - assertEquals(3, cosArray.size()); - assertEquals(COSName.A, cosArray.get(0)); - assertEquals(COSName.B, cosArray.get(1)); - assertEquals(COSName.C, cosArray.get(2)); - } - - @Test - void testConvertString2COSNameAndBack() - { - COSArray cosArray = COSArray.ofCOSNames( - Arrays.asList(COSName.A.getName(), COSName.B.getName(), COSName.C.getName())); - assertEquals(3, cosArray.size()); - assertEquals(COSName.A, cosArray.get(0)); - assertEquals(COSName.B, cosArray.get(1)); - assertEquals(COSName.C, cosArray.get(2)); - - List cosNameStringList = cosArray.toCOSNameStringList(); - assertEquals(3, cosNameStringList.size()); - assertEquals(COSName.A.getName(), cosNameStringList.get(0)); - assertEquals(COSName.B.getName(), cosNameStringList.get(1)); - assertEquals(COSName.C.getName(), cosNameStringList.get(2)); - } - - @Test - void testConvertString2COSStringAndBack() - { - COSArray cosArray = COSArray - .ofCOSStrings(Arrays.asList("A", "B", "C")); - assertEquals(3, cosArray.size()); - assertEquals("A", cosArray.getString(0)); - assertEquals("B", cosArray.getString(1)); - assertEquals("C", cosArray.getString(2)); - - List cosStringStringList = cosArray.toCOSStringStringList(); - assertEquals(3, cosStringStringList.size()); - assertEquals("A", cosStringStringList.get(0)); - assertEquals("B", cosStringStringList.get(1)); - assertEquals("C", cosStringStringList.get(2)); - } - - @Test - void testConvertInteger2COSStringAndBack() - { - COSArray cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3)); - assertEquals(3, cosArray.size()); - assertEquals(1, cosArray.getInt(0)); - assertEquals(2, cosArray.getInt(1)); - assertEquals(3, cosArray.getInt(2)); - - List cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); - assertEquals(3, cosNumberIntegerList.size()); - assertEquals(1, (int) cosNumberIntegerList.get(0)); - assertEquals(2, (int) cosNumberIntegerList.get(1)); - assertEquals(3, (int) cosNumberIntegerList.get(2)); - - // check arrays with null values - cosArray = new COSArray(Arrays.asList( COSInteger.get( 1), null, COSInteger.get( 3))); - assertEquals(3, cosArray.size()); - assertEquals(1, cosArray.getInt(0)); - assertNull(cosArray.get(1)); - assertEquals(3, cosArray.getInt(2)); - cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); - assertEquals(3, cosNumberIntegerList.size()); - assertEquals(1, (int) cosNumberIntegerList.get(0)); - assertNull(cosNumberIntegerList.get(1)); - assertEquals(3, (int) cosNumberIntegerList.get(2)); - } - - @Test - void testConvertFloat2COSStringAndBack() - { - float[] floatArrayStart = { 1.0f, 0.1f, 0.02f }; - COSArray cosArray = new COSArray(); - cosArray.setFloatArray(floatArrayStart); - - assertEquals(3, cosArray.size()); - assertEquals( COSFloat.ONE, cosArray.get( 0)); - assertEquals(new COSFloat(0.1f), cosArray.get(1)); - assertEquals(new COSFloat(0.02f), cosArray.get(2)); - - List cosNumberFloatList = cosArray.toCOSNumberFloatList(); - assertEquals(3, cosNumberFloatList.size()); - assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); - assertEquals(0.1f, (float) cosNumberFloatList.get(1), 0); - assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); - - float[] floatArrayEnd = cosArray.toFloatArray(); - assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); - assertEquals(0.1f, (float) cosNumberFloatList.get(1), 0); - assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); - assertArrayEquals(floatArrayStart, floatArrayEnd, 0); - - // check arrays with null values - cosArray = new COSArray(Arrays.asList(COSFloat.ONE, null, new COSFloat(0.02f))); - assertEquals(3, cosArray.size()); - assertEquals(COSFloat.ONE, cosArray.get(0)); - assertNull(cosArray.get(1)); - assertEquals(new COSFloat(0.02f), cosArray.get(2)); - - cosNumberFloatList = cosArray.toCOSNumberFloatList(); - assertEquals(3, cosNumberFloatList.size()); - assertEquals(1.0f, (float) cosNumberFloatList.get(0), 0); - assertNull(cosNumberFloatList.get(1)); - assertEquals(0.02f, (float) cosNumberFloatList.get(2), 0); - - floatArrayEnd = cosArray.toFloatArray(); - // due to the null value the second value of the array is set to 0 - assertArrayEquals(new float[] { 1.0f, 0f, 0.02f }, floatArrayEnd, 0); - - } - - @Test - void testGetSetName() - { - COSArray cosArray = new COSArray(); - cosArray.growToSize(3); - cosArray.setName(0, "A"); - cosArray.setName(1, "B"); - cosArray.setName(2, "C"); - assertEquals(3, cosArray.size()); - assertEquals("A", cosArray.getName(0)); - assertEquals("B", cosArray.getName(1)); - assertEquals("C", cosArray.getName(2)); - assertEquals("NULL", cosArray.getName(3, "NULL")); - assertEquals(0, cosArray.indexOf(COSName.A)); - assertEquals(1, cosArray.indexOf(COSName.B)); - assertEquals(2, cosArray.indexOf(COSName.C)); - assertEquals(-1, cosArray.indexOf(COSName.D)); - cosArray.setName(1, "D"); - assertEquals(3, cosArray.size()); - assertEquals("D", cosArray.getName(1)); - } - - @Test - void testGetSetInt() - { - COSArray cosArray = new COSArray(); - cosArray.growToSize(3); - cosArray.setInt(0, 0); - cosArray.setInt(1, 1); - cosArray.setInt(2, 2); - assertEquals(3, cosArray.size()); - assertEquals(0, cosArray.getInt(0)); - assertEquals(1, cosArray.getInt(1)); - assertEquals(2, cosArray.getInt(2)); - assertEquals(0, cosArray.getInt(3, 0)); - assertEquals(0, cosArray.indexOf(COSInteger.get(0))); - assertEquals(1, cosArray.indexOf(COSInteger.get(1))); - assertEquals(2, cosArray.indexOf(COSInteger.get(2))); - assertEquals(-1, cosArray.indexOf(COSInteger.get(3))); - cosArray.setInt(1, 3); - assertEquals(3, cosArray.size()); - assertEquals(3, cosArray.getInt(1)); - } - - @Test - void testGetSetString() - { - COSArray cosArray = new COSArray(); - cosArray.growToSize(3); - cosArray.setString(0, "Test1"); - cosArray.setString(1, "Test2"); - cosArray.setString(2, "Test3"); - assertEquals(3, cosArray.size()); - assertEquals("Test1", cosArray.getString(0)); - assertEquals("Test2", cosArray.getString(1)); - assertEquals("Test3", cosArray.getString(2)); - assertEquals("NULL", cosArray.getString(3, "NULL")); - assertEquals(0, cosArray.indexOf(new COSString( "Test1"))); - assertEquals(1, cosArray.indexOf(new COSString("Test2"))); - assertEquals(2, cosArray.indexOf(new COSString("Test3"))); - assertEquals(-1, cosArray.indexOf(new COSString("Test4"))); - cosArray.setString(1, "Test4"); - assertEquals(3, cosArray.size()); - assertEquals("Test4", cosArray.getString(1)); - } - - @Test - void testRemove() - { - COSArray cosArray = COSArray - .ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); - cosArray.clear(); - assertEquals(0, cosArray.size()); - - cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); - assertEquals(COSInteger.get(3), cosArray.remove(2)); - // 1,2,4,5,6 should be left - assertEquals(5, cosArray.size()); - assertEquals(1, cosArray.getInt(0)); - assertEquals(4, cosArray.getInt(2)); - - // 1,2,4,6 should be left - assertTrue(cosArray.removeObject(COSInteger.get(5))); - assertEquals(4, cosArray.size()); - assertEquals(1, cosArray.getInt(0)); - assertEquals(4, cosArray.getInt(2)); - assertEquals(6, cosArray.getInt(3)); - - cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); - cosArray.removeAll(Arrays.asList(COSInteger.get(3), COSInteger.get(4))); - // 1,2,5,6 should be left - assertEquals(4, cosArray.size()); - assertEquals(2, cosArray.getInt(1)); - assertEquals(5, cosArray.getInt(2)); - - cosArray = COSArray.ofCOSIntegers(Arrays.asList(1, 2, 3, 4, 5, 6)); - cosArray.retainAll(Arrays.asList(COSInteger.get(3), COSInteger.get(4))); - // 3,4 should be left - assertEquals(2, cosArray.size()); - assertEquals(3, cosArray.getInt(0)); - assertEquals(4, cosArray.getInt(1)); - - } - - @Test - void testGrowToSize() - { - COSArray cosArray = new COSArray(); - assertEquals(0, cosArray.size()); - cosArray.growToSize(2); - // COSArray has 2 empty elements - assertEquals(2, cosArray.size()); - // size is already 2 -> nothing happens - cosArray.growToSize(2, COSInteger.get(0)); - assertEquals(2, cosArray.size()); - // increase size, fill the new elements with the given value - cosArray.growToSize(4, COSInteger.get(1)); - assertEquals(4, cosArray.size()); - List cosNumberIntegerList = cosArray.toCOSNumberIntegerList(); - assertEquals(4, cosNumberIntegerList.size()); - assertNull(cosNumberIntegerList.get(0)); - assertEquals(1, (int) cosNumberIntegerList.get(2)); - assertEquals(1, (int) cosNumberIntegerList.get(3)); - } - - @Test - void testToList() - { - COSArray cosArray = COSArray - .ofCOSIntegers(Arrays.asList(0, 1, 2, 3, 4, 5)); - List list = cosArray.toList(); - assertEquals(6, list.size()); - assertEquals(COSInteger.get(0), list.get(0)); - assertEquals(COSInteger.get(5), list.get(5)); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java deleted file mode 100644 index f21356ddda9..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBase.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.IOException; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Test class for {@link COSBase}. - */ -abstract class TestCOSBase -{ - /** The COSBase abstraction of the object being tested. */ - protected static COSBase testCOSBase; - - /** - * Tests getCOSObject() - tests that the underlying object is returned. - * In the case of objects derived from COSBase this is always an identity - * function. - */ - @Test - void testGetCOSObject() - { - assertEquals(testCOSBase, testCOSBase.getCOSObject()); - } - - /** - * Test accept() - tests the interface for visiting a document at the COS level. - */ - abstract void testAccept() throws IOException; - - /** - * Tests isDirect() and setDirect() - tests the getter/setter methods. - */ - @Test - void testIsSetDirect() - { - testCOSBase.setDirect(true); - assertTrue(testCOSBase.isDirect()); - testCOSBase.setDirect(false); - assertFalse(testCOSBase.isDirect()); - } - - /** - * A simple utility function to compare two byte arrays. - * @param byteArr1 the expected byte array - * @param byteArr2 the byte array being compared - */ - @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity - protected void testByteArrays(byte[] byteArr1, byte[] byteArr2) - { - assertEquals(byteArr1.length, byteArr1.length); - for (int i = 0; i < byteArr1.length; i++) - { - assertEquals(byteArr1[i], byteArr2[i]); - } - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java deleted file mode 100644 index 07c8b52bda4..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSBoolean.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -//import java.io.IOException; -//import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * Unittests for {@link COSBoolean} - */ -class TestCOSBoolean extends TestCOSBase -{ - final COSBoolean cosBooleanTrue = COSBoolean.TRUE; - final COSBoolean cosBooleanFalse = COSBoolean.FALSE; - - @BeforeAll - static void setUp() - { - testCOSBase = COSBoolean.TRUE; - } - - @Test - void testGetValue() - { - assertTrue(cosBooleanTrue.getValue()); - assertFalse(cosBooleanFalse.getValue()); - } - - @Test - void testGetValueAsObject() - { - assertTrue(cosBooleanTrue.getValueAsObject() instanceof Boolean); - assertEquals(Boolean.TRUE, cosBooleanTrue.getValueAsObject()); - assertTrue(cosBooleanFalse.getValueAsObject() instanceof Boolean); - assertEquals(Boolean.FALSE, cosBooleanFalse.getValueAsObject()); - } - - @Test - void testGetBoolean() - { - assertEquals(cosBooleanTrue, COSBoolean.getBoolean(Boolean.TRUE)); - assertEquals(cosBooleanFalse, COSBoolean.getBoolean(Boolean.FALSE)); - } - - @Test - void testEquals() - { - COSBoolean test1 = COSBoolean.TRUE; - COSBoolean test2 = COSBoolean.TRUE; - COSBoolean test3 = COSBoolean.TRUE; - // Reflexive (x == x) - assertEquals(test1, test1); - // Symmetric is preserved ( x==y then y===x) - assertEquals(test2, test1); - assertEquals(test1, test2); - // Transitive (if x==y && y==z then x===z) - assertEquals(test1, test2); - assertEquals(test2, test3); - assertEquals(test1, test3); - - assertNotEquals(COSBoolean.TRUE, COSBoolean.FALSE); - // same 'value' but different type - assertNotEquals(Boolean.TRUE, COSBoolean.TRUE); - assertNotEquals(Boolean.FALSE, COSBoolean.FALSE); - assertNotEquals(true, COSBoolean.TRUE); - assertNotEquals(true, COSBoolean.FALSE); - } - - @Override - @Test - void testAccept() - { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - TestVisitor visitor = new TestVisitor( outStream ); - int index = 0; - try - { - cosBooleanTrue.accept( visitor ); - testByteArrays(String.valueOf(cosBooleanTrue) - .getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); - outStream.reset(); - cosBooleanFalse.accept( visitor ); - testByteArrays(String.valueOf(cosBooleanFalse) - .getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); - outStream.reset(); - } - catch (Exception e) - { - fail("Failed to write " + index + " exception: " + e.getMessage()); - } - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java deleted file mode 100644 index 6c91f1040c9..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSFloat.java +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.Date; -import java.util.Random; -// import org.apache.pdfbox.pdfwriter.COSWriter; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * Tests {@link COSFloat}. - */ -class TestCOSFloat extends TestCOSNumber -{ - @BeforeAll - static void setUp() - { - try - { - testCOSBase = COSNumber.get( "1.1"); - } - catch (IOException e) - { - fail("Failed to create a COSNumber in setUp()"); - } - } - - /** - * Base class to run looped tests with float numbers. - * - * To use it, derive a class and just implement runTest(). Then either call - * runTests for a series of random and pseudorandom tests, or runTest to - * test with corner values. - */ - abstract class BaseTester - { - private int low = -100000; - private int high = 300000; - private int step = 20000; - - public void setLoop(int low, int high, int step) - { - this.low = low; - this.high = high; - this.step = step; - } - - // deterministic and non-deterministic test - public void runTests() - { - // deterministic test - loop(123456); - - // non-deterministic test - loop(System.currentTimeMillis()); - } - - // look through a series of pseudorandom tests influenced by a seed - private void loop(long seed) - { - Date currentDate = new Date(); - Random rnd = new Random( currentDate.getTime() ); - for (int i = low; i < high; i += step) - { - float num = i * rnd.nextFloat(); - try - { - runTest(num); - } - catch (AssertionError a) - { - fail("num = " + num + ", seed = " + seed + ", message: " + a.getMessage()); - } - } - } - - abstract void runTest(float num); - - } - - /** - * Tests equals() - ensures that the Object.equals() contract is obeyed. - * These are tested over a range of arbitrary values to ensure Consistency, - * Reflexivity, Symmetry, Transitivity and non-nullity. - */ - @Test - void testEquals() - { - new BaseTester() - { - @Override - @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity - void runTest(float num) - { - - COSFloat test1 = new COSFloat(num); - COSFloat test2 = new COSFloat(num); - COSFloat test3 = new COSFloat(num); - // Reflexive (x == x) - assertEquals(test1, test1); - // Symmetric is preserved ( x==y then y==x) - assertEquals(test2, test3); - assertEquals(test3, test2); - // Transitive (if x==y && y==z then x==z) - assertEquals(test1, test2); - assertEquals(test2, test3); - assertEquals(test1, test3); - - float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); - COSFloat test4 = new COSFloat(nf); - assertNotEquals(test4, test1); - } - }.runTests(); - } - - class HashCodeTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat test1 = new COSFloat(num); - COSFloat test2 = new COSFloat(num); - assertEquals(test1.hashCode(), test2.hashCode()); - - float nf = Float.intBitsToFloat(Float.floatToIntBits(num) + 1); - COSFloat test3 = new COSFloat(nf); - assertNotSame(test3.hashCode(), test1.hashCode()); - } - } - - /** - * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed - * over a range of arbitrary values. - */ - @Test - void testHashCode() - { - new HashCodeTester().runTests(); - } - - class FloatValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals(num, testFloat.floatValue()); - } - - } - - @Override - @Test - void testFloatValue() - { - new FloatValueTester().runTests(); - } - - class IntValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals((int) num, testFloat.intValue()); - } - - } - - @Override - @Test - void testIntValue() - { - new IntValueTester().runTests(); - } - - class LongValueTester extends BaseTester - { - - @Override - void runTest(float num) - { - COSFloat testFloat = new COSFloat(num); - assertEquals((long) num, testFloat.longValue()); - } - - } - - @Override - @Test - void testLongValue() - { - new LongValueTester().runTests(); - } - - class AcceptTester extends BaseTester - { - final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - final TestVisitor visitor = new TestVisitor(outStream); // writing gets tested elsewhere - - @Override - void runTest(float num) - { - COSFloat cosFloat = new COSFloat(num); - try - { - cosFloat.accept(visitor); - } - catch (IOException e) - { - throw new RuntimeException( e ); - } - String expected = "COSFloat{" + floatToString( cosFloat.floatValue() ) + "}"; - assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); - testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); - outStream.reset(); - } - - } - - @Override - @Test - void testAccept() - { - new AcceptTester().runTests(); - } - -// class WritePDFTester extends BaseTester -// { -// final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); -// -// WritePDFTester() -// { -// setLoop(-1000, 3000, 200); -// } -// -// @Override -// void runTest(float num) -// { -// try -// { -// COSFloat cosFloat = new COSFloat(num); -// // cosFloat.writePDF(outStream); // writing gets tested elsewhere -// -// String expected = floatToString(cosFloat.floatValue()); -// assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); -// assertEquals("COSFloat{" + expected + "}", cosFloat.toString()); -// -// expected = floatToString(num); -// assertEquals(expected, outStream.toString( StandardCharsets.ISO_8859_1 )); -// assertEquals("COSFloat{" + expected + "}", cosFloat.toString()); -// testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), -// outStream.toByteArray()); -// -// outStream.reset(); -// } -// catch (IOException e) -// { -// fail("Failed to write " + num + " exception: " + e.getMessage()); -// } -// } -// -// } - - /** - * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes - * this object to it. - */ -// @Test -// void testWritePDF() // writing gets tested elsewhere -// { -// WritePDFTester writePDFTester = new WritePDFTester(); -// writePDFTester.runTests(); -// -// // test a corner case as described in PDFBOX-1778 -// writePDFTester.runTest(0.000000000000000000000000000000001f); -// } - - @Test - void testDoubleNegative() throws IOException - { - // PDFBOX-4289 - COSFloat cosFloat = new COSFloat("--16.33"); - assertEquals(-16.33f, cosFloat.floatValue()); - } - - @Test - void testVerySmallValues() throws IOException - { - double smallValue = Float.MIN_VALUE / 10d; - - assertEquals(-1, Double.compare(smallValue, Float.MIN_VALUE), - "Test must be performed with a value smaller than Float.MIN_VALUE."); - - // 1.4012984643248171E-46 - String asString = String.valueOf(smallValue); - COSFloat cosFloat = new COSFloat(asString); - assertEquals(0.0f, cosFloat.floatValue()); - - // 0.00000000000000000000000000000000000000000000014012984643248171 - asString = new BigDecimal(asString).toPlainString(); - cosFloat = new COSFloat(asString); - assertEquals(0.0f, cosFloat.floatValue()); - - smallValue *= -1; - - // -1.4012984643248171E-46 - asString = String.valueOf(smallValue); - cosFloat = new COSFloat(asString); - assertEquals(0.0f, cosFloat.floatValue()); - - // -0.00000000000000000000000000000000000000000000014012984643248171 - asString = new BigDecimal(asString).toPlainString(); - cosFloat = new COSFloat(asString); - assertEquals(0.0f, cosFloat.floatValue()); - } - - @Test - void testVeryLargeValues() throws IOException - { - double largeValue = Float.MAX_VALUE * 10d; - - assertEquals(1, Double.compare(largeValue, Float.MAX_VALUE), - "Test must be performed with a value larger than Float.MAX_VALUE."); - - // 1.4012984643248171E-46 - String asString = String.valueOf(largeValue); - COSFloat cosFloat = new COSFloat(asString); - assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); - - // 0.00000000000000000000000000000000000000000000014012984643248171 - asString = new BigDecimal(asString).toPlainString(); - cosFloat = new COSFloat(asString); - assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); - - largeValue *= -1; - - // -1.4012984643248171E-46 - asString = String.valueOf(largeValue); - cosFloat = new COSFloat(asString); - assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); - - // -0.00000000000000000000000000000000000000000000014012984643248171 - asString = new BigDecimal(asString).toPlainString(); - cosFloat = new COSFloat(asString); - assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); - } - - @Test - void testMisplacedNegative() throws IOException - { - // PDFBOX-2990, PDFBOX-3369 have 0.00000-33917698 - // PDFBOX-3500 has 0.-262 - - COSFloat cosFloat = new COSFloat("0.00000-33917698"); - assertEquals(new COSFloat("-0.0000033917698"), cosFloat); - - cosFloat = new COSFloat("0.-262"); - assertEquals(new COSFloat("-0.262"), cosFloat); - - cosFloat = new COSFloat("-0.-262"); - assertEquals(new COSFloat("-0.262"), cosFloat); - - cosFloat = new COSFloat("-12.-1"); - assertEquals(new COSFloat("-12.1"), cosFloat); - } - - @Test - void testDuplicateMisplacedNegative() - { - assertThrows(IOException.class, () -> new COSFloat("0.-26-2")); - assertThrows(IOException.class, () -> new COSFloat("---0.262")); - assertThrows(IOException.class, () -> new COSFloat("--0.2-62")); - } - - @Test - void testStubOperatorMinMaxValues() - { - float largeValue = 32768f; - float largeNegativeValue = -32768f; - - assertEquals(largeValue, new COSFloat(largeValue).floatValue()); - assertEquals(largeNegativeValue, new COSFloat(largeNegativeValue).floatValue()); - } - - private String floatToString(float value) - { - // use a BigDecimal as intermediate state to avoid - // a floating point string representation of the float value - return removeTrailingNull(new BigDecimal(String.valueOf(value)).toPlainString()); - } - - private String removeTrailingNull(String value) - { - // remove fraction digit "0" only - if (value.indexOf('.') > -1 && !value.endsWith(".0")) - { - while (value.endsWith("0") && !value.endsWith(".0")) - { - value = value.substring(0,value.length()-1); - } - } - return value; - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java deleted file mode 100644 index 94a4f734ed1..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSInteger.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -//import org.apache.pdfbox.pdfwriter.COSWriter; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * A test case for COSInteger - * - * @author Koch - */ -class TestCOSInteger extends TestCOSNumber -{ - @BeforeAll - static void setUp() - { - try - { - testCOSBase = COSNumber.get( "0"); - } - catch (IOException e) - { - fail("Failed to create a COSNumber in setUp()"); - } - } - - /** - * Tests equals() - ensures that the Object.equals() contract is obeyed. These are tested over - * a range of arbitrary values to ensure Consistency, Reflexivity, Symmetry, Transitivity and - * non-nullity. - */ - @Test - void testEquals() - { - // Consistency - for (int i = -1000; i < 3000; i += 200) - { - COSInteger test1 = COSInteger.get( i); - COSInteger test2 = COSInteger.get(i); - COSInteger test3 = COSInteger.get(i); - // Reflexive (x == x) - assertEquals(test1, test1); - // Symmetric is preserved ( x==y then y===x) - assertEquals(test2, test1); - assertEquals(test1, test2); - // Transitive (if x==y && y==z then x===z) - assertEquals(test1, test2); - assertEquals(test2, test3); - assertEquals(test1, test3); - - COSInteger test4 = COSInteger.get(i + 1); - assertNotEquals(test4, test1); - } - } - - /** - * Tests hashCode() - ensures that the Object.hashCode() contract is obeyed over a range of - * arbitrary values. - */ - @Test - void testHashCode() - { - for (int i = -1000; i < 3000; i += 200) - { - COSInteger test1 = COSInteger.get(i); - COSInteger test2 = COSInteger.get(i); - assertEquals(test1.hashCode(), test2.hashCode()); - - COSInteger test3 = COSInteger.get(i + 1); - assertNotSame(test3.hashCode(), test1.hashCode()); - } - } - - @Override - @Test - void testFloatValue() - { - for (int i = -1000; i < 3000; i += 200) - { - assertEquals((float) i, COSInteger.get(i).floatValue()); - } - } - - @Override - @Test - void testIntValue() - { - for (int i = -1000; i < 3000; i += 200) - { - assertEquals(i, COSInteger.get(i).intValue()); - } - } - - @Override - @Test - void testLongValue() - { - for (int i = -1000; i < 3000; i += 200) - { - assertEquals((long) i, COSInteger.get(i).longValue()); - } - } - - @Override - @Test - void testAccept() - { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - TestVisitor visitor = new TestVisitor(outStream); - int index = 0; - try - { - // 197 is a prime number, used just to get some variation in the digits. - for (int i = -1000; i < 3000; i += 197) - { - index = i; - COSInteger cosInt = COSInteger.get(i); - String expected = Integer.toString( i ); - cosInt.accept(visitor); - testByteArrays(expected.getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); - outStream.reset(); - } - } - catch (Exception e) - { - fail("Failed to write " + index + " exception: " + e.getMessage()); - } - } - - /** - * Tests writePDF() - this method takes an {@link java.io.OutputStream} and writes this object to it. - */ -// @Test -// void testWritePDF() -// { -// ByteArrayOutputStream outStream = new ByteArrayOutputStream(); -// int index = 0; -// try -// { -// for (int i = -1000; i < 3000; i += 200) -// { -// index = i; -// COSInteger cosInt = COSInteger.get(i); -// testByteArrays(String.valueOf(i).getBytes(StandardCharsets.ISO_8859_1), outStream.toByteArray()); -// outStream.reset(); -// } -// } -// catch (Exception e) -// { -// fail("Failed to write " + index + " exception: " + e.getMessage()); -// } -// } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java deleted file mode 100644 index 69b1d5814e3..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSName.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2018 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -//import java.io.ByteArrayOutputStream; -import java.io.IOException; -//import org.apache.pdfbox.Loader; -//import org.apache.pdfbox.pdmodel.PDDocument; -//import org.apache.pdfbox.pdmodel.PDPage; -import org.junit.jupiter.api.Test; - -//import static org.junit.jupiter.api.Assertions.assertEquals; -//import static org.junit.jupiter.api.Assertions.assertTrue; - -class TestCOSName -{ - /** - * PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?". - * - * @throws IOException - */ - @Test - void PDFBox4076() throws IOException - { - /* - String special = "中国你好!"; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - try (PDDocument document = new PDDocument()) - { - PDPage page = new PDPage(); - document.addPage(page); - document.getDocumentCatalog().getCOSObject().setString( COSName.getPDFName( special), special); - - document.save(baos); - } - try (PDDocument document = Loader.loadPDF(baos.toByteArray())) - { - COSDictionary catalogDict = document.getDocumentCatalog().getCOSObject(); - assertTrue(catalogDict.containsKey(special)); - assertEquals(special, catalogDict.getString(special)); - } - */ - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java deleted file mode 100644 index 58015601cba..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSNumber.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.IOException; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * Test class for {@link COSNumber} - */ -abstract class TestCOSNumber extends TestCOSBase -{ - /** - * Test floatValue() - test that the correct float value is returned. - */ - abstract void testFloatValue(); - - /** - * Test intValue() - test that the correct int value is returned. - */ - abstract void testIntValue(); - - /** - * Test longValue() - test that the correct long value is returned. - */ - abstract void testLongValue(); - - /** - * Tests get() - tests a static constructor for COSNumber classes. - */ - @Test - void testGet() - { - try - { - - // Ensure the basic static numbers are recognized - assertEquals( COSInteger.ZERO, COSNumber.get( "0")); - assertEquals(COSInteger.ZERO, COSNumber.get("-")); - assertEquals(COSInteger.ZERO, COSNumber.get(".")); - assertEquals(COSInteger.ONE, COSNumber.get("1")); - assertEquals(COSInteger.TWO, COSNumber.get("2")); - assertEquals(COSInteger.THREE, COSNumber.get("3")); - // Test some arbitrary ints - assertEquals(COSInteger.get(100), COSNumber.get("100")); - assertEquals(COSInteger.get(256), COSNumber.get("256")); - assertEquals(COSInteger.get(-1000), COSNumber.get("-1000")); - assertEquals(COSInteger.get(2000), COSNumber.get("+2000")); - // Some arbitrary floats - assertEquals( new COSFloat( 1.1f), COSNumber.get( "1.1")); - assertEquals(new COSFloat(100f), COSNumber.get("100.0")); - assertEquals(new COSFloat(-100.001f), COSNumber.get("-100.001")); - // according to the specs the exponential shall not be used - // but obviously there some - assertNotNull(COSNumber.get("-2e-006")); - assertNotNull(COSNumber.get("-8e+05")); - - assertThrows(NullPointerException.class, () -> COSNumber.get(null)); - assertThrows(IOException.class, () -> COSNumber.get("a")); - } - catch (IOException e) - { - fail("Failed to convert a number " + e.getMessage()); - } - } - - /** - * PDFBOX-5176: large number, too big for a long leads to an COSInteger value which is marked as invalid. - * - * @throws IOException - */ - public void testLargeNumber() throws IOException - { - // max value - COSNumber cosNumber = COSNumber.get(Long.toString(Long.MAX_VALUE)); - assertTrue(cosNumber instanceof COSInteger); - COSInteger cosInteger = (COSInteger) cosNumber; - assertTrue(cosInteger.isValid()); - // min value - cosNumber = COSNumber.get(Long.toString(Long.MIN_VALUE)); - assertTrue(cosNumber instanceof COSInteger); - cosInteger = (COSInteger) cosNumber; - assertTrue(cosInteger.isValid()); - - // out of range, max value - cosNumber = COSNumber.get("18446744073307448448"); - assertTrue(cosNumber instanceof COSInteger); - cosInteger = (COSInteger) cosNumber; - assertFalse(cosInteger.isValid()); - // out of range, min value - cosNumber = COSNumber.get("-18446744073307448448"); - assertTrue(cosNumber instanceof COSInteger); - cosInteger = (COSInteger) cosNumber; - assertFalse(cosInteger.isValid()); - } - - @Test - void testInvalidNumber() - { - try - { - COSNumber.get("18446744073307F448448"); - fail("Was expecting an IOException"); - } - catch (IOException e) - { - } - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java deleted file mode 100644 index d0678bc6a3a..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSObject.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import org.apache.pdfbox.io.RandomAccessReadView; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import static org.apache.pdfbox.cos.TestCOSString.ESC_CHAR_STRING_PDF_FORMAT; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Test class for {@link COSObject}. - */ -class TestCOSObject extends TestCOSBase implements ICOSParser -{ - static COSObjectKey key = new COSObjectKey( 121L, 0 ); - static COSString cosString; - - @BeforeAll - static void setUp() - { - cosString = new COSString( "test string" ); - cosString.setKey( key ); // same key as the proxy object, because - // this will be the dereferenced object. - cosString.setDirect( true ); // If we were writing this as the value - // in a COSDictionary (which we are not) we would use this object - // directly and not create a reference for it. - testCOSBase = new COSObject( cosString ); - } - - @Test - void testGetCOSObject() - { - assert( testCOSBase.getCOSObject() instanceof COSObject ); - } - - @Test - @Override - void testIsSetDirect() - { - testCOSBase.setDirect(true); - assertFalse(testCOSBase.isDirect()); - testCOSBase.setDirect(false); - assertFalse(testCOSBase.isDirect()); - } - - @Test - void testGetObject() - { - // if I'm not mistaken, a proxy object will /never/ be direct - assertFalse( testCOSBase.isDirect() ); - COSBase base = ((COSObject) testCOSBase).getObject(); - // testCOSBase has no parser, so the object returned should be the - // string object we initialized it with. - assertEquals( cosString, base ); - assertTrue( ((COSObject) testCOSBase).isDereferenced() ); - - - final COSObject testCOSObject = new COSObject( key, this ); - - // start by making sure that the test object is indirect and is not dereferenced. - assertFalse( testCOSObject.isDereferenced() ); - - // getObject should cause the referenced object to be dereferenced - base = testCOSObject.getObject(); - assertTrue( testCOSObject.isDereferenced() ); - assertEquals( cosString, base ); - } - - /** - * Test accept() - tests the interface for visiting a document at the COS level. - * In the case of proxy {@link COSObject} the visitor is passed either to the - * encapsulated object, if it is present or can be dereferenced, or to the - * {@link COSNull#NULL} global object. - */ - @Test - void testAccept() throws IOException - { - String expected = "(" + ESC_CHAR_STRING_PDF_FORMAT + ")"; - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - TestVisitor visitor = new TestVisitor(outStream); - testCOSBase.accept( visitor ); - // the base test object encapsulates a string. - assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); - outStream.reset(); - // this new COSObject will dereference to the same string object as above. - COSObject testCOSObject = new COSObject( key, this ); - testCOSObject.accept( visitor ); - assertEquals( expected, outStream.toString( StandardCharsets.ISO_8859_1 )); - outStream.reset(); - testCOSObject.setToNull(); - testCOSObject.accept( visitor ); - assertEquals( "COSNull.NULL", outStream.toString( StandardCharsets.ISO_8859_1 )); - } - - @Test - void isCOSObjectNull() - { - COSObject testCOSObject = new COSObject( key, this ); - // The object has not been dereferenced, so it should still be null - assertTrue( testCOSObject.isObjectNull()); - testCOSObject.getObject(); // This should dereference the object - assertFalse( testCOSObject.isObjectNull()); - // this should set the encapsulated object to COSNull.NULL - testCOSObject.setToNull(); - assertTrue( testCOSObject.isObjectNull()); - // set to null should have zeroed out the parser, so no further - // dereferencing should be possible. - COSBase base = testCOSObject.getObject(); - assertEquals( COSNull.NULL, base ); - } - - /** - * A simple utility function to compare two byte arrays. - * @param byteArr1 the expected byte array - * @param byteArr2 the byte array being compared - */ - @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity - protected void testByteArrays(byte[] byteArr1, byte[] byteArr2) - { - assertEquals(byteArr1.length, byteArr1.length); - for (int i = 0; i < byteArr1.length; i++) - { - assertEquals(byteArr1[i], byteArr2[i]); - } - } - - @Override - public COSBase dereferenceCOSObject( COSObject obj ) throws IOException - { - return cosString; - } - - @Override - public RandomAccessReadView createRandomAccessReadView( long startPosition, long streamLength ) throws IOException - { - return null; - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java deleted file mode 100644 index 2adb0aa3bd5..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSStream.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import org.apache.pdfbox.cos.filter.Filter; -import org.apache.pdfbox.cos.filter.FilterFactory; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -class TestCOSStream -{ - /** - * Tests encoding of a stream without any filter applied. - * - * @throws IOException - */ - @Test - void testUncompressedStreamEncode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - COSStream stream = createStream( testString, null); - validateEncoded(stream, testString); - } - - /** - * Tests decoding of a stream without any filter applied. - * - * @throws IOException - */ - @Test - void testUncompressedStreamDecode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - COSStream stream = createStream(testString, null); - validateDecoded(stream, testString); - } - - /** - * Tests encoding of a stream with one filter applied. - * - * @throws IOException - */ - @Test - void testCompressedStream1Encode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - byte[] testStringEncoded = encodeData( testString, COSName.FLATE_DECODE); - COSStream stream = createStream(testString, COSName.FLATE_DECODE); - validateEncoded(stream, testStringEncoded); - } - - /** - * Tests decoding of a stream with one filter applied. - * - * @throws IOException - */ - @Test - void testCompressedStream1Decode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); - COSStream stream = new COSStream(); - - try (OutputStream output = stream.createRawOutputStream()) - { - output.write(testStringEncoded); - } - - stream.setItem(COSName.FILTER, COSName.FLATE_DECODE); - validateDecoded(stream, testString); - } - - /** - * Tests encoding of a stream with 2 filters applied. - * - * @throws IOException - */ - @Test - void testCompressedStream2Encode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); - testStringEncoded = encodeData(testStringEncoded, COSName.ASCII85_DECODE); - - COSArray filters = new COSArray(); - filters.add(COSName.ASCII85_DECODE); - filters.add(COSName.FLATE_DECODE); - - COSStream stream = createStream(testString, filters); - validateEncoded(stream, testStringEncoded); - } - - /** - * Tests decoding of a stream with 2 filters applied. - * - * @throws IOException - */ - @Test - void testCompressedStream2Decode() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); - testStringEncoded = encodeData(testStringEncoded, COSName.ASCII85_DECODE); - COSStream stream = new COSStream(); - - COSArray filters = new COSArray(); - filters.add(COSName.ASCII85_DECODE); - filters.add(COSName.FLATE_DECODE); - stream.setItem(COSName.FILTER, filters); - - try (OutputStream output = stream.createRawOutputStream()) - { - output.write(testStringEncoded); - } - - validateDecoded(stream, testString); - } - - /** - * Tests tests that encoding is done correctly even if the the stream is closed twice. - * Closeable.close() allows streams to be closed multiple times. The second and subsequent - * close() calls should have no effect. - * - * @throws IOException - */ - @Test - void testCompressedStreamDoubleClose() throws IOException - { - byte[] testString = "This is a test string to be used as input for TestCOSStream".getBytes(StandardCharsets.US_ASCII); - byte[] testStringEncoded = encodeData(testString, COSName.FLATE_DECODE); - COSStream stream = new COSStream(); - OutputStream output = stream.createOutputStream(COSName.FLATE_DECODE); - output.write(testString); - output.close(); - output.close(); - validateEncoded(stream, testStringEncoded); - } - - @Test - void testHasStreamData() throws IOException - { - try (COSStream stream = new COSStream()) - { - assertFalse(stream.hasData()); - Assertions.assertThrows(IOException.class, () -> stream.createInputStream(), - "createInputStream should have thrown an IOException"); - - byte[] testString = "This is a test string to be used as input for TestCOSStream" - .getBytes(StandardCharsets.US_ASCII); - try (OutputStream output = stream.createOutputStream()) - { - output.write(testString); - } - assertTrue(stream.hasData()); - } - } - - private byte[] encodeData(byte[] original, COSName filter) throws IOException - { - Filter encodingFilter = FilterFactory.INSTANCE.getFilter(filter); - ByteArrayOutputStream encoded = new ByteArrayOutputStream(); - encodingFilter.encode( new ByteArrayInputStream(original), encoded, new COSDictionary(), 0); - return encoded.toByteArray(); - } - - private COSStream createStream(byte[] testString, COSBase filters) throws IOException - { - COSStream stream = new COSStream(); - try (OutputStream output = stream.createOutputStream(filters)) - { - output.write(testString); - } - return stream; - } - - private void validateEncoded(COSStream stream, byte[] expected) throws IOException - { - InputStream in = stream.createRawInputStream(); - byte[] decoded = in.readAllBytes(); - stream.close(); - assertTrue(Arrays.equals(expected, decoded), "Encoded data doesn't match input"); - } - - private void validateDecoded(COSStream stream, byte[] expected) throws IOException - { - InputStream in = stream.createInputStream(); - byte[] encoded = in.readAllBytes(); - stream.close(); - assertTrue(Arrays.equals(expected, encoded), "Decoded data doesn't match input"); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java deleted file mode 100644 index 2f93a0d54db..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSString.java +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -// import org.apache.pdfbox.pdfwriter.COSWriter; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -/** - * This will test all of the filters in the PDFBox system. - * - * Ben Litchfield - */ -class TestCOSString extends TestCOSBase -{ - static final String ESC_CHAR_STRING = - "( test#some) escaped< \\chars>!~1239857 "; - static final String ESC_CHAR_STRING_PDF_FORMAT = - "\\( test#some\\) escaped< \\\\chars>!~1239857 "; - - @BeforeAll - static void setUp() - { - testCOSBase = new COSString( "test cos string"); - } - - /** - * TODO: writing gets tested elsewhere - * - * Test setForceHexForm() and setForceLiteralForm() - tests these two - * constructors do enforce the different String output forms within PDF. - */ -// @Test -// void testSetForceHexLiteralForm() -// { -// String inputString = "Test with a text and a few numbers 1, 2 and 3"; -// String pdfHex = "<" + createHex(inputString) + ">"; -// COSString cosStr = new COSString(inputString, true); -// writePDFTests(pdfHex, cosStr); -// -// COSString escStr = new COSString(ESC_CHAR_STRING); -// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", escStr); -// COSString escStrHex = new COSString(ESC_CHAR_STRING, true); -// // Escape characters not escaped in hex version -// writePDFTests("<" + createHex(ESC_CHAR_STRING) + ">", escStrHex); -// } -// -// /** -// * TODO: writing gets tested elsewhere -// * -// * Helper method for testing writePDF(). -// * -// * @param expected the String expected when writePDF() is invoked -// * @param testSubj the test subject -// */ -// private void writePDFTests(String expected, COSString testSubj) -// { -// ByteArrayOutputStream outStream = new ByteArrayOutputStream(); -// try -// { -// COSWriter visitor = new COSWriter( outStream ); -// visitor.writeString(testSubj.getBytes(), testSubj.getForceHexForm(), outStream); -// } -// catch (IOException e) -// { -// fail("IOException: " + e.getMessage()); -// } -// assertEquals(expected, outStream.toString()); -// } - - /** - * Test parseHex() - tests that the proper String is created from a hex string input. - */ - @Test - void testFromHex() - { - String expected = "Quick and simple test"; - String hexForm = createHex(expected); - try - { - COSString test1 = COSString.parseHex(hexForm); -// writePDFTests("(" + expected + ")", test1); - COSString test2 = COSString.parseHex(createHex(ESC_CHAR_STRING)); -// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", test2); - } - catch (IOException e) - { - fail("IOException thrown: " + e.getMessage()); - } - assertThrows(IOException.class, () -> COSString.parseHex(hexForm + "xx"), - "Should have thrown an IOException here"); - } - - private String createHex(String str) - { - StringBuilder sb = new StringBuilder(); - for (char c : str.toCharArray()) - { - sb.append(Integer.toString(c, 16)); - } - return sb.toString().toUpperCase(); - } - - /** - * Tests getHex() - ensure the hex String returned is properly formatted. - */ - @Test - void testGetHex() - { - String expected = "Test subject for testing getHex"; - COSString test1 = new COSString(expected); - String hexForm = createHex(expected); - assertEquals(hexForm, test1.toHexString()); - COSString escCS = new COSString(ESC_CHAR_STRING); - // Not sure whether the escaped characters should be escaped or not, presumably since - // writePDF() gives you the proper formatted text, getHex() should ONLY convert to hex. - assertEquals(createHex(ESC_CHAR_STRING), escCS.toHexString()); - } - - /** - * Test testGetString() - ensure getString() are returned in the correct format. - */ - @Test - void testGetString() - { - try - { - String testStr = "Test subject for getString()"; - COSString test1 = new COSString(testStr); - assertEquals(testStr, test1.getString()); - - COSString hexStr = COSString.parseHex(createHex(testStr)); - assertEquals(testStr, hexStr.getString()); - - COSString escapedString = new COSString(ESC_CHAR_STRING); - assertEquals(ESC_CHAR_STRING, escapedString.getString()); - - testStr = "Line1\nLine2\nLine3\n"; - COSString lineFeedString = new COSString(testStr); - assertEquals(testStr, lineFeedString.getString()); - } - catch (IOException e) - { - fail("IOException thrown: " + e.getMessage()); - } - } - - /** - * Test getBytes() - again not much to test, just ensure the proper byte array is returned. - */ - @Test - void testGetBytes() - { - COSString str = new COSString(ESC_CHAR_STRING); - testByteArrays(ESC_CHAR_STRING.getBytes(), str.getBytes()); - } - - /** - * TODO: writing output is tested elsewhere. - * Tests writePDF() - tests that the string is in PDF format. - */ -// @Test -// void testWritePDF() -// { -// // This has been tested quite thorougly above but do a couple tests anyway -// COSString testSubj = new COSString(ESC_CHAR_STRING); -// writePDFTests("(" + ESC_CHAR_STRING_PDF_FORMAT + ")", testSubj); -// String textString = "This is just an arbitrary piece of text for testing"; -// COSString testSubj2 = new COSString(textString); -// writePDFTests("(" + textString + ")", testSubj2); -// } - - /** - * This will test all of the filters in the system. - * - * @throws IOException If there is an exception while encoding. - */ - @Test - void testUnicode() throws IOException - { - String theString = "\u4e16"; - COSString string = new COSString(theString); - assertEquals( theString, string.getString() ); - - String textAscii = "This is some regular text. It should all be expressible in ASCII"; - /** En français où les choses sont accentués. En español, así */ - String text8Bit = "En fran\u00e7ais o\u00f9 les choses sont accentu\u00e9s. En espa\u00f1ol, as\u00ed"; - /** をクリックしてく */ - String textHighBits = "\u3092\u30af\u30ea\u30c3\u30af\u3057\u3066\u304f"; - - // Testing the getString method - COSString stringAscii = new COSString( textAscii ); - assertEquals( stringAscii.getString(), textAscii ); - - COSString string8Bit = new COSString( text8Bit ); - assertEquals( string8Bit.getString(), text8Bit ); - - COSString stringHighBits = new COSString( textHighBits ); - assertEquals( stringHighBits.getString(), textHighBits ); - - - // Testing the getBytes method - // The first two strings should be stored as ISO-8859-1 because they only contain chars in the range 0..255 - assertEquals(textAscii, new String(stringAscii.getBytes(), StandardCharsets.ISO_8859_1)); - // likewise for the 8bit characters. - assertEquals(text8Bit, new String(string8Bit.getBytes(), StandardCharsets.ISO_8859_1)); - - // The japanese text contains high bits so must be stored as big endian UTF-16 - assertEquals(textHighBits, new String(stringHighBits.getBytes(), "UnicodeBig")); - - - // Test the writePDF method to ensure that the Strings are correct when written into PDF. -// ByteArrayOutputStream out = new ByteArrayOutputStream(); -// COSWriter.writeString(stringAscii, out); -// assertEquals("(" + textAscii + ")", out.toString("ASCII")); - -// out.reset(); -// COSWriter.writeString(string8Bit, out); -// StringBuffer hex = new StringBuffer(); -// for(char c : text8Bit.toCharArray()) -// { -// hex.append( Integer.toHexString(c).toUpperCase() ); -// } -// assertEquals("<"+ hex +">", out.toString("ASCII")); - -// out.reset(); -// COSWriter.writeString(stringHighBits, out); -// hex = new StringBuffer(); -// hex.append("FEFF"); // Byte Order Mark -// for(char c : textHighBits.toCharArray()) -// { -// hex.append( Integer.toHexString(c).toUpperCase() ); -// } -// assertEquals("<"+ hex +">", out.toString("ASCII")); - } - - @Override - @Test - void testAccept() throws IOException - { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - TestVisitor visitor = new TestVisitor( outStream ); - COSString testSubj = new COSString(ESC_CHAR_STRING); - testSubj.accept(visitor); - String expected = "(" + ESC_CHAR_STRING_PDF_FORMAT + ")"; - assertEquals(expected, outStream.toString()); - outStream.reset(); - COSString testSubjHex = new COSString(ESC_CHAR_STRING, true); - testSubjHex.accept(visitor); - expected = "<" + createHex(ESC_CHAR_STRING) + ">"; - assertEquals(expected, outStream.toString()); - } - - /** - * Tests equals(Object) - ensure that the Object.equals() contract is obeyed. - */ - @Test - void testEquals() - { - // Check all these several times for consistency - for (int i = 0; i < 10; i++) - { - // Reflexive - COSString x1 = new COSString("Test"); - assertEquals(x1, x1); - - // Symmetry i.e. if x == y then y == x - COSString y1 = new COSString("Test"); - assertEquals(x1, y1); - assertEquals(y1, x1); - COSString x2 = new COSString("Test", true); - // also if x != y then y != x - assertNotEquals(x1, x2); - assertNotEquals(x2, x1); - - // Transitive if x == y && y == z then x == z - COSString z1 = new COSString("Test"); - assertEquals(x1, y1); - assertEquals(y1, z1); - assertEquals(x1, z1); - // Test the negative as well if x1 == y1 && y1 != x2 then x1 != x2 - assertEquals(x1, y1); - assertNotEquals(y1, x2); - assertNotEquals(x1, x2); - } - } - - /** - * Test hashCode() - tests that the Object.hashCode() contract is obeyed. - */ - @Test - void testHashCode() - { - COSString str1 = new COSString("Test1"); - COSString str2 = new COSString("Test2"); - assertNotEquals(str1.hashCode(), str2.hashCode()); - COSString str3 = new COSString("Test1"); - assertEquals(str1.hashCode(), str3.hashCode()); - COSString str3Hex = new COSString("Test1", true); - assertNotEquals(str1.hashCode(), str3Hex.hashCode()); - } - - /** - * Test testCompareFromHexString() - tests that Strings created from hex - * compare correctly (PDFBOX-2401) - * - * @throws IOException - */ - @SuppressWarnings({"java:S5863"}) // don't flag tests for reflexivity - @Test - void testCompareFromHexString() throws IOException - { - COSString test1 = COSString.parseHex("000000FF000000"); - COSString test2 = COSString.parseHex("000000FF00FFFF"); - assertEquals(test1, test1); - assertEquals(test2, test2); - assertNotEquals(test1.toHexString(), test2.toHexString()); - assertFalse(Arrays.equals(test1.getBytes(), test2.getBytes())); - assertNotEquals(test1, test2); - assertNotEquals(test2, test1); - assertNotEquals(test1.getString(), test2.getString()); - } - - /** - * PDFBOX-3881: Test that if String has only the BOM, that it be an empty string. - * - * @throws IOException - */ - @Test - void testEmptyStringWithBOM() throws IOException - { - assertTrue(COSString.parseHex("FEFF").getString().isEmpty()); - assertTrue(COSString.parseHex("FFFE").getString().isEmpty()); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java deleted file mode 100644 index eb02974e244..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestCOSUpdateInfo.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.pdfbox.cos; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Test class for {@link COSUpdateInfo}. - */ -class TestCOSUpdateInfo -{ - - /** - * Tests isNeedToBeUpdate() and setNeedToBeUpdate() - tests the getter/setter methods. - */ - @Test - void testIsSetNeedToBeUpdate() - { - COSDocumentState origin = new COSDocumentState(); - origin.setParsing(false); - // COSDictionary - COSUpdateInfo testCOSDictionary = new COSDictionary(); - testCOSDictionary.setNeedToBeUpdated(true); - assertFalse(testCOSDictionary.isNeedToBeUpdated()); - testCOSDictionary.getUpdateState().setOriginDocumentState(origin); - testCOSDictionary.setNeedToBeUpdated(true); - assertTrue(testCOSDictionary.isNeedToBeUpdated()); - testCOSDictionary.setNeedToBeUpdated(false); - assertFalse(testCOSDictionary.isNeedToBeUpdated()); - - // COSObject - COSUpdateInfo testCOSObject; - testCOSObject = new COSObject( null); - testCOSObject.setNeedToBeUpdated(true); - assertFalse(testCOSObject.isNeedToBeUpdated()); - testCOSObject.getUpdateState().setOriginDocumentState(origin); - testCOSObject.setNeedToBeUpdated(true); - assertTrue(testCOSObject.isNeedToBeUpdated()); - testCOSObject.setNeedToBeUpdated(false); - assertFalse(testCOSObject.isNeedToBeUpdated()); - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java deleted file mode 100644 index 2821207c6f3..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/TestVisitor.java +++ /dev/null @@ -1,96 +0,0 @@ -package org.apache.pdfbox.cos; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; - -import static org.apache.pdfbox.cos.TestCOSString.ESC_CHAR_STRING_PDF_FORMAT; - -public class TestVisitor implements ICOSVisitor -{ - private final ByteArrayOutputStream output; - public TestVisitor( ByteArrayOutputStream outStream ) - { - output = outStream; - } - - @Override - public void visitFromArray( COSArray obj ) throws IOException - { - // Write something to the output buffer just so we know that the visitor got called. - - } - - @Override - public void visitFromBoolean( COSBoolean cosBoolean ) throws IOException - { - if( cosBoolean.getValue() ) - { - output.write( COSBoolean.TRUE_BYTES ); - } - else - { - output.write( COSBoolean.FALSE_BYTES ); - } - - } - - @Override - public void visitFromDictionary( COSDictionary obj ) throws IOException - { - // Write something to the output buffer just so we know that the visitor got called. - - } - - @Override - public void visitFromDocument( COSDocument obj ) throws IOException - { - // Write something to the output buffer just so we know that the visitor got called. - - } - - @Override - public void visitFromFloat( COSFloat cosFloat ) throws IOException - { - // Write something to the output buffer just so we know that the visitor got called. - output.write( cosFloat.toString().getBytes( StandardCharsets.ISO_8859_1 ) ); - } - - @Override - public void visitFromInt( COSInteger cosInteger ) throws IOException - { - // Write something to the output buffer just so we know that the visitor got called. - output.write( Integer.toString( cosInteger.intValue() ).getBytes( StandardCharsets.ISO_8859_1 ) ); - } - - @Override - public void visitFromName( COSName obj ) throws IOException - { - throw new IOException(); - } - - @Override - public void visitFromNull( COSNull obj ) throws IOException - { - output.write( "COSNull.NULL".getBytes( StandardCharsets.ISO_8859_1 )); - } - - @Override - public void visitFromStream( COSStream obj ) throws IOException - { - - } - - @Override - public void visitFromString( COSString cosString ) throws IOException - { - if (cosString.getForceHexForm()) - { - output.write( ("<" + cosString.toHexString() + ">").getBytes( StandardCharsets.ISO_8859_1 )); - } - else - { - output.write( ("(" + ESC_CHAR_STRING_PDF_FORMAT + ")").getBytes( StandardCharsets.ISO_8859_1 )); - } - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java deleted file mode 100644 index 0045d7ec796..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/UnmodifiableCOSDictionaryTest.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos; - -import java.util.Calendar; -//import org.apache.pdfbox.pdmodel.font.encoding.Encoding; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.fail; - -class UnmodifiableCOSDictionaryTest -{ - @Test - void testUnmodifiableCOSDictionary() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.clear(); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.removeItem( COSName.A); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - - COSDictionary cosDictionary = new COSDictionary(); - try - { - unmodifiableCOSDictionary.addAll(cosDictionary); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setFlag(COSName.A, 0, true); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setNeedToBeUpdated(true); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetItem() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setItem(COSName.A, COSName.A); - fail("An UnsupportedOperationException should have been thrown"); - } - catch(UnsupportedOperationException exception) - { - // nothing to do - } - -// Encoding standardEncoding = Encoding.getInstance(COSName.STANDARD_ENCODING); -// try -// { -// unmodifiableCOSDictionary.setItem(COSName.A, standardEncoding); -// fail("An UnsupportedOperationException should have been thrown"); -// } -// catch(UnsupportedOperationException exception) -// { -// // nothing to do -// } - - try - { - unmodifiableCOSDictionary.setItem("A", COSName.A); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - -// try -// { -// unmodifiableCOSDictionary.setItem("A", standardEncoding); -// fail("An UnsupportedOperationException should have been thrown"); -// } -// catch(UnsupportedOperationException exception) -// { -// // nothing to do -// } - } - - @Test - void testSetBoolean() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setBoolean(COSName.A, true); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setBoolean("A", true); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetName() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setName(COSName.A, "A"); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setName("A", "A"); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetDate() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - Calendar calendar = Calendar.getInstance(); - try - { - unmodifiableCOSDictionary.setDate(COSName.A, calendar); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setDate("A", calendar); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetEmbeddedDate() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - Calendar calendar = Calendar.getInstance(); - try - { - unmodifiableCOSDictionary.setEmbeddedDate(COSName.PARAMS, COSName.A, calendar); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - } - - @Test - void testSetString() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setString(COSName.A, "A"); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setString("A", "A"); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetEmbeddedString() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setEmbeddedString(COSName.PARAMS, COSName.A, "A"); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetInt() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setInt(COSName.A, 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setInt("A", 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetEmbeddedInt() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setEmbeddedInt(COSName.PARAMS, COSName.A, 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetLong() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setLong(COSName.A, 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setLong("A", 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - - @Test - void testSetFloat() - { - COSDictionary unmodifiableCOSDictionary = new COSDictionary().asUnmodifiableDictionary(); - try - { - unmodifiableCOSDictionary.setFloat(COSName.A, 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - - try - { - unmodifiableCOSDictionary.setFloat("A", 0); - fail("An UnsupportedOperationException should have been thrown"); - } - catch (UnsupportedOperationException exception) - { - // nothing to do - } - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java deleted file mode 100644 index a9f5c994a43..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/PredictorTest.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2015 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import org.junit.jupiter.api.Test; - -import static org.apache.pdfbox.cos.filter.Predictor.calcSetBitSeq; -import static org.apache.pdfbox.cos.filter.Predictor.getBitSeq; -import static org.junit.jupiter.api.Assertions.assertEquals; - -/** - * - * @author Tilman Hausherr - */ -class PredictorTest -{ - /** - * Test of getBitSeq method, of class Predictor. - */ - @Test - void testGetBitSeq() - { - assertEquals(Integer.parseInt("11111111", 2), getBitSeq(Integer.parseInt("11111111", 2), 0, 8)); - assertEquals(Integer.parseInt("00000000", 2), getBitSeq(Integer.parseInt("00000000", 2), 0, 8)); - assertEquals(Integer.parseInt("1", 2), getBitSeq(Integer.parseInt("11111111", 2), 0, 1)); - assertEquals(Integer.parseInt("0", 2), getBitSeq(Integer.parseInt("00000000", 2), 0, 1)); - assertEquals(Integer.parseInt("001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 3)); - assertEquals(Integer.parseInt("10101010", 2), getBitSeq(Integer.parseInt("10101010", 2), 0, 8)); - assertEquals(Integer.parseInt("10", 2), getBitSeq(Integer.parseInt("10101010", 2), 0, 2)); - assertEquals(Integer.parseInt("01", 2), getBitSeq(Integer.parseInt("10101010", 2), 1, 2)); - assertEquals(Integer.parseInt("10", 2), getBitSeq(Integer.parseInt("10101010", 2), 2, 2)); - assertEquals(Integer.parseInt("101", 2), getBitSeq(Integer.parseInt("10101010", 2), 3, 3)); - assertEquals(Integer.parseInt("1010101", 2), getBitSeq(Integer.parseInt("10101010", 2), 1, 7)); - assertEquals(Integer.parseInt("01", 2), getBitSeq(Integer.parseInt("10101010", 2), 3, 2)); - assertEquals(Integer.parseInt("00110001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 8)); - assertEquals(Integer.parseInt("10001", 2), getBitSeq(Integer.parseInt("00110001", 2), 0, 5)); - assertEquals(Integer.parseInt("0011", 2), getBitSeq(Integer.parseInt("00110001", 2), 4, 4)); - assertEquals(Integer.parseInt("110", 2), getBitSeq(Integer.parseInt("00110001", 2), 3, 3)); - assertEquals(Integer.parseInt("00", 2), getBitSeq(Integer.parseInt("00110001", 2), 6, 2)); - assertEquals(Integer.parseInt("1111", 2), getBitSeq(Integer.parseInt("11110000", 2), 4, 4)); - assertEquals(Integer.parseInt("11", 2), getBitSeq(Integer.parseInt("11110000", 2), 6, 2)); - assertEquals(Integer.parseInt("0000", 2), getBitSeq(Integer.parseInt("11110000", 2), 0, 4)); - } - - /** - * Test of calcSetBitSeq method, of class Predictor. - */ - @Test - void testCalcSetBitSeq() - { - assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 0)); - assertEquals(Integer.parseInt("00000001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 1)); - assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 1, 1)); - assertEquals(Integer.parseInt("11111101", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 2, 1)); - assertEquals(Integer.parseInt("11111001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 3, 1)); - assertEquals(Integer.parseInt("00000001", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 0, 2, 1)); - assertEquals(Integer.parseInt("11110001", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 4, 1)); - assertEquals(Integer.parseInt("11100011", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 1, 4, 1)); - assertEquals(Integer.parseInt("00000010", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 1, 1, 1)); - assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 7, 1, 1)); - assertEquals(Integer.parseInt("01111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 7, 1, 0)); - assertEquals(Integer.parseInt("10000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 7, 1, 1)); - assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 7, 1, 0)); - assertEquals(Integer.parseInt("01000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 6, 1, 1)); - assertEquals(Integer.parseInt("00000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 6, 1, 0)); - assertEquals(Integer.parseInt("00110000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 3, 3, 6)); - assertEquals(Integer.parseInt("01100000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 4, 3, 6)); - assertEquals(Integer.parseInt("11000000", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 5, 3, 6)); - assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 0, 8, 0xFF)); - assertEquals(Integer.parseInt("11111111", 2), calcSetBitSeq(Integer.parseInt("11111111", 2), 0, 8, 0xFF)); - assertEquals(0x7E, calcSetBitSeq(0xA5, 0, 8, 0xD9 + 0xA5)); - - // check truncation - assertEquals(Integer.parseInt("00000010", 2), calcSetBitSeq(Integer.parseInt("00000000", 2), 1, 1, 3)); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java deleted file mode 100644 index 36b6c757be0..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/TestFilters.java +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.filter; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Date; -import java.util.Random; -import org.apache.pdfbox.cos.COSDictionary; -import org.apache.pdfbox.cos.COSName; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -/** - * This will test all of the filters in the PDFBox system. - */ -class TestFilters -{ - /** - * This will test all of the filters in the system. There will be COUNT - * of deterministic tests and COUNT of non-deterministic tests, see also - * the discussion in PDFBOX-1977. - * - * @throws IOException If there is an exception while encoding. - */ - @Test - void testFilters() // throws IOException - { - final int COUNT = 10; - Random rd = new Random(123456); - // 20 tests; 10 with deterministic seeding, 10 with relatively random. - for (int iter = 0; iter < COUNT * 2; iter++) - { - long seed; - if (iter < COUNT) - { - // deterministic seed - seed = rd.nextLong(); - } - else - { - // non-deterministic seed - Date currentDate = new Date(); - seed = new Random(currentDate.getTime() + iter ).nextLong(); - } - boolean success = true; - try - { - final Random random = new Random(seed); - final int numBytes = 10000 + random.nextInt(20000); - byte[] original = new byte[numBytes]; - - int upto = 0; - while(upto < numBytes) - { - final int left = numBytes - upto; - if (random.nextBoolean() || left < 2) - { - // Fill w/ pseudo-random bytes: - final int end = upto + Math.min(left, 10+random.nextInt(100)); - while(upto < end) - { - original[upto++] = (byte) random.nextInt(); - } - } - else - { - // Fill w/ very predictable bytes: - final int end = upto + Math.min(left, 2+random.nextInt(10)); - final byte value = (byte) random.nextInt(4); - while(upto < end) - { - original[upto++] = value; - } - } - } - - for( Filter filter : FilterFactory.INSTANCE.getAllFilters() ) - { - // Skip filters that don't currently support roundtripping - if (filter instanceof DCTFilter || - filter instanceof CCITTFaxFilter || - filter instanceof JPXFilter || - filter instanceof JBIG2Filter) - { - continue; - } - - try - { - System.out.println( "Testing " + filter.toString() + "; seed: " + seed % 233 ); - checkEncodeDecode( filter, original ); - } - catch (IOException e) - { - System.out.println( "Error checking encoding for filter " - + filter.toString() + "; continuing" ); - success = false; - } - catch (AssertionError e) - { - System.out.println( e + "; continuing" ); - success = false; - } - } - } - finally - { - if (!success) - { - System.err.println("NOTE: test failed with seed=" + seed); - } - } - } - } - - /** - * TODO: move to pdmodel package - * - * This will test the use of identity filter to decode stream and string. - * This test threw an IOException before the correction. - * - * @throws IOException - */ -// @Test -// void testPDFBOX4517() throws IOException -// { -// Loader.loadPDF(new File("target/pdfs/PDFBOX-4517-cryptfilter.pdf"), -// "userpassword1234"); -// } - - /** - * This will test the LZW filter with the sequence that failed in PDFBOX-1977. - * To check that the test itself is legit, revert LZWFilter.java to rev 1571801, - * which should fail this test. - * - * @throws IOException - */ - @Test - void testPDFBOX1977() throws IOException - { - Filter lzwFilter = FilterFactory.INSTANCE.getFilter(COSName.LZW_DECODE); - InputStream in = this.getClass().getResourceAsStream("PDFBOX-1977.bin"); - byte[] byteArray = in.readAllBytes(); - checkEncodeDecode(lzwFilter, byteArray); - } - - /** - * Test simple and corner cases (128 identical, 128 identical at the end) of RLE implementation. - * 128 non identical bytes likely to be caught in random testing. - * - * @throws IOException - */ - @Test - void testRLE() throws IOException - { - Filter rleFilter = FilterFactory.INSTANCE.getFilter(COSName.RUN_LENGTH_DECODE); - byte[] input0 = new byte[0]; - checkEncodeDecode(rleFilter, input0); - byte[] input1 = { 1, 2, 3, 4, 5, (byte) 128, (byte) 140, (byte) 180, (byte) 0xFF}; - checkEncodeDecode(rleFilter, input1); - byte[] input2 = new byte[10]; - checkEncodeDecode(rleFilter, input2); - byte[] input3 = new byte[128]; - checkEncodeDecode(rleFilter, input3); - byte[] input4 = new byte[129]; - checkEncodeDecode(rleFilter, input4); - byte[] input5 = new byte[128 + 128]; - checkEncodeDecode(rleFilter, input5); - byte[] input6 = new byte[1]; - checkEncodeDecode(rleFilter, input6); - byte[] input7 = {1, 2}; - checkEncodeDecode(rleFilter, input7); - byte[] input8 = new byte[2]; - checkEncodeDecode(rleFilter, input8); - } - - @Test - void testEmptyFilterList() throws Exception - { - assertThrows(IllegalArgumentException.class, () -> - { - Filter.decode(null, new ArrayList<>(), new COSDictionary(), null, null); - }); - } - - private void checkEncodeDecode(Filter filter, byte[] original) throws IOException - { - ByteArrayOutputStream encoded = new ByteArrayOutputStream(); - filter.encode(new ByteArrayInputStream(original), encoded, new COSDictionary()); - ByteArrayOutputStream decoded = new ByteArrayOutputStream(); - filter.decode(new ByteArrayInputStream(encoded.toByteArray()), - decoded, new COSDictionary(), 0); -// if (filter instanceof LZWFilter) -// original = null; - assertArrayEquals(original, decoded.toByteArray(), - "Data that is encoded and then decoded through " + filter.getClass() - + " does not match the original data"); - } -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html b/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html deleted file mode 100644 index 42247d09ace..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/filter/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -These classes will be used to test the various filters that are available with PDFBox. - - diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html b/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html deleted file mode 100644 index 5e42f5a153b..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -These classes will be used to test the various COS objects that make up the core of PDFBox. - - diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java deleted file mode 100644 index 09279fbe5d7..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/StringUtilTest.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.util; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; - -class StringUtilTest -{ - @Test - void testSplitOnSpace_happyPath() - { - String[] result = StringUtil.splitOnSpace( "a b c"); - assertArrayEquals(new String[] {"a", "b", "c"}, result); - } - - @Test - void testSplitOnSpace_emptyString() - { - String[] result = StringUtil.splitOnSpace(""); - assertArrayEquals(new String[] {""}, result); - } - - @Test - void testSplitOnSpace_onlySpaces() - { - String[] result = StringUtil.splitOnSpace(" "); - assertArrayEquals(new String[] {}, result); - } - - @Test - void testTokenizeOnSpace_happyPath() - { - String[] result = StringUtil.tokenizeOnSpace("a b c"); - assertArrayEquals(new String[] {"a", " ", "b", " ", "c"}, result); - } - - @Test - void testTokenizeOnSpace_emptyString() - { - String[] result = StringUtil.tokenizeOnSpace(""); - assertArrayEquals(new String[] {""}, result); - } - - @Test - void testTokenizeOnSpace_onlySpaces() - { - String[] result = StringUtil.tokenizeOnSpace(" "); - assertArrayEquals(new String[] {" ", " ", " "}, result); - } - - @Test - void testTokenizeOnSpace_onlySpacesWithText() - { - String[] result = StringUtil.tokenizeOnSpace(" a "); - assertArrayEquals(new String[] {" ", " ", "a", " ", " "}, result); - } -} \ No newline at end of file diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java deleted file mode 100644 index f6bf719741e..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestDateUtil.java +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.util; - -import java.io.IOException; -import java.text.ParsePosition; -import java.util.Calendar; -import java.util.GregorianCalendar; -import java.util.Locale; -import java.util.SimpleTimeZone; -import java.util.TimeZone; -import org.apache.pdfbox.cos.COSString; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -/** - * Test the date conversion utility. - * - * @author Ben Litchfield - * @author Fred Hansen - * - */ -class TestDateUtil -{ - private static final int MINS = 60*1000, HRS = 60*MINS; - // expect parse fail - private static final int BAD = -666; - - /** - * Test common date formats. - * - * @throws Exception when there is an exception - */ - @Test - void testExtract() throws Exception - { - TimeZone timezone = TimeZone.getDefault(); - TimeZone.setDefault(TimeZone.getTimeZone("UTC")); - - assertCalendarEquals( new GregorianCalendar( 2005, 4, 12 ), - DateConverter.toCalendar( "D:05/12/2005" ) ); - assertCalendarEquals( new GregorianCalendar( 2005, 4,12,15,57,16 ), - DateConverter.toCalendar( "5/12/2005 15:57:16" ) ); - - TimeZone.setDefault(timezone); - // check that new toCalendarSTATIC gives null for a null arg - assertNull(DateConverter.toCalendar((String)null)); - } - - /** - * Calendar.equals test case. - * - * @param expect the expected calendar value - * @param was the calendar value to be checked - */ - private void assertCalendarEquals(Calendar expect, Calendar was) - { - assertEquals( expect.getTimeInMillis(), was.getTimeInMillis() ); - assertEquals( expect.getTimeZone().getRawOffset(), - was.getTimeZone().getRawOffset() ); - } - - /** - * Test case for - * PDFBOX-598. - * - * @throws IOException if something went wrong. - */ - @Test - void testDateConversion() throws IOException - { - Calendar c = DateConverter.toCalendar("D:20050526205258+01'00'"); - assertEquals(2005, c.get(Calendar.YEAR)); - assertEquals(05-1, c.get(Calendar.MONTH)); - assertEquals(26, c.get(Calendar.DAY_OF_MONTH)); - assertEquals(20, c.get(Calendar.HOUR_OF_DAY)); - assertEquals(52, c.get(Calendar.MINUTE)); - assertEquals(58, c.get(Calendar.SECOND)); - assertEquals(0, c.get(Calendar.MILLISECOND)); - } - - /** - * Check toCalendarSTATIC. - * @param yr expected year value - * If an IOException is the expected result, yr should be null - * @param mon expected month value - * @param day expected dayofmonth value - * @param hr expected hour value - * @param min expected minute value - * @param sec expected second value - * @param offsetHours expected timezone offset in hours (-11..11) - * @param offsetMinutes expected timezone offset in minutes (0..59) - * @param orig A date to be parsed. - * @throws Exception If an unexpected error occurs. - */ - private static void checkParse(int yr, int mon, int day, - int hr, int min, int sec, int offsetHours, int offsetMinutes, - String orig) throws Exception - { - String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", - yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); - String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" - + "T%02d:%02d:%02d%+03d:%02d", - yr,mon,day,hr,min,sec,offsetHours,offsetMinutes); - Calendar cal = DateConverter.toCalendar(orig); - if (cal != null) - { - assertEquals(iso8601Date, DateConverter.toISO8601(cal)); - assertEquals(pdfDate, DateConverter.toString(cal)); - } - // new toCalendarSTATIC() - cal = DateConverter.toCalendar(orig); - if (yr == BAD) - { - assertEquals(null, cal); - } - else - { - assertEquals(pdfDate, DateConverter.toString(cal)); - } - } - - /** - * Test dates in various formats. - * Years differ to make it easier to find failures. - * @throws Exception none expected - */ - @Test - void testDateConverter() throws Exception - { - int year = Calendar.getInstance().get(Calendar.YEAR); - checkParse(2010, 4,23, 0, 0, 0, 0, 0, "D:20100423"); - checkParse(2011, 4,23, 0, 0, 0, 0, 0, "20110423"); - checkParse(2012, 1, 1, 0, 0, 0, 0, 0, "D:2012"); - checkParse(2013, 1, 1, 0, 0, 0, 0, 0, "2013"); - - // PDFBOX-1219 - checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33+01:00 "); - - // Same with milliseconds - checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33.123+01:00"); - - // PDFBOX-465 - checkParse(2002, 5,12, 9,47, 0, 0, 0, "9:47 5/12/2002"); - // PDFBOX-465 - checkParse(2003,12,17, 2, 2, 3, 0, 0, "200312172:2:3"); - // PDFBOX-465 - checkParse(2009, 3,19,20, 1,22, 0, 0, " 20090319 200122"); - - checkParse(2014, 4, 1, 0, 0, 0, +2, 0, "20140401+0200"); - // "EEEE, MMM dd, yy", - checkParse(2115, 1,11, 0, 0, 0, 0, 0, "Friday, January 11, 2115"); - // "EEEE, MMM dd, yy", - checkParse(1915, 1,11, 0, 0, 0, 0, 0, "Monday, Jan 11, 1915"); - // "EEEE, MMM dd, yy", - checkParse(2215, 1,11, 0, 0, 0, 0, 0, "Wed, January 11, 2215"); - // "EEEE, MMM dd, yy", - checkParse(2015, 1,11, 0, 0, 0, 0, 0, " Sun, January 11, 2015 "); - checkParse(2016, 4, 1, 0, 0, 0, +4, 0, "20160401+04'00'"); - checkParse(2017, 4, 1, 0, 0, 0, +9, 0, "20170401+09'00'"); - checkParse(2017, 4, 1, 0, 0, 0, +9, 30, "20170401+09'30'"); - checkParse(2018, 4, 1, 0, 0, 0, -2, 0, "20180401-02'00'"); - checkParse(2019, 4, 1, 6, 1, 1, -11, 0, "20190401 6:1:1 -1100"); - checkParse(2020, 5,26,11,25,10, 0, 0, "26 May 2020 11:25:10"); - checkParse(2021, 5,26,11,23, 0, 0, 0, "26 May 2021 11:23"); - - // half hour timezones - checkParse(2016, 4, 1, 0, 0, 0, +4, 30, "20160401+04'30'"); - checkParse(2017, 4, 1, 0, 0, 0, +9, 30, "20170401+09'30'"); - checkParse(2018, 4, 1, 0, 0, 0, -2, 30, "20180401-02'30'"); - checkParse(2019, 4, 1, 6, 1, 1, -11, 30, "20190401 6:1:1 -1130"); - checkParse(2000, 2,29, 0, 0, 0, +11, 30, " 2000 Feb 29 GMT + 11:30"); - - // try dates invalid due to out of limit values - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Tuesday, May 32 2000 11:27 UCT"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "32 May 2000 11:25"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Tuesday, May 32 2000 11:25"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921301 11:25"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921232 11:25"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19921001 11:60"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "19920401 24:25"); - - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, - "20070430193647+713'00' illegal tz hr"); // PDFBOX-465 - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "nodigits"); - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "Unknown"); // PDFBOX-465 - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "333three digit year"); - - checkParse(2000, 2,29, 0, 0, 0, 0, 0, "2000 Feb 29"); // valid date - checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 GMT + 11:00"); // valid date - checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 UTC + 11:00"); // valid date - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2100 Feb 29 GMT+11"); // invalid date - checkParse(2012, 2,29, 0, 0, 0,+11, 0, "2012 Feb 29 GMT+11"); // valid date - checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2012 Feb 30 GMT+11"); // invalid date - - checkParse(1970,12,23, 0, 8, 0, 0, 0, "1970 12 23:08"); // test ambiguous date - - // cannot have P for PM - // cannot have Sat. instead of Sat - // EST works, but EDT does not; EST is a special kludge in Java - - // test cases for all entries on old formats list - // "E, dd MMM yyyy hh:mm:ss a" - checkParse(1971, 7, 6, 17, 22, 1, 0, 0, "Tuesday, 6 Jul 1971 5:22:1 PM"); - // "EE, MMM dd, yyyy hh:mm:ss a" - checkParse(1972, 7, 6, 17, 22, 1, 0, 0, "Thu, July 6, 1972 5:22:1 pm"); - // "MM/dd/yyyy hh:mm:ss" - checkParse(1973, 7, 6, 17, 22, 1, 0, 0, "7/6/1973 17:22:1"); - // "MM/dd/yyyy" - checkParse(1974, 7, 6, 0, 0, 0, 0, 0, "7/6/1974"); - // "yyyy-MM-dd'T'HH:mm:ss'Z'" - checkParse(1975, 7, 6, 17, 22, 1, -10, 0, "1975-7-6T17:22:1-1000"); - // "yyyy-MM-dd'T'HH:mm:ssz" - checkParse(1976, 7, 6, 17, 22, 1, -4, 0, "1976-7-6T17:22:1GMT-4"); - // "yyyy-MM-dd'T'HH:mm:ssz" - checkParse(BAD, 7, 6, 17, 22, 1, -4, 0, "2076-7-6T17:22:1EDT"); // "EDT" is not a known tz ID - // "yyyy-MM-dd'T'HH:mm:ssz" - checkParse(1960, 7, 6, 17, 22, 1, -5, 0, "1960-7-6T17:22:1EST"); // "EST" does not have a DST rule - // "EEEE, MMM dd, yyyy" - checkParse(1977, 7, 6, 0, 0, 0, 0, 0, "Wednesday, Jul 6, 1977"); - // "EEEE MMM dd, yyyy HH:mm:ss" - checkParse(1978, 7, 6, 17, 22, 1, 0, 0, "Thu Jul 6, 1978 17:22:1"); - // "EEEE MMM dd HH:mm:ss z yyyy" - checkParse(1979, 7, 6, 17, 22, 1, +8, 0, "Friday July 6 17:22:1 GMT+08:00 1979"); - // "EEEE, MMM dd, yyyy 'at' hh:mma" - checkParse(1980, 7, 6, 16, 23, 0, 0, 0, "Sun, Jul 6, 1980 at 4:23pm"); - // "EEEEEEEEEE, MMMMMMMMMMMM dd, yyyy" - checkParse(1981, 7, 6, 0, 0, 0, 0, 0, "Monday, July 6, 1981"); - // "dd MMM yyyy hh:mm:ss" - checkParse(1982, 7, 6, 17, 22, 1, 0, 0, "6 Jul 1982 17:22:1"); - // "M/dd/yyyy hh:mm:ss" - checkParse(1983, 7, 6, 17, 22, 1, 0, 0, "7/6/1983 17:22:1"); - // "MM/d/yyyy hh:mm:ss" - checkParse(1984, 7, 6, 17, 22, 1, 0, 0, "7/6/1984 17:22:01"); - // "M/dd/yyyy" - checkParse(1985, 7, 6, 0, 0, 0, 0, 0, "7/6/1985"); - // "MM/d/yyyy" - checkParse(1986, 7, 6, 0, 0, 0, 0, 0, "07/06/1986"); - // "M/d/yyyy hh:mm:ss" - checkParse(1987, 7, 6, 17, 22, 1, 0, 0, "7/6/1987 17:22:1"); - // "M/d/yyyy" - checkParse(1988, 7, 6, 0, 0, 0, 0, 0, "7/6/1988"); - - // test ends of range of two digit years - checkParse(year-79, 1, 1, 0, 0, 0, 0, 0, "1/1/" + ((year-79)%100) - + " 00:00:00"); // "M/d/yy hh:mm:ss" - // "M/d/yy" - checkParse(year+19, 1, 1, 0, 0, 0, 0, 0, "1/1/" + ((year+19)%100)); - - // "yyyyMMdd hh:mm:ss Z" - checkParse(1991, 7, 6, 17, 7, 1, +6, 0, "19910706 17:7:1 Z+0600"); - // "yyyyMMdd hh:mm:ss" - checkParse(1992, 7, 6, 17, 7, 1, 0, 0, "19920706 17:07:01"); - // "yyyyMMdd'+00''00'''" - checkParse(1993, 7, 6, 0, 0, 0, 0, 0, "19930706+00'00'"); - // "yyyyMMdd'+01''00'''" - checkParse(1994, 7, 6, 0, 0, 0, 1, 0, "19940706+01'00'"); - // "yyyyMMdd'+02''00'''" - checkParse(1995, 7, 6, 0, 0, 0, 2, 0, "19950706+02'00'"); - // "yyyyMMdd'+03''00'''" - checkParse(1996, 7, 6, 0, 0, 0, 3, 0, "19960706+03'00'"); - // . . . - // "yyyyMMdd'-10''00'''" - checkParse(1997, 7, 6, 0, 0, 0, -10, 0, "19970706-10'00'"); - // "yyyyMMdd'-11''00'''" - checkParse(1998, 7, 6, 0, 0, 0, -11, 0, "19980706-11'00'"); - // "yyyyMMdd" - checkParse(1999, 7, 6, 0, 0, 0, 0, 0, "19990706"); - // ambiguous big-endian date - checkParse(2073,12,25, 0, 8, 0, 0, 0, "2073 12 25:08"); - - // PDFBOX-3315 GMT+12 - checkParse(2016, 4,11,16,01,15, 12, 0, "D:20160411160115+12'00'"); - } - - private static void checkToString(int yr, int mon, int day, - int hr, int min, int sec, - TimeZone tz, int offsetHours, int offsetMinutes) throws Exception - { - // construct a GregoreanCalendar from args - GregorianCalendar cal = new GregorianCalendar(tz, Locale.ENGLISH); - cal.set(yr, mon-1, day, hr, min, sec); - // create expected strings - String pdfDate = String.format(Locale.US, "D:%04d%02d%02d%02d%02d%02d%+03d'%02d'", - yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); - String iso8601Date = String.format(Locale.US, "%04d-%02d-%02d" - + "T%02d:%02d:%02d%+03d:%02d", - yr,mon,day,hr,min,sec,offsetHours, offsetMinutes); - // compare outputs from toString and toISO8601 with expected values - assertEquals(pdfDate, DateConverter.toString(cal)); - assertEquals(iso8601Date, DateConverter.toISO8601(cal)); - } - - /** - * Test toString() and toISO8601() for various dates. - * - * @throws Exception if something went wrong. - */ - @Test - void testToString() throws Exception - { // std DST - TimeZone tzPgh = TimeZone.getTimeZone("America/New_York"); // -5 -4 - TimeZone tzBerlin = TimeZone.getTimeZone("Europe/Berlin"); // +1 +2 - TimeZone tzMaputo = TimeZone.getTimeZone("Africa/Maputo"); // +2 +2 - TimeZone tzAruba = TimeZone.getTimeZone("America/Aruba"); // -4 -4 - TimeZone tzJamaica = TimeZone.getTimeZone("America/Jamaica");// -5 -5 - TimeZone tzMcMurdo = TimeZone.getTimeZone("Antartica/McMurdo");// +12 +13 - TimeZone tzAdelaide = TimeZone.getTimeZone("Australia/Adelaide");// +9:30 +10:30 - - assertNull(DateConverter.toCalendar((COSString) null)); - assertNull(DateConverter.toCalendar((String) null)); - assertNull(DateConverter.toCalendar("D: ")); - assertNull(DateConverter.toCalendar("D:")); - - checkToString(2013, 8, 28, 3, 14, 15, tzPgh, -4, 0); - checkToString(2014, 2, 28, 3, 14, 15, tzPgh, -5, 0); - checkToString(2015, 8, 28, 3, 14, 15, tzBerlin, +2, 0); - checkToString(2016, 2, 28, 3, 14, 15, tzBerlin, +1, 0); - checkToString(2017, 8, 28, 3, 14, 15, tzAruba, -4, 0); - checkToString(2018, 1, 1, 1, 14, 15, tzJamaica, -5, 0); - checkToString(2019, 12, 31, 12, 59, 59, tzJamaica, -5, 0); - checkToString(2020, 2, 29, 0, 0, 0, tzMaputo, +2, 0); - checkToString(2015, 8, 28, 3, 14, 15, tzAdelaide, +9, 30); - checkToString(2016, 2, 28, 3, 14, 15, tzAdelaide, +10, 30); - // McMurdo has a daylightsavings rule, but it seems never to apply - for (int m = 1; m <= 12; ++m) - { - checkToString(1980 + m, m, 1, 1, 14, 15, tzMcMurdo, +0, 0); - } - } - - private static void checkParseTZ(int expect, String src) - { - GregorianCalendar dest = DateConverter.newGreg(); - DateConverter.parseTZoffset(src, dest, new ParsePosition(0)); - assertEquals(expect, dest.get(Calendar.ZONE_OFFSET)); - } - - /** - * Timezone testcase. - */ - @Test - void testParseTZ() - { - // 1st parameter is what to expect - checkParseTZ(0*HRS+0*MINS, "+00:00"); - checkParseTZ(0*HRS+0*MINS, "-0000"); - checkParseTZ(1*HRS+0*MINS, "+1:00"); - checkParseTZ(-(1*HRS+0*MINS), "-1:00"); - checkParseTZ(-(1*HRS+30*MINS), "-0130"); - checkParseTZ(11*HRS+59*MINS, "1159"); - checkParseTZ(12*HRS+30*MINS, "1230"); - checkParseTZ(-(12*HRS+30*MINS), "-12:30"); - checkParseTZ(0*HRS+0*MINS, "Z"); - checkParseTZ(-(8*HRS+0*MINS), "PST"); - checkParseTZ(0*HRS+0*MINS, "EDT"); // EDT does not parse - checkParseTZ(-(3*HRS+0*MINS), "GMT-0300"); - checkParseTZ(+(11*HRS+0*MINS), "GMT+11:00"); - checkParseTZ(-(6*HRS+0*MINS), "America/Chicago"); - checkParseTZ(+(3*HRS+0*MINS), "Europe/Moscow"); - checkParseTZ(+(9*HRS+30*MINS), "Australia/Adelaide"); - checkParseTZ((5*HRS+0*MINS), "0500"); - checkParseTZ((5*HRS+0*MINS), "+0500"); - checkParseTZ((11*HRS+0*MINS), "+11'00'"); - checkParseTZ(0, "Z"); - // PDFBOX-3315, PDFBOX-2420 - checkParseTZ(12*HRS+0*MINS, "+12:00"); - checkParseTZ(-(12*HRS+0*MINS), "-12:00"); - checkParseTZ(14*HRS+0*MINS, "1400"); - checkParseTZ(-(14*HRS+0*MINS), "-1400"); - } - - private static void checkFormatOffset(double off, String expect) - { - TimeZone tz = new SimpleTimeZone((int)(off*60*60*1000), "junkID"); - String got = DateConverter.formatTZoffset(tz.getRawOffset(), ":"); - assertEquals(expect, got); - } - - /** - * Timezone offset testcase. - */ - @Test - void testFormatTZoffset() - { - // 2nd parameter is what to expect - checkFormatOffset(-12.1, "-12:06"); - checkFormatOffset(12.1, "+12:06"); - checkFormatOffset(0, "+00:00"); - checkFormatOffset(-1, "-01:00"); - checkFormatOffset(.5, "+00:30"); - checkFormatOffset(-0.5, "-00:30"); - checkFormatOffset(.1, "+00:06"); - checkFormatOffset(-0.1, "-00:06"); - checkFormatOffset(-12, "-12:00"); - checkFormatOffset(12, "+12:00"); - checkFormatOffset(-11.5, "-11:30"); - checkFormatOffset(11.5, "+11:30"); - checkFormatOffset(11.9, "+11:54"); - checkFormatOffset(11.1, "+11:06"); - checkFormatOffset(-11.9, "-11:54"); - checkFormatOffset(-11.1, "-11:06"); - // PDFBOX-2420 - checkFormatOffset(14, "+14:00"); - checkFormatOffset(-14, "-14:00"); - } - -} diff --git a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java b/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java deleted file mode 100644 index 000fa7b3107..00000000000 --- a/pdfcos/src/test/java/org/apache/pdfbox/cos/util/TestHexUtil.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2016 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pdfbox.cos.util; - -import java.nio.charset.StandardCharsets; -import java.util.Locale; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -/** - * - * @author Michael Doswald - */ -class TestHexUtil -{ - - /** - * Test conversion from short to char[] - */ - @Test - void testGetCharsFromShortWithoutPassingInABuffer() - { - assertArrayEquals( new char[]{'0','0','0','0'}, Hex.getChars( (short)0x0000)); - assertArrayEquals (new char[]{'0','0','0','F'}, Hex.getChars((short)0x000F)); - assertArrayEquals( new char[]{'A','B','C','D'}, Hex.getChars((short)0xABCD)); - assertArrayEquals( new char[]{'B','A','B','E'}, Hex.getChars((short)0xCAFEBABE)); - } - - /** - * Check conversion from String to a char[] which contains the UTF16-BE encoded - * bytes of the string as hex digits - * - */ - @Test - void testGetCharsUTF16BE() - { - assertArrayEquals(new char[]{'0','0','6','1','0','0','6','2'}, Hex.getCharsUTF16BE("ab")); - assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助")); - } - - /** - * Test getBytes() and getString() and decodeHex() - */ - @Test - void testMisc() - { - byte[] byteSrcArray = new byte[256]; - for (int i = 0; i < 256; ++i) - { - byteSrcArray[i] = (byte) i; - - byte[] bytes = Hex.getBytes((byte) i); - assertEquals(2, bytes.length); - String s2 = String.format(Locale.US, "%02X", i); - assertArrayEquals(s2.getBytes(StandardCharsets.US_ASCII), bytes); - s2 = Hex.getString((byte) i); - assertArrayEquals(s2.getBytes(StandardCharsets.US_ASCII), bytes); - - assertArrayEquals(new byte[]{(byte) i}, Hex.decodeHex(s2)); - } - byte[] byteDstArray = Hex.getBytes(byteSrcArray); - assertEquals(byteDstArray.length, byteSrcArray.length * 2); - - String dstString = Hex.getString(byteSrcArray); - assertEquals(dstString.length(), byteSrcArray.length * 2); - - assertArrayEquals(dstString.getBytes(StandardCharsets.US_ASCII), byteDstArray); - - assertArrayEquals(byteSrcArray, Hex.decodeHex(dstString)); - } - -} diff --git a/pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin b/pdfcos/src/test/resources/org/apache/pdfbox/cos/filter/PDFBOX-1977.bin deleted file mode 100644 index ee9403ee5a0d413be441f7d677da63013a0ee50d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19321 zcmZ5nV_+U#(~gbCwr$%s8ryE{G`5{IZ0t0)jmBzh+x+T2?{|LeoZY?m%*-{{%sGo2 z2j~G#oG=SCK3l)sPN11| zS|UF?K>xiK0Pt^XkQ7S2IGa~`0kNtpi;$JJiPOQJ)Zu&-+e*Wtc7NFeLxQEjK0R&U zdK_`=-8Ed~^XJ#VghG`Q;R{IT_;v=v*OOny;wAb%c1<_30P=k(X+l)o9&~riL23#J zjJ3?7`yJ*1PJRCs88K>Z7m92X71V9Schi5he+GT;CFip36kohDE|Tu!YET&ZVB%B8 zu#PQVS2FcJ>iB>VkdRKTnM$H%G#LniEuw4tt~zNxRddxGw8Sz*uI)g(l)(OWYNBQ9 z6KX%|5jGtI$j7_~&ajupNh=~8jA41bb`RHLomj9hT1g`%R~SrMk!DRXDw&_Eq1UD= zc}UkPu{*Sid*%+mMrrHvBE9K8B!!%wboqf^A;TP^)>TY`rg6W?^NX+P7a~GO1 zVx+NViquh+F87PY#}m(`ao4k`Esp~Pd@caTsOY*6j9L3rd1ui`o9Vft>a50xsb6DT zO53_3Xcf^!r5VHR0vb_BOVl3t!i-tE*9J`pGt zrv+WGN+9y1~#|%B^R?ZCawa$Pp{n26K%0ENOnPNIv zT*SIdqXazTbhgM_!;lmex_gL!l!^{U{HqPtn7j{-qW$813!)a6hUzcu00DnSkk^Hl zi9et{+BQN|t(QEyZ6VY)Z!}nX&FWz_BTkH+aFF}w&ku`0DGn&~2W4$nI-%Aq!;7ll-!80 zR6T}HgJaui>6&m8acN+?WSI}0^I<-#!PT=0Ct|{PqH1QX-LicH9WT*k6IC+>>^KmD zSBg^AcF+4fh;C`K+4@8XO6uZN0_8XSgmf`mK>5RmA1wG;6wj)>F%(oaoHLzeky~B` zTnfuWC0e0oESQs}q)x@IJ?*LA`!PBiogya1mrxGKBn{VuDYlz8YfmNZ>!jS^ben+! z1r@#wsz|>`&t?ZY@d#6{Q@RFq)TG(HHXm6P1g07X{bT+A_5@W|xf0kJp)8aYW#al7 zlrhT3?2n&q&bs301lfYye7FX6;3{@lGK495-d7%0t2_~nDs%D47aOygeQ;mBfjRVu z8evHp*qg01;SY9#rEtm9~y9&vfauI+)qZz(9sDW zB5+kTIC3h&-Z{Y!rM<_`kiY2$;OtgZT!5sC5f!oNPpbs!jzQaoGQW$3hwt6Z@eD@E z*DgB-yTsHuVDo1r7JsHa1N_Reybi)=!|4|_JV1g2JD_(_-x9cAP0omSmIfWqcvK8U z-Z#nAVzduam|fs6O$nvfhZ3pJo|0Bzi^wfkD1%{G1eSS)Qs&_eYNnVxsa?`XM^91W zGYn*6tX0aB{CV2>&N@D5OLJ>aP7k!`plD^rcjycpX}96etOx|;JNnYOa1+%qpufG- zzbB_;8&^NvI&_-oR4E^}H5w>2bo_ZJLCgzsa+8&El^LyI8#;FZq-w7X<@MQHiAGs> zLhd{eJ+08W*r!Xj|L`m=Y``a&Z*YX`-1yRun|Qd~3KOM%u(0N*0?}B{mI#K;K-q9b*S&+x-lk!cPg1tKs^4z= zv;zeIRtpWvWC}n40z&}-6YF>31lQi2@6drC{H!+!g^Y?CROO}U?)LKZ2@zYo-9f|z zETbHa;;MF4H0@s~j;=K(1$uMCl5Ks{U^On-Lw|&h?>&aT9ka$90f4_-;GF%ts}56; zL$h3FWfVy;xY^i_qB~?{^VVR%M48xyi?i!bL5}Cq`PT=KRBd8mV8I!PcSkT)si+6) zIZYW)CPHHqd5X<`BR@LXkXLPSsgh*kATv#z~(Si>0yPkazaT&p}ue!D z9N5@b#tBr-+&x!APtscI;7?GN8no++vA}Sv;P`=R28s_M0IzU*uFK4G&wZ94z)F7y zocT#-DrQ^ghy(<(JUx0t+5@hA8<+Ltq`j63&kMQM17T=b%;){dRMRYP8BaBx9Of&x zwK~RQV3f4zERK9aa5TkpYeUPOXqGGTopx+rlJq`bAp z&PC)7@b1vi81$k7_|3=E`st>%!}HJd>`w1M!X8A`5`WX-f5Z^&k|D#Xi}#(VB*lWyG*9|C9uW(LkJI7PfI#p9|bd=^&hlU@ap zB*O&2X7$>IC4L1{1+{U6Vk4ydJe@Cpo;BDeOZM7^O0tWrLO~|f?c@)6OhHQhC5zAX z9(@WTIeDb#KQ>v(#dcMZ_py3{o+7d7)HWSV83iF&ZFJX>Ew?w-2y!~Q-GYBy={%Y2 z4j6|}tcQYpg(Vs8buP52loE=~>*dL`Q+fV+EES! z3%o-`-!A@n=1=ZgLrrgVJDjWSk#k3LF(pgSl7?^Ba%BQ?we979E8Ma{2n)fAuSu8f zi{Op77aYxQEIL>f)R-Y}C3&6nFIYeRL=V}&f92eepiY80S96>?ixO16?{#Vf%z0*P z9sbNvAIM$0&-qj3_sw^g{$%x^LqI@p_5QhJUD_Ez;0Vw$ZLwOf`Le>31B;cR`x^1D_34Y6_GB1iP6PYx8+2kM&sxcqae_0tTATI8wjiw2Lq=`fH8AQ#B`Z58w) z#mE4hs7WVsSnS;A6@B$y_JD%xOhK4-DNgk9H%>`N-`04wr0y2&)+yv+f=_Qr3#nr;(`b0U?C zZfUr~bPv4o-a|EVHGft9)Aj?i&j;*%w1aiirZ<<%R&Oae=49|g@pJN40Iw-bB|~gJ3^U%kN#c!89p{eg`8Ls zMKQskU*R1k5d~jrm{f5mkd#Wk&+M1?MjOq7;v~ww&m#|iA_t1q)!9gMtI?YBYp_3& z{VsnlXSwQYEyG!aFcA)#>7y4uf2g*Uo|iSq&p!sYX$dWerB2}$_8IdPEh$9;lL!OhBTeDY@S=l#jB?iPQ_^;G(Av4H6-e7@!&-ur{Br%)rccDIllOtorB)jB+`{?qkf=NQaW1y ztPi{`d@9CW(vB?AV9rKx(0NlMFU`Y;`hdY2Lm7_}EJVXg4K| znzcDavNp(tlm{RBh?#2f)(+1S0BhrVk@nXO6oeWnBA(ISDWI;mNL{b7>@Y$vGgU)> zLcDs?jV!V4^$Tx+3otR#0rr3dxQN93;cR`QbMCgw_PV=lq3=*b4)`ci!&Ts$%d%neY&eO+eUjI_2vNMcE}bJj6fQaJuz7r z%nAza03EQqf_$%^o+Q3zX6z@g-)p4Dy>e^8#klS1JYV1iyE^a=5H<8gZO{I8Iq*AdlnJDpBT7 zikR7r<%ZoQoHb~myG_Sdd*!DM%ndC!Y_avVf2sGoi2T_9-Tf#2{^)x#_|vZVQT3Bn zysWNIjA^OCT%`nT{04Bo!Ywb5%ENJZcphN5s9+DOvG{;Rj!YTejFFVs+=YdeDnYwS zwyaK*e3}!-aoKFcpTj2{o^6P`64L*Kc`n7J7)PVwew>zCO`WO$k>D`vsy+UMGXJ>b zKNB`nEJiw)N0znvmMYwaFXMmB)m7tEnsEfjDOdAdN{g zbelsQ??jenA6awiR{Wolmu;K@u^&Wu){NZEC$1gzvTx!yCIMJJO!%(&NH|mtlDu$i zzT}qW*|Fc06lG>2+SlZnG4e-|qoZ{BB5J-BWTcnA%;Z1LEfKCFApl1=T=aSjVcmFm zm*0pc%>z(^Rao86@ze21@8`8iZbl(q6KFLg>Q>P7IDb#MQ;5 z_g+Ny>8_DfLO5zi{`3>hyfL0G@Fc!(nQK^ksOa3uhg<{t1;`bUgm2tI*8;zz{0ASh z8E|MswzaN=&r!;7+9eAi9JOFjhLJ@Sn&h0@0w2e#6i4YNzFbNiTCxw$*9p8fyRbqk z!3i=aN(*N=b$6nJ2%;!;WZiI**1yXLt40i)fcG13vtIg~yGslcg#cp#iHXSewgT*gUfG`=Ln;{`Eh0M4igsv|iFBenOl)+w3c%;w9LuEY4Z>0wiBpTo65>3u)qI z3}dko2RBtBu4Fr?9hM2o?1oyI;@1gG}D&Gwi~kv>3dVcsK^(*cb) zR~#H=1?a4!Szj{o*Sw@F`=;w2z>$+iIB)#*@*P#*wFFt&go~#>%~tV>RY@SB2}-Dn_A2Df&|MKfF~@;=)va;sQC}gF6=#w@ z>JAsOg#=9&1R2y{NInZFK74bD8uj=A%HPtJeqeyWk9x60T-W&}tU(~ILWt`TD!&+T zHfXbaRQn--iNwSZSAHk*_$d<%*&vXRC=pl<)N)SKuYuVTx`}nKVcwA}IN~_uBAaaB zQ)*_M=?8VK%8&)S$z?Y26XyuL6SDZeKF7%Fr*R`Ki&4QhVH?FfPKr?MCT>5JOrFtv zybAfN=~6A4emScKPm|F*7E@m<`)qU2hMJtJcz#m)m92IrK7I7h_c)B)D-(}8*{kdqn{ZdTI+hFG!((FX{{L6t3#AVd4|tYDKRh6<=g|KJwu z?8xLkN)aan(_Un$Y{w+;5;R()UMO_=yzwE~kzTV9d^XR+w(K29VB&ZDCfJ?kAP-q^ z)yJE zjdaX3i(eqX@Uh=3gtucmCoj||QL2^G=eSaPj#>b4oPNIZ**UwEe+%#9D%v6TDMTgh z?YPT7(EmcCTqFg*l?AO*8OLI%zY?z>Jx;V|>|%KLv2UCPXELAJ-uco~lW>TF<6=@4 z?G9+Aa_XRBfFJtuy=C;X;z0Gs1X^n|;t7+zCNqdJpQ>{lOw_RDqpByWSS!Jl>hRR6 zM>&E8!^<|Uh9#asMd%J^>JSY15k%o5X*no_4t?KNV8cO|f_h%}lQv{hdU`rNT7)9C4Fr);x zEHX4t4Fu2f)+mFiAn{rhO?=!E;vS}96gFWJ+epvCno+G(hy#@)@a#84R_ro_Ihdpg z#x_SG2B;G-*8G*7h2?f8S#fo>ErfLY;5`Q&Mxh#Fsp6!`}cSn%TQE znBl$&U1-q3y{)STn{#yZeNS04f1s#d$o1&{8D--r#+S^#I`lChbmw5h6s!^Xs-V zocn-G9y8j!#!2IbbD)h+SWtBBq*LEoycMF;_zv#5LG{DB2?-Y06?2iEo6`i7OA3C5XG<@zZ@-hrV|wI!@kFarq30YE&UK*qA=7%1wX!lIUsEMCvAEycjhzmG4#m z{ElJ((5~-xBBCUcz~8by@0f(&lNa;zslBg&%5L(hdceaMJtCGdA?l1rk&n9f{HaqS z_-BaJiJ!)SKm_$l+o^F39coUS_c!s@_qCk~UA%*9WjaNx{q$CJ|Kt`B5c~__R^dYM zUVU;>MPcx?A>v%FuyqikQ*4M`Fo|r$f6fdcXz!_9AcE3BTOfM=2xlC)_yP4>=3U* z@XPelgyr>+A|PpUmTn~^38E1BBaCEON-#HY`+!XY0{!O&4GlCaGW&qLjG}PkHqz2G z?o3#iR?CGzuG?O+3C)wLc&UT86(;QY>P*Q^w|VQk>&d|^8!4_?M!H2<9uz(~- zqaqeh8~shQR9915!QiG!pk|vyu|+Fi{J_nk_g$Xcy|q&#>t*=cDJI-+h7rA)N#047 zw<>|mQN-dTJs;~5@&)M-gTu5HCc&|xke%zzlt90VRcc$M!LBm~TL##q-Y!Y?b~cpd zSE6yjHu80M$`CZUApzqtEa*pO_aX)ZkCmX{+u_FrXH@oJTmn#zQHOl0UZO*wQ)Fz8 zJCINx(1sp0O^Q9Zjx~s|d0R~G({G~OFJ2{D$Rh@%lcbRiRMXc$!Dgk%imtARE}~-B zK2@V)lMB3BrN?I->+=%N%9-D)j!8fR%NzBvjD)WHMym==Y-MFJBra1yN%wZO856B( zSE&ks-snXRjn>`pV;S?a{iy}t<@drFCwKXDI}4+@uePpT8#d`up0t~%7kzwNgmLWD zqyZ)5)cgqedQkHrg2YhEr!Ds%)Be1I`R_vZ?=tCc=a16w^2epWk56PaTIjInp=D;0 zA7HYFtF&l+$|u~Y{H%Vl8(TtDt@(8*8D1)oB?6T&INQWcHb&tSd9E!wo97HWWM+BM8C71j!y#kQ(`kFp<2gZl}mE-FfCv9(;qAC)cG%QMd{Gz+o!lmbXYf#m! zY34h^Sp6%1v9cyXAP#8E3D%O5WNi8Ogv_2=J8`~Hj55`+JK$oU)A2)5zp3gU%~;Ze zi-S%p{y^~`txyY+5o7@gzs1NXu+m8FU$(Ba*}$AkqpC&}P=C=L92sRUqnLl_!tR{v zUMuvKMhW|6B$Q}vb{s38D1(?MzD0RVD9s}F{u%k3azXnl@56CN8BrP4RgD9Q;n1YyAtgvdui7VXM1eBgk&x- zY+UkxP5fc+uW(zxU_D&0g^GD*%Pyzd`!;zp9Gek74LUUt596=_8HlKY2$bhRV*kDJ zN6P$YpvTBJG_Ip(BedN2U=_7jvNn7Kk!x3npT@#g?x7j3>twUBzo=W>{Lu7U6(ArS zPoi&Ukq&1Rba=dxQ3>p!P1|3QmQK)--cK{dI708W-wguA&kv_ z^@vH3Cc>iUtAG<%LIc^QEu)Z_%77cZ93pC{0yC8!OTtP#B=*1B)H@k;9KW&bPk>;HN7; zHhleou+Bb9S39Ya(ycN?uDw zO=vfWh1|$tE!7k6Njbty-%cBX&!M;TTJ4WlKheVA;C$2;6lcrI1Qup1tdDfxrw63s z5TySaaW;>-#B`%hKCIps4a-RhjqqAA48_lwR=Nb-$<_ zBo95*0ZhUsT~j^{lme zmK=+sG#rvQk@f8gW zw!5o1w3=0OB!0p#9oz{y=-^h*C2GxF+e` zJ7R)lea-@LGxQT%;jyo4T+mXWXBPSz{m8Ag4oxEP-*3QrbpgO0_9F2$w^baWfJUZg z)rkoU$2Ro%8_M>`dd)6Yar|}O4@TA$%D@>ve2Q1=&EYqfi$@U$dr;vl%_@`HMVs>_ zYLX8zI8uy}%6W1QAR8wT!_}gt?GgH_dLHMFlg|3+^Qlp~&2-TvE9* ziSuOKwzf^(YyqqZZ4duE^9S3%zyHO<=EcslL$`a;Lqpl%+i+BVT#aE3&M3?rOdU~w zp!F&=mC7>x=)trZ*IG8Tq+>ehP1jr4j)i$wX=^g?n7N1~hr-orE0f5k>l2G&1RFIf z`bI%Nwnn+)B?0ADTKKSJ4~UPgDt$_BCkj}SdGtjd8@bj9MC7eU>S9bpAHtoRXvcBv zd7IB)!owBRCNUm9*rDvb^NUNkq93r}6hLwTz2b+1j+9~9uGRFua6}PNdk@Of1~57R z`8h1ey566$VE?t;Pti^(_joDsqkzp&jfpF#J-KI)YCsx=p(dxYxbZgdj)|=-T(SgqB74}Hw8dTh)x1UjfD zTXCggl70L}>jT+8P?KpDal|;QJBYnp3Yx|sc)E2pICE?)iGIb-tZ=chc)J_bUzHB9=9G-m~yHd+Fh`s%-5a=44kXhgCXhvnEbEnT2b!) zVH@#^mZps9;crw_f#SPGuw%KbTY}s{GEY?>$X`3W()*Bl10yS8+=B;JfePIJtRenm zNJivN^LVz+76R&dT;_`jI`O1Q+2X5A%+%+W4+$xPNQYn4Y3@amVq*@lqkDW6(p%hS zXa;2~_8S%ZUoEQBrt&MOS4kK-=I(;4V8QhC(XX*@{+jgX#jl(q1T`G%2&f7#!^f5i zHq`_fgU%#&kPq$sj-A3>C(Nb<6JYX@*p!g*;y41erG9b=xTh{>Yj0L#4K=7Yy0?j0 zc%A`Pjg!7<0p%wS-y+Vz8oWC&(zpn;E8J?~IYq1PlUHkkwOO>H{EaZt2 zU7~V7>3hvoY;<@%;eJiJa3-P}m#X>PWp2njb_Dab(O5fABL51MG`LkcgOt0=>335= z7a1VaGM&k-rAyk3Nj-wTc<6z83WO1OwO~cFDyMlaAN!t;5+2RcgpQ~eiU#m#tg!Mb z`(0Pv>}7wxCt_;$)a2dX?Tn)3=H9j>SZX>?mtnSg{$*gtqO>2913+dgB%z^CNc@b2 zP+-=j98Z)kES8Wd_&hPj8%+GO$fJR)L~xLUT`(jsCAY{IVSL#n_l+tc`7(_#f{KM% z*NBYupeBcCic?>X8WWiifAe#1xSPGE@ifwVxBUOUPKZ#uzgh;M6+%Fn?wHkAYY7Tq z7NeNFt;uxCP(@iK3-44neI90RvyOpx$!EEg60? zux%+;4o>uC7cn#RKcWdrUl62JsER$PSiVraU>Rvxcmfc`g@#fUQP2lYbjN!(Jl#!8 zdI4lHRU4`??UvB*2O|NUq-tPl-F08fkt<~yoTtepcPrzer3d|&HUIzk#;0S|b;#)73?-8(1X={|_OATWw- zF1zHc5SWg|E0yaNA*XN4i977-bpqzK)z(<1zfO~0)H+$!GQQp2cWDDawr+&=exXAOql>pw}aXIr0|#_OT#e-V*)1M<%aF`#RTqr}i_<`PTQ8`dhGF zkjf=+1|Fa0R2nZ@5IXrp0ASozAWvT3yzw$y=479sO_nZKt`=39LiE=_!@%^7A#?b* z)szUCSBLiCXgg*CD4 z`KCUz@i}riRKT#@gT|k!u?f*QZuHp;bVhwI6M+PWZdKCu%zOoP34=(Zw&O9Zc_M~S zwkiooFFNVqLfje)e@oTx*=R_z1APcBzDE+vdYixU(=}6IATGjT94ViidC)ni{$jHj zgQtntp*?s9X^2d+Zxbez__XSxoyHr;P^o{kgONf=3~;#^*J#gO!}*)a(NTH>9F3cN z<}>4$q!gR@wJfJ;uo|%673{C>|8`h5le-f;32u}z)TNN%FPjn;$*i-N!~m(z98c>Y z3V1IcZUf8WFc2v8#6e4^Dx=4;C_&DkU|)^qw0hkhtjX%4N1_c7wB-L*NmrQYJ4>H& zsjwLZ$*tJtIF<4l>=2i%UhIZ42x7$2ce7svPuxlHNJ@j`ZQGXqPSOr&oTcEJfM$in zjg*q`C2;qSg=@m`RE8KZ9p8=05(D1(ly-e9bYg48SHLg-q^$!OerIPndKKq71gzGt zw~ClfqO5=tm|K!h3*OXTk!&`oA^SnY&J@4c=Ttl4U`?}oe|`Kc(*=&z%XgSi*CBlr zJ`A56R@5Rr4n6(rr25c^k2e`lVwef0a*vu>Xwkd|BEfb>_C4ZxTL|&vyPBK#DOYoW z9qC{tdK2{T64wO9duRzvU+0Ue%iPc7&15I%$lZz&b8`P-`!iKd$@+!nxf<9oTREB< zKNqKBudlJI&c~x*LqDT8*+}eOi@;#@J}ga6<5q_9#DP6c_9iyqqc z`qaI==__*jbkfC}fiVm9VR1RCH7%6k0;e0{1C5^~4*CHhjg!e4-+%BnZA?90AZufr zIj;Lllt0ty51jrT{6Prwvx-AAX%*W=GN^+6WSB?gUIooy{T7p5_t0(&Tli61zGA0Q zs1r+~2|Q;v?*n28Dyd?n%!xM-Bp{VZUc7?R80bQe#?T#{f5G^ppdLVPjouT-QK$ns z_U3FSh};uQ8&7_pPC)p54bChnS_K*u(%L~kr-FIN_ZNflrV4pDnh;iQ-Klx5)W*!P zg6bS}vkpAKOaa9lZEP-e^$Jw>+MK|jxm&C-yCE7+-~r)O;cNdD>HoTB-xwv~$!i61 zCfcVn9`<$bm=~hRg6Dmp9j()jC*R}sJ!Fc=a=6z5z~0BH76v1`u}~cjZ_616FcH&v zItiv=_5c;WXS(u{9|);b$x^4NwyBHSB|Z?Jd${TwVJv)hSDn!u7!cR%G5Xqq?p@xA zb$+IumM~er@SuA>tEeMAS!L;pu(s*t#?!4lthNfR5aY33hfRxUfsO0i)(f<-%@yN|^PdD~QWfiO z;*wo~3MGn|)^mS&^s7}Xsgj<^EEq8;B~VE#%PORPdbQRzn+|bk^FX0=%E7fsZRjMq zyAM{`+VUSVYpzLLpF8Q|>`X+ndN`mN~o0X=7CuMFw?#h}g}X zuD19E;@ScVav;_2%5S4IZDAd_9IJOgwUdJZUE~1sX~Ug)fa%z$eXYp+Dxj<9;%QQPmPun%S09@r98egR+}@D=#92x zv$B~proJAiJA?zom9sAwSzLJu-b&k2O*mRTN*@`MaI7qmNrXH_EFJM_m&`4doIm4w zO%n37^+2vwsrv~7!>&cj=vsDK<^LWOR=HA^miSU$> zWHLAM3C~oJXda@0*jikC8>}tbzoPz&B;U7dbhnB-tp2MwmuvK--i=zP{mR2i8gt|@ zfgL%+nGLdO8Ii}4=bm3vqpQ%mWWn(rKVb1P27WA&tRHWg6qn&|2qgXet-#MAe~d1w z(HD#h6o##aBW~VNSKs{(NixjMb?T6jD>kpZUQVDeK8|y=N;x^&;%0&O zrz`VzfbEy5_#xu6WRo&*5g*3+RMV>wFL5@8MR)xTz=o`E#eC)s6fTv>8yg>!%UAZu zV|NZHhu=BmF0IX5mCx7YGX#`MQ+KM7N-+CuIp76)8>39Gu+Tz`S{u3bF<88*>K)mlKqU~j?PCl^UtY-s>sB$ zwKM#j4>YFmS6Nr5N1WSc;B@QE=Xvf}HG}o6$K<101pGgI{!516oxj2S)!CE+4H;V0 z#z1tEds?NeJ=gy-s4}Z3P+GfT!oQxSar%A7ayt8|Zv z6=Gm?@T@CZJz(IB!+5v92hzv9JW+s zHc@CP;)H=zFKt&hZG1xXXbV%P0=FLiT#9S-F&?Y;Z80xV^?i@1zIVi{uSvmZ7a1fG zhY3VL17n<5`n%dmAZ7h~k}pm}z{qz_yyck<|1jLI>TktPea#i-n+(rjJZcu37BwZY zFM9w_adU*u&Zp;#02&EL>Czhq=DZi}IO1m}?egr%N@_v3U6o?B>!pp7|6tX}Pzb)W zQy{<>FlAq*6^}(q;>l^?TDkV>=l%BOS|xajXQ3AJNj#^bf|5Y9f^wWtms|fX7g)G2 z%GtPH_gC0{vibYwKUHK`I1LI5I!8H1R|d$=$WXbZc4U)-vw*BbFJDN|D&<*;OaxTA z!^3~$)MPYVgXjYjUy?V4qz8b;U`v1Z8-Nwwa(LqIxUh@+IbwRcwe=1*9{tpUTMgiD zA;vZI)ef4S#WX;|oxe>>lQ=P%Tb4Ee*_j;cl`|L_a^F|_rNHPHOp!5SyFqIXaIULu zK-(T<#gnY>|JV54zC-xk)*nCyzU025XT{rj$BxO$nADFly^}MnUFX4c|k#OZI$@L$b^To0T!1@e$p!UuEDK+n|v3)`m+3 zGHDiCQ3t(_1ETTk6}f*2nQOl0%s$!ehlj>z6{#u&bw`EE|A6&xlKG(+0ATZTBOCdS zQ?(V**vrB{lYgE3yB_EvMgG2#3a-TyUj>W1qw^L8}hC_wd?+@l*Ag1SgBQQuzrkIuvX@3hNM;9B6!4m%P=E^^C#rTZ(ZO5J{4+=mPWBWW|W^YRP7- z|K-R)kzDAMHAh<0OIOS1`dc{h`#$~8l#&y}g9%1pElG{-vXE5I`qeXv9@}4@NTkX6 zoQ?{y_jSt%olH+MSM$do#q%)@;c!lW==_b`58)G73GSeCZJTpa^2o|SGMi1_%SxS3 zBYa`9qIzt+re|gIt+?t!v1<&2?8_{+o)gOqO`&cyrDqZ0#+AKroKz$_OWhK5ha|&# z9$x@NhN>`YX)-XA2EWFsBXT)@VZEWf2SRZx0X(rs}@vA13AUs>|5(80!)86E6L zTx(>u)<9$~7OQpGqLNxn)>VI`7MT8>6h z9h);^@JN02vUBmjxGs)@hta+5@Jh|60@o1KG;5gFDtpp^HdQ!N9nnW}SpMSRKV;QI zMjk$;#D3&Yo(4IqA!Up*4LHbzS3KBh&lr)BE2M{R%H;trY7w;%S05}lEXas0m$3-c ztOk@r1c-QA#;-ghakeX^UAzN-b}i_Lf3jjQ+S4=w7Il%v4w{fE%$a)9@LrZ>C<|JNH3imhP7Hww$Q=g&D!zD@*9>0nA-OWqk9!h|KS`0qpo9hC z0p+s#oo0V>=ez9mzy2U|8!lD{U^hJqkFu=!e*RhQLD4`}|9pz~tmm9>A+a4;mfMhU zje+(Xrr{!>iv$9rD=Ui}^tlchqhFTOj(j8xs}-PpJvsJ1+VS@e3 z0!jZaFSb6=d=HMt7!wYi9l5gW%K!&pazTb-t|~#zXue?cSo7@JW|WQUWEz+Z<i@t1 zf8y}RJ3zo7YK{dafH2qURnTd9dK+1ISG$NG`;$sJoIZ~K$yGpn1o~^Xys&K;70l?) z$+0oqk4xxqj@T$Ebpv}F#%&MjBzTYQ4P+E9+bmQ1*<@&!mM2$i51O7m$O5L@U5%6I zx$WE?X`u56_27UClG^ZsnhG*Y+|@!X9-}Y9PrIhM>#`j`Vzt}6xbWIIe|C!ZCCN6E zr~`knv;4WXDvkymyj9R%yjux?^3kj!iw2V)A<|v(bdRyj1o?2uq3+}v0~dPfEUuGl zr8@EVtIXf97+7$tv-LK;U3~v;`Fs1#h`*i!0tW2SON+}vbZ*>8iQ9>?x*4BCTr&C2 zqzTEUQc)&GU*qib{Cffd2*hEfDi*M$sAK6%I=4f z|5P7{mfMj_&70+}RAhf{2o%XKtMoaB3{e~E>bG`p^@RC_@F2HteS(Au{xKLN?!4Bi z7U6bxwVnMb$nq3>f5BR(<(y+{ds;#05jmc+`o4Fikryq!Aqg^BZ92~o4HcH@D1c== zfGiLxaj0+I_Uw^-onRFdY*@tQ(GxCBL?}F0(*fznR5j3vy+gVehf1I@K5F4q#=`#I zr&ScZWw^|l3&THesQ!@X?*#M1k00#^V2WhO6LD|79c&bUoYMZXt0e&eT(h3t4IubA zUwlQ&j+l<&30%D+U)bL_(6WVS%oZ=Q6GOv13DBFF*rqy-*+Kyi5UYP6a0;P9aYl7) z8ICn>7qtB~Jg{ki1}z#(8>{j9Cw#K1cUFR!3X$l7Qs=yx7TorWjo(Hd7<*Wy-f0(4 zOR&^$m=P#oV-B_nuJajL1Kj=|;@>3mpRoSt%V(NMBjxf0ZwvE{DWVbD9@xB+s2V-| z3=?-X41R{TZeve=pa@P6-nMddTP+HQ3u6ecwEOjKIP^O^#mc*?%-NM$X9c#EC*7zk z?;3m*mA0w7OFAP``j%b0a{>$hZL`Oj63fwU?8n`UMQk;l(o9cWqzlW`TGTv)f0MIS-hh^n-HL5z{|g#0Ftvwh{U~k= zaBQQzQm1Fd>eWNEIrfY$(NF_^b#Z89ut+9`0z z=<#Uo_1@+xQ~L1icI3r*I4vNp43`s8j)AR^IOU+8JCd2*_pjj+hWk)dFqjMIy|K@g zja!C_C6Q^h4<1^f`T@U63S(#Ce;Z0mq)td#QCU}N4BAD-nt7>#m&R(EmrCDNgW+GQ z^K*maFxv%oPsD6*PRO;7aTN6#_!gBOs2?m_i8b&!!m%8?X!j9QN=AIS;KOhn8z-zc zW(cM&T;;r~D`6Q%xwUzrotcSLQ}K~VBj;2SdbkBvNO9JRbJhDo9By`IJeDg$()FaO1UpfWhbr^cnsbU&+oi3X5XU@^U0T^nipc$peutdGiUYv z3zf^=BXOc>BVoBK1!ucKl@+6`J}lK`}Ur{WIAtF$wXS z;WR>>p|s9LNUW?e<3=9?1H3oNcs+!O*5l;LjoiD97=;V0S|A#^;>1%BaFl5&xHG~o zWp@(IU|dB`Q97aLWjZTQQU*$pm%pkZ2sC3eiBDNe8Peqt5<|3Rt|jlf&1jsT^dUo< zn0?eao%6?O83|l?2D&sty7+9vt!|dK<%J5H4m~cWSwQyspfs))%~1D`d@&CH-va{! z0|NsJ&X4F@nzG}c;=mu>arFC=5xNPmKvCf7BdaegyO$MjRw1yjM>zB}*pa;%f6FEd zP}Cbxvx+PWRs#bA0|NsC0|NsA0RaI40RaI40|NsC0|NsB0s;a80s=NNRRVHOE*)ci zqHdrNF@L^5RW#6IpB^z-9}$2`3G?fgl-IBXB}w5vtm|ifPC!VBM8_=K0!thtn|Y1# zW3rtho6ENm%{+X0Y$xc~^Vs@EnD_B&tomQ}#I1?KqhHk}^F`Gf`Wd*dqD~odb~G{T=I_ zQjt_vobmBEfbnZ;lwQyq*-p9Fh?uX2$}M2Ynk8YZOHRg&1G5K*j`ON>Jy^sN3MapV z0Q%X=`?Khf8Y4Lm8*%n4NhA#Z92^bnyvA4a!-{rBM~KE97KB~KLmnpp;YLx24({vo zGXTiGiIJ$I-dC6{m{^&P`s9GHrNtbHVziyJ9t|@)HOBS0^PL2rR*>-48IoCs?R(iZ z2@~aFVUt0t?LBK39m2v8bWnBP11FGP4{SN`@jW)RJq5A)CJ;aFAz>by$HTr8{~dzq zTMEbH!l@+fP_Xk)dB+eUbQ9nJ))1BYe50?gi3p(_!xb63N_urNrTdZ><1LF z7u>^2U1^A{hSvHdk-I{~`~d+00dQQT#mGo3Eaj~3=p~_$43<6(2Tv=tULJIJn`571 zggXFfkl;#YmQHD5$12|6jiQwbW4Kxu8nZ(zK1CDYjGuC2Z(32fAzA&V2yFk2D6zF- z*)|>b^nc@*|50F&r|jJi%R8Ps(iyTv{uXG=2zvp8=!>QcoH#24wK&qp33Wg+=2!MG z1ab0({pCc<`vu^)vOcz;kum>5Ga}l)851e~ Mg4Cy&J9oI8cArN$#{d8T