@@ -23,44 +23,129 @@ You should have received a copy of the GNU Affero General Public License
2323using System ;
2424using System . Collections . Generic ;
2525using System . IO ;
26+ using iText . Commons . Actions . Contexts ;
27+ using iText . Commons . Utils ;
2628using iText . IO . Image ;
27- using iText . IO . Util ;
2829using iText . Kernel . Colors ;
2930using iText . Kernel . Font ;
3031using iText . Kernel . Geom ;
32+ using iText . Kernel . Pdf ;
33+ using iText . Pdfa ;
3134using iText . Pdfocr . Helpers ;
35+ using iText . Pdfocr . Logs ;
3236using iText . Test ;
3337using iText . Test . Attributes ;
3438
3539namespace iText . Pdfocr {
3640 public class ApiTest : ExtendedITextTest {
41+ public static readonly String DESTINATION_FOLDER = NUnit . Framework . TestContext . CurrentContext . TestDirectory
42+ + "/test/itext/pdfocr" ;
43+
44+ [ NUnit . Framework . OneTimeSetUp ]
45+ public static void BeforeClass ( ) {
46+ CreateOrClearDestinationFolder ( DESTINATION_FOLDER ) ;
47+ }
48+
3749 [ NUnit . Framework . Test ]
38- public virtual void TestTextInfo ( ) {
39- String path = PdfHelper . GetDefaultImagePath ( ) ;
40- IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( ) . DoImageOcr ( new FileInfo ( path ) ) ;
41- NUnit . Framework . Assert . AreEqual ( 1 , result . Count ) ;
42- TextInfo textInfo = new TextInfo ( ) ;
43- textInfo . SetText ( "text" ) ;
44- textInfo . SetBboxRect ( new Rectangle ( 204.0f , 158.0f , 538.0f , 136.0f ) ) ;
45- int page = 2 ;
46- result . Put ( page , JavaCollectionsUtil . SingletonList < TextInfo > ( textInfo ) ) ;
47- NUnit . Framework . Assert . AreEqual ( 2 , result . Count ) ;
48- NUnit . Framework . Assert . AreEqual ( textInfo . GetText ( ) , result . Get ( page ) [ 0 ] . GetText ( ) ) ;
50+ public virtual void CreatePdfWithFileTest ( ) {
51+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) ;
52+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
53+ using ( PdfDocument pdf = pdfCreator . CreatePdf ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper
54+ . GetDefaultImagePath ( ) ) ) , PdfHelper . GetPdfWriter ( ) , new DocumentProperties ( ) . SetEventCountingMetaInfo (
55+ new ApiTest . DummyMetaInfo ( ) ) ) ) {
56+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
57+ . UTF8 ) ;
58+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
59+ }
60+ }
61+
62+ [ NUnit . Framework . Test ]
63+ public virtual void CreatePdfFileWithFileTest ( ) {
64+ String output = DESTINATION_FOLDER + "createPdfFileWithFileTest.pdf" ;
65+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) ;
66+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
67+ pdfCreator . CreatePdfFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
68+ ( ) ) ) , new FileInfo ( output ) ) ;
69+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
70+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
71+ . UTF8 ) ;
72+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
73+ }
74+ }
75+
76+ [ NUnit . Framework . Test ]
77+ public virtual void CreatePdfAWithFileTest ( ) {
78+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) . SetPdfLang
79+ ( "en-US" ) ;
80+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
81+ using ( PdfDocument pdf = pdfCreator . CreatePdfA ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper
82+ . GetDefaultImagePath ( ) ) ) , PdfHelper . GetPdfWriter ( ) , new DocumentProperties ( ) . SetEventCountingMetaInfo (
83+ new ApiTest . DummyMetaInfo ( ) ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ) {
84+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
85+ . UTF8 ) ;
86+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
87+ NUnit . Framework . Assert . IsTrue ( pdf is PdfADocument ) ;
88+ }
4989 }
5090
5191 [ NUnit . Framework . Test ]
52- public virtual void TestTextInfoDeprecationMode ( ) {
92+ public virtual void CreatePdfAFileWithFileTest ( ) {
93+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileTest.pdf" ;
94+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetMetaInfo ( new ApiTest . DummyMetaInfo ( ) ) . SetPdfLang
95+ ( "en-US" ) ;
96+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
97+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
98+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
99+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
100+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
101+ . UTF8 ) ;
102+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
103+ PdfAConformanceLevel cl = pdf . GetReader ( ) . GetPdfAConformanceLevel ( ) ;
104+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetConformance ( ) , cl . GetConformance ( ) ) ;
105+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetPart ( ) , cl . GetPart ( ) ) ;
106+ }
107+ }
108+
109+ [ NUnit . Framework . Test ]
110+ public virtual void CreatePdfAFileWithFileNoMetaTest ( ) {
111+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileNoMetaTest.pdf" ;
112+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetPdfLang ( "en-US" ) ;
113+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( new CustomOcrEngine ( ) , props ) ;
114+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
115+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
116+ using ( PdfDocument pdf = new PdfDocument ( new PdfReader ( output ) ) ) {
117+ String contentBytes = iText . Commons . Utils . JavaUtil . GetStringForBytes ( pdf . GetPage ( 1 ) . GetContentBytes ( ) , System . Text . Encoding
118+ . UTF8 ) ;
119+ NUnit . Framework . Assert . IsTrue ( contentBytes . Contains ( "<00190014001c001400150014>" ) ) ;
120+ PdfAConformanceLevel cl = pdf . GetReader ( ) . GetPdfAConformanceLevel ( ) ;
121+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetConformance ( ) , cl . GetConformance ( ) ) ;
122+ NUnit . Framework . Assert . AreEqual ( PdfAConformanceLevel . PDF_A_3U . GetPart ( ) , cl . GetPart ( ) ) ;
123+ }
124+ }
125+
126+ [ NUnit . Framework . Test ]
127+ public virtual void CreatePdfAFileWithFileProductAwareEngineTest ( ) {
128+ String output = DESTINATION_FOLDER + "createPdfAFileWithFileProductAwareEngineTest.pdf" ;
129+ OcrPdfCreatorProperties props = new OcrPdfCreatorProperties ( ) . SetPdfLang ( "en-US" ) ;
130+ CustomProductAwareOcrEngine ocrEngine = new CustomProductAwareOcrEngine ( ) ;
131+ OcrPdfCreator pdfCreator = new OcrPdfCreator ( ocrEngine , props ) ;
132+ pdfCreator . CreatePdfAFile ( JavaCollectionsUtil . SingletonList < FileInfo > ( new FileInfo ( PdfHelper . GetDefaultImagePath
133+ ( ) ) ) , new FileInfo ( output ) , PdfHelper . GetRGBPdfOutputIntent ( ) ) ;
134+ NUnit . Framework . Assert . IsTrue ( ocrEngine . IsGetMetaInfoContainerTriggered ( ) ) ;
135+ }
136+
137+ [ NUnit . Framework . Test ]
138+ public virtual void TestTextInfo ( ) {
53139 String path = PdfHelper . GetDefaultImagePath ( ) ;
54- IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( true ) . DoImageOcr ( new FileInfo ( path ) ) ;
140+ IDictionary < int , IList < TextInfo > > result = new CustomOcrEngine ( ) . DoImageOcr ( new FileInfo ( path ) ) ;
55141 NUnit . Framework . Assert . AreEqual ( 1 , result . Count ) ;
56142 TextInfo textInfo = new TextInfo ( ) ;
57143 textInfo . SetText ( "text" ) ;
58- textInfo . SetBbox ( JavaUtil . ArraysAsList ( 204.0f , 158.0f , 742 .0f, 294 .0f) ) ;
144+ textInfo . SetBboxRect ( new Rectangle ( 204.0f , 158.0f , 538 .0f, 136 .0f) ) ;
59145 int page = 2 ;
60146 result . Put ( page , JavaCollectionsUtil . SingletonList < TextInfo > ( textInfo ) ) ;
61147 NUnit . Framework . Assert . AreEqual ( 2 , result . Count ) ;
62148 NUnit . Framework . Assert . AreEqual ( textInfo . GetText ( ) , result . Get ( page ) [ 0 ] . GetText ( ) ) ;
63- NUnit . Framework . Assert . AreEqual ( textInfo . GetBbox ( ) . Count , result . Get ( page ) [ 0 ] . GetBbox ( ) . Count ) ;
64149 }
65150
66151 [ LogMessage ( PdfOcrLogMessageConstant . COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER , Count = 7 ) ]
@@ -113,18 +198,7 @@ public virtual ImageData ApplyRotation(ImageData imageData) {
113198 }
114199 }
115200
116- [ LogMessage ( PdfOcrLogMessageConstant . COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER , Count = 7 ) ]
117- [ NUnit . Framework . Test ]
118- public virtual void TestThaiImageWithNotDefGlyphsDeprecationMode ( ) {
119- String testName = "testThaiImageWithNotdefGlyphs" ;
120- String path = PdfHelper . GetThaiImagePath ( ) ;
121- String pdfPath = PdfHelper . GetTargetDirectory ( ) + testName + ".pdf" ;
122- PdfHelper . CreatePdf ( pdfPath , new FileInfo ( path ) , new OcrPdfCreatorProperties ( ) . SetTextColor ( DeviceRgb . BLACK
123- ) , true ) ;
124- ExtractionStrategy strategy = PdfHelper . GetExtractionStrategy ( pdfPath ) ;
125- PdfFont font = strategy . GetPdfFont ( ) ;
126- String fontName = font . GetFontProgram ( ) . GetFontNames ( ) . GetFontName ( ) ;
127- NUnit . Framework . Assert . IsTrue ( fontName . Contains ( "LiberationSans" ) ) ;
201+ private class DummyMetaInfo : IMetaInfo {
128202 }
129203 }
130204}
0 commit comments