From 26a6df601521afca6ef421b60dd0883a72bbdbcf Mon Sep 17 00:00:00 2001 From: Houfu Ang Date: Wed, 19 Dec 2018 14:22:58 +0800 Subject: [PATCH] Implement page_nums = false If page_nums = false, simply reads the file and outputs the text. Tested on my own files and it works as I expected. --- xpdf_python/wrapper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/xpdf_python/wrapper.py b/xpdf_python/wrapper.py index 792e73a..7debbdc 100644 --- a/xpdf_python/wrapper.py +++ b/xpdf_python/wrapper.py @@ -62,8 +62,13 @@ def to_text(file_loc, page_nums = True): text += '***Page {}*** {}'.format(actual, t) file.close() else: - # TO BE IMPLEMENTED - pass + # Calls xpdf + subprocess.call(['pdftotext', full_file_loc]) + # Opens file saved to disk + saved_file = full_file_loc.replace('.pdf', '.txt') + file = open(saved_file, 'r', encoding="ISO-8859-1") + text = file.read() + file.close() # Remove file saved to disk os.remove(saved_file)