From 1543718750674791f6a475b1bd84cb22b8652584 Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Thu, 25 Sep 2025 17:31:31 +0100 Subject: [PATCH 1/6] tests/test_general.py: test_gitinfo(): fix for mupdf without git information. --- tests/test_general.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_general.py b/tests/test_general.py index 1d7b76aa3..c6086aef2 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1989,7 +1989,9 @@ def test_gitinfo(): print(f'{pymupdf.pymupdf_git_branch=}') print(f'{pymupdf.pymupdf_git_sha=}') print(f'{pymupdf.pymupdf_version=}') - print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}') + print(f'{pymupdf.pymupdf_git_diff=}') + if pymupdf.pymupdf_git_diff: + print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}') def test_4392(): @@ -2086,7 +2088,7 @@ def test_4590(): def test_4702(): if os.environ.get('PYODIDE_ROOT'): - # util.download() uses subrocess. + # util.download() uses subprocess. print('test_4702(): not running on Pyodide - cannot run child processes.') return From b03c3ac848c0f0c89de24ade0f36203aa05b0c41 Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Mon, 29 Sep 2025 18:18:51 +0100 Subject: [PATCH 2/6] pipcl.py: fix for package names containing '-'. fixed doctests. --- pipcl.py | 134 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 38 deletions(-) diff --git a/pipcl.py b/pipcl.py index c533c73ee..78f99d6df 100644 --- a/pipcl.py +++ b/pipcl.py @@ -2,19 +2,32 @@ Python packaging operations, including PEP-517 support, for use by a `setup.py` script. -The intention is to take care of as many packaging details as possible so that -setup.py contains only project-specific information, while also giving as much -flexibility as possible. +Overview: -For example we provide a function `build_extension()` that can be used to build -a SWIG extension, but we also give access to the located compiler/linker so -that a `setup.py` script can take over the details itself. + The intention is to take care of as many packaging details as possible so + that setup.py contains only project-specific information, while also giving + as much flexibility as possible. -Run doctests with: `python -m doctest pipcl.py` + For example we provide a function `build_extension()` that can be used + to build a SWIG extension, but we also give access to the located + compiler/linker so that a `setup.py` script can take over the details + itself. -For Graal we require that PIPCL_GRAAL_PYTHON is set to non-graal Python (we -build for non-graal except with Graal Python's include paths and library -directory). +Doctests: + Doctest strings are provided in some comments. + + Test in the usual way with: + python -m doctest pipcl.py + + Test specific functions/classes with: + python pipcl.py --doctest run_if ... + + If no functions or classes are specified, this tests everything. + +Graal: + For Graal we require that PIPCL_GRAAL_PYTHON is set to non-graal Python (we + build for non-graal except with Graal Python's include paths and library + directory). ''' import base64 @@ -532,6 +545,12 @@ def assert_str_or_multi( v): assert_str_or_multi( requires_external) assert_str_or_multi( project_url) assert_str_or_multi( provides_extra) + + assert re.match('^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\\Z', name, re.IGNORECASE), ( + f'Invalid package name' + f' (https://packaging.python.org/en/latest/specifications/name-normalization/)' + f': {name!r}' + ) # https://packaging.python.org/en/latest/specifications/core-metadata/. assert re.match('([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', name, re.IGNORECASE), \ @@ -761,7 +780,7 @@ def build_sdist(self, else: items = self.fn_sdist() - prefix = f'{_normalise(self.name)}-{self.version}' + prefix = f'{_normalise2(self.name)}-{self.version}' os.makedirs(sdist_directory, exist_ok=True) tarpath = f'{sdist_directory}/{prefix}.tar.gz' log2(f'Creating sdist: {tarpath}') @@ -833,9 +852,11 @@ def tag_python(self): Get two-digit python version, e.g. 'cp3.8' for python-3.8.6. ''' if self.tag_python_: - return self.tag_python_ + ret = self.tag_python_ else: - return 'cp' + ''.join(platform.python_version().split('.')[:2]) + ret = 'cp' + ''.join(platform.python_version().split('.')[:2]) + assert '-' not in ret + return ret def tag_abi(self): ''' @@ -891,10 +912,13 @@ def tag_platform(self): ret = ret2 log0( f'tag_platform(): returning {ret=}.') + assert '-' not in ret return ret def wheel_name(self): - return f'{_normalise(self.name)}-{self.version}-{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}.whl' + ret = f'{_normalise2(self.name)}-{self.version}-{self.tag_python()}-{self.tag_abi()}-{self.tag_platform()}.whl' + assert ret.count('-') == 4, f'Expected 4 dash characters in {ret=}.' + return ret def wheel_name_match(self, wheel): ''' @@ -923,7 +947,7 @@ def wheel_name_match(self, wheel): log2(f'py_limited_api; {tag_python=} compatible with {self.tag_python()=}.') py_limited_api_compatible = True - log2(f'{_normalise(self.name) == name=}') + log2(f'{_normalise2(self.name) == name=}') log2(f'{self.version == version=}') log2(f'{self.tag_python() == tag_python=} {self.tag_python()=} {tag_python=}') log2(f'{py_limited_api_compatible=}') @@ -932,7 +956,7 @@ def wheel_name_match(self, wheel): log2(f'{self.tag_platform()=}') log2(f'{tag_platform.split(".")=}') ret = (1 - and _normalise(self.name) == name + and _normalise2(self.name) == name and self.version == version and (self.tag_python() == tag_python or py_limited_api_compatible) and self.tag_abi() == tag_abi @@ -1059,7 +1083,7 @@ def _argv_dist_info(self, root): it writes to a slightly different directory. ''' if root is None: - root = f'{self.name}-{self.version}.dist-info' + root = f'{normalise2(self.name)}-{self.version}.dist-info' self._write_info(f'{root}/METADATA') if self.license: with open( f'{root}/COPYING', 'w') as f: @@ -1347,7 +1371,7 @@ def __str__(self): ) def _dist_info_dir( self): - return f'{_normalise(self.name)}-{self.version}.dist-info' + return f'{_normalise2(self.name)}-{self.version}.dist-info' def _metainfo(self): ''' @@ -1487,7 +1511,7 @@ def _fromto(self, p): to_ = f'{self._dist_info_dir()}/{to_[ len(prefix):]}' prefix = '$data/' if to_.startswith( prefix): - to_ = f'{self.name}-{self.version}.data/{to_[ len(prefix):]}' + to_ = f'{_normalise2(self.name)}-{self.version}.data/{to_[ len(prefix):]}' if isinstance(from_, str): from_, _ = self._path_relative_to_root( from_, assert_within_root=False) to_ = self._path_relative_to_root(to_) @@ -2569,7 +2593,7 @@ def _cpu_name(): return f'x{32 if sys.maxsize == 2**31 - 1 else 64}' -def run_if( command, out, *prerequisites): +def run_if( command, out, *prerequisites, caller=1): ''' Runs a command only if the output file is not up to date. @@ -2599,21 +2623,26 @@ def run_if( command, out, *prerequisites): ... os.remove( out) >>> if os.path.exists( f'{out}.cmd'): ... os.remove( f'{out}.cmd') - >>> run_if( f'touch {out}', out) + >>> run_if( f'touch {out}', out, caller=0) pipcl.py:run_if(): Running command because: File does not exist: 'run_if_test_out' pipcl.py:run_if(): Running: touch run_if_test_out True If we repeat, the output file will be up to date so the command is not run: - >>> run_if( f'touch {out}', out) + >>> run_if( f'touch {out}', out, caller=0) pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out' If we change the command, the command is run: - >>> run_if( f'touch {out}', out) - pipcl.py:run_if(): Running command because: Command has changed - pipcl.py:run_if(): Running: touch run_if_test_out + >>> run_if( f'touch {out};', out, caller=0) + pipcl.py:run_if(): Running command because: Command has changed: + pipcl.py:run_if(): @@ -1,2 +1,2 @@ + pipcl.py:run_if(): touch + pipcl.py:run_if(): -run_if_test_out + pipcl.py:run_if(): +run_if_test_out; + pipcl.py:run_if(): + pipcl.py:run_if(): Running: touch run_if_test_out; True If we add a prerequisite that is newer than the output, the command is run: @@ -2622,15 +2651,20 @@ def run_if( command, out, *prerequisites): >>> prerequisite = 'run_if_test_prerequisite' >>> run( f'touch {prerequisite}', caller=0) pipcl.py:run(): Running: touch run_if_test_prerequisite - >>> run_if( f'touch {out}', out, prerequisite) - pipcl.py:run_if(): Running command because: Prerequisite is new: 'run_if_test_prerequisite' + >>> run_if( f'touch {out}', out, prerequisite, caller=0) + pipcl.py:run_if(): Running command because: Command has changed: + pipcl.py:run_if(): @@ -1,2 +1,2 @@ + pipcl.py:run_if(): touch + pipcl.py:run_if(): -run_if_test_out; + pipcl.py:run_if(): +run_if_test_out + pipcl.py:run_if(): pipcl.py:run_if(): Running: touch run_if_test_out True If we repeat, the output will be newer than the prerequisite, so the command is not run: - >>> run_if( f'touch {out}', out, prerequisite) + >>> run_if( f'touch {out}', out, prerequisite, caller=0) pipcl.py:run_if(): Not running command because up to date: 'run_if_test_out' ''' doit = False @@ -2687,9 +2721,9 @@ def _make_prerequisites(p): for p in prerequisites: prerequisites_all += _make_prerequisites( p) if 0: - log2( 'prerequisites_all:') + log2( 'prerequisites_all:', caller=caller+1) for i in prerequisites_all: - log2( f' {i!r}') + log2( f' {i!r}', caller=caller+1) pre_mtime = 0 pre_path = None for prerequisite in prerequisites_all: @@ -2715,16 +2749,16 @@ def _make_prerequisites(p): os.remove( cmd_path) except Exception: pass - log1( f'Running command because: {doit}', caller=2) + log1( f'Running command because: {doit}', caller=caller+1) - run( command, caller=2) + run( command, caller=caller+1) # Write the command we ran, into `cmd_path`. with open( cmd_path, 'w') as f: f.write( command) return True else: - log1( f'Not running command because up to date: {out!r}', caller=2) + log1( f'Not running command because up to date: {out!r}', caller=caller+1) if 0: log2( f'out_mtime={time.ctime(out_mtime)} pre_mtime={time.ctime(pre_mtime)}.' @@ -2796,6 +2830,11 @@ def _normalise(name): return re.sub(r"[-_.]+", "-", name).lower() +def _normalise2(name): + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/ + return _normalise(name).replace('-', '_') + + def _assert_version_pep_440(version): assert re.match( r'^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$', @@ -2848,19 +2887,30 @@ def _log(text, level, caller): print(f'{filename}:{fr.function}(): {line}', file=sys.stdout, flush=1) -def relpath(path, start=None): +def relpath(path, start=None, allow_up=True): ''' A safe alternative to os.path.relpath(), avoiding an exception on Windows if the drive needs to change - in this case we use os.path.abspath(). + + Args: + path: + Path to be processed. + start: + Start directory or current directory if None. + allow_up: + If false we return absolute path is is not within . ''' if windows(): try: - return os.path.relpath(path, start) + ret = os.path.relpath(path, start) except ValueError: # os.path.relpath() fails if trying to change drives. - return os.path.abspath(path) + ret = os.path.abspath(path) else: - return os.path.relpath(path, start) + ret = os.path.relpath(path, start) + if not allow_up and ret.startswith('../') or ret.startswith('..\\'): + ret = os.path.abspath(path) + return ret def _so_suffix(use_so_versioning=True): @@ -3218,7 +3268,15 @@ def venv_run(args, path, recreate=True, clean=False): # graal_legacy_python_config is true. # includes, ldflags = sysconfig_python_flags() - if sys.argv[1:] == ['--graal-legacy-python-config', '--includes']: + if sys.argv[1] == '--doctest': + import doctest + if sys.argv[2:]: + for f in sys.argv[2:]: + ff = globals()[f] + doctest.run_docstring_examples(ff, globals()) + else: + doctest.testmod(None) + elif sys.argv[1:] == ['--graal-legacy-python-config', '--includes']: print(includes) elif sys.argv[1:] == ['--graal-legacy-python-config', '--ldflags']: print(ldflags) From a090e379bb74ac0f4648da597a18e5dc4f71e80f Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Mon, 29 Sep 2025 18:19:50 +0100 Subject: [PATCH 3/6] scripts/test.py: fixed typo in comment. --- scripts/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.py b/scripts/test.py index 2ea847a03..861fd6e8c 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -293,7 +293,7 @@ Use specified prefix when running pytest, must be one of: gdb helgrind - vagrind + valgrind -v venv is: From 09cf191c761c56de12a78b28958380eaab7f2a71 Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Mon, 29 Sep 2025 18:18:26 +0100 Subject: [PATCH 4/6] tests/: added test_4712() and test_4712m(). Crash with "corrupted double-linked list". Both tests are disabled for now because they segv. --- tests/resources/test_4712_a.pdf | Bin 0 -> 2922 bytes tests/resources/test_4712_b.pdf | Bin 0 -> 3328 bytes tests/test_general.py | 63 ++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 tests/resources/test_4712_a.pdf create mode 100644 tests/resources/test_4712_b.pdf diff --git a/tests/resources/test_4712_a.pdf b/tests/resources/test_4712_a.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7a7fd49fc19d6a348e318bf63d0dbdbdde9cd80a GIT binary patch literal 2922 zcmY!laBzFQvuf-)r zsfoE<6?3$o1T`=*+a_D^6}59qOKMCxY`IJ!BXLziV$PC)DO}=wZJU`Mu5q$B9dM9= zqfjz4C39W+s)U@JW$6nibgN}$C9FzcGNJjXoU-tQJ)N2!^Csvet8Be-#%agQ)0!;j zu9X{yY%mGt@N}M$ydZ(^OSuPEjEk~@p>62FfM7)xZoOw+222dkOfBRKGY8u$x60^CIZPvT^0SiB5cC-90`IpVXEq zR66bInRq41nEPRd-f{tp==)~_4qoz6RN+nRYizQ-@K{l^K(M#r$d?e-WZ%{aIy+`) zAL3w8>#+#J;vwXk55I@VPjwyCOm!}ZRQJ5pLJ5nPkn3Dpd5NkSDForAXEU*S3At9r z@1?xTne89PuITw zN#(680qI6~wW9jfdM&1X6ki!hB$nbA=g+i^jg73tFU`MpeP82K^Iy!mdKc#1O4Fym zeH*xzx#4>J`(MZT!H52TYf6rsHe=md31<>$v3cWPFyP{=Q zI$VkeZ$73?9&DuL%LB3rddQCO?u>};%kk4U`x_6Li&w5*%`D6W+!rn{pIlxz8>p>2 zGuPjJw`B=V20ouV`-{xi+!}R%_@p%S`*-2hpZgn{o*-|ZJmvrKc7rRm`>m<3?+xx! z24?0DUOV5mR$b9DCq+u};LnG&$%BKmjEN}d;_W+P=Uda8k8*1Y-rt)3N=y4oQGsx2 zE^M%6%tb*b|6g1@(qm%qJ{Rkm|KLI_&4mrNjJYW2=IyyCkCQ^pJ1PE=j+AT*sWcTf z)-t9df6<4umG%jlcCoRW4K#8zCmL)SVggT(o@QFXIZkqV0Ncu8 z^Bl^GmL6h}gdPWudNDV&MV^Mh1)Ix&9t$!Z2U~;BVh)R(AA6C&d9kmJLkBqSfQ#j2 zV$RYH*qVx|)+_B}isl$I=RujjftRC#?1qNhkDnB#Ll2Yhkh7 zrs_^o3%0|AK2G8qbu0R@VV5gU08JQF1sudY;5IMl6-8czD%`MPUSg-SeZgeT=8TH` zIE>kAF7~q-D7LXk#dE!QYYu@c5pU~U#MxvQ<&cQL=;)|b#?Hv5cA8bm*c&>pvp2O5|4>2_>k9&6<+LO ltrXF93p)M6;zqR-)UG1d7N$iE%c(2uLl`tSKmTF~{0sG!(@OvV literal 0 HcmV?d00001 diff --git a/tests/test_general.py b/tests/test_general.py index c6086aef2..aaa23dd94 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -2117,3 +2117,66 @@ def test_4702(): _ = document.xref_object(xref) wt = pymupdf.TOOLS.mupdf_warnings() assert wt == 'repairing PDF document' + + +def test_4712(): + ''' + Crash with "corrupted double-linked list + ''' + if 1: + print(f'test_4712(): Not running because known to fail.') + return + path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf') + path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf') + doc1 = pymupdf.open(path_a) + for i in range(6): + doc1.load_page(i).get_pixmap() + doc2 = pymupdf.open(path_b) + for i in range(6): + doc2.load_page(i).get_pixmap() + + +def test_4712m(): + if 1: + print(f'test_4712b(): Not running because known to fail.') + return + + path_a = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_a.pdf') + path_b = os.path.normpath(f'{__file__}/../../tests/resources/test_4712_b.pdf') + + mupdf = pymupdf.mupdf + def get_pixmap(page): + displaylist = mupdf.fz_new_display_list_from_page(page) + rect = mupdf.fz_bound_display_list(displaylist) + irect = mupdf.fz_round_rect(rect) + pixmap = mupdf.fz_new_pixmap_with_bbox( + mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB), + irect, + mupdf.FzSeparations(), + 0, # alpha + ) + mupdf.fz_clear_pixmap_with_value(pixmap, 0xFF) + matrix = mupdf.FzMatrix() + device = mupdf.fz_new_draw_device(matrix, pixmap) + mupdf.fz_run_display_list( + displaylist, + device, + mupdf.FzMatrix(), + mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), + mupdf.FzCookie(), + ) + mupdf.fz_close_device(device) + + def process_document(document): + for i in range(6): + print(f' {i=}', flush=1) + page = mupdf.fz_load_page(document, i) + get_pixmap(page) + + print(f'Processing {path_a=}', flush=1) + document_a = mupdf.fz_open_document(path_a) + process_document(document_a) + + print(f'Processing {path_b=}', flush=1) + document_b = mupdf.fz_open_document(path_b) + process_document(document_b) From fd368e06a04706153bdc1968fdd212cfd25a5920 Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Fri, 3 Oct 2025 18:13:47 +0100 Subject: [PATCH 5/6] setup.py: update default mupdf to 1.26.10. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c2731b1e0..5eb36678e 100755 --- a/setup.py +++ b/setup.py @@ -1269,7 +1269,7 @@ def sdist(): # PyMuPDF version. version_p = '1.26.5' -version_mupdf = '1.26.7' +version_mupdf = '1.26.10' # PyMuPDFb version. This is the PyMuPDF version whose PyMuPDFb wheels we will # (re)use if generating separate PyMuPDFb wheels. Though as of PyMuPDF-1.24.11 From 025c7af38a5fd8118215123f23ac6d1cfc326c3d Mon Sep 17 00:00:00 2001 From: Julian Smith Date: Fri, 3 Oct 2025 17:04:54 +0100 Subject: [PATCH 6/6] changes.txt: updated to match all changes since 1.26.4 release. Also mark update to mupdf-1.26.10. --- changes.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/changes.txt b/changes.txt index d06386a9a..790150b3d 100644 --- a/changes.txt +++ b/changes.txt @@ -4,16 +4,25 @@ Change Log **Changes in version 1.26.5** +* Use MuPDF-1.26.10. + * Fixed issues: + * **Fixed** `2883 `_: Improve the Python type annotations for fitz_new + * **Fixed** `4507 `_: Bugs in pyodide + * **Fixed** `4613 `_: Thai and number blocks are not auto-scaled and get wrong hyphen when using in insert_htmlbox + * **Fixed** `4700 `_: pymupdf.open() processes .zip file without raising + * **Fixed** `4716 `_: Problems with unreadable characters + * Other: - * Partially address `2883 `_: Improve the Python type annotations for fitz_new - We now define all class methods explicitly instead of with dynamic assignment. + * We now define all class methods explicitly instead of with dynamic assignment; this improves type hints. * Removed `pymupdf.utils.Shape` class, was duplicate of `pymupdf.Shape`. * Allow use of cibuildwheel to build and test on Pyodide. + * Fixed various Pyodide bugs. * In documentation, added section about Linux wheels and glibc compatibility. + * Improved documentation of pymupdf.open()'s arg. * Retrospectively mark `4544 `_ as fixed in 1.26.4.