From 638e587422ccc08c4ca4af204fc19cbe5688d0bf Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Thu, 25 Sep 2025 20:43:08 +0100
Subject: [PATCH 1/4] Commit

---
 Lib/test/test_codecencodings_jp.py            | 41 +++++++++++++++++++
 ...-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst |  2 +
 Modules/cjkcodecs/_codecs_jp.c                | 13 +++++-
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst

diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index 94378d124f7485..48d9f73db99398 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -106,6 +106,27 @@ class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
         b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
 
+    def test_null_terminator(self):
+        # see gh-101828
+        cases = (
+            "バルーンフルーツ",
+            "ライフアップキノコ",
+            "テスト",
+            "'Tis but a scratch!"
+        )
+        for case in cases:
+            with self.subTest(case=case):
+                encode_w_null = (case + "\0").encode(self.encoding)
+                encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
+                self.assertTrue(encode_w_null.endswith(b'\x00'))
+                self.assertEqual(encode_w_null, encode_plus_null)
+
+                encode_w_null_2 = encode_w_null + encode_w_null
+                encode_plus_null_2 = encode_plus_null + encode_plus_null
+                self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+                self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
+
 class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'shift_jisx0213'
     tstring = multibytecodec_support.load_teststring('shift_jisx0213')
@@ -121,6 +142,26 @@ class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
         "\xab\u211c\xbb = \u2329\u1234\u232a",
         b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
+    def test_null_terminator(self):
+        # see gh-101828
+        cases = (
+            "バルーンフルーツ",
+            "ライフアップキノコ",
+            "テスト",
+            "'Tis but a scratch!"
+        )
+        for case in cases:
+            with self.subTest(case=case):
+                encode_w_null = (case + "\0").encode(self.encoding)
+                encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
+                self.assertTrue(encode_w_null.endswith(b'\x00'))
+                self.assertEqual(encode_w_null, encode_plus_null)
+
+                encode_w_null_2 = encode_w_null + encode_w_null
+                encode_plus_null_2 = encode_plus_null + encode_plus_null
+                self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+                self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
new file mode 100644
index 00000000000000..942bc61698b4d3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
@@ -0,0 +1,2 @@
+Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null char
+as it was treated as part of a multi-character sequence.
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index f7127487aa5f59..04b88a04305b51 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -611,8 +611,19 @@ ENCODER(shift_jis_2004)
                             if (code == DBCINV)
                                 return 1;
                             }
-                            else
+                            else if (ch2 != 0) {
                                 insize = 2;
+                            }
+                            else {
+                                /* Don't consume null char as part of pair */
+                                code = find_pairencmap(
+                                    (ucs2_t)c, 0,
+                                    jisx0213_pair_encmap,
+                                    JISX0213_ENCPAIRS);
+                                if (code == DBCINV) {
+                                    return 1;
+                                }
+                            }
                         }
                     }
                 }

From c565d52194bb3eef64a8f2b436e9fad0df85125a Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stanulbrych@gmail.com>
Date: Thu, 25 Sep 2025 20:45:52 +0100
Subject: [PATCH 2/4] Commit

---
 Lib/test/test_codecencodings_jp.py                            | 3 +--
 .../Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst    | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index 48d9f73db99398..0cdfa0d13eb4a7 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -126,7 +126,6 @@ def test_null_terminator(self):
                 self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
                 self.assertEqual(encode_w_null_2, encode_plus_null_2)
 
-
 class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'shift_jisx0213'
     tstring = multibytecodec_support.load_teststring('shift_jisx0213')
@@ -142,6 +141,7 @@ class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
         "\xab\u211c\xbb = \u2329\u1234\u232a",
         b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
+
     def test_null_terminator(self):
         # see gh-101828
         cases = (
@@ -162,6 +162,5 @@ def test_null_terminator(self):
                 self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
                 self.assertEqual(encode_w_null_2, encode_plus_null_2)
 
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
index 942bc61698b4d3..9fe961f731493c 100644
--- a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
+++ b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
@@ -1,2 +1,2 @@
-Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null char
-as it was treated as part of a multi-character sequence.
+Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null chars
+as they were treated as part of multi-character sequences.

From adb384bf978df054882c9c0428472f1e93f83ec3 Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Mon, 6 Oct 2025 17:12:34 +0100
Subject: [PATCH 3/4] Address Inada review + refactor test location

---
 Lib/test/multibytecodec_support.py            | 16 ++++++++
 Lib/test/test_codecencodings_jp.py            | 40 -------------------
 ...-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst |  3 +-
 Modules/cjkcodecs/_codecs_jp.c                | 16 +++-----
 4 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/Lib/test/multibytecodec_support.py b/Lib/test/multibytecodec_support.py
index dbf0cc428e3ff6..205ac8bfc8698d 100644
--- a/Lib/test/multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -282,6 +282,22 @@ def test_incrementalencoder_del_segfault(self):
         with self.assertRaises(AttributeError):
             del e.errors
 
+    def test_null_terminator(self):
+        # see gh-101828
+        if any(enc in self.encoding for enc in ('shift', 'euc_jis')):
+            text = "バルーンフルーツ"
+        else:
+            text = "Spam"
+        encode_w_null = (text + "\0").encode(self.encoding)
+        encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
+        self.assertTrue(encode_w_null.endswith(b'\x00'))
+        self.assertEqual(encode_w_null, encode_plus_null)
+
+        encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
+        encode_plus_null_2 = encode_plus_null + encode_plus_null
+        self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
+        self.assertEqual(encode_w_null_2, encode_plus_null_2)
+
 
 class TestBase_Mapping(unittest.TestCase):
     pass_enctest = []
diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index 0cdfa0d13eb4a7..94378d124f7485 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -106,26 +106,6 @@ class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
         b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
 
-    def test_null_terminator(self):
-        # see gh-101828
-        cases = (
-            "バルーンフルーツ",
-            "ライフアップキノコ",
-            "テスト",
-            "'Tis but a scratch!"
-        )
-        for case in cases:
-            with self.subTest(case=case):
-                encode_w_null = (case + "\0").encode(self.encoding)
-                encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
-                self.assertTrue(encode_w_null.endswith(b'\x00'))
-                self.assertEqual(encode_w_null, encode_plus_null)
-
-                encode_w_null_2 = encode_w_null + encode_w_null
-                encode_plus_null_2 = encode_plus_null + encode_plus_null
-                self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
-                self.assertEqual(encode_w_null_2, encode_plus_null_2)
-
 class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'shift_jisx0213'
     tstring = multibytecodec_support.load_teststring('shift_jisx0213')
@@ -142,25 +122,5 @@ class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
         b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
 
-    def test_null_terminator(self):
-        # see gh-101828
-        cases = (
-            "バルーンフルーツ",
-            "ライフアップキノコ",
-            "テスト",
-            "'Tis but a scratch!"
-        )
-        for case in cases:
-            with self.subTest(case=case):
-                encode_w_null = (case + "\0").encode(self.encoding)
-                encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
-                self.assertTrue(encode_w_null.endswith(b'\x00'))
-                self.assertEqual(encode_w_null, encode_plus_null)
-
-                encode_w_null_2 = encode_w_null + encode_w_null
-                encode_plus_null_2 = encode_plus_null + encode_plus_null
-                self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
-                self.assertEqual(encode_w_null_2, encode_plus_null_2)
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
index 9fe961f731493c..1d100180c072ec 100644
--- a/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
+++ b/Misc/NEWS.d/next/Library/2025-09-25-20-16-10.gh-issue-101828.yTxJlJ.rst
@@ -1,2 +1,3 @@
-Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null chars
+Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
+``'euc_jis_2004'`` codecs truncating null chars
 as they were treated as part of multi-character sequences.
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
index 04b88a04305b51..cd77888d5514b8 100644
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
                                 JISX0213_ENCPAIRS);
                             if (code == DBCINV)
                                 return 1;
-                        } else
+                        }
+                        else if (c2 != 0) {
+                            /* Don't consume null char as part of pair */
                             insize = 2;
+                        }
                     }
                 }
             }
@@ -612,17 +615,8 @@ ENCODER(shift_jis_2004)
                                 return 1;
                             }
                             else if (ch2 != 0) {
-                                insize = 2;
-                            }
-                            else {
                                 /* Don't consume null char as part of pair */
-                                code = find_pairencmap(
-                                    (ucs2_t)c, 0,
-                                    jisx0213_pair_encmap,
-                                    JISX0213_ENCPAIRS);
-                                if (code == DBCINV) {
-                                    return 1;
-                                }
+                                insize = 2;
                             }
                         }
                     }

From d9f910caaa6765383b3031f298a64406f49f9e5e Mon Sep 17 00:00:00 2001
From: Stan Ulbrych <stan@ulbrych.org>
Date: Tue, 7 Oct 2025 17:37:18 +0100
Subject: [PATCH 4/4] Commit

---
 Lib/test/multibytecodec_support.py  |  9 +++++----
 Modules/cjkcodecs/_codecs_iso2022.c | 11 +++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/Lib/test/multibytecodec_support.py b/Lib/test/multibytecodec_support.py
index 205ac8bfc8698d..6b4c57d0b4bad7 100644
--- a/Lib/test/multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -284,10 +284,11 @@ def test_incrementalencoder_del_segfault(self):
 
     def test_null_terminator(self):
         # see gh-101828
-        if any(enc in self.encoding for enc in ('shift', 'euc_jis')):
-            text = "バルーンフルーツ"
-        else:
-            text = "Spam"
+        text = "フルーツ"
+        try:
+            text.encode(self.encoding)
+        except UnicodeEncodeError:
+            text = "Python is cool"
         encode_w_null = (text + "\0").encode(self.encoding)
         encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
         self.assertTrue(encode_w_null.endswith(b'\x00'))
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index ef6faeb71274e1..b1984df2695b17 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -802,10 +802,13 @@ jisx0213_encoder(const MultibyteCodec *codec, const Py_UCS4 *data,
         return coded;
 
     case 2: /* second character of unicode pair */
-        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
-                                jisx0213_pair_encmap, JISX0213_ENCPAIRS);
-        if (coded != DBCINV)
-            return coded;
+        if (data[1] != 0) { /* Don't consume null char as part of pair */
+            coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+                                    jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+            if (coded != DBCINV) {
+                return coded;
+            }
+        }
         _Py_FALLTHROUGH;
 
     case -1: /* flush unterminated */