From 7706b33e18b277b8bf45c5c4484133bb6ca2a086 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 16:54:42 +0200 Subject: [PATCH 1/4] Fix base64-url parsing errors in email headers --- Lib/email/_encoded_words.py | 9 +++++++-- Lib/email/base64mime.py | 11 +++++++---- Lib/test/test_email/test__encoded_words.py | 6 ++++++ Lib/test/test_email/test_email.py | 3 +++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 6795a606de037e..7c2dfb360f95ad 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -113,9 +113,14 @@ def decode_b(encoded): # The non-alphabet characters are ignored as far as padding # goes, but we don't know how many there are. So try without adding # padding to see if it works. + # + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode + # both the urlsafe and regular alphabets. + try: return ( - base64.b64decode(encoded, validate=False), + base64.urlsafe_b64decode(encoded), [errors.InvalidBase64CharactersDefect()], ) except binascii.Error: @@ -123,7 +128,7 @@ def decode_b(encoded): # is ignored). try: return ( - base64.b64decode(encoded + b'==', validate=False), + base64.urlsafe_b64decode(encoded + b'=='), [errors.InvalidBase64CharactersDefect(), errors.InvalidBase64PaddingDefect()], ) diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index a5a3f737a97b51..3363a5bc45f2d4 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -34,8 +34,8 @@ ] -from base64 import b64encode -from binascii import b2a_base64, a2b_base64 +from base64 import b64encode, urlsafe_b64decode +from binascii import b2a_base64 CRLF = '\r\n' NL = '\n' @@ -102,12 +102,15 @@ def decode(string): base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high level email.header class for that functionality. """ + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode both + # the urlsafe and regular alphabets. if not string: return bytes() elif isinstance(string, str): - return a2b_base64(string.encode('raw-unicode-escape')) + return urlsafe_b64decode(string.encode('raw-unicode-escape')) else: - return a2b_base64(string) + return urlsafe_b64decode(string) # For convenience and backwards compatibility w/ standard base64 module diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 1713962f94caef..e547f372adb5c3 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -38,6 +38,12 @@ def test_missing_padding(self): # 2 missing padding characters self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect]) + def test_urlsafe_alphabet(self): + self._test( + b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', + b'Anmeldung Netzanschluss S\xfcdring3p.jpg', + [errors.InvalidBase64CharactersDefect]) + def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b8116d073a2670..16b156dc7fc07f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4663,6 +4663,9 @@ def test_decode(self): eq = self.assertEqual eq(base64mime.decode(''), b'') eq(base64mime.decode('aGVsbG8='), b'hello') + eq(base64mime.decode( + 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), + b'Anmeldung Netzanschluss S\xfcdring3p.jpg') def test_encode(self): eq = self.assertEqual From b79dce66038179af62b640af6dfb416727c69626 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 16:59:24 +0200 Subject: [PATCH 2/4] Add news entry --- .../next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst new file mode 100644 index 00000000000000..d23e7eb2bd4c0e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst @@ -0,0 +1,2 @@ +Accept urlsafe base64 in email headers, as those are sometimes created by +email clients. From 81cff0ab4c4040cf3f31886c9dfbb0a0c3004b87 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 17:37:41 +0200 Subject: [PATCH 3/4] Add tests for mixed base64 alphabets --- Lib/test/test_email/test__encoded_words.py | 3 +++ Lib/test/test_email/test_email.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index e547f372adb5c3..890c4fb6cbd51e 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -43,6 +43,9 @@ def test_urlsafe_alphabet(self): b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', b'Anmeldung Netzanschluss S\xfcdring3p.jpg', [errors.InvalidBase64CharactersDefect]) + # Mix of 2 base64 alphabets + self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect]) + def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 16b156dc7fc07f..6914f5e7a0cdba 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4666,6 +4666,8 @@ def test_decode(self): eq(base64mime.decode( 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), b'Anmeldung Netzanschluss S\xfcdring3p.jpg') + # Mix of 2 base64 alphabets + eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????') def test_encode(self): eq = self.assertEqual From 6161551bf3a35c03786662f3735eee4241de1fff Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 20:28:51 +0200 Subject: [PATCH 4/4] Address review comments --- Lib/test/test_email/test__encoded_words.py | 9 ++++----- Lib/test/test_email/test_email.py | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 890c4fb6cbd51e..2686448565bbbe 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -40,13 +40,12 @@ def test_missing_padding(self): def test_urlsafe_alphabet(self): self._test( - b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', - b'Anmeldung Netzanschluss S\xfcdring3p.jpg', - [errors.InvalidBase64CharactersDefect]) - # Mix of 2 base64 alphabets + b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', + b'Anmeldung Netzanschluss S\xfcdring3p.jpg', + [errors.InvalidBase64CharactersDefect]) + # mix of different base64 alphabets self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect]) - def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 6914f5e7a0cdba..03381ea1b72b2f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4664,9 +4664,9 @@ def test_decode(self): eq(base64mime.decode(''), b'') eq(base64mime.decode('aGVsbG8='), b'hello') eq(base64mime.decode( - 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), - b'Anmeldung Netzanschluss S\xfcdring3p.jpg') - # Mix of 2 base64 alphabets + 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), + b'Anmeldung Netzanschluss S\xfcdring3p.jpg') + # mix of different base64 alphabets eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????') def test_encode(self):