2525import codecs
2626import datetime
2727from email import message_from_file
28- from email .header import Header , decode_header
28+ from email .header import decode_header , make_header
2929from email .utils import parsedate_tz , mktime_tz
3030from fnmatch import fnmatch
31- from functools import reduce
3231import logging
33- import operator
3432import re
3533import sys
3634
3937from django .contrib .auth .models import User
4038from django .utils .log import AdminEmailHandler
4139from django .utils import six
42- from django .utils .six .moves import map
4340
4441from patchwork .models import (Patch , Project , Person , Comment , State ,
4542 DelegationRule , get_default_initial_patch_state )
@@ -63,19 +60,84 @@ def normalise_space(str):
6360 return whitespace_re .sub (' ' , str ).strip ()
6461
6562
63+ def sanitise_header (header_contents , header_name = None ):
64+ """Clean and individual mail header.
65+
66+ Given a header with header_contents, optionally labelled
67+ header_name, decode it with decode_header, sanitise it to make
68+ sure it decodes correctly and contains no invalid characters,
69+ then encode the result with make_header()
70+ """
71+
72+ # We have some Py2/Py3 issues here.
73+ #
74+ # Firstly, the email parser (before we get here)
75+ # Python 3: headers with weird chars are email.header.Header
76+ # class, others as str
77+ # Python 2: every header is an str
78+ #
79+ # Secondly, the behaviour of decode_header:
80+ # Python 3: weird headers are labelled as unknown-8bit
81+ # Python 2: weird headers are not labelled differently
82+ #
83+ # Lastly, aking matters worse, in Python2, unknown-8bit doesn't
84+ # seem to be supported as an input to make_header, so not only do
85+ # we have to detect dodgy headers, we have to fix them ourselves.
86+ #
87+ # We solve this by catching any Unicode errors, and then manually
88+ # handling any interesting headers.
89+
90+ value = decode_header (header_contents )
91+ try :
92+ header = make_header (value ,
93+ header_name = header_name ,
94+ continuation_ws = '\t ' )
95+ except UnicodeDecodeError :
96+ # At least one of the parts cannot be encoded as ascii.
97+ # Find out which one and fix it somehow.
98+ #
99+ # We get here under Py2 when there's non-7-bit chars in header,
100+ # or under Py2 or Py3 where decoding with the coding hint fails.
101+
102+ new_value = []
103+
104+ for (part , coding ) in value :
105+ # We have random bytes that aren't properly coded.
106+ # If we had a coding hint, it failed to help.
107+ if six .PY3 :
108+ # python3 - force coding to unknown-8bit
109+ new_value += [(part , 'unknown-8bit' )]
110+ else :
111+ # python2 - no support in make_header for unknown-8bit
112+ # We should do unknown-8bit coding ourselves.
113+ # For now, we're just going to replace any dubious
114+ # chars with ?.
115+ #
116+ # TODO: replace it with a proper QP unknown-8bit codec.
117+ new_value += [(part .decode ('ascii' , errors = 'replace' )
118+ .encode ('ascii' , errors = 'replace' ),
119+ None )]
120+
121+ header = make_header (new_value ,
122+ header_name = header_name ,
123+ continuation_ws = '\t ' )
124+
125+ return header
126+
127+
66128def clean_header (header ):
67129 """Decode (possibly non-ascii) headers."""
68- def decode (fragment ):
69- (frag_str , frag_encoding ) = fragment
70- if frag_encoding :
71- return frag_str .decode (frag_encoding )
72- elif isinstance (frag_str , six .binary_type ): # python 2
73- return frag_str .decode ()
74- return frag_str
75130
76- fragments = list ( map ( decode , decode_header ( header )) )
131+ sane_header = sanitise_header ( header )
77132
78- return normalise_space (u' ' .join (fragments ))
133+ # on Py2, we want to do unicode(), on Py3, str().
134+ # That gets us the decoded, un-wrapped header.
135+ if six .PY2 :
136+ header_str = unicode (sane_header )
137+ else :
138+ header_str = str (sane_header )
139+
140+ return normalise_space (header_str )
79141
80142
81143def find_project_by_id (list_id ):
@@ -168,10 +230,13 @@ def mail_date(mail):
168230
169231
170232def mail_headers (mail ):
171- return reduce (operator .__concat__ ,
172- ['%s: %s\n ' % (k , Header (v , header_name = k ,
173- continuation_ws = '\t ' ).encode ())
174- for (k , v ) in list (mail .items ())])
233+ headers = [(key , sanitise_header (value , header_name = key ))
234+ for key , value in mail .items ()]
235+
236+ strings = [('%s: %s' % (key , header .encode ()))
237+ for (key , header ) in headers ]
238+
239+ return '\n ' .join (strings )
175240
176241
177242def find_pull_request (content ):
0 commit comments