Skip to content

Commit e5ef16e

Browse files
committed
fix: Reconfigure the encoding of standard input according to the --encoding option, closes #1038
1 parent 12be2ff commit e5ef16e

File tree

14 files changed

+55
-51
lines changed

14 files changed

+55
-51
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Unreleased
1010
* :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
1111
* :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
1212
* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
13+
* fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable.
1314
* fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`.
1415
* fix: No longer errors if a NUL byte occurs in an input file.
1516
* Add Python 3.12 support.

csvkit/cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,14 @@ def _init_common_parser(self):
233233
'-V', '--version', action='version', version='%(prog)s 1.2.0',
234234
help='Display version information and exit.')
235235

236-
def _open_input_file(self, path):
236+
def _open_input_file(self, path, opened=False):
237237
"""
238238
Open the input file specified on the command line.
239239
"""
240240
if not path or path == '-':
241+
# "UnsupportedOperation: It is not possible to set the encoding or newline of stream after the first read"
242+
if not opened:
243+
sys.stdin.reconfigure(encoding=self.args.encoding)
241244
f = sys.stdin
242245
else:
243246
extension = splitext(path)[1]

csvkit/utilities/csvstack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def main(self):
108108
output.writerow(headers)
109109

110110
for i, path in enumerate(self.args.input_paths):
111-
f = self._open_input_file(path)
111+
f = self._open_input_file(path, opened=True)
112112
file_is_stdin = path == '-'
113113

114114
if has_groups:

tests/test_convert/test_fixed.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from io import StringIO
1+
import io
22

33
from csvkit.convert import fixed
44
from csvkit.utilities.in2csv import In2CSV
@@ -23,7 +23,7 @@ def test_fixed_skip_lines(self):
2323
self.assertEqual(f.read(), output)
2424

2525
def test_fixed_no_inference(self):
26-
input_file = StringIO(' 1 2 3')
26+
input_file = io.BytesIO(b' 1 2 3')
2727

2828
with stdin_as_string(input_file):
2929
self.assertLines(['--no-inference', '-f', 'fixed', '--schema',
@@ -36,7 +36,7 @@ def test_fixed_no_inference(self):
3636

3737
def test_fixed_streaming(self):
3838
with open('examples/testfixed') as f, open('examples/testfixed_schema.csv') as schema:
39-
output_file = StringIO()
39+
output_file = io.StringIO()
4040
fixed.fixed2csv(f, schema, output=output_file)
4141
output = output_file.getvalue()
4242
output_file.close()
@@ -91,7 +91,7 @@ def test_schematic_line_parser(self):
9191
bar,6,2
9292
baz,8,5"""
9393

94-
f = StringIO(schema)
94+
f = io.StringIO(schema)
9595
parser = fixed.FixedWidthRowParser(f)
9696
f.close()
9797

tests/test_utilities/test_csvclean.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvclean import CSVClean, launch_new_instance
@@ -17,7 +17,7 @@ def tearDown(self):
1717

1818
def assertCleaned(self, basename, output_lines, error_lines, additional_args=[]):
1919
args = [f'examples/{basename}.csv'] + additional_args
20-
output_file = StringIO()
20+
output_file = io.StringIO()
2121

2222
utility = CSVClean(args, output_file)
2323
utility.run()

tests/test_utilities/test_csvformat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvformat import CSVFormat, launch_new_instance
@@ -54,7 +54,7 @@ def test_tab_delimiter(self):
5454
])
5555

5656
def test_quotechar(self):
57-
input_file = StringIO('a,b,c\n1*2,3,4\n')
57+
input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
5858

5959
with stdin_as_string(input_file):
6060
self.assertLines(['-Q', '*'], [
@@ -65,7 +65,7 @@ def test_quotechar(self):
6565
input_file.close()
6666

6767
def test_doublequote(self):
68-
input_file = StringIO('a\n"a ""quoted"" string"')
68+
input_file = io.BytesIO(b'a\n"a ""quoted"" string"')
6969

7070
with stdin_as_string(input_file):
7171
self.assertLines(['-P', '#', '-B'], [
@@ -76,7 +76,7 @@ def test_doublequote(self):
7676
input_file.close()
7777

7878
def test_escapechar(self):
79-
input_file = StringIO('a,b,c\n1"2,3,4\n')
79+
input_file = io.BytesIO(b'a,b,c\n1"2,3,4\n')
8080

8181
with stdin_as_string(input_file):
8282
self.assertLines(['-P', '#', '-U', '3'], [

tests/test_utilities/test_csvjson.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import json
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvjson import CSVJSON, launch_new_instance
@@ -58,7 +58,7 @@ def test_keying(self):
5858
self.assertDictEqual(js, {'True': {'a': True, 'c': 3.0, 'b': 2.0}})
5959

6060
def test_duplicate_keys(self):
61-
output_file = StringIO()
61+
output_file = io.StringIO()
6262
utility = CSVJSON(['-k', 'a', 'examples/dummy3.csv'], output_file)
6363
self.assertRaisesRegex(ValueError,
6464
'Value True is not unique in the key column.',

tests/test_utilities/test_csvlook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvlook import CSVLook, launch_new_instance
@@ -127,7 +127,7 @@ def test_max_column_width(self):
127127
])
128128

129129
def test_stdin(self):
130-
input_file = StringIO('a,b,c\n1,2,3\n4,5,6\n')
130+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n4,5,6\n')
131131

132132
with stdin_as_string(input_file):
133133
self.assertLines([], [

tests/test_utilities/test_csvsort.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvsort import CSVSort, launch_new_instance
@@ -78,7 +78,7 @@ def test_sort_t_and_nulls(self):
7878
self.assertEqual(test_order, new_order)
7979

8080
def test_stdin(self):
81-
input_file = StringIO('a,b,c\n4,5,6\n1,2,3\n')
81+
input_file = io.BytesIO(b'a,b,c\n4,5,6\n1,2,3\n')
8282

8383
with stdin_as_string(input_file):
8484
self.assertLines([], [

tests/test_utilities/test_csvsql.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from textwrap import dedent
55
from unittest.mock import patch
66

@@ -108,7 +108,7 @@ def test_linenumbers(self):
108108
''')) # noqa: W291
109109

110110
def test_stdin(self):
111-
input_file = StringIO('a,b,c\n4,2,3\n')
111+
input_file = io.BytesIO(b'a,b,c\n4,2,3\n')
112112

113113
with stdin_as_string(input_file):
114114
sql = self.get_output(['--tables', 'foo'])
@@ -124,7 +124,7 @@ def test_stdin(self):
124124
input_file.close()
125125

126126
def test_stdin_and_filename(self):
127-
input_file = StringIO("a,b,c\n1,2,3\n")
127+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
128128

129129
with stdin_as_string(input_file):
130130
sql = self.get_output(['-', 'examples/dummy.csv'])
@@ -135,7 +135,7 @@ def test_stdin_and_filename(self):
135135
input_file.close()
136136

137137
def test_query(self):
138-
input_file = StringIO("a,b,c\n1,2,3\n")
138+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
139139

140140
with stdin_as_string(input_file):
141141
sql = self.get_output(['--query', 'SELECT m.usda_id, avg(i.sepal_length) AS mean_sepal_length FROM iris '
@@ -150,7 +150,7 @@ def test_query(self):
150150
input_file.close()
151151

152152
def test_query_empty(self):
153-
input_file = StringIO()
153+
input_file = io.BytesIO()
154154

155155
with stdin_as_string(input_file):
156156
output = self.get_output(['--query', 'SELECT 1'])
@@ -185,14 +185,14 @@ def test_before_after_insert(self):
185185
'SELECT 1; CREATE TABLE foobar (date DATE)', '--after-insert',
186186
'INSERT INTO dummy VALUES (0, 5, 6)'])
187187

188-
output_file = StringIO()
188+
output_file = io.StringIO()
189189
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM foobar'], output_file)
190190
utility.run()
191191
output = output_file.getvalue()
192192
output_file.close()
193193
self.assertEqual(output, 'date\n')
194194

195-
output_file = StringIO()
195+
output_file = io.StringIO()
196196
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM dummy'], output_file)
197197
utility.run()
198198
output = output_file.getvalue()

0 commit comments

Comments
 (0)