Skip to content

Conversation

khemkaran10
Copy link
Contributor

Before Fix:

>>> pd.read_csv(
...     io.StringIO("1,2,3"),
...     names=["col1", "col2", "col3"],
...     engine="pyarrow",
... )
---------------------------------------------------------------------------
ArrowInvalid     
...
ParserError: CSV parse error: Empty CSV file or block: cannot infer number of columns

After Fix:

>>> pd.read_csv(
...     io.StringIO("1,2,3"),
...     names=["col1", "col2", "col3"],
...     engine="pyarrow",
... )
---------------------------------------------------------------------------
    col1  col2  col3
0     1     2     3

@Alvaro-Kothe
Copy link
Contributor

There are some tests that is skipping pyarrow because of this. Can you try to remove the skip?

@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
data = "45e-1,4.5,45.,inf,-inf"
result = parser.read_csv(StringIO(data), header=None)
expected = DataFrame([[float(s) for s in data.split(",")]])
tm.assert_frame_equal(result, expected)

@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
def test_int64_uint64_range(all_parsers, val):
# These numbers fall right inside the int64-uint64
# range, so they should be parsed as integer.
parser = all_parsers
result = parser.read_csv(StringIO(str(val)), header=None)
expected = DataFrame([val])
tm.assert_frame_equal(result, expected)
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
def test_outside_int64_uint64_range(all_parsers, val, request):
# These numbers fall just outside the int64-uint64
# range, so they should be parsed as object.
parser = all_parsers
if parser.engine == "python":
mark = pytest.mark.xfail(reason="TODO: Python engine reads bigint as string")
request.applymarker(mark)
result = parser.read_csv(StringIO(str(val)), header=None)
expected = DataFrame([val])
tm.assert_frame_equal(result, expected)
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
def test_outside_int64_uint64_range_follow_str(all_parsers, val):
parser = all_parsers
result = parser.read_csv(StringIO(f"{val}\nabc"), header=None)
expected = DataFrame([str(val), "abc"])
tm.assert_frame_equal(result, expected)

@khemkaran10 khemkaran10 marked this pull request as draft October 10, 2025 11:56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

BUG: read_csv with pyarrow engine cannot handle single-line CSV files
2 participants