diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index faa61cf4bd3bc..03244c808ad03 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -22,7 +22,10 @@ become the default string dtype in pandas 3.0. See Bug fixes ^^^^^^^^^ -- +- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the + "string" type in the JSON Table Schema for :class:`StringDtype` columns + (:issue:`61889`) + .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 7879be18b52c9..3a98189ac1a96 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -90,8 +90,6 @@ def as_json_table_type(x: DtypeObj) -> str: return "datetime" elif lib.is_np_dtype(x, "m"): return "duration" - elif isinstance(x, ExtensionDtype): - return "any" elif is_string_dtype(x): return "string" else: @@ -197,7 +195,7 @@ def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype: """ typ = field["type"] if typ == "string": - return "object" + return field.get("extDtype", None) elif typ == "integer": return field.get("extDtype", "int64") elif typ == "number": diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 7936982e4a055..ee9f9a45d8343 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -70,7 +70,7 @@ def test_build_table_schema(self, df_schema, using_infer_string): "primaryKey": ["idx"], } if using_infer_string: - expected["fields"][2] = {"name": "B", "type": "any", "extDtype": "str"} + expected["fields"][2] = {"name": "B", "type": "string", "extDtype": "str"} assert result == expected result = build_table_schema(df_schema) assert "pandas_version" in result @@ -120,10 +120,10 @@ def test_multiindex(self, df_schema, using_infer_string): if using_infer_string: expected["fields"][0] = { "name": "level_0", - "type": "any", + "type": "string", "extDtype": "str", } - expected["fields"][3] = {"name": "B", "type": "any", "extDtype": "str"} + expected["fields"][3] = {"name": "B", "type": "string", "extDtype": "str"} assert result == expected df.index.names = ["idx0", None] @@ -303,7 +303,7 @@ def test_to_json(self, df_table, using_infer_string): ] if using_infer_string: - fields[2] = {"name": "B", "type": "any", "extDtype": "str"} + fields[2] = {"name": "B", "type": "string", "extDtype": "str"} schema = {"fields": fields, "primaryKey": ["idx"]} data = [ @@ -547,7 +547,7 @@ def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered): }, CategoricalDtype(categories=["a", "b", "c"], ordered=True), ), - ({"type": "string"}, "object"), + ({"type": "string"}, None), ], ) def test_convert_json_field_to_pandas_type(self, inp, exp): diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py index 12ae24b064c9d..2b775a43aa321 100644 --- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py +++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py @@ -50,7 +50,7 @@ def test_build_table_schema(self): {"name": "index", "type": "integer"}, {"name": "A", "type": "any", "extDtype": "DateDtype"}, {"name": "B", "type": "number", "extDtype": "decimal"}, - {"name": "C", "type": "any", "extDtype": "string"}, + {"name": "C", "type": "string", "extDtype": "string"}, {"name": "D", "type": "integer", "extDtype": "Int64"}, ], "primaryKey": ["index"], @@ -80,10 +80,10 @@ def test_as_json_table_type_ext_decimal_dtype(self): @pytest.mark.parametrize("box", [lambda x: x, Series]) def test_as_json_table_type_ext_string_array_dtype(self, box): string_data = box(array(["pandas"], dtype="string")) - assert as_json_table_type(string_data.dtype) == "any" + assert as_json_table_type(string_data.dtype) == "string" def test_as_json_table_type_ext_string_dtype(self): - assert as_json_table_type(StringDtype()) == "any" + assert as_json_table_type(StringDtype()) == "string" @pytest.mark.parametrize("box", [lambda x: x, Series]) def test_as_json_table_type_ext_integer_array_dtype(self, box): @@ -176,7 +176,7 @@ def test_build_string_series(self, sa): fields = [ {"name": "id", "type": "integer"}, - {"name": "a", "type": "any", "extDtype": "string"}, + {"name": "a", "type": "string", "extDtype": "string"}, ] schema = {"fields": fields, "primaryKey": ["id"]} @@ -235,7 +235,7 @@ def test_to_json(self, da, dc, sa, ia): OrderedDict({"name": "idx", "type": "integer"}), OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}), OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}), - OrderedDict({"name": "C", "type": "any", "extDtype": "string"}), + OrderedDict({"name": "C", "type": "string", "extDtype": "string"}), OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}), ]