From 2f20091d0c11466a1e1dcde847b4ce1f449015c0 Mon Sep 17 00:00:00 2001 From: anzarhw <71497005+anzarhw@users.noreply.github.com> Date: Sun, 5 Oct 2025 17:50:24 +0530 Subject: [PATCH] add json connector --- onecode/base/enums.py | 3 + onecode/elements/input/json_reader.py | 154 +++++++++ tests/data/flow_1/data/input_test.json | 5 + tests/unit/elements/test_json_reader.py | 397 ++++++++++++++++++++++++ tests/utils/flow_cli.py | 18 ++ 5 files changed, 577 insertions(+) create mode 100644 onecode/elements/input/json_reader.py create mode 100644 tests/data/flow_1/data/input_test.json create mode 100644 tests/unit/elements/test_json_reader.py diff --git a/onecode/base/enums.py b/onecode/base/enums.py index 29165a0..0b98eb4 100644 --- a/onecode/base/enums.py +++ b/onecode/base/enums.py @@ -122,9 +122,12 @@ class FileFilter(tuple): * `PYTHON`:octicons-arrow-both-24: `("Python", ".py")` * `IMAGE` :octicons-arrow-both-24: `("Image", ".jpg .png .jpeg")` * `ZIP` :octicons-arrow-both-24: `("ZIP", ".zip .gz .tar.gz .7z")` + * `JSON` :octicons-arrow-both-24: `("JSON", ".json")` + """ CSV = ("CSV", ".csv") # noqa: E-221 PYTHON = ("Python", ".py") # noqa: E-221 IMAGE = ("Image", ".jpg .png .jpeg") # noqa: E-221 ZIP = ("ZIP", ".zip .gz .tar.gz .7z") # noqa: E-221 + JSON = ("JSON", ".json") # noqa: E-221 diff --git a/onecode/elements/input/json_reader.py b/onecode/elements/input/json_reader.py new file mode 100644 index 0000000..0cfed68 --- /dev/null +++ b/onecode/elements/input/json_reader.py @@ -0,0 +1,154 @@ +# SPDX-FileCopyrightText: 2023-2024 DeepLime +# SPDX-License-Identifier: MIT + +import os +from typing import Any, Dict, List, Optional, Union + +import pandas as pd + +from ...base.decorator import check_type +from ...base.project import Project +from ..input_element import InputElement + + +class JSONReader(InputElement): + @check_type + def __init__( + self, + key: str, + value: Optional[Union[str, List[str]]], + label: Optional[str] = None, + count: Optional[Union[int, str]] = None, + optional: Union[bool, str] = False, + hide_when_disabled: bool = False, + tags: Optional[List[str]] = None, + sep: Optional[str] = None, + **kwargs: Any + ): + """ + A JSON-file reader returning a Pandas DataFrame. + + Args: + key: ID of the element. It must be unique as it is the key used to story data in + Project(), otherwise it will lead to conflicts at runtime in execution mode. + The key will be transformed into snake case and slugified to avoid + any special character or whitespace. Note that an ID cannot start with `_`. Try to + choose a key that is meaningful for your context (see examples projects). + value: Path to the JSON file. JSON file must exists. + label: Label to display on top of the table. + count: Placeholder, ignore until we activate this feature. + optional: Specify whether the `value` may be None. + hide_when_disabled: Placeholder, ignore until we activate this feature. + tags: Optional meta-data information about the expected file. This information is only + used by the `Mode.EXTRACT_ALL` when dumping attributes to JSON. + **kwargs: Extra user meta-data to attach to the element. Argument names cannot overwrite + existing attributes or methods name such as `_validate`, `_value`, etc. + + Raises: + ValueError: if the `key` is empty or starts with `_`. + AttributeError: if one the `kwargs` conflicts with an existing attribute or method. + + !!! example + ```py + import pandas as pd + from onecode import json_reader, Mode, Project + + Project().mode = Mode.EXECUTE + widget = json_reader( + key="JSONReader", + value="/path/to/file.json", + label="My JSON Reader", + tags=['JSON'], + ) + + pd.testing.assert_frame_equal(widget, pd.read_json("/path/to/file.json")) + ``` + + """ + super().__init__( + key, + value, + label, + count, + optional, + hide_when_disabled, + tags=tags, + json_options={ + "read_options": {}, + "convert_options": {}, + }, + **kwargs + ) + + @staticmethod + def metadata(value: str) -> Dict: + """ + Returns the metadata associated to the given JSON(s). + + Returns: + A dictionnary metadata for each JSON path provided: + ```py + { + "columns": df.columns.to_list(), + "stats": df.describe().to_dict() + } + ``` + + """ + df = pd.read_json(value) + + meta = { + "columns": df.columns.to_list(), + "stats": df.describe().to_dict() + } + + return meta + + @property + def _value_type(self) -> type: + """ + Get the JSONReader value type: Pandas DataFrame `pd.DataFrame`. + + """ + return pd.DataFrame + + @property + def value(self) -> Optional[pd.DataFrame]: + """ + Returns: + The Pandas DataFrame loaded from the provided file path, otherwise None if the + file does not exists. + + """ + if self._value is not None: + if type(self._value) is str: + filepath = Project().get_input_path(self._value) + return pd.read_json( + filepath, + ) if os.path.exists(filepath) or filepath.startswith('https://') else None + + elif type(self._value) is list and all( + type(v) is str for v in self._value + ): + return [ + pd.read_json( + Project().get_input_path(val), + ) if os.path.exists( + Project().get_input_path(val) + ) or filepath.startswith('https://') else None for val in self._value + ] + + return None + + @check_type + def _validate( + self, + value: pd.DataFrame + ) -> None: + """ + Raises: + ValueError: if the DataFrame is empty. + + """ + if value.empty: + raise ValueError(f"[{self.key}] Empty dataframe") diff --git a/tests/data/flow_1/data/input_test.json b/tests/data/flow_1/data/input_test.json new file mode 100644 index 0000000..1516af5 --- /dev/null +++ b/tests/data/flow_1/data/input_test.json @@ -0,0 +1,5 @@ +[ + { "x": 1.2, "y": 3.5, "z": 6.3 }, + { "x": 4.3, "y": 5.6, "z": 8.1 }, + { "x": 3.1, "y": 6.6, "z": 5.3 } +] diff --git a/tests/unit/elements/test_json_reader.py b/tests/unit/elements/test_json_reader.py new file mode 100644 index 0000000..d80c3be --- /dev/null +++ b/tests/unit/elements/test_json_reader.py @@ -0,0 +1,397 @@ +import os +import shutil + +import pandas as pd +import pytest + +from onecode import Mode, Project, JSONReader +from tests.utils.flow_cli import ( + _clean_flow, + _generate_json_file, + _generate_flow_name +) + + +def test_console_json_reader(): + Project().mode = Mode.CONSOLE + + widget = JSONReader( + key="JSONReader", + value=None, + optional=True, + testdata="data" + ) + + assert type(widget()) == JSONReader + assert widget.testdata == "data" + assert widget.kind == "JSONReader" + assert widget.hide_when_disabled is False + + +def test_execute_single_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json') + + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=json_file + ) + + pd.testing.assert_frame_equal(widget(), pd.read_json(json_file)) + assert widget.key == "JSONReader" + assert widget.label == "JSONReader" + assert widget._label == "JSONReader" + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_execute_multiple_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file_1 = _generate_json_file(folder_path, 'test1.json') + json_file_2 = _generate_json_file(folder_path, 'test2.json') + + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=[json_file_1, json_file_2], + count=2 + ) + + value = widget() + pd.testing.assert_frame_equal(value[0], pd.read_json(json_file_1)) + pd.testing.assert_frame_equal(value[1], pd.read_json(json_file_2)) + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_execute_optional_json_reader(): + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=None, + optional=True + ) + + assert widget() is None + + +def test_execute_invalid_path_json_reader(): + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value="nofile.json" + ) + + assert widget.value is None + + with pytest.raises(ValueError) as excinfo: + widget() + + assert "[JSONReader] Value is required: None provided" == str(excinfo.value) + + +def test_execute_invalid_single_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json') + + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=json_file, + count=1 + ) + with pytest.raises(TypeError) as excinfo: + widget() + + assert """Invalid value A B C +0 0 1 2 +1 3 4 5, expected: list()""" == str(excinfo.value) + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_execute_invalid_multiple_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file_1 = _generate_json_file(folder_path, 'test1.json') + json_file_2 = _generate_json_file(folder_path, 'test2.json') + + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=[json_file_1, json_file_2], + count=None + ) + + with pytest.raises(TypeError) as excinfo: + widget() + + assert """Invalid value type for [ A B C +0 0 1 2 +1 3 4 5, A B C +0 0 1 2 +1 3 4 5], expected: """ == str(excinfo.value) + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_execute_invalid_optional_json_reader(): + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=None, + optional=False + ) + + with pytest.raises(ValueError) as excinfo: + widget() + + assert "[JSONReader] Value is required: None provided" == str(excinfo.value) + + +def test_build_gui_json_reader(): + Project().mode = Mode.BUILD_GUI + + widget = JSONReader( + key="JSONReader", + value=["/path/to/file.json"], + label="My JSONReader", + optional="$x$", + count=2, + tags=["json"] + ) + + assert widget() == ('JSONReader', { + "key": "JSONReader", + "kind": "JSONReader", + "value": ["/path/to/file.json"], + "label": "My JSONReader", + "disabled": '$x$', + "optional": True, + "count": 2, + "tags": ["json"], + "json_options": { + "read_options": {}, + "convert_options": {}, + }, + 'metadata': True, + 'depends_on': ['x'] + }) + + +def test_extract_all_json_reader(): + Project().mode = Mode.EXTRACT_ALL + + widget = JSONReader( + key="JSONReader", + value=["/path/to/file.json"], + label="My JSONReader", + optional="$x$", + count=2, + tags=["json"] + ) + + assert widget() == ('JSONReader', { + "key": "JSONReader", + "kind": "JSONReader", + "value": ["/path/to/file.json"], + "label": "My JSONReader", + "disabled": '$x$', + "optional": True, + "count": 2, + "tags": ["json"], + "json_options": { + "read_options": {}, + "convert_options": {} + } + }) + + +def test_extract_all_json_reader_with_data(): + Project().mode = Mode.EXTRACT_ALL + Project().data = { + "JSONReader": "/other_file.json" + } + + widget = JSONReader( + key="JSONReader", + value=["/path/to/file.json"], + label="My JSONReader", + optional="$x$", + count=2, + tags=["json"] + ) + + assert widget() == ('JSONReader', { + "key": "JSONReader", + "kind": "JSONReader", + "value": "/other_file.json", + "label": "My JSONReader", + "disabled": '$x$', + "optional": True, + "count": 2, + "tags": ["json"], + "json_options": { + "read_options": {}, + "convert_options": {} + } + }) + + +def test_load_then_execute_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json') + + Project().mode = Mode.LOAD_THEN_EXECUTE + Project().data = { + "JSONReader": json_file + } + + widget = JSONReader( + key="JSONReader", + value=None, + optional=True + ) + + pd.testing.assert_frame_equal(widget(), pd.read_json(json_file)) + assert widget.key == "JSONReader" + assert widget.label == "JSONReader" + assert widget._label == "JSONReader" + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_load_then_execute_json_reader_no_key(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json') + + Project().mode = Mode.LOAD_THEN_EXECUTE + Project().data = { + "no_JSONReader": json_file + } + + widget = JSONReader( + key="JSONReader", + value=None, + optional=True + ) + + assert widget() is None + assert widget.key == "JSONReader" + assert widget.label == "JSONReader" + assert widget._label == "JSONReader" + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_empty_json_reader(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json', empty=True) + + Project().mode = Mode.EXECUTE + + widget = JSONReader( + key="JSONReader", + value=json_file + ) + + with pytest.raises(ValueError) as excinfo: + widget() + + assert "[JSONReader] Empty dataframe" == str(excinfo.value) + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_json_reader_metadata(): + _, folder, _ = _generate_flow_name() + tmp = _clean_flow(folder) + folder_path = os.path.join(tmp, folder) + + json_file = _generate_json_file(folder_path, 'test.json') + metadata = JSONReader.metadata(json_file) + + assert list(metadata.keys()) == ["columns", "stats"] + assert metadata["columns"] == ["A", "B", "C"] + assert isinstance(metadata["stats"], dict) + + try: + shutil.rmtree(folder_path) + except Exception: + pass + + +def test_json_reader_dependencies(): + widget = JSONReader( + key="JSONReader", + value=None, + optional=True + ) + + assert widget.dependencies() == [] + + widget = JSONReader( + key="JSONReader", + value=None, + optional="len($df1$) > 1", + ) + + assert set(widget.dependencies()) == {"df1"} + + widget = JSONReader( + key="JSONReader", + value=None, + optional=True, + count="len($df1$)" + ) + + assert set(widget.dependencies()) == {"df1"} diff --git a/tests/utils/flow_cli.py b/tests/utils/flow_cli.py index 592fca2..f83b031 100644 --- a/tests/utils/flow_cli.py +++ b/tests/utils/flow_cli.py @@ -2,6 +2,7 @@ import shutil import tempfile import uuid +import json from slugify import slugify @@ -40,3 +41,20 @@ def _generate_csv_file( f.write("3,4,5\n") return out_file + + +def _generate_json_file(flow: str, to_file: str, empty: bool = False) -> str: + out_file = os.path.join(flow, 'data', to_file) + os.makedirs(os.path.dirname(out_file), exist_ok=True) + + data = [] + if not empty: + data = [ + {"A": 0, "B": 1, "C": 2}, + {"A": 3, "B": 4, "C": 5} + ] + + with open(out_file, 'w') as f: + json.dump(data, f, indent=4) + + return out_file