From c3a864246b682f34377d72dec7795953a1684d6a Mon Sep 17 00:00:00 2001 From: Roman Legonkov Date: Sat, 21 Dec 2024 22:21:22 +0300 Subject: [PATCH 1/5] if column.model not provided in visual app then set default model name --- rectools/visuals/visual_app.py | 6 ++++-- tests/visuals/test_visual_app.py | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/rectools/visuals/visual_app.py b/rectools/visuals/visual_app.py index 2de67abf..8035d773 100644 --- a/rectools/visuals/visual_app.py +++ b/rectools/visuals/visual_app.py @@ -29,6 +29,7 @@ MIN_WIDTH_LIMIT = 10 REQUEST_NAMES_COL = "request_name" REQUEST_IDS_COL = "request_id" +DEFAULT_MODEL_NAME = "model1" VisualAppT = tp.TypeVar("VisualAppT", bound="VisualAppBase") @@ -71,7 +72,8 @@ def from_raw( ---------- reco : tp.Union[pd.DataFrame, TablesDict] Recommendations from different models in a form of a pd.DataFrame or a dict. - In DataFrame form model names must be specified in `Columns.Model` column. In dict form + In DataFrame form model names must be specified in `Columns.Model` column. + If not, `Columns.Model` column will be created with default value ``model1``. In dict form model names are supposed to be dict keys. item_data : pd.DataFrame Data for items that is used for visualisation in both interactions and recommendations @@ -100,7 +102,7 @@ def from_raw( if isinstance(reco, pd.DataFrame): if Columns.Model not in reco.columns: - raise KeyError("Missing `{Columns.Model}` column in `reco` DataFrame") + reco[Columns.Model] = DEFAULT_MODEL_NAME reco = cls._df_to_tables_dict(reco, Columns.Model) cls._check_columns_present_in_reco(reco=reco, id_col=id_col) diff --git a/tests/visuals/test_visual_app.py b/tests/visuals/test_visual_app.py index ab724f7f..34c0819e 100644 --- a/tests/visuals/test_visual_app.py +++ b/tests/visuals/test_visual_app.py @@ -51,7 +51,7 @@ INTERACTIONS = pd.DataFrame({Columns.User: [1, 1, 2], Columns.Item: [3, 7, 8]}) SELECTED_REQUESTS_U2I: tp.Dict[tp.Hashable, tp.Hashable] = {"user_one": 1, "user_three": 3} SELECTED_REQUESTS_I2I: tp.Dict[tp.Hashable, tp.Hashable] = {"item_three": 3} - +DEFAULT_MODEL_NAME = "model1" def check_data_storages_equal(one: AppDataStorage, two: AppDataStorage) -> None: assert one.id_col == two.id_col @@ -229,17 +229,17 @@ def test_missing_columns_validation(self) -> None: ) # Missing `Columns.Model` in reco pd.DataFrame - with pytest.raises(KeyError): - incorrect_reco = pd.DataFrame( - {Columns.User: [1, 2, 3, 4], Columns.Item: [3, 4, 3, 4], Columns.Score: [0.99, 0.9, 0.5, 0.5]} - ) - AppDataStorage.from_raw( - reco=incorrect_reco, - item_data=ITEM_DATA, - interactions=INTERACTIONS, - is_u2i=True, - selected_requests=SELECTED_REQUESTS_U2I, - ) + incorrect_reco = pd.DataFrame( + {Columns.User: [1, 2, 3, 4], Columns.Item: [3, 4, 3, 4], Columns.Score: [0.99, 0.9, 0.5, 0.5]} + ) + ads = AppDataStorage.from_raw( + reco=incorrect_reco, + item_data=ITEM_DATA, + interactions=INTERACTIONS, + is_u2i=True, + selected_requests=SELECTED_REQUESTS_U2I, + ) + assert "model1" in ads.model_names def test_incorrect_interactions_for_reco_case(self) -> None: From 424ef0d81df7f8e4f9a419b9c2c556ba78326563 Mon Sep 17 00:00:00 2001 From: Roman Legonkov Date: Sat, 21 Dec 2024 22:36:21 +0300 Subject: [PATCH 2/5] update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac9a48cf..f4514fe5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- If `Column.Model` is not provided in `VisualApp`, then default model name added to dataframe ([#118](https://github.com/MobileTeleSystems/RecTools/pull/235)) +- Changed tests for `VisualApp` missing columns ## [0.9.0] - 11.12.2024 From 1af5f3a1a1d7c04a7e4f1a951e760d5462425d81 Mon Sep 17 00:00:00 2001 From: Roman Legonkov Date: Sat, 21 Dec 2024 22:40:15 +0300 Subject: [PATCH 3/5] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4514fe5..2bff1ce8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- If `Column.Model` is not provided in `VisualApp`, then default model name added to dataframe ([#118](https://github.com/MobileTeleSystems/RecTools/pull/235)) +- If `Column.Model` is not provided in `VisualApp`, then default model name added to dataframe ([#235](https://github.com/MobileTeleSystems/RecTools/pull/235)) - Changed tests for `VisualApp` missing columns ## [0.9.0] - 11.12.2024 From 3dc45d6382e4fbc674396b0d812ac81a9e9c3e51 Mon Sep 17 00:00:00 2001 From: Roman Legonkov Date: Tue, 24 Dec 2024 21:40:29 +0300 Subject: [PATCH 4/5] review changes --- CHANGELOG.md | 1 - rectools/visuals/visual_app.py | 2 +- tests/visuals/test_visual_app.py | 32 +++++++++++++++++++++++++------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bff1ce8..af5ce099 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - If `Column.Model` is not provided in `VisualApp`, then default model name added to dataframe ([#235](https://github.com/MobileTeleSystems/RecTools/pull/235)) -- Changed tests for `VisualApp` missing columns ## [0.9.0] - 11.12.2024 diff --git a/rectools/visuals/visual_app.py b/rectools/visuals/visual_app.py index 8035d773..a2a6d89d 100644 --- a/rectools/visuals/visual_app.py +++ b/rectools/visuals/visual_app.py @@ -29,7 +29,7 @@ MIN_WIDTH_LIMIT = 10 REQUEST_NAMES_COL = "request_name" REQUEST_IDS_COL = "request_id" -DEFAULT_MODEL_NAME = "model1" +DEFAULT_MODEL_NAME = "model" VisualAppT = tp.TypeVar("VisualAppT", bound="VisualAppBase") diff --git a/tests/visuals/test_visual_app.py b/tests/visuals/test_visual_app.py index 34c0819e..abdf6bc7 100644 --- a/tests/visuals/test_visual_app.py +++ b/tests/visuals/test_visual_app.py @@ -21,7 +21,12 @@ import pytest from rectools import Columns, ExternalId -from rectools.visuals.visual_app import AppDataStorage, ItemToItemVisualApp, StorageFiles, TablesDict, VisualApp +from rectools.visuals.visual_app import (AppDataStorage, + ItemToItemVisualApp, + StorageFiles, + TablesDict, + VisualApp, + DEFAULT_MODEL_NAME) RECO_U2I: TablesDict = { "model1": pd.DataFrame( @@ -51,7 +56,6 @@ INTERACTIONS = pd.DataFrame({Columns.User: [1, 1, 2], Columns.Item: [3, 7, 8]}) SELECTED_REQUESTS_U2I: tp.Dict[tp.Hashable, tp.Hashable] = {"user_one": 1, "user_three": 3} SELECTED_REQUESTS_I2I: tp.Dict[tp.Hashable, tp.Hashable] = {"item_three": 3} -DEFAULT_MODEL_NAME = "model1" def check_data_storages_equal(one: AppDataStorage, two: AppDataStorage) -> None: assert one.id_col == two.id_col @@ -183,7 +187,6 @@ def test_empty_selected_requests(self, selected_requests: tp.Optional[tp.Dict[tp assert "random_2" in ads.selected_requests def test_missing_columns_validation(self) -> None: - # Missing `Columns.User` for u2i with pytest.raises(KeyError): incorrect_u2i_reco: TablesDict = { @@ -228,18 +231,33 @@ def test_missing_columns_validation(self) -> None: selected_requests=SELECTED_REQUESTS_U2I, ) - # Missing `Columns.Model` in reco pd.DataFrame - incorrect_reco = pd.DataFrame( + def test_successful_path_with_missing_model(self) -> None: + # Missing `Columns.Model` + reco_without_model = pd.DataFrame( {Columns.User: [1, 2, 3, 4], Columns.Item: [3, 4, 3, 4], Columns.Score: [0.99, 0.9, 0.5, 0.5]} ) ads = AppDataStorage.from_raw( - reco=incorrect_reco, + reco=reco_without_model, item_data=ITEM_DATA, interactions=INTERACTIONS, is_u2i=True, selected_requests=SELECTED_REQUESTS_U2I, ) - assert "model1" in ads.model_names + expected_grouped_reco = { + "model": { + "user_one": pd.DataFrame( + {Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.99]} + ), + "user_three": pd.DataFrame( + {Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.5]} + ) + } + } + assert expected_grouped_reco.keys() == ads.grouped_reco.keys() + for model_name, model_reco in expected_grouped_reco.items(): + assert model_reco.keys() == ads.grouped_reco[model_name].keys() + for user_name, user_reco in model_reco.items(): + pd.testing.assert_frame_equal(user_reco, ads.grouped_reco[model_name][user_name]) def test_incorrect_interactions_for_reco_case(self) -> None: From c9814c94d97b7652b2407364b3f81572527d6cf8 Mon Sep 17 00:00:00 2001 From: Roman Legonkov Date: Fri, 10 Jan 2025 12:30:02 +0300 Subject: [PATCH 5/5] fixed imports --- tests/visuals/test_visual_app.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/visuals/test_visual_app.py b/tests/visuals/test_visual_app.py index abdf6bc7..eb05ada3 100644 --- a/tests/visuals/test_visual_app.py +++ b/tests/visuals/test_visual_app.py @@ -21,12 +21,14 @@ import pytest from rectools import Columns, ExternalId -from rectools.visuals.visual_app import (AppDataStorage, - ItemToItemVisualApp, - StorageFiles, - TablesDict, - VisualApp, - DEFAULT_MODEL_NAME) +from rectools.visuals.visual_app import ( + DEFAULT_MODEL_NAME, + AppDataStorage, + ItemToItemVisualApp, + StorageFiles, + TablesDict, + VisualApp, +) RECO_U2I: TablesDict = { "model1": pd.DataFrame( @@ -57,6 +59,7 @@ SELECTED_REQUESTS_U2I: tp.Dict[tp.Hashable, tp.Hashable] = {"user_one": 1, "user_three": 3} SELECTED_REQUESTS_I2I: tp.Dict[tp.Hashable, tp.Hashable] = {"item_three": 3} + def check_data_storages_equal(one: AppDataStorage, two: AppDataStorage) -> None: assert one.id_col == two.id_col assert one.is_u2i == two.is_u2i @@ -244,13 +247,9 @@ def test_successful_path_with_missing_model(self) -> None: selected_requests=SELECTED_REQUESTS_U2I, ) expected_grouped_reco = { - "model": { - "user_one": pd.DataFrame( - {Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.99]} - ), - "user_three": pd.DataFrame( - {Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.5]} - ) + DEFAULT_MODEL_NAME: { + "user_one": pd.DataFrame({Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.99]}), + "user_three": pd.DataFrame({Columns.Item: [3], "feature_1": ["one"], Columns.Score: [0.5]}), } } assert expected_grouped_reco.keys() == ads.grouped_reco.keys()