From 757fc7e81fb0be8e1b95b47e52ed0eef7c9db77e Mon Sep 17 00:00:00 2001 From: ScottFB Date: Fri, 8 Aug 2025 15:10:41 -0700 Subject: [PATCH 01/26] allow hiding of Mean, Median, and SD columns --- gt_extras/summary.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index b2cd42a1..84dfa714 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -29,7 +29,9 @@ FONT_SIZE_RATIO = 0.2 # height_px / 5 -def gt_plt_summary(df: IntoDataFrame, title: str | None = None) -> GT: +def gt_plt_summary( + df: IntoDataFrame, title: str | None = None, hide_stats: bool = False +) -> GT: """ Create a comprehensive data summary table with visualizations. @@ -119,7 +121,10 @@ def gt_plt_summary(df: IntoDataFrame, title: str | None = None) -> GT: summary table. Keep in mind that sometimes pandas or polars have differing behaviors with datatypes, especially when null values are present. """ + breakpoint() summary_df = _create_summary_df(df) + if hide_stats: + summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -139,13 +144,10 @@ def gt_plt_summary(df: IntoDataFrame, title: str | None = None) -> GT: gt = ( GT(summary_df) .tab_header(title=title, subtitle=subtitle) - # handle missing - .sub_missing(columns=["Mean", "Median", "SD"]) # Add visuals .fmt(_make_icon_html, columns="Type") # Format numerics .fmt_percent(columns="Missing", decimals=1) - .fmt_number(columns=["Mean", "Median", "SD"], rows=numeric_cols) .tab_style( style=style.text(weight="bold"), locations=loc.body(columns="Column"), @@ -154,6 +156,14 @@ def gt_plt_summary(df: IntoDataFrame, title: str | None = None) -> GT: .cols_align(align="center", columns="Plot Overview") ) + if not hide_stats: + gt = ( + # handle missing + gt.sub_missing(columns=["Mean", "Median", "SD"]).fmt_number( + columns=["Mean", "Median", "SD"], rows=numeric_cols + ) + ) + gt = gt_theme_espn(gt) for i, col_name in enumerate(nw_summary_df.get_column("Column")): From 9923eb765205e464b8d678db279fc68638b7e800 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Fri, 8 Aug 2025 22:41:34 -0700 Subject: [PATCH 02/26] Add mode calculation to summary table Introduces a 'Mode' column to the summary table in gt_plt_summary, with logic to handle cases with no singular mode or multiple modes. Updates function arguments to allow hiding descriptive stats and mode, and adjusts .gitignore to exclude VSCode launch configuration. --- .gitignore | 1 + gt_extras/summary.py | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index d347d5e6..259785d5 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,4 @@ sandbox.* possible_feats.* compare_themes.py compare_tables.html +.vscode/launch.json diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 84dfa714..10677d16 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -30,7 +30,10 @@ def gt_plt_summary( - df: IntoDataFrame, title: str | None = None, hide_stats: bool = False + df: IntoDataFrame, + title: str | None = None, + hide_desc_stats: bool = False, + add_mode: bool = False, ) -> GT: """ Create a comprehensive data summary table with visualizations. @@ -121,10 +124,9 @@ def gt_plt_summary( summary table. Keep in mind that sometimes pandas or polars have differing behaviors with datatypes, especially when null values are present. """ - breakpoint() summary_df = _create_summary_df(df) - if hide_stats: - summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) + if hide_desc_stats: + summary_df = summary_df.drop(columns=["Mean", "Median", "SD", "Mode"]) # type: ignore nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -156,11 +158,14 @@ def gt_plt_summary( .cols_align(align="center", columns="Plot Overview") ) - if not hide_stats: + if not hide_desc_stats: gt = ( # handle missing - gt.sub_missing(columns=["Mean", "Median", "SD"]).fmt_number( - columns=["Mean", "Median", "SD"], rows=numeric_cols + gt.sub_missing( + columns=["Mean", "Median", "SD", "Mode"] + ).fmt_number( # Mode? + columns=["Mean", "Median", "SD"], + rows=numeric_cols, # Mode? ) ) @@ -200,6 +205,7 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: "Mean": [], "Median": [], "SD": [], + "Mode": [], } for col_name in nw_df.columns: @@ -208,6 +214,7 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: mean_val = None median_val = None std_val = None + mode_val = None clean_col = _clean_series(col, col.dtype.is_numeric()) @@ -222,6 +229,15 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: mean_val = clean_col.mean() median_val = clean_col.median() std_val = clean_col.std() + mode_val = clean_col.mode() + # If lengths are the same there's no mode, likely due to continuous data input. + if len(mode_val) == len(clean_col): + mode_val = "No Singular Mode" + # Limiting the number of modes displayed to two at maximum + elif len(mode_val) > 2: + mode_val = "Greater than 2 Modes" + else: + mode_val = ", ".join(str(i) for i in mode_val.to_list()) elif col.dtype == nw.String: col_type = "string" @@ -243,6 +259,7 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: summary_data["Mean"].append(mean_val) summary_data["Median"].append(median_val) summary_data["SD"].append(std_val) + summary_data["Mode"].append(mode_val) summary_nw_df = nw.from_dict(summary_data, backend=nw_df.implementation) return summary_nw_df.to_native() From 6b90ffdf1b53ef74579f742fdccd3d3e032655e7 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 16:27:54 -0700 Subject: [PATCH 03/26] Refactor summary stats column handling Mode column is now conditionally dropped based on the add_mode flag, and descriptive stats columns are managed more flexibly. This improves clarity and control over which summary statistics are displayed. --- gt_extras/summary.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 10677d16..c02348ee 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -125,8 +125,12 @@ def gt_plt_summary( datatypes, especially when null values are present. """ summary_df = _create_summary_df(df) + + if not add_mode: + summary_df = summary_df.drop(columns=["Mode"]) # type: ignore + if hide_desc_stats: - summary_df = summary_df.drop(columns=["Mean", "Median", "SD", "Mode"]) # type: ignore + summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) # type: ignore nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -162,10 +166,10 @@ def gt_plt_summary( gt = ( # handle missing gt.sub_missing( - columns=["Mean", "Median", "SD", "Mode"] - ).fmt_number( # Mode? + columns=["Mean", "Median", "SD"] + (["Mode"] if add_mode else []) + ).fmt_number( columns=["Mean", "Median", "SD"], - rows=numeric_cols, # Mode? + rows=numeric_cols, ) ) From a01e0830965b964f3cfec6e97f621885ffc2971f Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 16:32:55 -0700 Subject: [PATCH 04/26] Descriptions of new boolean variables --- gt_extras/summary.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index c02348ee..64f56989 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -55,6 +55,12 @@ def gt_plt_summary( title Optional title for the summary table. If `None`, defaults to "Summary Table". + hide_desc_stats + Boolean that allows the hiding of the Mean, Median, and SD columns. Defaults to 'False'. + + add_mode + Boolean that allows the addition of a Mode column. Defaults to 'False'. + Returns ------- GT From cbdf59e05046fb85511da61e06606f3e9a0219a4 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 17:02:53 -0700 Subject: [PATCH 05/26] Add interactivity option to summary plots Introduces an 'interactivity' parameter to summary plot functions, allowing toggling of interactive features such as hover tooltips and CSS. All relevant plotting and SVG generation functions now accept and respect this parameter, enabling more flexible plot rendering. NOTE: The code that creates the tooltips still runs, it just isn't append to the SVG Element. --- gt_extras/summary.py | 75 ++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 64f56989..abf8f59e 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -34,6 +34,7 @@ def gt_plt_summary( title: str | None = None, hide_desc_stats: bool = False, add_mode: bool = False, + interactivity: bool = False, ) -> GT: """ Create a comprehensive data summary table with visualizations. @@ -194,6 +195,7 @@ def gt_plt_summary( nw_series=vals, col_type=col_type, plot_id=plot_id, + interactivity=interactivity, ), columns="Plot Overview", rows=i, @@ -302,6 +304,7 @@ def _make_summary_plot( nw_series: nw.Series, col_type: str, plot_id: str, + interactivity: bool = False, ) -> str: if len(nw_series) == 0: return "
" @@ -309,18 +312,22 @@ def _make_summary_plot( clean_list = nw_series.to_native().to_list() if col_type == "string": - return _plot_categorical(clean_list, plot_id=plot_id) + return _plot_categorical( + clean_list, plot_id=plot_id, interactivity=interactivity + ) elif col_type == "numeric": - return _plot_numeric(clean_list, plot_id=plot_id) + return _plot_numeric(clean_list, plot_id=plot_id, interactivity=interactivity) elif col_type == "datetime": - return _plot_datetime(clean_list, plot_id=plot_id) + return _plot_datetime(clean_list, plot_id=plot_id, interactivity=interactivity) elif col_type == "boolean": - return _plot_boolean(clean_list, plot_id=plot_id) + return _plot_boolean(clean_list, plot_id=plot_id, interactivity=interactivity) else: return "
" -def _plot_categorical(data: list[str], plot_id: str) -> str: +def _plot_categorical( + data: list[str], plot_id: str, interactivity: bool = False +) -> str: category_counts = {} for item in data: if item in category_counts: @@ -348,12 +355,13 @@ def _plot_categorical(data: list[str], plot_id: str) -> str: category for category, _ in sorted_categories ], # maybe leave combined with counts? counts=counts, + interactivity=interactivity, ) return svg.as_str() -def _plot_boolean(data: list[bool], plot_id: str) -> str: +def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = False) -> str: true_count = sum(data) false_count = len(data) - true_count total_count = len(data) @@ -399,6 +407,7 @@ def _make_categories_bar_svg( categories: list[str], counts: list[int], opacities: list[float] | None = None, + interactivity: bool = False, ) -> SVG: plot_width_px = width_px * PLOT_WIDTH_RATIO plot_height_px = height_px * PLOT_HEIGHT_RATIO @@ -412,15 +421,18 @@ def _make_categories_bar_svg( max_opacity = 1.0 min_opacity = 0.2 - hover_css = _generate_hover_css( - num_elements=len(proportions), - bar_highlight_style="opacity: 0.4;", - tooltip_class="category-tooltip", - use_hover_areas=False, - plot_id=plot_id, - ) + if interactivity: + hover_css = _generate_hover_css( + num_elements=len(proportions), + bar_highlight_style="opacity: 0.4;", + tooltip_class="category-tooltip", + use_hover_areas=False, + plot_id=plot_id, + ) - elements: list[Element] = [Style(text=hover_css)] + elements: list[Element] = [Style(text=hover_css)] + else: + elements: list[Element] = [] for i, (proportion, category, count) in enumerate( zip(proportions, categories, counts) @@ -503,13 +515,16 @@ def _make_categories_bar_svg( ), ], ) - elements.append(tooltip) + if interactivity: + elements.append(tooltip) x_loc += section_width return SVG(height=height_px, width=width_px, elements=elements) -def _plot_numeric(data: list[float] | list[int], plot_id: str) -> str: +def _plot_numeric( + data: list[float] | list[int], plot_id: str, interactivity: bool = False +) -> str: data_min, data_max = min(data), max(data) data_range = data_max - data_min @@ -566,8 +581,7 @@ def _plot_numeric(data: list[float] | list[int], plot_id: str) -> str: def _plot_datetime( - dates: list[datetime], - plot_id: str, + dates: list[datetime], plot_id: str, interactivity: bool = False ) -> str: date_timestamps = [x.timestamp() for x in dates] data_min, data_max = min(date_timestamps), max(date_timestamps) @@ -639,6 +653,7 @@ def _make_histogram_svg( data_max: str, counts: list[float], bin_edges: list[str], + interactivity: bool = False, ) -> SVG: max_count = max(counts) normalized_counts = [c / max_count for c in counts] if max_count > 0 else counts @@ -663,14 +678,6 @@ def _make_histogram_svg( f"stroke: white; stroke-width: {line_stroke_width}; fill-opacity: 0.6;" ) - hover_css = _generate_hover_css( - num_elements=len(counts), - bar_highlight_style=bar_highlight_style, - tooltip_class="tooltip", - use_hover_areas=True, - plot_id=plot_id, - ) - # Calculate text positioning to avoid overflow min_text_width = len(data_min) * font_size_px * 0.6 max_text_width = len(data_max) * font_size_px * 0.6 @@ -688,9 +695,6 @@ def _make_histogram_svg( ) elements: list[Element] = [ - Style( - text=hover_css, - ), # Bottom line Line( x1=0, @@ -727,6 +731,16 @@ def _make_histogram_svg( ), ] + if interactivity: + hover_css = _generate_hover_css( + num_elements=len(counts), + bar_highlight_style=bar_highlight_style, + tooltip_class="tooltip", + use_hover_areas=True, + plot_id=plot_id, + ) + elements.append(Style(text=hover_css)) + # Make each bar, with an accompanying tooltip for i, (count, normalized_count) in enumerate(zip(counts, normalized_counts)): bar_height = normalized_count / 1 * max_bar_height_px @@ -812,7 +826,8 @@ def _make_histogram_svg( # Insert bars at beginning, tooltips at end elements.insert(0, bar) elements.insert(0, hover_area) - elements.append(tooltip) + if interactivity: + elements.append(tooltip) x_loc += bin_width_px return SVG(height=height_px, width=width_px, elements=elements) From 497e2496ea43d8f7a2bad29bceb21589bae048c5 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 18:07:15 -0700 Subject: [PATCH 06/26] Add customizable color mapping to gt_plt_summary Introduces a color_mapping parameter to gt_plt_summary, allowing users to override the default color scheme. A helper function change_color_mapping updates the global COLOR_MAPPING when user overrides are provided. --- gt_extras/summary.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index abf8f59e..a48ca41c 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -29,12 +29,19 @@ FONT_SIZE_RATIO = 0.2 # height_px / 5 +def change_color_mapping(user_overrides: dict[str, str] | None = None) -> None: + global COLOR_MAPPING # declare that you want to modify the global variable + if user_overrides: + COLOR_MAPPING.update(user_overrides) + + def gt_plt_summary( df: IntoDataFrame, title: str | None = None, hide_desc_stats: bool = False, add_mode: bool = False, - interactivity: bool = False, + interactivity: bool = False, # TODO Bug if Interactivity is False with Sp500 set + color_mapping: dict | None = None, ) -> GT: """ Create a comprehensive data summary table with visualizations. @@ -62,6 +69,23 @@ def gt_plt_summary( add_mode Boolean that allows the addition of a Mode column. Defaults to 'False'. + color_mapping + List detailing the color scheme for the 5 possible data types. If the list doesn't modify + all 5 data types, then the default color mapping is used for the unaltered types. + Examples: + { + "string": "#4e79a7", + "numeric": "#f18e2c", + "datetime": "#73a657", + "boolean": "#a65773", + "other": "black", + } + + { + "string": "purple", + "numeric": "pink", + } + Returns ------- GT @@ -131,6 +155,9 @@ def gt_plt_summary( summary table. Keep in mind that sometimes pandas or polars have differing behaviors with datatypes, especially when null values are present. """ + if color_mapping: + change_color_mapping(color_mapping) + summary_df = _create_summary_df(df) if not add_mode: From 5ee9f46d2da4850b36853d202427adb404fccec4 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 18:55:48 -0700 Subject: [PATCH 07/26] Refactor summary stats and enable plot interactivity Refactored summary DataFrame creation to conditionally include descriptive statistics and mode based on parameters. Enabled interactivity by default in category bar and histogram SVG plots. Commented out redundant DataFrame column drops in gt_plt_summary. --- gt_extras/summary.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index a48ca41c..409a6f53 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -158,13 +158,15 @@ def gt_plt_summary( if color_mapping: change_color_mapping(color_mapping) - summary_df = _create_summary_df(df) + summary_df = _create_summary_df( + df, hide_desc_stats=hide_desc_stats, add_mode=add_mode + ) - if not add_mode: - summary_df = summary_df.drop(columns=["Mode"]) # type: ignore + # if not add_mode: + # summary_df = summary_df.drop(columns=["Mode"]) # type: ignore - if hide_desc_stats: - summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) # type: ignore + # if hide_desc_stats: + # summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) # type: ignore nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -233,7 +235,9 @@ def gt_plt_summary( ############### Helpers for gt_plt_summary ############### -def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: +def _create_summary_df( + df: IntoDataFrameT, hide_desc_stats: bool = False, add_mode: bool = False +) -> IntoDataFrameT: nw_df = nw.from_native(df, eager_only=True) # Should I be concerned about this? summary_data = { @@ -241,10 +245,10 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: "Column": [], "Plot Overview": [], "Missing": [], - "Mean": [], - "Median": [], - "SD": [], - "Mode": [], + # "Mean": [], + # "Median": [], + # "SD": [], + # "Mode": [], } for col_name in nw_df.columns: @@ -295,10 +299,12 @@ def _create_summary_df(df: IntoDataFrameT) -> IntoDataFrameT: summary_data["Column"].append(col_name) summary_data["Plot Overview"].append(None) summary_data["Missing"].append(missing_ratio) - summary_data["Mean"].append(mean_val) - summary_data["Median"].append(median_val) - summary_data["SD"].append(std_val) - summary_data["Mode"].append(mode_val) + if not hide_desc_stats: + summary_data.setdefault("Mean", []).append(mean_val) + summary_data.setdefault("Median", []).append(median_val) + summary_data.setdefault("SD", []).append(std_val) + if not hide_desc_stats and add_mode: + summary_data.setdefault("Mode", []).append(mode_val) summary_nw_df = nw.from_dict(summary_data, backend=nw_df.implementation) return summary_nw_df.to_native() @@ -434,7 +440,7 @@ def _make_categories_bar_svg( categories: list[str], counts: list[int], opacities: list[float] | None = None, - interactivity: bool = False, + interactivity: bool = True, ) -> SVG: plot_width_px = width_px * PLOT_WIDTH_RATIO plot_height_px = height_px * PLOT_HEIGHT_RATIO @@ -665,6 +671,7 @@ def _plot_datetime( data_min=str(datetime.fromtimestamp(data_min, tz=timezone.utc).date()), counts=counts, bin_edges=bin_edges, + interactivity=interactivity, ) return svg.as_str() @@ -680,7 +687,7 @@ def _make_histogram_svg( data_max: str, counts: list[float], bin_edges: list[str], - interactivity: bool = False, + interactivity: bool = True, ) -> SVG: max_count = max(counts) normalized_counts = [c / max_count for c in counts] if max_count > 0 else counts From fffc965c4ece02d11b2aada30aa42278d9e37cfa Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 19:18:39 -0700 Subject: [PATCH 08/26] Enable interactivity by default in summary plots. Added interactivity to boolean and numeric plots. Changed the default value of the 'interactivity' parameter to True in all summary plot functions to ensure interactive plots are generated by default. Also --- gt_extras/summary.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 409a6f53..c3b3aa0a 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -40,7 +40,7 @@ def gt_plt_summary( title: str | None = None, hide_desc_stats: bool = False, add_mode: bool = False, - interactivity: bool = False, # TODO Bug if Interactivity is False with Sp500 set + interactivity: bool = True, color_mapping: dict | None = None, ) -> GT: """ @@ -337,7 +337,7 @@ def _make_summary_plot( nw_series: nw.Series, col_type: str, plot_id: str, - interactivity: bool = False, + interactivity: bool = True, ) -> str: if len(nw_series) == 0: return "
" @@ -358,9 +358,7 @@ def _make_summary_plot( return "
" -def _plot_categorical( - data: list[str], plot_id: str, interactivity: bool = False -) -> str: +def _plot_categorical(data: list[str], plot_id: str, interactivity: bool = True) -> str: category_counts = {} for item in data: if item in category_counts: @@ -394,7 +392,7 @@ def _plot_categorical( return svg.as_str() -def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = False) -> str: +def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = True) -> str: true_count = sum(data) false_count = len(data) - true_count total_count = len(data) @@ -426,6 +424,7 @@ def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = False) - categories=categories, counts=counts, opacities=opacities, + interactivity=interactivity, ) return svg.as_str() @@ -556,7 +555,7 @@ def _make_categories_bar_svg( def _plot_numeric( - data: list[float] | list[int], plot_id: str, interactivity: bool = False + data: list[float] | list[int], plot_id: str, interactivity: bool = True ) -> str: data_min, data_max = min(data), max(data) data_range = data_max - data_min @@ -608,13 +607,14 @@ def _plot_numeric( data_min=str(round(data_min, 2)), counts=counts, bin_edges=bin_edges, + interactivity=interactivity, ) return svg.as_str() def _plot_datetime( - dates: list[datetime], plot_id: str, interactivity: bool = False + dates: list[datetime], plot_id: str, interactivity: bool = True ) -> str: date_timestamps = [x.timestamp() for x in dates] data_min, data_max = min(date_timestamps), max(date_timestamps) From c8ea289788e8cc5f5fa9d3ffcb3e5a01bb9ae558 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 21:16:13 -0700 Subject: [PATCH 09/26] Add tests for change_color_mapping function Added unit tests to verify that change_color_mapping correctly updates COLOR_MAPPING, handles None and empty dict inputs, and preserves existing keys. This improves test coverage for color mapping customization. --- gt_extras/tests/test_summary.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index 816d3bcd..69cd0de5 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -6,7 +6,7 @@ import pytest from great_tables import GT -from gt_extras.summary import gt_plt_summary +from gt_extras.summary import COLOR_MAPPING, change_color_mapping, gt_plt_summary from gt_extras.tests.conftest import assert_rendered_body @@ -443,6 +443,26 @@ def test_gt_plt_summary_column_order_preserved(DataFrame): assert z_pos < a_pos < m_pos +def test_change_color_mapping_updates(): + overrides = {"string": "#00FF00"} + change_color_mapping(overrides) + assert COLOR_MAPPING.get("string") == "#00FF00" + # Existing keys should remain too + assert COLOR_MAPPING.get("numeric") == "#f18e2c" + + +def test_change_color_mapping_with_none(): + old_mapping = COLOR_MAPPING.copy() + change_color_mapping(None) + assert COLOR_MAPPING == old_mapping + + +def test_change_color_mapping_with_empty_dict(): + old_mapping = COLOR_MAPPING.copy() + change_color_mapping({}) + assert COLOR_MAPPING == old_mapping + + # TODO: time # def test_gt_plt_summary_datetime_with_time(): # df = pd.DataFrame( From ac1cff6f9ed88510010cec4db7c4b223bd405756 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 21:16:36 -0700 Subject: [PATCH 10/26] Fix column existence checks for summary formatting Adds checks to ensure that missing value handling and number formatting are only applied to columns present in the DataFrame. This prevents errors in Polars when columns like 'Mean', 'Median', 'SD', or 'Mode' are absent. --- gt_extras/summary.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index c3b3aa0a..4d222407 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -198,13 +198,22 @@ def gt_plt_summary( .cols_align(align="center", columns="Plot Overview") ) + # Polars has strict column checking, so can't perform .sub_missing or .fmt_number etc on columns + # that aren't present in a df. Therefore, we need to check if those columns are present prior to + # handling missing and formatting. + cols_to_check = ["Mean", "Median", "SD"] + (["Mode"] if add_mode else []) + existing_desc_cols = [col for col in cols_to_check if col in nw_summary_df.columns] + + # Mode stays as a string object, so we don't include in here. + columns_to_format_as_number = [ + col for col in ["Mean", "Median", "SD"] if col in nw_summary_df.columns + ] + if not hide_desc_stats: gt = ( # handle missing - gt.sub_missing( - columns=["Mean", "Median", "SD"] + (["Mode"] if add_mode else []) - ).fmt_number( - columns=["Mean", "Median", "SD"], + gt.sub_missing(columns=existing_desc_cols).fmt_number( + columns=columns_to_format_as_number, rows=numeric_cols, ) ) From a12833a1571e647b27ae5d690c0b7dc15694dc80 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 21:20:34 -0700 Subject: [PATCH 11/26] Remove .vscode/launch.json from .gitignore The .vscode/launch.json entry was deleted from .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 259785d5..d347d5e6 100644 --- a/.gitignore +++ b/.gitignore @@ -124,4 +124,3 @@ sandbox.* possible_feats.* compare_themes.py compare_tables.html -.vscode/launch.json From d4e26aa7ec8cf759fd2a3fbeaa44fb43befddd24 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 21:32:20 -0700 Subject: [PATCH 12/26] Refactor color mapping override in gt_plt_summary Removed the change_color_mapping function and integrated color mapping overrides directly into gt_plt_summary via the new_color_mapping parameter. Updated related docstrings and cleaned up unused code and tests for change_color_mapping. --- gt_extras/summary.py | 27 +++++++-------------------- gt_extras/tests/test_summary.py | 22 +--------------------- 2 files changed, 8 insertions(+), 41 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 4d222407..8044d7a2 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -29,19 +29,13 @@ FONT_SIZE_RATIO = 0.2 # height_px / 5 -def change_color_mapping(user_overrides: dict[str, str] | None = None) -> None: - global COLOR_MAPPING # declare that you want to modify the global variable - if user_overrides: - COLOR_MAPPING.update(user_overrides) - - def gt_plt_summary( df: IntoDataFrame, title: str | None = None, hide_desc_stats: bool = False, add_mode: bool = False, interactivity: bool = True, - color_mapping: dict | None = None, + new_color_mapping: dict | None = None, ) -> GT: """ Create a comprehensive data summary table with visualizations. @@ -69,7 +63,7 @@ def gt_plt_summary( add_mode Boolean that allows the addition of a Mode column. Defaults to 'False'. - color_mapping + new_color_mapping List detailing the color scheme for the 5 possible data types. If the list doesn't modify all 5 data types, then the default color mapping is used for the unaltered types. Examples: @@ -155,19 +149,14 @@ def gt_plt_summary( summary table. Keep in mind that sometimes pandas or polars have differing behaviors with datatypes, especially when null values are present. """ - if color_mapping: - change_color_mapping(color_mapping) + if new_color_mapping: + global COLOR_MAPPING + COLOR_MAPPING.update(new_color_mapping) summary_df = _create_summary_df( df, hide_desc_stats=hide_desc_stats, add_mode=add_mode ) - # if not add_mode: - # summary_df = summary_df.drop(columns=["Mode"]) # type: ignore - - # if hide_desc_stats: - # summary_df = summary_df.drop(columns=["Mean", "Median", "SD"]) # type: ignore - nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -254,10 +243,6 @@ def _create_summary_df( "Column": [], "Plot Overview": [], "Missing": [], - # "Mean": [], - # "Median": [], - # "SD": [], - # "Mode": [], } for col_name in nw_df.columns: @@ -288,6 +273,7 @@ def _create_summary_df( # Limiting the number of modes displayed to two at maximum elif len(mode_val) > 2: mode_val = "Greater than 2 Modes" + # Converting to string, then listing together else: mode_val = ", ".join(str(i) for i in mode_val.to_list()) @@ -308,6 +294,7 @@ def _create_summary_df( summary_data["Column"].append(col_name) summary_data["Plot Overview"].append(None) summary_data["Missing"].append(missing_ratio) + # setdefault adds the column if it's not present if not hide_desc_stats: summary_data.setdefault("Mean", []).append(mean_val) summary_data.setdefault("Median", []).append(median_val) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index 69cd0de5..816d3bcd 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -6,7 +6,7 @@ import pytest from great_tables import GT -from gt_extras.summary import COLOR_MAPPING, change_color_mapping, gt_plt_summary +from gt_extras.summary import gt_plt_summary from gt_extras.tests.conftest import assert_rendered_body @@ -443,26 +443,6 @@ def test_gt_plt_summary_column_order_preserved(DataFrame): assert z_pos < a_pos < m_pos -def test_change_color_mapping_updates(): - overrides = {"string": "#00FF00"} - change_color_mapping(overrides) - assert COLOR_MAPPING.get("string") == "#00FF00" - # Existing keys should remain too - assert COLOR_MAPPING.get("numeric") == "#f18e2c" - - -def test_change_color_mapping_with_none(): - old_mapping = COLOR_MAPPING.copy() - change_color_mapping(None) - assert COLOR_MAPPING == old_mapping - - -def test_change_color_mapping_with_empty_dict(): - old_mapping = COLOR_MAPPING.copy() - change_color_mapping({}) - assert COLOR_MAPPING == old_mapping - - # TODO: time # def test_gt_plt_summary_datetime_with_time(): # df = pd.DataFrame( From cf2355850e658b1c65c8b948dfee21f01cd2322a Mon Sep 17 00:00:00 2001 From: ScottFB Date: Mon, 11 Aug 2025 21:50:16 -0700 Subject: [PATCH 13/26] Document interactivity parameter in gt_plt_summary Added documentation for the 'interactivity' parameter in the gt_plt_summary function, explaining its purpose for toggling interactive features in Plot Overview column graphs. --- gt_extras/summary.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 8044d7a2..3b30f469 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -63,6 +63,10 @@ def gt_plt_summary( add_mode Boolean that allows the addition of a Mode column. Defaults to 'False'. + interactivity + Boolean that toggles interactivity in Plot Overview column graphs. Interactivity refers to + hovering css and tooltips code applied to the graphs. + new_color_mapping List detailing the color scheme for the 5 possible data types. If the list doesn't modify all 5 data types, then the default color mapping is used for the unaltered types. From e332955cf327ab6a51dc94c7f180d98a10acdbb9 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Tue, 12 Aug 2025 19:56:16 -0700 Subject: [PATCH 14/26] Rename hide_desc_stats to show_desc_stats in summary Replaces the 'hide_desc_stats' parameter with 'show_desc_stats' in gt_plt_summary and related functions for clarity. Updates logic and docstrings to reflect the new parameter name and default value. --- gt_extras/summary.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 3b30f469..4e14768f 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -32,7 +32,7 @@ def gt_plt_summary( df: IntoDataFrame, title: str | None = None, - hide_desc_stats: bool = False, + show_desc_stats: bool = True, add_mode: bool = False, interactivity: bool = True, new_color_mapping: dict | None = None, @@ -57,7 +57,7 @@ def gt_plt_summary( title Optional title for the summary table. If `None`, defaults to "Summary Table". - hide_desc_stats + show_desc_stats Boolean that allows the hiding of the Mean, Median, and SD columns. Defaults to 'False'. add_mode @@ -158,7 +158,7 @@ def gt_plt_summary( COLOR_MAPPING.update(new_color_mapping) summary_df = _create_summary_df( - df, hide_desc_stats=hide_desc_stats, add_mode=add_mode + df, show_desc_stats=show_desc_stats, add_mode=add_mode ) nw_df = nw.from_native(df, eager_only=True) @@ -202,7 +202,7 @@ def gt_plt_summary( col for col in ["Mean", "Median", "SD"] if col in nw_summary_df.columns ] - if not hide_desc_stats: + if show_desc_stats: gt = ( # handle missing gt.sub_missing(columns=existing_desc_cols).fmt_number( @@ -238,7 +238,7 @@ def gt_plt_summary( def _create_summary_df( - df: IntoDataFrameT, hide_desc_stats: bool = False, add_mode: bool = False + df: IntoDataFrameT, show_desc_stats: bool = True, add_mode: bool = False ) -> IntoDataFrameT: nw_df = nw.from_native(df, eager_only=True) # Should I be concerned about this? @@ -299,11 +299,11 @@ def _create_summary_df( summary_data["Plot Overview"].append(None) summary_data["Missing"].append(missing_ratio) # setdefault adds the column if it's not present - if not hide_desc_stats: + if show_desc_stats: summary_data.setdefault("Mean", []).append(mean_val) summary_data.setdefault("Median", []).append(median_val) summary_data.setdefault("SD", []).append(std_val) - if not hide_desc_stats and add_mode: + if show_desc_stats and add_mode: summary_data.setdefault("Mode", []).append(mode_val) summary_nw_df = nw.from_dict(summary_data, backend=nw_df.implementation) From 116257ef48001483a2e595922b275bf870b80d52 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Tue, 12 Aug 2025 20:15:27 -0700 Subject: [PATCH 15/26] new_color_mapping example to main docstring Improves the docstring for gt_plt_summary by clarifying the new_color_mapping parameter as a dictionary mapping data types to hex color codes, and adds a new example demonstrating custom color mapping with ocean swell data. Also corrects default value descriptions for show_desc_stats and add_mode. --- gt_extras/summary.py | 56 ++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 4e14768f..b81082c4 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -58,31 +58,18 @@ def gt_plt_summary( Optional title for the summary table. If `None`, defaults to "Summary Table". show_desc_stats - Boolean that allows the hiding of the Mean, Median, and SD columns. Defaults to 'False'. + Boolean that allows the hiding of the Mean, Median, and SD columns. Defaults to `False`. add_mode - Boolean that allows the addition of a Mode column. Defaults to 'False'. + Boolean that allows the addition of a Mode column. Defaults to `False`. interactivity Boolean that toggles interactivity in Plot Overview column graphs. Interactivity refers to hovering css and tooltips code applied to the graphs. new_color_mapping - List detailing the color scheme for the 5 possible data types. If the list doesn't modify - all 5 data types, then the default color mapping is used for the unaltered types. - Examples: - { - "string": "#4e79a7", - "numeric": "#f18e2c", - "datetime": "#73a657", - "boolean": "#a65773", - "other": "black", - } - - { - "string": "purple", - "numeric": "pink", - } + A dictionary that maps data types (string, numeric, datetime, boolean, and other) to their + corresponding color codes in hexadecimal format. Returns ------- @@ -147,6 +134,41 @@ def gt_plt_summary( gte.gt_plt_summary(df) ``` + And lastly, an example showing ocean swell data with changes to the default color mapping: + ```{python} + import polars as pl + from great_tables import GT + import gt_extras as gte + from datetime import datetime + + df = pl.DataFrame({ + "Date": [ + datetime(2024, 7, 1, 6, 0), + datetime(2024, 7, 1, 12, 0), + datetime(2024, 7, 2, 6, 0), + datetime(2024, 7, 2, 12, 0), + datetime(2024, 7, 3, 6, 0), + datetime(2024, 7, 3, 12, 0), + datetime(2024, 7, 4, 6, 0), + datetime(2024, 7, 4, 12, 0), + datetime(2024, 7, 5, 6, 0), + ], + "Height_m": [1.2, 1.5, 2.1, 2.4, 1.8, None, 2.7, 3.0, 2.5], + "Period_s": [10, 12, 14, 15, 11, 9, 16, None, 13], + "Direction_deg": [210, 215, 220, 225, 205, 200, 230, 240, 235], + "WindSpeed_kts": [5, 7, 10, 12, 6, 4, 8, 11, None], + "Breaking": [True, True, True, False, True, False, True, True, True] + }) + + color_mapping = { + "date": "blue", + "numeric": "lightblue", + "boolean": "lightgreen", + } + + gte.gt_plt_summary(df, new_color_mapping=color_mapping) + ``` + Note --------- The datatype (dtype) of each column in your dataframe will determine the classified type in the From bf3726bc1847e489b75c8d9dce5d84ce6e40f6d7 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Tue, 12 Aug 2025 20:31:39 -0700 Subject: [PATCH 16/26] Show category tooltips and bars only with interactivity Moved the creation and insertion of visual bars and tooltips inside the interactivity check in _make_categories_bar_svg. This ensures that these SVG elements are only added when interactivity is enabled, preventing unnecessary elements in static mode. --- gt_extras/summary.py | 96 ++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index b81082c4..452ab680 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -518,60 +518,60 @@ def _make_categories_bar_svg( fill_opacity=opacity, stroke="transparent", ) - elements.insert(1, visual_bar) + if interactivity: + elements.insert(1, visual_bar) - section_center_x = x_loc + section_width / 2 + section_center_x = x_loc + section_width / 2 - row_label = "row" if count == 1 else "rows" - text_top = f"{count:.0f} {row_label}" - text_bottom = f'"{category}"' - - # Estimate text width - max_text_width = max( - len(text_top) * font_size_px * 0.6, - len(text_bottom) * font_size_px * 0.6, - ) + row_label = "row" if count == 1 else "rows" + text_top = f"{count:.0f} {row_label}" + text_bottom = f'"{category}"' - tooltip_x = _calculate_text_position( - center_x=section_center_x, - text_width=max_text_width, - svg_width=width_px, - margin=5, - ) + # Estimate text width + max_text_width = max( + len(text_top) * font_size_px * 0.6, + len(text_bottom) * font_size_px * 0.6, + ) - # Use plot_id in tooltip ID and class - tooltip_id = f"{plot_id}-tooltip-{i}" - tooltip_class = f"{plot_id}-category-tooltip" + tooltip_x = _calculate_text_position( + center_x=section_center_x, + text_width=max_text_width, + svg_width=width_px, + margin=5, + ) - tooltip = G( - id=tooltip_id, - class_=[tooltip_class], - elements=[ - Text( - text=text_top, - x=tooltip_x, - y=font_size_px * 1.25, - fill="black", - font_size=font_size_px, - dominant_baseline="hanging", - text_anchor="middle", - font_weight="bold", - ), - Text( - text=text_bottom, - x=tooltip_x, - y=font_size_px * 2.5, - fill="black", - font_size=font_size_px, - dominant_baseline="hanging", - text_anchor="middle", - font_weight="bold", - ), - ], - ) - if interactivity: + # Use plot_id in tooltip ID and class + tooltip_id = f"{plot_id}-tooltip-{i}" + tooltip_class = f"{plot_id}-category-tooltip" + + tooltip = G( + id=tooltip_id, + class_=[tooltip_class], + elements=[ + Text( + text=text_top, + x=tooltip_x, + y=font_size_px * 1.25, + fill="black", + font_size=font_size_px, + dominant_baseline="hanging", + text_anchor="middle", + font_weight="bold", + ), + Text( + text=text_bottom, + x=tooltip_x, + y=font_size_px * 2.5, + fill="black", + font_size=font_size_px, + dominant_baseline="hanging", + text_anchor="middle", + font_weight="bold", + ), + ], + ) elements.append(tooltip) - x_loc += section_width + x_loc += section_width return SVG(height=height_px, width=width_px, elements=elements) From efe98575e92687d9690195d820ab6b151aee6c2e Mon Sep 17 00:00:00 2001 From: ScottFB Date: Tue, 12 Aug 2025 20:51:55 -0700 Subject: [PATCH 17/26] Fix tooltip rendering logic in SVG bar and histogram Adjusts the placement and conditional rendering of tooltips and visual bars in the _make_categories_bar_svg and _make_histogram_svg functions. Tooltips are now only created and appended when interactivity is enabled, preventing unnecessary elements in non-interactive SVGs. --- gt_extras/summary.py | 62 ++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 452ab680..7401c48d 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -518,9 +518,9 @@ def _make_categories_bar_svg( fill_opacity=opacity, stroke="transparent", ) - if interactivity: - elements.insert(1, visual_bar) + elements.insert(1, visual_bar) + if interactivity: section_center_x = x_loc + section_width / 2 row_label = "row" if count == 1 else "rows" @@ -571,7 +571,7 @@ def _make_categories_bar_svg( ], ) elements.append(tooltip) - x_loc += section_width + x_loc += section_width return SVG(height=height_px, width=width_px, elements=elements) @@ -840,32 +840,34 @@ def _make_histogram_svg( hover_area_id = f"{plot_id}-hover-area-{i}" hover_area_class = f"{plot_id}-hover-area" - tooltip = G( - id=tooltip_id, - class_=[tooltip_class], - elements=[ - Text( - text=text_top, - x=x_loc_tooltip, - y=font_size_px * 0.25, - fill="black", - font_size=font_size_px, - dominant_baseline="hanging", - text_anchor="middle", - font_weight="bold", - ), - Text( - text=text_bottom, - x=x_loc_tooltip, - y=font_size_px * 1.5, - fill="black", - font_size=font_size_px, - dominant_baseline="hanging", - text_anchor="middle", - font_weight="bold", - ), - ], - ) + if interactivity: + tooltip = G( + id=tooltip_id, + class_=[tooltip_class], + elements=[ + Text( + text=text_top, + x=x_loc_tooltip, + y=font_size_px * 0.25, + fill="black", + font_size=font_size_px, + dominant_baseline="hanging", + text_anchor="middle", + font_weight="bold", + ), + Text( + text=text_bottom, + x=x_loc_tooltip, + y=font_size_px * 1.5, + fill="black", + font_size=font_size_px, + dominant_baseline="hanging", + text_anchor="middle", + font_weight="bold", + ), + ], + ) + elements.append(tooltip) # Add invisible hover area that covers bar + tooltip space hover_area = Rect( @@ -882,8 +884,6 @@ def _make_histogram_svg( # Insert bars at beginning, tooltips at end elements.insert(0, bar) elements.insert(0, hover_area) - if interactivity: - elements.append(tooltip) x_loc += bin_width_px return SVG(height=height_px, width=width_px, elements=elements) From 56beea5d960776ea135ccbc0a9707ab4ef1ac573 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Tue, 12 Aug 2025 21:28:55 -0700 Subject: [PATCH 18/26] Struggling with the test here. Could use some advice --- gt_extras/tests/test_summary.py | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index 816d3bcd..c59de443 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -30,6 +30,58 @@ def test_gt_plt_summary_snap(snapshot): assert_rendered_body(snapshot(name="pd_and_pl"), gt=res) +def test_gt_plt_summary_additional_parameters_snap(snapshot): + for DataFrame in [pd.DataFrame, pl.DataFrame]: + df = DataFrame( + { + "numeric": [1.5, 2.2, 3.3, None, 5.1], + "modes_test": [1, 2, 3, 4, 5], + "string": ["A", "B", "A", "C", None], + "boolean": [True, False, True, False, False], + "datetime": [ + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 2, tzinfo=timezone.utc), + datetime(2024, 1, 3, tzinfo=timezone.utc), + None, + datetime(2024, 1, 5, tzinfo=timezone.utc), + ], + } + ) + res = gt_plt_summary( + df, + show_desc_stats=True, + add_mode=True, + interactivity=False, + new_color_mapping={"string": "purple", "numeric": "green"}, + ) + assert_rendered_body(snapshot(name="pd_and_pl_optional_parameters"), gt=res) + + +def test_gt_plt_summary_interactivity_snap(snapshot): + for DataFrame in [pd.DataFrame, pl.DataFrame]: + df = DataFrame( + { + "numeric": [1.5, 2.2, 3.3, None, 5.1], + "string": ["A", "B", "A", "C", None], + "boolean": [True, False, True, False, False], + "datetime": [ + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 2, tzinfo=timezone.utc), + datetime(2024, 1, 3, tzinfo=timezone.utc), + None, + datetime(2024, 1, 5, tzinfo=timezone.utc), + ], + } + ) + res = gt_plt_summary( + df, + show_desc_stats=True, + add_mode=True, + interactivity=True, + ) + assert_rendered_body(snapshot(name="pd_and_pl_optional_parameters"), gt=res) + + @pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) def test_gt_plt_summary_basic(DataFrame): df = DataFrame( From 9a5a6f2478e3979b57c2a30b0a420bfcf48d68d4 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Wed, 13 Aug 2025 17:15:02 -0700 Subject: [PATCH 19/26] remove unnecessary defaults --- gt_extras/summary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 7401c48d..890fd373 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -58,10 +58,10 @@ def gt_plt_summary( Optional title for the summary table. If `None`, defaults to "Summary Table". show_desc_stats - Boolean that allows the hiding of the Mean, Median, and SD columns. Defaults to `False`. + Boolean that allows the hiding of the Mean, Median, and SD columns. add_mode - Boolean that allows the addition of a Mode column. Defaults to `False`. + Boolean that allows the addition of a Mode column. interactivity Boolean that toggles interactivity in Plot Overview column graphs. Interactivity refers to From 8460d713a2c7df084ee12f2663e1b4b98d7e088f Mon Sep 17 00:00:00 2001 From: ScottFB Date: Wed, 13 Aug 2025 18:32:58 -0700 Subject: [PATCH 20/26] sorted numbers in mode column --- gt_extras/summary.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index 890fd373..b2adef5c 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -301,7 +301,8 @@ def _create_summary_df( mode_val = "Greater than 2 Modes" # Converting to string, then listing together else: - mode_val = ", ".join(str(i) for i in mode_val.to_list()) + mode_val = sorted(mode_val.to_list()) # sorts from least to greatest + mode_val = ", ".join(str(i) for i in mode_val) elif col.dtype == nw.String: col_type = "string" From 77a764015a46f6a1d32604fd98c04a3ec289b53d Mon Sep 17 00:00:00 2001 From: ScottFB Date: Wed, 13 Aug 2025 18:33:15 -0700 Subject: [PATCH 21/26] Add tests for multiple modes in gt_plt_summary Introduces tests to verify gt_plt_summary correctly handles cases with two modes and more than two modes in both pandas and polars DataFrames. --- gt_extras/tests/test_summary.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index c59de443..6a12caf2 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -82,6 +82,26 @@ def test_gt_plt_summary_interactivity_snap(snapshot): assert_rendered_body(snapshot(name="pd_and_pl_optional_parameters"), gt=res) +@pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) +def test_gt_plt_summary_two_modes(DataFrame): + df = DataFrame({"numeric": [1, 1, 2, 2, 3]}) + + result = gt_plt_summary(df, add_mode=True) + html = result.as_raw_html() + + assert "1, 2" in html + + +@pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) +def test_gt_plt_summary_greater_than_two_modes(DataFrame): + df = DataFrame({"numeric": [1, 1, 2, 2, 3, 4, 4]}) + + result = gt_plt_summary(df, add_mode=True) + html = result.as_raw_html() + + assert "Greater than 2 Modes" in html + + @pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) def test_gt_plt_summary_basic(DataFrame): df = DataFrame( From e97fb22f504f2d90c966922eb1f7ad6f902bda40 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Wed, 13 Aug 2025 21:45:49 -0700 Subject: [PATCH 22/26] Update summary tests to check for exact HTML output Changed assertions in test_gt_plt_summary_two_modes and test_gt_plt_summary_greater_than_two_modes to verify the exact HTML table cell output instead of just substring matches. This ensures the tests are more robust and accurately reflect the rendered HTML structure. --- gt_extras/tests/test_summary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index 6a12caf2..38ff022b 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -89,7 +89,7 @@ def test_gt_plt_summary_two_modes(DataFrame): result = gt_plt_summary(df, add_mode=True) html = result.as_raw_html() - assert "1, 2" in html + assert '1, 2' in html @pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) @@ -99,7 +99,7 @@ def test_gt_plt_summary_greater_than_two_modes(DataFrame): result = gt_plt_summary(df, add_mode=True) html = result.as_raw_html() - assert "Greater than 2 Modes" in html + assert 'Greater than 2 Modes' in html @pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) From e3d54d8de0cb8fbbcbc299a236d57654a183c5b2 Mon Sep 17 00:00:00 2001 From: ScottFB Date: Wed, 13 Aug 2025 21:46:07 -0700 Subject: [PATCH 23/26] Align 'Mode' column to right in summary table Adds right alignment to the 'Mode' column in the summary table when add_mode is enabled --- gt_extras/summary.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index b2adef5c..ba44a643 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -232,6 +232,8 @@ def gt_plt_summary( rows=numeric_cols, ) ) + if add_mode: + gt = gt.cols_align(align="right", columns="Mode") gt = gt_theme_espn(gt) From ea5543dcf354d074474178614de78e2db94809ef Mon Sep 17 00:00:00 2001 From: Jules <54960783+juleswg23@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:26:33 -0400 Subject: [PATCH 24/26] replace snap test with parameterized test --- gt_extras/tests/test_summary.py | 38 +++++++++++++-------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/gt_extras/tests/test_summary.py b/gt_extras/tests/test_summary.py index 38ff022b..d4adb136 100644 --- a/gt_extras/tests/test_summary.py +++ b/gt_extras/tests/test_summary.py @@ -57,29 +57,21 @@ def test_gt_plt_summary_additional_parameters_snap(snapshot): assert_rendered_body(snapshot(name="pd_and_pl_optional_parameters"), gt=res) -def test_gt_plt_summary_interactivity_snap(snapshot): - for DataFrame in [pd.DataFrame, pl.DataFrame]: - df = DataFrame( - { - "numeric": [1.5, 2.2, 3.3, None, 5.1], - "string": ["A", "B", "A", "C", None], - "boolean": [True, False, True, False, False], - "datetime": [ - datetime(2024, 1, 1, tzinfo=timezone.utc), - datetime(2024, 1, 2, tzinfo=timezone.utc), - datetime(2024, 1, 3, tzinfo=timezone.utc), - None, - datetime(2024, 1, 5, tzinfo=timezone.utc), - ], - } - ) - res = gt_plt_summary( - df, - show_desc_stats=True, - add_mode=True, - interactivity=True, - ) - assert_rendered_body(snapshot(name="pd_and_pl_optional_parameters"), gt=res) +@pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) +def test_gt_plt_summary_no_interactivity(DataFrame): + df = DataFrame( + { + "numeric": [1.5, 2.2, 3.3, None, 5.1], + "string": ["A", "B", "A", "C", None], + } + ) + + result = gt_plt_summary(df, interactivity=False) + html = result.as_raw_html() + + assert "opacity: 0;" not in html + assert ":hover" not in html + assert "transition:" not in html @pytest.mark.parametrize("DataFrame", [pd.DataFrame, pl.DataFrame]) From 53d89fd6390f66e3ae6acd19206685cac8a422fc Mon Sep 17 00:00:00 2001 From: Jules <54960783+juleswg23@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:48:43 -0400 Subject: [PATCH 25/26] propogate color mapping --- gt_extras/summary.py | 82 +++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/gt_extras/summary.py b/gt_extras/summary.py index ba44a643..29d31a0a 100644 --- a/gt_extras/summary.py +++ b/gt_extras/summary.py @@ -175,14 +175,14 @@ def gt_plt_summary( summary table. Keep in mind that sometimes pandas or polars have differing behaviors with datatypes, especially when null values are present. """ - if new_color_mapping: - global COLOR_MAPPING - COLOR_MAPPING.update(new_color_mapping) - summary_df = _create_summary_df( df, show_desc_stats=show_desc_stats, add_mode=add_mode ) + color_mapping = COLOR_MAPPING.copy() + if new_color_mapping is not None: + color_mapping.update(new_color_mapping) + nw_df = nw.from_native(df, eager_only=True) dim_df = nw_df.shape @@ -202,7 +202,7 @@ def gt_plt_summary( GT(summary_df) .tab_header(title=title, subtitle=subtitle) # Add visuals - .fmt(_make_icon_html, columns="Type") + .fmt(lambda dtype: _make_icon_html(dtype, color_mapping), columns="Type") # Format numerics .fmt_percent(columns="Missing", decimals=1) .tab_style( @@ -250,6 +250,7 @@ def gt_plt_summary( nw_series=vals, col_type=col_type, plot_id=plot_id, + color_mapping=color_mapping, interactivity=interactivity, ), columns="Plot Overview", @@ -335,22 +336,22 @@ def _create_summary_df( return summary_nw_df.to_native() -def _make_icon_html(dtype: str) -> str: +def _make_icon_html(dtype: str, color_mapping: dict[str, str]) -> str: if dtype == "string": fa_name = "list" - color = COLOR_MAPPING["string"] + color = color_mapping["string"] elif dtype == "numeric": fa_name = "signal" - color = COLOR_MAPPING["numeric"] + color = color_mapping["numeric"] elif dtype == "datetime": fa_name = "clock" - color = COLOR_MAPPING["datetime"] + color = color_mapping["datetime"] elif dtype == "boolean": fa_name = "check" - color = COLOR_MAPPING["boolean"] + color = color_mapping["boolean"] else: fa_name = "question" - color = COLOR_MAPPING["other"] + color = color_mapping["other"] icon = icon_svg(name=fa_name, fill=color, width=f"{20}px", a11y="sem") @@ -362,6 +363,7 @@ def _make_summary_plot( nw_series: nw.Series, col_type: str, plot_id: str, + color_mapping: dict[str, str], interactivity: bool = True, ) -> str: if len(nw_series) == 0: @@ -371,19 +373,42 @@ def _make_summary_plot( if col_type == "string": return _plot_categorical( - clean_list, plot_id=plot_id, interactivity=interactivity + clean_list, + plot_id=plot_id, + interactivity=interactivity, + color_mapping=color_mapping, ) elif col_type == "numeric": - return _plot_numeric(clean_list, plot_id=plot_id, interactivity=interactivity) + return _plot_numeric( + clean_list, + plot_id=plot_id, + interactivity=interactivity, + color_mapping=color_mapping, + ) elif col_type == "datetime": - return _plot_datetime(clean_list, plot_id=plot_id, interactivity=interactivity) + return _plot_datetime( + clean_list, + plot_id=plot_id, + interactivity=interactivity, + color_mapping=color_mapping, + ) elif col_type == "boolean": - return _plot_boolean(clean_list, plot_id=plot_id, interactivity=interactivity) + return _plot_boolean( + clean_list, + plot_id=plot_id, + interactivity=interactivity, + color_mapping=color_mapping, + ) else: return "
" -def _plot_categorical(data: list[str], plot_id: str, interactivity: bool = True) -> str: +def _plot_categorical( + data: list[str], + plot_id: str, + color_mapping: dict[str, str], + interactivity: bool = True, +) -> str: category_counts = {} for item in data: if item in category_counts: @@ -404,7 +429,7 @@ def _plot_categorical(data: list[str], plot_id: str, interactivity: bool = True) svg = _make_categories_bar_svg( width_px=DEFAULT_WIDTH_PX, height_px=DEFAULT_HEIGHT_PX, - fill=COLOR_MAPPING["string"], + fill=color_mapping["string"], plot_id=plot_id, proportions=proportions, categories=[ @@ -417,7 +442,12 @@ def _plot_categorical(data: list[str], plot_id: str, interactivity: bool = True) return svg.as_str() -def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = True) -> str: +def _plot_boolean( + data: list[bool], + plot_id: str, + color_mapping: dict[str, str], + interactivity: bool = True, +) -> str: true_count = sum(data) false_count = len(data) - true_count total_count = len(data) @@ -443,7 +473,7 @@ def _plot_boolean(data: list[bool], plot_id: str, interactivity: bool = True) -> svg = _make_categories_bar_svg( width_px=DEFAULT_WIDTH_PX, height_px=DEFAULT_HEIGHT_PX, - fill=COLOR_MAPPING["boolean"], + fill=color_mapping["boolean"], plot_id=plot_id, proportions=proportions, categories=categories, @@ -580,7 +610,10 @@ def _make_categories_bar_svg( def _plot_numeric( - data: list[float] | list[int], plot_id: str, interactivity: bool = True + data: list[float] | list[int], + plot_id: str, + color_mapping: dict[str, str], + interactivity: bool = True, ) -> str: data_min, data_max = min(data), max(data) data_range = data_max - data_min @@ -625,7 +658,7 @@ def _plot_numeric( svg = _make_histogram_svg( width_px=DEFAULT_WIDTH_PX, height_px=DEFAULT_HEIGHT_PX, - fill=COLOR_MAPPING["numeric"], + fill=color_mapping["numeric"], plot_id=plot_id, normalized_mean=normalized_mean, data_max=str(round(data_max, 2)), @@ -639,7 +672,10 @@ def _plot_numeric( def _plot_datetime( - dates: list[datetime], plot_id: str, interactivity: bool = True + dates: list[datetime], + plot_id: str, + color_mapping: dict[str, str], + interactivity: bool = True, ) -> str: date_timestamps = [x.timestamp() for x in dates] data_min, data_max = min(date_timestamps), max(date_timestamps) @@ -689,7 +725,7 @@ def _plot_datetime( svg = _make_histogram_svg( width_px=DEFAULT_WIDTH_PX, height_px=DEFAULT_HEIGHT_PX, - fill=COLOR_MAPPING["datetime"], + fill=color_mapping["datetime"], plot_id=plot_id, normalized_mean=normalized_mean, data_max=str(datetime.fromtimestamp(data_max, tz=timezone.utc).date()), From a04fe48f2e91e21c00f32764385daa2224ed0419 Mon Sep 17 00:00:00 2001 From: Jules <54960783+juleswg23@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:48:51 -0400 Subject: [PATCH 26/26] run make-update --- .../tests/__snapshots__/test_summary.ambr | 69 +++++++++++++++++-- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/gt_extras/tests/__snapshots__/test_summary.ambr b/gt_extras/tests/__snapshots__/test_summary.ambr index 63d91650..692eded2 100644 --- a/gt_extras/tests/__snapshots__/test_summary.ambr +++ b/gt_extras/tests/__snapshots__/test_summary.ambr @@ -1,4 +1,65 @@ # serializer version: 1 +# name: test_gt_plt_summary_additional_parameters_snap[pd_and_pl_optional_parameters] + ''' + + + signal + + numeric + 1.55.1 + 20.0% + 3.02 + 2.75 + 1.57 + No Singular Mode + + + signal + + modes_test + 15 + 0.0% + 3.00 + 3.00 + 1.58 + No Singular Mode + + + List + + string + + 20.0% + — + — + — + — + + + Check + + boolean + + 0.0% + 0.40 + — + — + — + + + Clock + + datetime + 2024-01-012024-01-05 + 20.0% + — + — + — + — + + + ''' +# --- # name: test_gt_plt_summary_snap[pd_and_pl] ''' @@ -6,7 +67,7 @@ signal numeric - 2 rows[1.5 to 3.3]2 rows[3.3 to 5.1] 20.0% 3.02 2.75 @@ -71,7 +132,7 @@ Clock datetime - 2 rows[2024-01-01 to 2024-01-03]2 rows[2024-01-03 to 2024-01-05] 20.0% — —