Skip to content

Commit 9b65262

Browse files
authored
Data Explorer: Override numpy.histogram bin edges when all the values are constant to return lower edge == upper edge (#8611)
This addresses the Python side of #8095.
1 parent e466009 commit 9b65262

File tree

2 files changed

+53
-3
lines changed

2 files changed

+53
-3
lines changed

extensions/positron-python/python_files/posit/positron/data_explorer.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,16 @@ def _get_histogram_numpy(data, num_bins, method="fd", *, to_numpy=False):
20322032

20332033
if need_recompute:
20342034
bin_counts, bin_edges = np.histogram(data, **hist_params)
2035+
2036+
# Special case: if we have a single bin, check if all values are the same
2037+
# If so, override the bin edges to be the same value instead of value +/- 0.5
2038+
if len(bin_counts) == 1 and len(data) > 0:
2039+
# Check if all non-null values are the same
2040+
unique_values = np.unique(data)
2041+
if len(unique_values) == 1:
2042+
# All values are the same, set bin edges to [value, value]
2043+
bin_edges = np.array([unique_values[0], unique_values[0]])
2044+
20352045
return bin_counts, bin_edges
20362046

20372047

extensions/positron-python/python_files/posit/positron/tests/test_data_explorer.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2978,23 +2978,23 @@ def test_pandas_polars_profile_histogram(dxf: DataExplorerFixture):
29782978
(
29792979
_get_histogram(5, method="sturges"),
29802980
{
2981-
"bin_edges": ["0.5000", "1.50"],
2981+
"bin_edges": ["1.00", "1.00"],
29822982
"bin_counts": [11],
29832983
"quantiles": [],
29842984
},
29852985
),
29862986
(
29872987
_get_histogram(5, method="freedman_diaconis"),
29882988
{
2989-
"bin_edges": ["0.5000", "1.50"],
2989+
"bin_edges": ["1.00", "1.00"],
29902990
"bin_counts": [11],
29912991
"quantiles": [],
29922992
},
29932993
),
29942994
(
29952995
_get_histogram(5, method="scott"),
29962996
{
2997-
"bin_edges": ["0.5000", "1.50"],
2997+
"bin_edges": ["1.00", "1.00"],
29982998
"bin_counts": [11],
29992999
"quantiles": [],
30003000
},
@@ -3101,6 +3101,46 @@ def test_profile_histogram_windows_int32_bug():
31013101
assert (result == expected).all()
31023102

31033103

3104+
def test_histogram_single_value_special_case():
3105+
# Test the special case where all values are the same
3106+
from ..data_explorer import _get_histogram_numpy
3107+
3108+
# Test with integer array of all same values
3109+
arr_int = np.array([5, 5, 5, 5, 5])
3110+
bin_counts, bin_edges = _get_histogram_numpy(arr_int, 10, method="sturges")
3111+
assert len(bin_counts) == 1
3112+
assert bin_counts[0] == 5
3113+
assert len(bin_edges) == 2
3114+
assert bin_edges[0] == 5
3115+
assert bin_edges[1] == 5
3116+
3117+
# Test with float array of all same values
3118+
arr_float = np.array([3.14, 3.14, 3.14, 3.14])
3119+
bin_counts, bin_edges = _get_histogram_numpy(arr_float, 10, method="fd")
3120+
assert len(bin_counts) == 1
3121+
assert bin_counts[0] == 4
3122+
assert len(bin_edges) == 2
3123+
assert bin_edges[0] == 3.14
3124+
assert bin_edges[1] == 3.14
3125+
3126+
# Test with single value
3127+
arr_single = np.array([42])
3128+
bin_counts, bin_edges = _get_histogram_numpy(arr_single, 10, method="scott")
3129+
assert len(bin_counts) == 1
3130+
assert bin_counts[0] == 1
3131+
assert len(bin_edges) == 2
3132+
assert bin_edges[0] == 42
3133+
assert bin_edges[1] == 42
3134+
3135+
# Test that non-uniform data still works normally
3136+
arr_mixed = np.array([1, 2, 3, 4, 5])
3137+
bin_counts, bin_edges = _get_histogram_numpy(arr_mixed, 5, method="fixed")
3138+
assert len(bin_counts) == 5
3139+
assert len(bin_edges) == 6
3140+
# Should not have same start and end edges for non-uniform data
3141+
assert bin_edges[0] != bin_edges[-1]
3142+
3143+
31043144
# ----------------------------------------------------------------------
31053145
# polars backend functionality tests
31063146

0 commit comments

Comments
 (0)