From 60120a7f1627331aee407e268fed342f30083852 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 3 Jul 2025 11:15:29 -0400 Subject: [PATCH 01/30] renaming class --- src/celldega/viz/__init__.py | 8 ++++---- src/celldega/viz/widget.py | 30 +----------------------------- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/src/celldega/viz/__init__.py b/src/celldega/viz/__init__.py index 717e7db4..7172b2ec 100644 --- a/src/celldega/viz/__init__.py +++ b/src/celldega/viz/__init__.py @@ -5,16 +5,16 @@ from ipywidgets import HBox, Layout, jslink from .local_server import get_local_server -from .widget import Landscape, Matrix +from .widget import Landscape, Clustergram def landscape_matrix(landscape, mat, width="600px", height="700px"): """ - Display a `Landscape` widget and a `Matrix` widget side by side. + Display a `Landscape` widget and a `Clustergram` widget side by side. Args: landscape (Landscape): A `Landscape` widget. - mat (Matrix): A `Matrix` widget. + cgm (Clustergram): A `Clustergram` widget. width (str): The width of the widgets. height (str): The height of the widgets. @@ -34,4 +34,4 @@ def landscape_matrix(landscape, mat, width="600px", height="700px"): return HBox([landscape, mat]) -__all__ = ["Landscape", "Matrix", "get_local_server", "landscape_matrix"] +__all__ = ["Landscape", "Clustergram", "get_local_server", "landscape_matrix"] diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index e3e13af2..45341612 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -90,7 +90,7 @@ def update_cell_clusters(self, new_clusters): self.cell_clusters = new_clusters -class Matrix(anywidget.AnyWidget): +class Clustergram(anywidget.AnyWidget): """ A widget for interactive visualization of a hierarchically clustered matrix. @@ -119,31 +119,3 @@ class Matrix(anywidget.AnyWidget): width = traitlets.Int(600).tag(sync=True) height = traitlets.Int(600).tag(sync=True) click_info = traitlets.Dict({}).tag(sync=True) - - -class MatrixNew(anywidget.AnyWidget): - """ - A new matrix widget for enhanced visualization capabilities. - - Attributes: - component (str): The name of the component. - network (dict): The network dictionary. - click_info (dict): The click_info dictionary. - width (int): Width of the widget. - height (int): Height of the widget. - value (int): The value traitlet. - - Returns: - MatrixNew: An enhanced widget for matrix visualization. - """ - - _esm = Path(__file__).parent / "../static" / "widget.js" - _css = Path(__file__).parent / "../static" / "widget.css" - value = traitlets.Int(0).tag(sync=True) - component = traitlets.Unicode("MatrixNew").tag(sync=True) - - network = traitlets.Dict({}).tag(sync=True) - width = traitlets.Int(600).tag(sync=True) - height = traitlets.Int(600).tag(sync=True) - - click_info = traitlets.Dict({}).tag(sync=True) From 1a311b30a4c9e5c742daca0102b20fc208aa5518 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 3 Jul 2025 16:20:53 -0400 Subject: [PATCH 02/30] clustergram registry --- src/celldega/__init__.py | 4 ++-- src/celldega/clust/matrix.py | 34 ++++++++++++++++++++++++++++++ src/celldega/viz/__init__.py | 4 ++-- src/celldega/viz/widget.py | 40 ++++++++++++++++++++++++++++-------- 4 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/celldega/__init__.py b/src/celldega/__init__.py index 8af4101f..52486ba2 100644 --- a/src/celldega/__init__.py +++ b/src/celldega/__init__.py @@ -5,7 +5,7 @@ from celldega.nbhd import alpha_shape from celldega.pre import landscape from celldega.qc import qc_segmentation -from celldega.viz import Landscape, Matrix +from celldega.viz import Clustergram, Landscape warnings.filterwarnings("ignore", category=FutureWarning) @@ -16,8 +16,8 @@ __version__ = "unknown" __all__ = [ + "Clustergram", "Landscape", - "Matrix", "alpha_shape", "clust", "landscape", diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index 8718929e..4bec506b 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -11,6 +11,7 @@ from anndata import AnnData import numpy as np +import hashlib import pandas as pd from scipy.cluster.hierarchy import dendrogram, linkage from scipy.spatial.distance import pdist @@ -49,6 +50,28 @@ _distance_cache = weakref.WeakKeyDictionary() _ranking_cache = weakref.WeakKeyDictionary() +def quick_hash_data(data: pd.DataFrame | AnnData, max_rows=100, max_cols=100) -> str: + try: + if isinstance(data, pd.DataFrame): + df = data.select_dtypes(include=[np.number]) # drop object/string columns + row_means = df.mean(axis=1).values[:max_rows] + col_means = df.mean(axis=0).values[:max_cols] + elif isinstance(data, AnnData): + import scipy.sparse + x = data.X + if scipy.sparse.issparse(x): + x = x.toarray() + x = np.asarray(x, dtype=np.float32) + row_means = x.mean(axis=1)[:max_rows] + col_means = x.mean(axis=0)[:max_cols] + else: + return f"cgm_{id(data)}" + + sig = np.concatenate([row_means, col_means]) + sig_bytes = sig.astype(np.float32).tobytes() + return f"cgm_{hashlib.md5(sig_bytes).hexdigest()[:12]}" + except Exception: + return f"cgm_{id(data)}" class Matrix: """ @@ -85,6 +108,7 @@ def __init__( disable_processing: bool = True, # Visualization parameters global_colors: dict[str, str] | pd.DataFrame | None = None, + name: str | None = None, ): """ Create Matrix with automatic processing unless disabled. @@ -134,6 +158,13 @@ def __init__( # Visualization structure self.viz: dict[str, Any] = DEFAULT_VIZ.copy() + # if name is None, generate a quick hash-based name from the data content + if name is None: + # Generate a quick hash-based name from the data content + self._data_hash_name = quick_hash_data(data) + else: + self._data_hash_name = name + # Load data and optionally apply processing if data is not None: # Step 1: Always load data @@ -852,6 +883,9 @@ def _viz_json(self, dendro: bool = True, links: bool = False) -> None: """Generate visualization JSON structure.""" dat, viz = self.dat, self.viz + # add name + viz["name"] = self._data_hash_name + viz["linkage"] = { axis: dat["node_info"][axis]["Y"].tolist() for axis in (Axis.ROW.value, Axis.COL.value) } diff --git a/src/celldega/viz/__init__.py b/src/celldega/viz/__init__.py index 7172b2ec..eb701285 100644 --- a/src/celldega/viz/__init__.py +++ b/src/celldega/viz/__init__.py @@ -5,7 +5,7 @@ from ipywidgets import HBox, Layout, jslink from .local_server import get_local_server -from .widget import Landscape, Clustergram +from .widget import Clustergram, Landscape def landscape_matrix(landscape, mat, width="600px", height="700px"): @@ -34,4 +34,4 @@ def landscape_matrix(landscape, mat, width="600px", height="700px"): return HBox([landscape, mat]) -__all__ = ["Landscape", "Clustergram", "get_local_server", "landscape_matrix"] +__all__ = ["Clustergram", "Landscape", "get_local_server", "landscape_matrix"] diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 45341612..8535b1d3 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -7,6 +7,8 @@ import anywidget import traitlets +_clustergram_registry = {} # maps names to widget instances + class Landscape(anywidget.AnyWidget): """ @@ -94,28 +96,50 @@ class Clustergram(anywidget.AnyWidget): """ A widget for interactive visualization of a hierarchically clustered matrix. + Automatically replaces older widgets with the same name to prevent notebook bloat. + Args: value (int): The value traitlet. component (str): The component traitlet. network (dict): The network traitlet. click_info (dict): The click_info traitlet. - - Attributes: - component (str): The name of the component. - network (dict): The network dictionary. - click_info (dict): The click_info dictionary. + name (str): Optional name for this widget instance. Returns: - Matrix: A widget for visualizing a hierarchically clustered matrix. + Clustergram: A widget for visualizing a hierarchically clustered matrix. """ _esm = Path(__file__).parent / "../static" / "widget.js" _css = Path(__file__).parent / "../static" / "widget.css" + value = traitlets.Int(0).tag(sync=True) component = traitlets.Unicode("Matrix").tag(sync=True) - network = traitlets.Dict({}).tag(sync=True) - width = traitlets.Int(600).tag(sync=True) height = traitlets.Int(600).tag(sync=True) click_info = traitlets.Dict({}).tag(sync=True) + + def __init__(self, **kwargs): + + # set name from network.name + if "network" in kwargs and "name" not in kwargs: + name = kwargs["network"].get("name", None) + + # Close any previously registered widget with the same name + old_widget = _clustergram_registry.get(name) + + print(_clustergram_registry.keys()) + + if old_widget: + try: + old_widget.close() + except Exception: + pass + + # Pass name into traitlets + kwargs["name"] = name + + super().__init__(**kwargs) + + # Store new widget + _clustergram_registry[name] = self \ No newline at end of file From 5ba6e97f40dcf299e0e35764222a5873a3156185 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 3 Jul 2025 16:32:05 -0400 Subject: [PATCH 03/30] linting fixes --- src/celldega/clust/matrix.py | 6 +----- src/celldega/viz/widget.py | 6 +++--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index 4bec506b..28098bdb 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -1,9 +1,6 @@ -""" -Optimized Matrix class with improved maintainability and time/space complexity. -""" - from __future__ import annotations +import hashlib import json from typing import Any import warnings @@ -11,7 +8,6 @@ from anndata import AnnData import numpy as np -import hashlib import pandas as pd from scipy.cluster.hierarchy import dendrogram, linkage from scipy.spatial.distance import pdist diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 8535b1d3..48a88d0f 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -2,11 +2,13 @@ Widget module for interactive visualization components. """ +from contextlib import suppress from pathlib import Path import anywidget import traitlets + _clustergram_registry = {} # maps names to widget instances @@ -131,10 +133,8 @@ def __init__(self, **kwargs): print(_clustergram_registry.keys()) if old_widget: - try: + with suppress(Exception): old_widget.close() - except Exception: - pass # Pass name into traitlets kwargs["name"] = name From 212318a403d184d22d033967f44bbf2b36b78769 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 3 Jul 2025 16:49:33 -0400 Subject: [PATCH 04/30] removed print statement --- src/celldega/viz/widget.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 48a88d0f..c4268b79 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -130,8 +130,6 @@ def __init__(self, **kwargs): # Close any previously registered widget with the same name old_widget = _clustergram_registry.get(name) - print(_clustergram_registry.keys()) - if old_widget: with suppress(Exception): old_widget.close() From 4fe6b4adb27ccc2a48d657c97ff31ccf9b0cb8ef Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 3 Jul 2025 16:56:29 -0400 Subject: [PATCH 05/30] removed name logic --- src/celldega/viz/widget.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index c4268b79..6644b99b 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -105,7 +105,6 @@ class Clustergram(anywidget.AnyWidget): component (str): The component traitlet. network (dict): The network traitlet. click_info (dict): The click_info traitlet. - name (str): Optional name for this widget instance. Returns: Clustergram: A widget for visualizing a hierarchically clustered matrix. @@ -124,7 +123,7 @@ class Clustergram(anywidget.AnyWidget): def __init__(self, **kwargs): # set name from network.name - if "network" in kwargs and "name" not in kwargs: + if "network" in kwargs: name = kwargs["network"].get("name", None) # Close any previously registered widget with the same name From e7e91150bdab6286a261beb45b67f666a89da4e3 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 11:53:59 -0400 Subject: [PATCH 06/30] Add Parquet export support for Clustergram widget (#111) * manually bringing in parquet processing * big int * fixing bug with parquet matrix processing * fix index parsing bug * removed print and console logs * added mat as kwarg * Update src/celldega/viz/widget.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update js/read_parquet/network_from_parquet.js Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * implemnting astype change --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- js/read_parquet/network_from_parquet.js | 51 ++ js/widget.js | 17 +- notebooks/Single_Category_Matrix.ipynb | 1120 +++++++++++++++++++++++ src/celldega/clust/matrix.py | 58 ++ src/celldega/viz/widget.py | 40 +- 5 files changed, 1274 insertions(+), 12 deletions(-) create mode 100644 js/read_parquet/network_from_parquet.js create mode 100644 notebooks/Single_Category_Matrix.ipynb diff --git a/js/read_parquet/network_from_parquet.js b/js/read_parquet/network_from_parquet.js new file mode 100644 index 00000000..4b0a5d24 --- /dev/null +++ b/js/read_parquet/network_from_parquet.js @@ -0,0 +1,51 @@ +import { arrayBufferToArrowTable } from './arrayBufferToArrowTable'; + +function tableToObjects(table) { + const cols = table.schema.fields.map((f) => ({ + name: f.name, + data: table.getChild(f.name).toArray(), + })); + return Array.from({ length: table.numRows }, (_, i) => + Object.fromEntries(cols.map((col) => [col.name, col.data[i]])) + ); +} + +function tableToMatrix(table) { + let colNames = table.schema.fields.map((f) => f.name); + if (colNames[0] === 'row' || colNames[0] === 'index') { + colNames = colNames.slice(1); + } + const cols = colNames.map((n) => table.getChild(n).toArray()); + const mat = []; + for (let r = 0; r < table.numRows; r++) { + const row = cols.map((c) => c[r]); + mat.push(Array.from(row)); + } + return mat; +} + +export const networkFromParquet = async ( + meta, + matBytes, + rowNodesBytes, + colNodesBytes, + rowLinkBytes, + colLinkBytes + ) => { + const matTable = await arrayBufferToArrowTable(matBytes.buffer); + const rowNodesTable = await arrayBufferToArrowTable(rowNodesBytes.buffer); + const colNodesTable = await arrayBufferToArrowTable(colNodesBytes.buffer); + const rowLinkTable = await arrayBufferToArrowTable(rowLinkBytes.buffer); + const colLinkTable = await arrayBufferToArrowTable(colLinkBytes.buffer); + + const network = { ...meta }; + network.mat = tableToMatrix(matTable); + network.row_nodes = tableToObjects(rowNodesTable); + network.col_nodes = tableToObjects(colNodesTable); + network.linkage = { + row: tableToMatrix(rowLinkTable), + col: tableToMatrix(colLinkTable), + }; + + return network; + }; \ No newline at end of file diff --git a/js/widget.js b/js/widget.js index 597d8015..348bb944 100644 --- a/js/widget.js +++ b/js/widget.js @@ -1,4 +1,5 @@ import './widget.css'; +import { networkFromParquet } from './read_parquet/network_from_parquet'; import { handleAsyncError, handleValidationWarning, @@ -117,15 +118,29 @@ const render_landscape = async ({ model, el }) => { }; const render_matrix_new = async ({ model, el }) => { - const network = model.get('network'); + let network = model.get('network'); const width = model.get('width'); const height = model.get('height'); + const matBytes = model.get('mat_parquet'); + if (matBytes && matBytes.byteLength > 0) { + + network = await networkFromParquet( + model.get('network_meta'), + matBytes, + model.get('row_nodes_parquet'), + model.get('col_nodes_parquet'), + model.get('row_linkage_parquet'), + model.get('col_linkage_parquet') + ); + } + matrix_viz(model, el, network, width, height); }; // Main render function - no export keyword function render({ model, el }) { + try { const componentType = model.get('component'); diff --git a/notebooks/Single_Category_Matrix.ipynb b/notebooks/Single_Category_Matrix.ipynb new file mode 100644 index 00000000..4f971309 --- /dev/null +++ b/notebooks/Single_Category_Matrix.ipynb @@ -0,0 +1,1120 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "72af410d-a468-47a3-8270-97720fdf60cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "env: ANYWIDGET_HMR=1\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%env ANYWIDGET_HMR=1" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6f917896-9329-4ad7-8c27-924a7890e32a", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import celldega as dega" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2aa7a572-5fa7-4c00-8cd4-855b66bd0411", + "metadata": {}, + "outputs": [], + "source": [ + "from ipywidgets import Widget\n", + "Widget.close_all()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1624a711-8758-4c86-9aee-3f39d42441ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1000, 500)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# generate random matrix\n", + "num_cols = 500\n", + "num_rows = 1000\n", + "\n", + "np.random.seed(seed=100)\n", + "mat = np.random.rand(num_rows, num_cols)\n", + "\n", + "# make row and col labels\n", + "rows = range(num_rows)\n", + "cols = range(num_cols)\n", + "rows = [str(i) for i in rows]\n", + "cols = [str(i) for i in cols]\n", + "\n", + "# make dataframe\n", + "df = pd.DataFrame(data=mat, columns=cols, index=rows)\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b2fbedca-566b-4ce1-880a-d1044a4478f5", + "metadata": {}, + "outputs": [], + "source": [ + "meta_col = pd.DataFrame(index=df.columns.tolist())\n", + "top_cols = df.sum(axis=0).sort_values(ascending=False).index.tolist()[:5]\n", + "meta_col['type'] = 'low'\n", + "meta_col['experiment'] = 'a'\n", + "meta_col['test'] = 'something'\n", + "meta_col.loc[top_cols, 'type'] = 'high'\n", + "meta_col.loc['0', 'experiment'] = 'b'\n", + "meta_col.loc['1', 'experiment'] = 'b'\n", + "meta_col.loc['2', 'experiment'] = 'b'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1298800a-7a6b-4023-9c63-e5e3acfc8e06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typeexperimenttest
0lowbsomething
1lowbsomething
2lowbsomething
3lowasomething
4lowasomething
\n", + "
" + ], + "text/plain": [ + " type experiment test\n", + "0 low b something\n", + "1 low b something\n", + "2 low b something\n", + "3 low a something\n", + "4 low a something" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meta_col.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "10d11372-b4a8-4d9c-bcfe-c6bc1ca737ca", + "metadata": {}, + "outputs": [], + "source": [ + "meta_row = pd.DataFrame(index=df.index.tolist())\n", + "top_rows = df.sum(axis=1).sort_values(ascending=False).index.tolist()[:5]\n", + "meta_row['type'] = 'low'\n", + "meta_row.loc['0', 'type'] = 'high'\n", + "meta_row.loc['1', 'type'] = 'high'\n", + "meta_row.loc['2', 'type'] = 'very-high'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9bc8c1db-1856-4043-84c9-77f7895b371c", + "metadata": {}, + "outputs": [], + "source": [ + "df_colors = pd.DataFrame()\n", + "df_colors.loc['low', 'color'] = 'blue'\n", + "df_colors.loc['high', 'color'] = 'black'\n", + "df_colors.loc['very-high', 'color'] = 'yellow'\n", + "# df_colors.loc['a', 'color'] = 'orange'\n", + "# df_colors.loc['b', 'color'] = 'purple'" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7082eb7b-19d6-4138-aa43-31f0d2040445", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typeexperimenttest
0lowbsomething
1lowbsomething
2lowbsomething
3lowasomething
4lowasomething
............
495lowasomething
496lowasomething
497lowasomething
498lowasomething
499highasomething
\n", + "

500 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " type experiment test\n", + "0 low b something\n", + "1 low b something\n", + "2 low b something\n", + "3 low a something\n", + "4 low a something\n", + ".. ... ... ...\n", + "495 low a something\n", + "496 low a something\n", + "497 low a something\n", + "498 low a something\n", + "499 high a something\n", + "\n", + "[500 rows x 3 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meta_col" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "dd623659-512e-469e-a1ae-68c4e784fbdc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
type
0high
1high
2very-high
3low
4low
......
995low
996low
997low
998low
999low
\n", + "

1000 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " type\n", + "0 high\n", + "1 high\n", + "2 very-high\n", + "3 low\n", + "4 low\n", + ".. ...\n", + "995 low\n", + "996 low\n", + "997 low\n", + "998 low\n", + "999 low\n", + "\n", + "[1000 rows x 1 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meta_row" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c3130fa6-d77d-4e38-b91d-d3f4bb2c462b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...490491492493494495496497498499
00.5434050.2783690.4245180.8447760.0047190.1215690.6707490.8258530.1367070.575093...0.7571580.6861720.8563930.2308780.5193380.3433330.6587730.2827900.5024660.303278
10.5525290.1611270.5685490.4857100.1275200.5436920.2004910.6701610.5581120.232378...0.6165220.9475790.9064720.9650940.3376210.6564030.2914560.1508690.0369320.597964
20.0277320.3822370.9532510.2221990.3051260.8193200.5784700.0276170.9543880.312139...0.2790620.6528830.3829980.4599170.1170760.8204980.0265540.7377060.8583150.744456
30.1065100.4109000.3370360.0998370.9230920.8301820.5537840.9794810.8232150.477880...0.6308910.4789550.1407950.3291100.7756160.6330310.1052470.9808980.3647980.778565
40.2594440.0230030.6544070.9692160.6989520.0320540.0032950.1223100.7298920.743647...0.5140560.9271850.9585490.6372250.8104250.2000900.9190830.4783020.6359120.418574
..................................................................
9950.4523510.1072720.4026130.5610700.2110270.2669750.9368880.5491720.7055220.299131...0.4643190.7763640.2477120.4859190.6946550.4003010.7896910.4179200.9246770.217532
9960.8303610.9225380.8726610.9339650.4679940.2166360.1523800.2525260.2111260.731267...0.9313730.1359470.5127730.5593360.2535750.5252370.2066480.2638760.8274280.828747
9970.1113210.4000650.9548790.0820370.1843620.6114590.5573530.1809280.8291280.470532...0.4339400.7311240.4379030.6749010.3856840.9159620.1333510.3686540.6977460.226066
9980.1052830.0711390.6267550.9680890.4193720.4353460.1172110.7530260.1570660.632154...0.7288390.9905280.9347440.2083940.8322310.7143530.4137640.9398620.0652000.521385
9990.4168040.8764200.0588890.7758550.2445100.6227390.7786530.2896940.5519550.663487...0.8025160.2386750.4234080.2200760.8950980.0469240.1691850.7552740.1457080.578691
\n", + "

1000 rows × 500 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 0.543405 0.278369 0.424518 0.844776 0.004719 0.121569 0.670749 \n", + "1 0.552529 0.161127 0.568549 0.485710 0.127520 0.543692 0.200491 \n", + "2 0.027732 0.382237 0.953251 0.222199 0.305126 0.819320 0.578470 \n", + "3 0.106510 0.410900 0.337036 0.099837 0.923092 0.830182 0.553784 \n", + "4 0.259444 0.023003 0.654407 0.969216 0.698952 0.032054 0.003295 \n", + ".. ... ... ... ... ... ... ... \n", + "995 0.452351 0.107272 0.402613 0.561070 0.211027 0.266975 0.936888 \n", + "996 0.830361 0.922538 0.872661 0.933965 0.467994 0.216636 0.152380 \n", + "997 0.111321 0.400065 0.954879 0.082037 0.184362 0.611459 0.557353 \n", + "998 0.105283 0.071139 0.626755 0.968089 0.419372 0.435346 0.117211 \n", + "999 0.416804 0.876420 0.058889 0.775855 0.244510 0.622739 0.778653 \n", + "\n", + " 7 8 9 ... 490 491 492 \\\n", + "0 0.825853 0.136707 0.575093 ... 0.757158 0.686172 0.856393 \n", + "1 0.670161 0.558112 0.232378 ... 0.616522 0.947579 0.906472 \n", + "2 0.027617 0.954388 0.312139 ... 0.279062 0.652883 0.382998 \n", + "3 0.979481 0.823215 0.477880 ... 0.630891 0.478955 0.140795 \n", + "4 0.122310 0.729892 0.743647 ... 0.514056 0.927185 0.958549 \n", + ".. ... ... ... ... ... ... ... \n", + "995 0.549172 0.705522 0.299131 ... 0.464319 0.776364 0.247712 \n", + "996 0.252526 0.211126 0.731267 ... 0.931373 0.135947 0.512773 \n", + "997 0.180928 0.829128 0.470532 ... 0.433940 0.731124 0.437903 \n", + "998 0.753026 0.157066 0.632154 ... 0.728839 0.990528 0.934744 \n", + "999 0.289694 0.551955 0.663487 ... 0.802516 0.238675 0.423408 \n", + "\n", + " 493 494 495 496 497 498 499 \n", + "0 0.230878 0.519338 0.343333 0.658773 0.282790 0.502466 0.303278 \n", + "1 0.965094 0.337621 0.656403 0.291456 0.150869 0.036932 0.597964 \n", + "2 0.459917 0.117076 0.820498 0.026554 0.737706 0.858315 0.744456 \n", + "3 0.329110 0.775616 0.633031 0.105247 0.980898 0.364798 0.778565 \n", + "4 0.637225 0.810425 0.200090 0.919083 0.478302 0.635912 0.418574 \n", + ".. ... ... ... ... ... ... ... \n", + "995 0.485919 0.694655 0.400301 0.789691 0.417920 0.924677 0.217532 \n", + "996 0.559336 0.253575 0.525237 0.206648 0.263876 0.827428 0.828747 \n", + "997 0.674901 0.385684 0.915962 0.133351 0.368654 0.697746 0.226066 \n", + "998 0.208394 0.832231 0.714353 0.413764 0.939862 0.065200 0.521385 \n", + "999 0.220076 0.895098 0.046924 0.169185 0.755274 0.145708 0.578691 \n", + "\n", + "[1000 rows x 500 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f6001290-0ad7-4f64-a472-a963ec13cc2f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
typeexperimenttest
0lowbsomething
1lowbsomething
2lowbsomething
3lowasomething
4lowasomething
\n", + "
" + ], + "text/plain": [ + " type experiment test\n", + "0 low b something\n", + "1 low b something\n", + "2 low b something\n", + "3 low a something\n", + "4 low a something" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meta_col.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "ef93ca7f-289b-474f-9328-0fbd33ec0507", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
color
lowblue
highblack
very-highyellow
\n", + "
" + ], + "text/plain": [ + " color\n", + "low blue\n", + "high black\n", + "very-high yellow" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_colors.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "03477f41-abb4-4b56-953d-874904748b93", + "metadata": {}, + "outputs": [], + "source": [ + "# mat.export_viz_parquet()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "bd8bc2bc-e6e3-4bf1-bb73-70784337a86d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4fcb181b618b4ec1ac6d299425a6bd1f", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "Clustergram(height=500, network_meta={'linkage': {}, 'cat_colors': {'row': {}, 'col': {}}, 'matrix_colors': {'…" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mat = dega.clust.Matrix(df, meta_col=meta_col, meta_row=meta_row, name='parquet') # , global_colors=df_colors)\n", + "mat.clust()\n", + "cgm_1 = dega.viz.Clustergram(matrix=mat, width=500, height=500)\n", + "cgm_1" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5949a850-0db1-46fe-a4ac-26468e4df852", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3104048790b74688a0d1a8cac5335e2f", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "Clustergram(height=500, network={'row_nodes': [{'name': '0', 'ini': 1000, 'clust': 232, 'rank': 369, 'rankvar'…" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mat = dega.clust.Matrix(df, meta_col=meta_col, meta_row=meta_row, name='json') # , global_colors=df_colors)\n", + "mat.clust()\n", + "# cgm_1 = dega.viz.Clustergram(network=mat.viz, width=500, height=500)\n", + "cgm_2 = dega.viz.Clustergram(network=mat.viz, width=500, height=500)\n", + "cgm_2" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "3a2e33ea-987f-4368-81b3-ad56bd86f73d", + "metadata": {}, + "outputs": [], + "source": [ + "cgm_1.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c291e09f-6eb0-4c6f-a2e8-ab56e0482f5a", + "metadata": {}, + "outputs": [], + "source": [ + "# cgm_2.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81f9fe79-afbf-4916-b38f-144b694a069f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6573a85a-325e-465c-8f31-1e24957a342d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c55cdd00-6163-4506-a5fe-e2edb7bdb416", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "af182fb04d104151b26cd27ebcf97ac5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": {} + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index 28098bdb..3969b691 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -590,6 +590,64 @@ def export_viz_json_string(self) -> str: def export_viz_to_widget(self, which_viz: str = "viz") -> str: """Export visualization for widget.""" return self.export_viz_json_string() + + def export_viz_parquet(self) -> dict[str, bytes]: + """Export visualization using Parquet encoded tables.""" + if not self._clustered: + warnings.warn( + "Matrix not clustered. Call clust() first.", + UserWarning, + stacklevel=2, + ) + + import io + import pyarrow as pa + import pyarrow.parquet as pq + + def _to_bytes(df: pd.DataFrame) -> bytes: + # Build a dtype mapping for all applicable columns + dtype_map = {} + for col in df.select_dtypes(include=["int64"]).columns: + dtype_map[col] = "int32" + for col in df.select_dtypes(include=["float64"]).columns: + dtype_map[col] = "float32" + + # Perform a single bulk cast + df_casted = df.astype(dtype_map, copy=False) + + # Serialize to Parquet + buf = io.BytesIO() + pq.write_table(pa.Table.from_pandas(df_casted), buf, compression="zstd") + return buf.getvalue() + + + viz = self.viz + + mat_df = pd.DataFrame( + self.dat["mat"], + index=self.dat["nodes"][Axis.ROW.value], + columns=self.dat["nodes"][Axis.COL.value], + ).reset_index(names="row") + + row_nodes_df = pd.DataFrame(viz.get("row_nodes", [])) + col_nodes_df = pd.DataFrame(viz.get("col_nodes", [])) + row_link_df = pd.DataFrame(viz.get("linkage", {}).get(Axis.ROW.value, [])) + col_link_df = pd.DataFrame(viz.get("linkage", {}).get(Axis.COL.value, [])) + + meta_json = viz.copy() + meta_json.pop("mat", None) + meta_json.pop("row_nodes", None) + meta_json.pop("col_nodes", None) + meta_json["linkage"] = {} + + return { + "mat": _to_bytes(mat_df), + "row_nodes": _to_bytes(row_nodes_df), + "col_nodes": _to_bytes(col_nodes_df), + "row_linkage": _to_bytes(row_link_df), + "col_linkage": _to_bytes(col_link_df), + "meta": meta_json, + } def add_category(self, axis: AxisInput, name: str, data: pd.Series) -> None: """ diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 6644b99b..482e93b6 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -116,27 +116,45 @@ class Clustergram(anywidget.AnyWidget): value = traitlets.Int(0).tag(sync=True) component = traitlets.Unicode("Matrix").tag(sync=True) network = traitlets.Dict({}).tag(sync=True) + network_meta = traitlets.Dict({}).tag(sync=True) width = traitlets.Int(600).tag(sync=True) height = traitlets.Int(600).tag(sync=True) click_info = traitlets.Dict({}).tag(sync=True) def __init__(self, **kwargs): + pq_data = kwargs.pop("parquet_data", None) + + # Allow fallback via a 'matrix' kwarg + if pq_data is None: + matrix = kwargs.pop("matrix", None) + if matrix is not None: + pq_data = matrix.export_viz_parquet() + elif "network" not in kwargs: + raise ValueError( + "You must pass either `network`, `parquet_data`, or `matrix` (for fallback). If both `network` and `matrix` are provided, `matrix` will be prioritized." + ) + + # Infer name from pq_data or network + name = kwargs.get("network", {}).get("name", None) + if pq_data is not None: + meta = pq_data.get("meta", {}) + name = meta.get("name", name) + kwargs.setdefault("network_meta", meta) + + parquet_traits = { + "mat_parquet": traitlets.Bytes(pq_data.get("mat", b"")).tag(sync=True), + "row_nodes_parquet": traitlets.Bytes(pq_data.get("row_nodes", b"")).tag(sync=True), + "col_nodes_parquet": traitlets.Bytes(pq_data.get("col_nodes", b"")).tag(sync=True), + "row_linkage_parquet": traitlets.Bytes(pq_data.get("row_linkage", b"")).tag(sync=True), + "col_linkage_parquet": traitlets.Bytes(pq_data.get("col_linkage", b"")).tag(sync=True), + } + self.add_traits(**parquet_traits) - # set name from network.name - if "network" in kwargs: - name = kwargs["network"].get("name", None) - - # Close any previously registered widget with the same name old_widget = _clustergram_registry.get(name) - if old_widget: with suppress(Exception): old_widget.close() - # Pass name into traitlets kwargs["name"] = name - super().__init__(**kwargs) - - # Store new widget - _clustergram_registry[name] = self \ No newline at end of file + _clustergram_registry[name] = self From 5be64f0084acc55eb3d1551163826daf31dfc529 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:12:35 -0400 Subject: [PATCH 07/30] Update src/celldega/clust/matrix.py more efficient hash for sparse data Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/celldega/clust/matrix.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index 3969b691..df9abc3b 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -56,10 +56,12 @@ def quick_hash_data(data: pd.DataFrame | AnnData, max_rows=100, max_cols=100) -> import scipy.sparse x = data.X if scipy.sparse.issparse(x): - x = x.toarray() - x = np.asarray(x, dtype=np.float32) - row_means = x.mean(axis=1)[:max_rows] - col_means = x.mean(axis=0)[:max_cols] + row_means = x.mean(axis=1).A1[:max_rows] # Use sparse matrix operations + col_means = x.mean(axis=0).A1[:max_cols] # Use sparse matrix operations + else: + x = np.asarray(x, dtype=np.float32) + row_means = x.mean(axis=1)[:max_rows] + col_means = x.mean(axis=0)[:max_cols] else: return f"cgm_{id(data)}" From 9f91403cdd419ce1cde891d6f3ed00462d9a8c77 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:17:54 -0400 Subject: [PATCH 08/30] ruff fix --- notebooks/Single_Category_Matrix.ipynb | 14 +++++++------- src/celldega/clust/matrix.py | 1 + tests/unit/test_qc/test_qc_module.py | 6 ++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/notebooks/Single_Category_Matrix.ipynb b/notebooks/Single_Category_Matrix.ipynb index 4f971309..e6b4f517 100644 --- a/notebooks/Single_Category_Matrix.ipynb +++ b/notebooks/Single_Category_Matrix.ipynb @@ -987,7 +987,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4fcb181b618b4ec1ac6d299425a6bd1f", + "model_id": "57a5dd690fd547ce83f92eb3859152aa", "version_major": 2, "version_minor": 1 }, @@ -1009,14 +1009,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 34, "id": "5949a850-0db1-46fe-a4ac-26468e4df852", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3104048790b74688a0d1a8cac5335e2f", + "model_id": "8719440e4a5b4a80aac382e171faa412", "version_major": 2, "version_minor": 1 }, @@ -1024,7 +1024,7 @@ "Clustergram(height=500, network={'row_nodes': [{'name': '0', 'ini': 1000, 'clust': 232, 'rank': 369, 'rankvar'…" ] }, - "execution_count": 16, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1049,12 +1049,12 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 37, "id": "c291e09f-6eb0-4c6f-a2e8-ab56e0482f5a", "metadata": {}, "outputs": [], "source": [ - "# cgm_2.close()" + "cgm_2.close()" ] }, { @@ -1103,7 +1103,7 @@ "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { - "af182fb04d104151b26cd27ebcf97ac5": { + "6150e5bb64a04b91b7697968a0fda2c1": { "model_module": "@jupyter-widgets/base", "model_module_version": "2.0.0", "model_name": "LayoutModel", diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index df9abc3b..b60ce7b7 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -603,6 +603,7 @@ def export_viz_parquet(self) -> dict[str, bytes]: ) import io + import pyarrow as pa import pyarrow.parquet as pq diff --git a/tests/unit/test_qc/test_qc_module.py b/tests/unit/test_qc/test_qc_module.py index 33857a02..05473eec 100644 --- a/tests/unit/test_qc/test_qc_module.py +++ b/tests/unit/test_qc/test_qc_module.py @@ -1,9 +1,11 @@ """ Test celldega.qc module. """ -import pytest -import sys from pathlib import Path +import sys + +import pytest + sys.path.insert(0, str(Path(__file__).parents[3] / "src")) From d08be3b64ee8bc79c163dfc9e0408ac8e9a10a1e Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:29:03 -0400 Subject: [PATCH 09/30] docs: add parquet_data usage (#117) --- docs/overview/usage.md | 27 ++++++++++++++++++++++++++- docs/python/viz/api.md | 4 ++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/overview/usage.md b/docs/overview/usage.md index ce436a04..72299219 100644 --- a/docs/overview/usage.md +++ b/docs/overview/usage.md @@ -1,3 +1,28 @@ # Usage -** coming soon ** \ No newline at end of file +This section provides a minimal example of displaying a clustered matrix with Celldega. + +## Clustergram `parquet_data` + +The `Clustergram` widget accepts a `parquet_data` argument that contains the network encoded as Parquet tables. Using this approach avoids transferring large JSON structures to the browser. You can obtain this dictionary from a [`Matrix`](../python/clust/api.md#celldega.clust.matrix.Matrix.export_viz_parquet) instance using `Matrix.export_viz_parquet()`. + +```python +import celldega as dega +import pandas as pd + +# Load expression data +df = pd.read_parquet("df_sig.parquet") + +# Create and cluster the matrix +mat = dega.clust.Matrix(df, name="demo") +mat.clust() + +# Export to Parquet-encoded bytes +pq_data = mat.export_viz_parquet() + +# Initialize widget with parquet_data +cgm = dega.viz.Clustergram(parquet_data=pq_data, width=500, height=500) +cgm +``` + +`Clustergram` also accepts `network` or `matrix` arguments, but `parquet_data` is recommended for large datasets. diff --git a/docs/python/viz/api.md b/docs/python/viz/api.md index 4ea1745c..0a3da160 100644 --- a/docs/python/viz/api.md +++ b/docs/python/viz/api.md @@ -2,5 +2,9 @@ ## Widget Classes +The `Clustergram` widget accepts a `parquet_data` argument for efficient +initialization. Use [`Matrix.export_viz_parquet`](../clust/api.md#celldega.clust.matrix.Matrix.export_viz_parquet) +to generate this data from a clustered matrix. + ::: celldega.viz From dc1435d7efaf9f844132c49cd1112f9b7b4d46fc Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:29:55 -0400 Subject: [PATCH 10/30] Add Clustergram parquet widget tests (#116) --- tests/unit/test_viz/test_widget.py | 56 ++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/unit/test_viz/test_widget.py diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py new file mode 100644 index 00000000..308bdcaf --- /dev/null +++ b/tests/unit/test_viz/test_widget.py @@ -0,0 +1,56 @@ +"""Tests for Clustergram widget with Parquet input.""" + +import numpy as np +import pandas as pd +import pytest + +try: + from celldega.clust import Matrix + from celldega.viz import Clustergram +except Exception as e: # pragma: no cover - if deps missing skip + pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) + + +def make_simple_matrix() -> Matrix: + np.random.seed(0) + df = pd.DataFrame(np.random.rand(4, 5)) + mat = Matrix(df, disable_processing=True) + mat.cluster() + return mat + + +def test_export_viz_parquet_returns_bytes() -> None: + mat = make_simple_matrix() + pq = mat.export_viz_parquet() + + expected_keys = { + "mat", + "row_nodes", + "col_nodes", + "row_linkage", + "col_linkage", + "meta", + } + + assert set(pq) == expected_keys + for key in expected_keys - {"meta"}: + assert isinstance(pq[key], (bytes, bytearray)) + assert pq[key] # non-empty + assert isinstance(pq["meta"], dict) + + +def test_clustergram_initializes_with_parquet() -> None: + mat = make_simple_matrix() + pq = mat.export_viz_parquet() + + widget = Clustergram(parquet_data=pq) + + assert widget.network_meta == pq["meta"] + for attr in [ + "mat_parquet", + "row_nodes_parquet", + "col_nodes_parquet", + "row_linkage_parquet", + "col_linkage_parquet", + ]: + assert getattr(widget, attr) == pq[attr] From 7c979314ca66b88badb3cadf76fbfb96e94488c0 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:45:50 -0400 Subject: [PATCH 11/30] upadted test --- tests/unit/test_viz/test_widget.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py index 308bdcaf..dc760a5e 100644 --- a/tests/unit/test_viz/test_widget.py +++ b/tests/unit/test_viz/test_widget.py @@ -43,14 +43,18 @@ def test_clustergram_initializes_with_parquet() -> None: mat = make_simple_matrix() pq = mat.export_viz_parquet() - widget = Clustergram(parquet_data=pq) + widget = Clustergram(matrix=mat) + # Confirm meta is set correctly assert widget.network_meta == pq["meta"] - for attr in [ - "mat_parquet", - "row_nodes_parquet", - "col_nodes_parquet", - "row_linkage_parquet", - "col_linkage_parquet", + + # Confirm dynamic parquet attributes exist and match expected values + for attr, key in [ + ("mat_parquet", "mat"), + ("row_nodes_parquet", "row_nodes"), + ("col_nodes_parquet", "col_nodes"), + ("row_linkage_parquet", "row_linkage"), + ("col_linkage_parquet", "col_linkage"), ]: - assert getattr(widget, attr) == pq[attr] + assert hasattr(widget, attr), f"Missing attribute: {attr}" + assert getattr(widget, attr) == pq[key], f"Attribute {attr} does not match expected parquet value" From eb4a6a82f59b62f3953bbd9f72160358d0a39a20 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:49:34 -0400 Subject: [PATCH 12/30] fixed test lint --- tests/unit/test_viz/test_widget.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_viz/test_widget.py b/tests/unit/test_viz/test_widget.py index dc760a5e..32f8cffd 100644 --- a/tests/unit/test_viz/test_widget.py +++ b/tests/unit/test_viz/test_widget.py @@ -4,6 +4,7 @@ import pandas as pd import pytest + try: from celldega.clust import Matrix from celldega.viz import Clustergram @@ -34,7 +35,7 @@ def test_export_viz_parquet_returns_bytes() -> None: assert set(pq) == expected_keys for key in expected_keys - {"meta"}: - assert isinstance(pq[key], (bytes, bytearray)) + assert isinstance(pq[key], bytes | bytearray) assert pq[key] # non-empty assert isinstance(pq["meta"], dict) From 118883b309f032939838f4ca2784fb5cc3dddb88 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 12:55:26 -0400 Subject: [PATCH 13/30] ruff format --- src/celldega/clust/constants.py | 41 ++++++++++++++++++++++++++++----- src/celldega/clust/matrix.py | 13 +++++++---- src/celldega/viz/widget.py | 8 +++++-- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/src/celldega/clust/constants.py b/src/celldega/clust/constants.py index 48de283f..3bc99fc2 100644 --- a/src/celldega/clust/constants.py +++ b/src/celldega/clust/constants.py @@ -88,12 +88,41 @@ class CacheLevel(Enum): LinkageType = Literal["average", "single", "complete", "ward"] _COLOR_PALETTE = [ - "#393b79", "#aec7e8", "#ff7f0e", "#ffbb78", "#98df8a", "#bcbd22", - "#404040", "#ff9896", "#c5b0d5", "#8c5648", "#1f77b4", "#5254a3", - "#FFDB58", "#c49c94", "#e377c2", "#7f7f7f", "#2ca02c", "#9467bd", - "#dbdb8d", "#17becf", "#637939", "#6b6ecf", "#9c9ede", "#d62728", - "#8ca252", "#8c6d31", "#bd9e39", "#e7cb94", "#843c39", "#ad494a", - "#d6616b", "#7b4173", "#a55194", "#ce6dbd", "#de9ed6" + "#393b79", + "#aec7e8", + "#ff7f0e", + "#ffbb78", + "#98df8a", + "#bcbd22", + "#404040", + "#ff9896", + "#c5b0d5", + "#8c5648", + "#1f77b4", + "#5254a3", + "#FFDB58", + "#c49c94", + "#e377c2", + "#7f7f7f", + "#2ca02c", + "#9467bd", + "#dbdb8d", + "#17becf", + "#637939", + "#6b6ecf", + "#9c9ede", + "#d62728", + "#8ca252", + "#8c6d31", + "#bd9e39", + "#e7cb94", + "#843c39", + "#ad494a", + "#d6616b", + "#7b4173", + "#a55194", + "#ce6dbd", + "#de9ed6", ] diff --git a/src/celldega/clust/matrix.py b/src/celldega/clust/matrix.py index b60ce7b7..a239bbd9 100644 --- a/src/celldega/clust/matrix.py +++ b/src/celldega/clust/matrix.py @@ -46,6 +46,7 @@ _distance_cache = weakref.WeakKeyDictionary() _ranking_cache = weakref.WeakKeyDictionary() + def quick_hash_data(data: pd.DataFrame | AnnData, max_rows=100, max_cols=100) -> str: try: if isinstance(data, pd.DataFrame): @@ -54,6 +55,7 @@ def quick_hash_data(data: pd.DataFrame | AnnData, max_rows=100, max_cols=100) -> col_means = df.mean(axis=0).values[:max_cols] elif isinstance(data, AnnData): import scipy.sparse + x = data.X if scipy.sparse.issparse(x): row_means = x.mean(axis=1).A1[:max_rows] # Use sparse matrix operations @@ -71,6 +73,7 @@ def quick_hash_data(data: pd.DataFrame | AnnData, max_rows=100, max_cols=100) -> except Exception: return f"cgm_{id(data)}" + class Matrix: """ High-performance matrix class for single-cell genomics data processing. @@ -592,7 +595,7 @@ def export_viz_json_string(self) -> str: def export_viz_to_widget(self, which_viz: str = "viz") -> str: """Export visualization for widget.""" return self.export_viz_json_string() - + def export_viz_parquet(self) -> dict[str, bytes]: """Export visualization using Parquet encoded tables.""" if not self._clustered: @@ -623,7 +626,6 @@ def _to_bytes(df: pd.DataFrame) -> bytes: pq.write_table(pa.Table.from_pandas(df_casted), buf, compression="zstd") return buf.getvalue() - viz = self.viz mat_df = pd.DataFrame( @@ -650,7 +652,7 @@ def _to_bytes(df: pd.DataFrame) -> bytes: "row_linkage": _to_bytes(row_link_df), "col_linkage": _to_bytes(col_link_df), "meta": meta_json, - } + } def add_category(self, axis: AxisInput, name: str, data: pd.Series) -> None: """ @@ -734,7 +736,9 @@ def add_cats(self, axis: AxisInput, cat_data: dict[str, Any]) -> None: if self._clustered: self.make_viz() - def set_global_cat_colors(self, color_mapping: dict[str, str] | pd.DataFrame | None = None) -> None: + def set_global_cat_colors( + self, color_mapping: dict[str, str] | pd.DataFrame | None = None + ) -> None: """ Set global category color mapping that applies across all categories. @@ -775,7 +779,6 @@ def set_global_cat_colors(self, color_mapping: dict[str, str] | pd.DataFrame | N self.viz["global_cat_colors"].update(color_mapping) - def set_matrix_colors(self, pos: str = "red", neg: str = "blue") -> None: """ Set matrix color scheme for positive and negative values. diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 482e93b6..50292555 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -145,8 +145,12 @@ def __init__(self, **kwargs): "mat_parquet": traitlets.Bytes(pq_data.get("mat", b"")).tag(sync=True), "row_nodes_parquet": traitlets.Bytes(pq_data.get("row_nodes", b"")).tag(sync=True), "col_nodes_parquet": traitlets.Bytes(pq_data.get("col_nodes", b"")).tag(sync=True), - "row_linkage_parquet": traitlets.Bytes(pq_data.get("row_linkage", b"")).tag(sync=True), - "col_linkage_parquet": traitlets.Bytes(pq_data.get("col_linkage", b"")).tag(sync=True), + "row_linkage_parquet": traitlets.Bytes(pq_data.get("row_linkage", b"")).tag( + sync=True + ), + "col_linkage_parquet": traitlets.Bytes(pq_data.get("col_linkage", b"")).tag( + sync=True + ), } self.add_traits(**parquet_traits) From 5b144d4a3383c5524267079a925ca4329d22cde2 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 13:00:59 -0400 Subject: [PATCH 14/30] format JS --- js/deck-gl/matrix/cat_layers.js | 1 - js/matrix/cat_data.js | 2 -- js/matrix/set_constants.js | 8 ++--- js/read_parquet/network_from_parquet.js | 48 ++++++++++++------------- js/widget.js | 4 +-- 5 files changed, 29 insertions(+), 34 deletions(-) diff --git a/js/deck-gl/matrix/cat_layers.js b/js/deck-gl/matrix/cat_layers.js index 418ff203..62f4d79a 100644 --- a/js/deck-gl/matrix/cat_layers.js +++ b/js/deck-gl/matrix/cat_layers.js @@ -3,7 +3,6 @@ import * as d3 from 'd3'; import { CustomMatrixLayer } from './custom_matrix_layer'; export const ini_row_cat_layer = (viz_state) => { - const transitions = { getPosition: { duration: viz_state.animate.duration, diff --git a/js/matrix/cat_data.js b/js/matrix/cat_data.js index 282e22b3..668c787f 100644 --- a/js/matrix/cat_data.js +++ b/js/matrix/cat_data.js @@ -9,7 +9,6 @@ const colorToRgba = (colorStr, alpha = 255) => { }; const set_cat_data = (network, viz_state, axis) => { - const isRow = axis === 'row'; const nodes = isRow ? network.row_nodes : network.col_nodes; const num_cats = isRow @@ -28,7 +27,6 @@ const set_cat_data = (network, viz_state, axis) => { const cat_data = nodes .flatMap((node, node_index) => { return Array.from({ length: num_cats }).map((_, cat_index) => { - const cat_name = `cat-${cat_index}`; const inst_cat = node[cat_name]; diff --git a/js/matrix/set_constants.js b/js/matrix/set_constants.js index 31ecfaee..76b80e41 100644 --- a/js/matrix/set_constants.js +++ b/js/matrix/set_constants.js @@ -39,8 +39,10 @@ export const set_mat_constants = ( viz_state.viz.row_cat_offset = 9; viz_state.viz.col_cat_offset = 9; - viz_state.viz.mat_width = width - (viz_state.viz.row_cat_offset * viz_state.cats.num_cats.row); - viz_state.viz.mat_height = height - (viz_state.viz.col_cat_offset * viz_state.cats.num_cats.col); + viz_state.viz.mat_width = + width - viz_state.viz.row_cat_offset * viz_state.cats.num_cats.row; + viz_state.viz.mat_height = + height - viz_state.viz.col_cat_offset * viz_state.cats.num_cats.col; viz_state.mat = {}; viz_state.mat.num_rows = network.mat.length; @@ -70,8 +72,6 @@ export const set_mat_constants = ( viz_state.viz.row_cat_width = 8; viz_state.viz.col_cat_height = 8; - - // move rows labels left viz_state.viz.label_row_x = 15; // 15 diff --git a/js/read_parquet/network_from_parquet.js b/js/read_parquet/network_from_parquet.js index 4b0a5d24..b846dcd2 100644 --- a/js/read_parquet/network_from_parquet.js +++ b/js/read_parquet/network_from_parquet.js @@ -25,27 +25,27 @@ function tableToMatrix(table) { } export const networkFromParquet = async ( - meta, - matBytes, - rowNodesBytes, - colNodesBytes, - rowLinkBytes, - colLinkBytes - ) => { - const matTable = await arrayBufferToArrowTable(matBytes.buffer); - const rowNodesTable = await arrayBufferToArrowTable(rowNodesBytes.buffer); - const colNodesTable = await arrayBufferToArrowTable(colNodesBytes.buffer); - const rowLinkTable = await arrayBufferToArrowTable(rowLinkBytes.buffer); - const colLinkTable = await arrayBufferToArrowTable(colLinkBytes.buffer); - - const network = { ...meta }; - network.mat = tableToMatrix(matTable); - network.row_nodes = tableToObjects(rowNodesTable); - network.col_nodes = tableToObjects(colNodesTable); - network.linkage = { - row: tableToMatrix(rowLinkTable), - col: tableToMatrix(colLinkTable), - }; - - return network; - }; \ No newline at end of file + meta, + matBytes, + rowNodesBytes, + colNodesBytes, + rowLinkBytes, + colLinkBytes +) => { + const matTable = await arrayBufferToArrowTable(matBytes.buffer); + const rowNodesTable = await arrayBufferToArrowTable(rowNodesBytes.buffer); + const colNodesTable = await arrayBufferToArrowTable(colNodesBytes.buffer); + const rowLinkTable = await arrayBufferToArrowTable(rowLinkBytes.buffer); + const colLinkTable = await arrayBufferToArrowTable(colLinkBytes.buffer); + + const network = { ...meta }; + network.mat = tableToMatrix(matTable); + network.row_nodes = tableToObjects(rowNodesTable); + network.col_nodes = tableToObjects(colNodesTable); + network.linkage = { + row: tableToMatrix(rowLinkTable), + col: tableToMatrix(colLinkTable), + }; + + return network; +}; diff --git a/js/widget.js b/js/widget.js index 348bb944..b85f79a3 100644 --- a/js/widget.js +++ b/js/widget.js @@ -124,7 +124,6 @@ const render_matrix_new = async ({ model, el }) => { const matBytes = model.get('mat_parquet'); if (matBytes && matBytes.byteLength > 0) { - network = await networkFromParquet( model.get('network_meta'), matBytes, @@ -133,14 +132,13 @@ const render_matrix_new = async ({ model, el }) => { model.get('row_linkage_parquet'), model.get('col_linkage_parquet') ); - } + } matrix_viz(model, el, network, width, height); }; // Main render function - no export keyword function render({ model, el }) { - try { const componentType = model.get('component'); From b10fe2a21e867360ca9043bfe62f38a078e54f9e Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Mon, 7 Jul 2025 21:34:21 -0400 Subject: [PATCH 15/30] Refine Landscape metadata handling --- js/read_parquet/objects_from_parquet.js | 24 +++++++++ js/widget.js | 22 +++++++-- src/celldega/viz/widget.py | 66 ++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 js/read_parquet/objects_from_parquet.js diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js new file mode 100644 index 00000000..137f59b9 --- /dev/null +++ b/js/read_parquet/objects_from_parquet.js @@ -0,0 +1,24 @@ +import { arrayBufferToArrowTable } from './arrayBufferToArrowTable'; + +// Convert a Parquet-encoded ArrayBuffer to a dictionary mapping the +// first column to the remaining column values. If only two columns +// are present the value will be a single item, otherwise an array. +export const objectsFromParquet = async (bytes) => { + const table = await arrayBufferToArrowTable(bytes.buffer); + const fields = table.schema.fields.map((f) => f.name); + if (fields.length < 2) { + return {}; + } + const keyCol = table.getChild(fields[0]).toArray(); + const valueCols = fields.slice(1).map((n) => table.getChild(n).toArray()); + + const result = {}; + for (let i = 0; i < table.numRows; i++) { + if (valueCols.length === 1) { + result[keyCol[i]] = valueCols[0][i]; + } else { + result[keyCol[i]] = valueCols.map((col) => col[i]); + } + } + return result; +}; diff --git a/js/widget.js b/js/widget.js index b85f79a3..1b0aa46a 100644 --- a/js/widget.js +++ b/js/widget.js @@ -1,5 +1,6 @@ import './widget.css'; import { networkFromParquet } from './read_parquet/network_from_parquet'; +import { objectsFromParquet } from './read_parquet/objects_from_parquet'; import { handleAsyncError, handleValidationWarning, @@ -21,9 +22,24 @@ const render_landscape_ist = async ({ model, el }) => { const dataset_name = model.get('dataset_name'); const width = model.get('width'); const height = model.get('height'); - const meta_cell = model.get('meta_cell'); - const meta_cluster = model.get('meta_cluster'); - const umap = model.get('umap'); + let meta_cell = model.get('meta_cell'); + let meta_cluster = model.get('meta_cluster'); + let umap = model.get('umap'); + + const metaCellBytes = model.get('meta_cell_parquet'); + if (metaCellBytes && metaCellBytes.byteLength > 0) { + meta_cell = await objectsFromParquet(metaCellBytes); + } + + const metaClusterBytes = model.get('meta_cluster_parquet'); + if (metaClusterBytes && metaClusterBytes.byteLength > 0) { + meta_cluster = await objectsFromParquet(metaClusterBytes); + } + + const umapBytes = model.get('umap_parquet'); + if (umapBytes && umapBytes.byteLength > 0) { + umap = await objectsFromParquet(umapBytes); + } const landscape_state = model.get('landscape_state'); const segmentation = model.get('segmentation'); diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 50292555..fe815ce7 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -6,6 +6,7 @@ from pathlib import Path import anywidget +import pandas as pd import traitlets @@ -59,9 +60,7 @@ class Landscape(anywidget.AnyWidget): region = traitlets.Dict({}).tag(sync=True) nbhd = traitlets.Dict({}).tag(sync=True) - meta_cell = traitlets.Dict({}).tag(sync=True) meta_cluster = traitlets.Dict({}).tag(sync=True) - umap = traitlets.Dict({}).tag(sync=True) landscape_state = traitlets.Unicode("spatial").tag(sync=True) update_trigger = traitlets.Dict().tag(sync=True) @@ -72,6 +71,69 @@ class Landscape(anywidget.AnyWidget): width = traitlets.Int(0).tag(sync=True) height = traitlets.Int(800).tag(sync=True) + def __init__(self, **kwargs): + adata = kwargs.pop("adata", None) + pq_meta_cell = kwargs.pop("meta_cell_parquet", None) + pq_meta_cluster = kwargs.pop("meta_cluster_parquet", None) + pq_umap = kwargs.pop("umap_parquet", None) + + meta_cell_df = kwargs.pop("meta_cell", None) + meta_cluster = kwargs.get("meta_cluster") + umap_df = kwargs.pop("umap", None) + + def _df_to_bytes(df): + import io + + import pyarrow as pa + import pyarrow.parquet as pq + + buf = io.BytesIO() + pq.write_table(pa.Table.from_pandas(df), buf, compression="zstd") + return buf.getvalue() + + if adata is not None: + + meta_cell_df = adata.obs.copy() + meta_cell_df.reset_index(inplace=True) + pq_meta_cell = _df_to_bytes(meta_cell_df) + + if "X_umap" in adata.obsm: + umap_df = pd.DataFrame( + adata.obsm["X_umap"], index=adata.obs.index + ).reset_index() + pq_umap = _df_to_bytes(umap_df) + + if isinstance(meta_cell_df, pd.DataFrame): + pq_meta_cell = _df_to_bytes(meta_cell_df.reset_index()) + + if isinstance(meta_cluster, pd.DataFrame): + pq_meta_cluster = _df_to_bytes(meta_cluster.reset_index()) + kwargs.pop("meta_cluster") + + if isinstance(umap_df, pd.DataFrame): + pq_umap = _df_to_bytes(umap_df.reset_index()) + + parquet_traits = {} + if pq_meta_cell is not None: + parquet_traits["meta_cell_parquet"] = traitlets.Bytes(pq_meta_cell).tag( + sync=True + ) + if pq_meta_cluster is not None: + parquet_traits["meta_cluster_parquet"] = traitlets.Bytes( + pq_meta_cluster + ).tag(sync=True) + if pq_umap is not None: + parquet_traits["umap_parquet"] = traitlets.Bytes(pq_umap).tag(sync=True) + + if parquet_traits: + self.add_traits(**parquet_traits) + + super().__init__(**kwargs) + + # store DataFrames locally without syncing to the frontend + self.meta_cell = meta_cell_df + self.umap = umap_df + def trigger_update(self, new_value): """ Update the update_trigger traitlet with a new value. From cdc5fbf31afb24d3a39260305a73b4d06b0c1dc5 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Thu, 10 Jul 2025 13:34:03 -0400 Subject: [PATCH 16/30] feat(viz): support AnnData in Landscape (#131) --- README.md | 17 ++++++++++++----- src/celldega/viz/widget.py | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 93143b57..3ae8e394 100644 --- a/README.md +++ b/README.md @@ -31,13 +31,20 @@ apt update && apt install -y libvips libvips-tools libvips-dev base_url = 'https://raw.githubusercontent.com/broadinstitute/celldega_Xenium_Prime_Human_Skin_FFPE_outs/main/Xenium_Prime_Human_Skin_FFPE_outs' landscape_ist = dega.viz.Landscape( - technology='Xenium', - ini_zoom = -4.5, + technology="Xenium", + ini_zoom=-4.5, ini_x=6000, ini_y=8000, - base_url = base_url, - height = 700, - width= 600 + base_url=base_url, + height=700, + width=600, +) + +# Alternatively pass an AnnData object to auto-populate cell metadata +# including "leiden" clusters, colors and UMAP coordinates. +landscape_from_adata = dega.viz.Landscape( + base_url=base_url, + AnnData=adata, ) file_path = 'https://raw.githubusercontent.com/broadinstitute/celldega_Xenium_Prime_Human_Skin_FFPE_outs/main/Xenium_Prime_Human_Skin_FFPE_outs/df_sig.parquet' diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index fe815ce7..0b2b45e4 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -8,11 +8,18 @@ import anywidget import pandas as pd import traitlets +import colorsys _clustergram_registry = {} # maps names to widget instances +def _hsv_to_hex(h: float) -> str: + """Convert HSV color to hex string.""" + r, g, b = colorsys.hsv_to_rgb(h, 0.65, 0.9) + return f"#{int(r * 255):02x}{int(g * 255):02x}{int(b * 255):02x}" + + class Landscape(anywidget.AnyWidget): """ A widget for interactive visualization of spatial omics data. This widget @@ -24,8 +31,13 @@ class Landscape(anywidget.AnyWidget): ini_zoom (float): The initial zoom level of the view. token (str): The token traitlet. base_url (str): The base URL for the widget. + AnnData (AnnData, optional): AnnData object to derive metadata from. dataset_name (str, optional): The name of the dataset to visualize. This will show up in the user interface bar. + The AnnData input automatically extracts cell attributes (e.g., ``leiden`` + clusters), the corresponding colors (or derives them when missing), and any + available UMAP coordinates. + Attributes: component (str): The name of the component. technology (str): The technology used. @@ -72,7 +84,7 @@ class Landscape(anywidget.AnyWidget): height = traitlets.Int(800).tag(sync=True) def __init__(self, **kwargs): - adata = kwargs.pop("adata", None) + adata = kwargs.pop("adata", None) or kwargs.pop("AnnData", None) pq_meta_cell = kwargs.pop("meta_cell_parquet", None) pq_meta_cluster = kwargs.pop("meta_cluster_parquet", None) pq_umap = kwargs.pop("umap_parquet", None) @@ -80,6 +92,7 @@ def __init__(self, **kwargs): meta_cell_df = kwargs.pop("meta_cell", None) meta_cluster = kwargs.get("meta_cluster") umap_df = kwargs.pop("umap", None) + meta_cluster_df = None def _df_to_bytes(df): import io @@ -97,6 +110,23 @@ def _df_to_bytes(df): meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) + if "leiden" in adata.obs.columns: + cluster_counts = adata.obs["leiden"].value_counts().sort_index() + colors = adata.uns.get("leiden_colors") + if colors is None: + n = len(cluster_counts) + colors = [ + _hsv_to_hex(i / max(n, 1)) for i in range(n) + ] + meta_cluster_df = pd.DataFrame( + { + "color": list(colors)[: len(cluster_counts)], + "count": cluster_counts.values, + }, + index=cluster_counts.index, + ) + pq_meta_cluster = _df_to_bytes(meta_cluster_df) + if "X_umap" in adata.obsm: umap_df = pd.DataFrame( adata.obsm["X_umap"], index=adata.obs.index @@ -109,6 +139,7 @@ def _df_to_bytes(df): if isinstance(meta_cluster, pd.DataFrame): pq_meta_cluster = _df_to_bytes(meta_cluster.reset_index()) kwargs.pop("meta_cluster") + meta_cluster_df = meta_cluster if isinstance(umap_df, pd.DataFrame): pq_umap = _df_to_bytes(umap_df.reset_index()) @@ -133,7 +164,9 @@ def _df_to_bytes(df): # store DataFrames locally without syncing to the frontend self.meta_cell = meta_cell_df self.umap = umap_df - + if meta_cluster_df is not None: + self.meta_cluster_df = meta_cluster_df + def trigger_update(self, new_value): """ Update the update_trigger traitlet with a new value. From 33bf1dc34e594c2d6c8b8648a33843d191755991 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Tue, 15 Jul 2025 13:44:09 -0400 Subject: [PATCH 17/30] adding print statement --- src/celldega/viz/widget.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index de9c87f6..ac9dccbd 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -110,6 +110,8 @@ def _df_to_bytes(df): if adata is not None: + print('found AnnData object, extracting metadata') + meta_cell_df = adata.obs.copy() meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) @@ -170,7 +172,7 @@ def _df_to_bytes(df): self.umap = umap_df if meta_cluster_df is not None: self.meta_cluster_df = meta_cluster_df - + def trigger_update(self, new_value): """ Update the update_trigger traitlet with a new value. From 5c1e911bda206c9b0aa7c08d7ab95eb6e8d7a531 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Tue, 15 Jul 2025 22:49:07 -0400 Subject: [PATCH 18/30] getting single-cell metadata to transfer --- js/deck-gl/layers/cell_layer.js | 5 +++++ js/viz/landscape_ist.js | 4 ++++ js/widget.js | 4 ++-- src/celldega/viz/widget.py | 3 ++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index 600d5547..d96ff8b1 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -107,9 +107,14 @@ export const ini_cell_layer = async (base_url, viz_state) => { set_cell_name_to_index_map(viz_state.cats); if (viz_state.cats.has_meta_cell) { + console.log('has_meta_cell', viz_state.cats.has_meta_cell); + console.log('meta_cell', viz_state.cats.meta_cell); + console.log('viz_state.cats.cell_names_array', viz_state.cats.cell_names_array); viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map( (name) => viz_state.cats.meta_cell[name] ); + + console.log('cell_cats', viz_state.cats.cell_cats); } else { // default clustering diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index 37317c3b..121c2be7 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -85,6 +85,10 @@ export const landscape_ist = async ( creds = {}, view_change_custom_callback = null ) => { + + console.log('meta_cell', meta_cell[0]); + + console.log('here') if (width === 0) { width = '100%'; } diff --git a/js/widget.js b/js/widget.js index 82e828a6..1a6e9d36 100644 --- a/js/widget.js +++ b/js/widget.js @@ -59,8 +59,8 @@ const render_landscape_ist = async ({ model, el }) => { width, height, meta_cell, - meta_cluster, - umap, + {}, // meta_cluster, + {}, // umap, landscape_state, segmentation, creds diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 0555d095..25038973 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -114,7 +114,7 @@ def _df_to_bytes(df): print('found AnnData object, extracting metadata') meta_cell_df = adata.obs.copy() - meta_cell_df.reset_index(inplace=True) + # meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) if "leiden" in adata.obs.columns: @@ -164,6 +164,7 @@ def _df_to_bytes(df): parquet_traits["umap_parquet"] = traitlets.Bytes(pq_umap).tag(sync=True) if parquet_traits: + print(parquet_traits) self.add_traits(**parquet_traits) super().__init__(**kwargs) From eb0df34803f92443e521071b34bce3d88874ab0f Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 10:00:11 -0400 Subject: [PATCH 19/30] making column for meta_cluster --- src/celldega/viz/widget.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 25038973..b143a5e1 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -127,6 +127,7 @@ def _df_to_bytes(df): ] meta_cluster_df = pd.DataFrame( { + "name": cluster_counts.index, "color": list(colors)[: len(cluster_counts)], "count": cluster_counts.values, }, From 28eb7203b7e1bfbbccc23d4ca5d5ae3f8560da4e Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 10:03:33 -0400 Subject: [PATCH 20/30] cleaning console logs --- js/deck-gl/layers/cell_layer.js | 8 ++++---- js/viz/landscape_ist.js | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index d96ff8b1..f83e1f6b 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -107,14 +107,14 @@ export const ini_cell_layer = async (base_url, viz_state) => { set_cell_name_to_index_map(viz_state.cats); if (viz_state.cats.has_meta_cell) { - console.log('has_meta_cell', viz_state.cats.has_meta_cell); - console.log('meta_cell', viz_state.cats.meta_cell); - console.log('viz_state.cats.cell_names_array', viz_state.cats.cell_names_array); + // console.log('has_meta_cell', viz_state.cats.has_meta_cell); + // console.log('meta_cell', viz_state.cats.meta_cell); + // console.log('viz_state.cats.cell_names_array', viz_state.cats.cell_names_array); viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map( (name) => viz_state.cats.meta_cell[name] ); - console.log('cell_cats', viz_state.cats.cell_cats); + // console.log('cell_cats', viz_state.cats.cell_cats); } else { // default clustering diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index 121c2be7..2f526a6f 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -86,9 +86,6 @@ export const landscape_ist = async ( view_change_custom_callback = null ) => { - console.log('meta_cell', meta_cell[0]); - - console.log('here') if (width === 0) { width = '100%'; } From 070fe1111cc9cb84c42d5707c7d8dc8c202ebc4e Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 10:59:05 -0400 Subject: [PATCH 21/30] working on parsing parquet to objects --- js/read_parquet/objects_from_parquet.js | 37 +++++++++++++++++-------- js/widget.js | 10 ++++--- src/celldega/viz/widget.py | 1 - 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js index 137f59b9..1f3e3c96 100644 --- a/js/read_parquet/objects_from_parquet.js +++ b/js/read_parquet/objects_from_parquet.js @@ -1,24 +1,39 @@ import { arrayBufferToArrowTable } from './arrayBufferToArrowTable'; -// Convert a Parquet-encoded ArrayBuffer to a dictionary mapping the -// first column to the remaining column values. If only two columns -// are present the value will be a single item, otherwise an array. -export const objectsFromParquet = async (bytes) => { +/** + * Converts a Parquet-encoded ArrayBuffer into an object using the DataFrame index as key. + * + * Works whether the index is named or not (e.g. "__index_level_0__"). + * + * @param {ArrayBuffer} bytes - The buffer to decode. + * @returns {Promise} - Object mapping index → [values] or single value. + */ +export const objects_from_parquet = async (bytes) => { const table = await arrayBufferToArrowTable(bytes.buffer); const fields = table.schema.fields.map((f) => f.name); - if (fields.length < 2) { - return {}; - } - const keyCol = table.getChild(fields[0]).toArray(); - const valueCols = fields.slice(1).map((n) => table.getChild(n).toArray()); + + if (fields.length < 2) return {}; + + console.log('fields', fields[1,:]) + + // Check if the index is explicitly preserved + const indexField = fields.find((f) => + f === '__index_level_0__' || !f.match(/^[a-zA-Z_]/) // conservative fallback + ) || fields[0]; // fallback to first field if no index column is clearly marked + + const keyCol = table.getChild(indexField).toArray(); + const valueFields = fields.filter((f) => f !== indexField); + const valueCols = valueFields.map((f) => table.getChild(f).toArray()); const result = {}; for (let i = 0; i < table.numRows; i++) { + const key = String(keyCol[i]); if (valueCols.length === 1) { - result[keyCol[i]] = valueCols[0][i]; + result[key] = valueCols[0][i]; } else { - result[keyCol[i]] = valueCols.map((col) => col[i]); + result[key] = valueCols.map((col) => col[i]); } } + return result; }; diff --git a/js/widget.js b/js/widget.js index 1a6e9d36..8949c095 100644 --- a/js/widget.js +++ b/js/widget.js @@ -1,7 +1,7 @@ import './widget.css'; import { networkFromParquet } from './read_parquet/network_from_parquet'; -import { objectsFromParquet } from './read_parquet/objects_from_parquet'; +import { objects_from_parquet } from './read_parquet/objects_from_parquet'; import { handleAsyncError, handleValidationWarning, @@ -30,17 +30,19 @@ const render_landscape_ist = async ({ model, el }) => { const metaCellBytes = model.get('meta_cell_parquet'); if (metaCellBytes && metaCellBytes.byteLength > 0) { - meta_cell = await objectsFromParquet(metaCellBytes); + meta_cell = await objects_from_parquet(metaCellBytes); } const metaClusterBytes = model.get('meta_cluster_parquet'); if (metaClusterBytes && metaClusterBytes.byteLength > 0) { - meta_cluster = await objectsFromParquet(metaClusterBytes); + meta_cluster = await objects_from_parquet(metaClusterBytes); + + console.log('meta_cluster', meta_cluster); } const umapBytes = model.get('umap_parquet'); if (umapBytes && umapBytes.byteLength > 0) { - umap = await objectsFromParquet(umapBytes); + umap = await objects_from_parquet(umapBytes); } const landscape_state = model.get('landscape_state'); const segmentation = model.get('segmentation'); diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index b143a5e1..25038973 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -127,7 +127,6 @@ def _df_to_bytes(df): ] meta_cluster_df = pd.DataFrame( { - "name": cluster_counts.index, "color": list(colors)[: len(cluster_counts)], "count": cluster_counts.values, }, From 1d86e4e0d6c7f83f1ab2cb6584a719caf4dba19f Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 14:41:28 -0400 Subject: [PATCH 22/30] meta_cell working --- js/deck-gl/layers/cell_layer.js | 29 ++++++++++++++++++++----- js/read_parquet/objects_from_parquet.js | 11 +++------- js/viz/landscape_ist.js | 7 ++++++ js/widget.js | 15 +++++++++---- 4 files changed, 44 insertions(+), 18 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index f83e1f6b..b4a7e535 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -107,12 +107,29 @@ export const ini_cell_layer = async (base_url, viz_state) => { set_cell_name_to_index_map(viz_state.cats); if (viz_state.cats.has_meta_cell) { - // console.log('has_meta_cell', viz_state.cats.has_meta_cell); - // console.log('meta_cell', viz_state.cats.meta_cell); - // console.log('viz_state.cats.cell_names_array', viz_state.cats.cell_names_array); - viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map( - (name) => viz_state.cats.meta_cell[name] - ); + // viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map( + // (name) => { + + // let ini_cat = viz_state.cats.meta_cell[name] + // let inst_cat + // console.log('ini_cat', ini_cat); + + // // if ini_cat is defined + // if (ini_cat !== undefined) { + // inst_cat = ini_cat[0] + // } else { + // inst_cat = 'unknown'; + // } + + // return inst_cat + // } + // ); + + viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map((name) => { + const attrs = viz_state.cats.meta_cell[name]; + return attrs?.[0] ?? 'N.A.'; + }); + // console.log('cell_cats', viz_state.cats.cell_cats); } else { diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js index 1f3e3c96..486bfe7a 100644 --- a/js/read_parquet/objects_from_parquet.js +++ b/js/read_parquet/objects_from_parquet.js @@ -14,8 +14,6 @@ export const objects_from_parquet = async (bytes) => { if (fields.length < 2) return {}; - console.log('fields', fields[1,:]) - // Check if the index is explicitly preserved const indexField = fields.find((f) => f === '__index_level_0__' || !f.match(/^[a-zA-Z_]/) // conservative fallback @@ -28,12 +26,9 @@ export const objects_from_parquet = async (bytes) => { const result = {}; for (let i = 0; i < table.numRows; i++) { const key = String(keyCol[i]); - if (valueCols.length === 1) { - result[key] = valueCols[0][i]; - } else { - result[key] = valueCols.map((col) => col[i]); - } + result[key] = valueCols.map((col) => col[i]); } - return result; + + return {result, fields}; }; diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index 2f526a6f..f87be26d 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -78,8 +78,11 @@ export const landscape_ist = async ( width = 0, height = 800, meta_cell = {}, + meta_cell_attr = [], meta_cluster = {}, + // meta_cluster_attr = [], umap = {}, + // umap_attr = [], landscape_state = 'spatial', segmentation = 'default', creds = {}, @@ -90,6 +93,8 @@ export const landscape_ist = async ( width = '100%'; } + console.log('meta_cluster_attr', meta_cell_attr); + const viz_state = {}; viz_state.obs_store = create_obs_store(); @@ -221,6 +226,8 @@ export const landscape_ist = async ( } viz_state.cats.meta_cell = meta_cell; + // console.log('viz_state.cats.meta_cell', viz_state.cats.meta_cell); + if (Object.keys(meta_cluster).length === 0) { viz_state.cats.has_meta_cluster = false; } else { diff --git a/js/widget.js b/js/widget.js index 8949c095..ab477ff6 100644 --- a/js/widget.js +++ b/js/widget.js @@ -28,21 +28,27 @@ const render_landscape_ist = async ({ model, el }) => { let meta_cluster = model.get('meta_cluster'); let umap = model.get('umap'); + let meta_cell_data; + let meta_cluster_data; + let umap_data; + const metaCellBytes = model.get('meta_cell_parquet'); if (metaCellBytes && metaCellBytes.byteLength > 0) { - meta_cell = await objects_from_parquet(metaCellBytes); + console.log('here') + meta_cell_data = await objects_from_parquet(metaCellBytes); + console.log('meta_cell_data', meta_cell_data); } const metaClusterBytes = model.get('meta_cluster_parquet'); if (metaClusterBytes && metaClusterBytes.byteLength > 0) { - meta_cluster = await objects_from_parquet(metaClusterBytes); + meta_cluster_data = await objects_from_parquet(metaClusterBytes); console.log('meta_cluster', meta_cluster); } const umapBytes = model.get('umap_parquet'); if (umapBytes && umapBytes.byteLength > 0) { - umap = await objects_from_parquet(umapBytes); + umap_data = await objects_from_parquet(umapBytes); } const landscape_state = model.get('landscape_state'); const segmentation = model.get('segmentation'); @@ -60,7 +66,8 @@ const render_landscape_ist = async ({ model, el }) => { 0.25, width, height, - meta_cell, + meta_cell_data.result, + meta_cell_data.fields, {}, // meta_cluster, {}, // umap, landscape_state, From ff5db2b5ed5dd1a7a82a4239512eb97443ba81a7 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 14:45:05 -0400 Subject: [PATCH 23/30] parsing meta_cell_attr --- js/read_parquet/objects_from_parquet.js | 4 +++- js/viz/landscape_ist.js | 2 +- js/widget.js | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js index 486bfe7a..5732e7f0 100644 --- a/js/read_parquet/objects_from_parquet.js +++ b/js/read_parquet/objects_from_parquet.js @@ -29,6 +29,8 @@ export const objects_from_parquet = async (bytes) => { result[key] = valueCols.map((col) => col[i]); } + // find the attribute fields (drop index field) + const attr = fields.filter((f) => f !== indexField); - return {result, fields}; + return {result, attr}; }; diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index f87be26d..fbed0ca6 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -93,7 +93,7 @@ export const landscape_ist = async ( width = '100%'; } - console.log('meta_cluster_attr', meta_cell_attr); + console.log('meta_cell_attr', meta_cell_attr); const viz_state = {}; diff --git a/js/widget.js b/js/widget.js index ab477ff6..b0850373 100644 --- a/js/widget.js +++ b/js/widget.js @@ -67,7 +67,7 @@ const render_landscape_ist = async ({ model, el }) => { width, height, meta_cell_data.result, - meta_cell_data.fields, + meta_cell_data.attr, {}, // meta_cluster, {}, // umap, landscape_state, From 78baffc441ff67bc9eafa7214c242f56748a7f6d Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 14:48:34 -0400 Subject: [PATCH 24/30] setting up multiple attr support for cells --- js/deck-gl/layers/cell_layer.js | 22 +++++----------------- js/viz/landscape_ist.js | 2 ++ 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index b4a7e535..2888948f 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -107,27 +107,15 @@ export const ini_cell_layer = async (base_url, viz_state) => { set_cell_name_to_index_map(viz_state.cats); if (viz_state.cats.has_meta_cell) { - // viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map( - // (name) => { - // let ini_cat = viz_state.cats.meta_cell[name] - // let inst_cat - // console.log('ini_cat', ini_cat); - - // // if ini_cat is defined - // if (ini_cat !== undefined) { - // inst_cat = ini_cat[0] - // } else { - // inst_cat = 'unknown'; - // } - - // return inst_cat - // } - // ); + // look up the index of the inst_cell_attr in the meta_cell_attr array + const inst_index = viz_state.cats.meta_cell_attr.indexOf( + viz_state.cats.inst_cell_attr + ); viz_state.cats.cell_cats = viz_state.cats.cell_names_array.map((name) => { const attrs = viz_state.cats.meta_cell[name]; - return attrs?.[0] ?? 'N.A.'; + return attrs?.[inst_index] ?? 'N.A.'; }); diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index fbed0ca6..f943f971 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -225,6 +225,8 @@ export const landscape_ist = async ( viz_state.cats.has_meta_cell = true; } viz_state.cats.meta_cell = meta_cell; + viz_state.cats.meta_cell_attr = meta_cell_attr; + viz_state.cats.inst_cell_attr = meta_cell_attr[0] || 'N.A.'; // console.log('viz_state.cats.meta_cell', viz_state.cats.meta_cell); From df9780f6e7c7f05e48927df4aec7eee6488b816c Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 15:12:24 -0400 Subject: [PATCH 25/30] getting multiple attr working for meta_clust --- js/deck-gl/layers/cell_layer.js | 3 --- js/global_variables/meta_cluster.js | 9 +++++++- js/read_parquet/objects_from_parquet.js | 30 ++++++++++++------------- js/viz/landscape_ist.js | 13 +++++++---- js/widget.js | 14 ++++++------ src/celldega/viz/widget.py | 2 ++ 6 files changed, 41 insertions(+), 30 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index 2888948f..f0e880cd 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -118,10 +118,7 @@ export const ini_cell_layer = async (base_url, viz_state) => { return attrs?.[inst_index] ?? 'N.A.'; }); - - // console.log('cell_cats', viz_state.cats.cell_cats); } else { - // default clustering const cluster_arrow_table = await get_arrow_table( `${base_url}/cell_clusters${viz_state.seg.version && viz_state.seg.version !== 'default' ? `_${viz_state.seg.version}` : ''}/cluster.parquet`, diff --git a/js/global_variables/meta_cluster.js b/js/global_variables/meta_cluster.js index 8d1b921c..56964476 100644 --- a/js/global_variables/meta_cluster.js +++ b/js/global_variables/meta_cluster.js @@ -31,8 +31,15 @@ export const set_cluster_metadata = async (viz_state) => { if (viz_state.cats.has_meta_cluster) { // loop through the keys of meta_cluster and assemble a dictionary of colors use a map or something functional for (const cluster_name in viz_state.cats.meta_cluster) { + + console.log('cluster_name', cluster_name); + // console.log('color', viz_state.cats.meta_cluster[cluster_name]['color']); + + // find the index of color in viz_state.cats.meta_cluster_attr + const color_index = viz_state.cats.meta_cluster_attr.indexOf('color'); + viz_state.cats.color_dict_cluster[cluster_name] = hexToRgb( - viz_state.cats.meta_cluster[cluster_name]['color'] + viz_state.cats.meta_cluster[cluster_name][color_index] || '#000000' ); } diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js index 5732e7f0..f1d45f1b 100644 --- a/js/read_parquet/objects_from_parquet.js +++ b/js/read_parquet/objects_from_parquet.js @@ -1,26 +1,25 @@ import { arrayBufferToArrowTable } from './arrayBufferToArrowTable'; /** - * Converts a Parquet-encoded ArrayBuffer into an object using the DataFrame index as key. + * Converts a Parquet-encoded ArrayBuffer into an object using the specified key field. * - * Works whether the index is named or not (e.g. "__index_level_0__"). - * - * @param {ArrayBuffer} bytes - The buffer to decode. - * @returns {Promise} - Object mapping index → [values] or single value. + * @param {ArrayBuffer} bytes - The Parquet bytes. + * @param {string} keyField - The name of the field to use as the key. + * @returns {Promise<{ result: Object, attr: string[] }>} */ -export const objects_from_parquet = async (bytes) => { +export const objects_from_parquet = async (bytes, keyField = '__index_level_0__') => { const table = await arrayBufferToArrowTable(bytes.buffer); const fields = table.schema.fields.map((f) => f.name); if (fields.length < 2) return {}; - // Check if the index is explicitly preserved - const indexField = fields.find((f) => - f === '__index_level_0__' || !f.match(/^[a-zA-Z_]/) // conservative fallback - ) || fields[0]; // fallback to first field if no index column is clearly marked + if (!fields.includes(keyField)) { + throw new Error(`Key field "${keyField}" not found in Parquet fields: ${fields.join(', ')}`); + } + - const keyCol = table.getChild(indexField).toArray(); - const valueFields = fields.filter((f) => f !== indexField); + const keyCol = table.getChild(keyField).toArray(); + const valueFields = fields.filter((f) => f !== keyField); const valueCols = valueFields.map((f) => table.getChild(f).toArray()); const result = {}; @@ -29,8 +28,9 @@ export const objects_from_parquet = async (bytes) => { result[key] = valueCols.map((col) => col[i]); } - // find the attribute fields (drop index field) - const attr = fields.filter((f) => f !== indexField); + console.log('fields', fields); + console.log('keyField', keyField); + console.log('valueFields', valueFields); - return {result, attr}; + return { result, attr: valueFields }; }; diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index f943f971..03537613 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -80,6 +80,7 @@ export const landscape_ist = async ( meta_cell = {}, meta_cell_attr = [], meta_cluster = {}, + meta_cluster_attr = [], // meta_cluster_attr = [], umap = {}, // umap_attr = [], @@ -89,12 +90,15 @@ export const landscape_ist = async ( view_change_custom_callback = null ) => { + console.log('landscape_ist') + + console.log('meta_cell', meta_cell); + console.log('meta_cluster', meta_cluster); + if (width === 0) { width = '100%'; } - console.log('meta_cell_attr', meta_cell_attr); - const viz_state = {}; viz_state.obs_store = create_obs_store(); @@ -218,7 +222,6 @@ export const landscape_ist = async ( viz_state.cats.cluster_counts = []; viz_state.cats.polygon_cell_names = []; - // check if meta_cell is an empty object if (Object.keys(meta_cell).length === 0) { viz_state.cats.has_meta_cell = false; } else { @@ -228,7 +231,7 @@ export const landscape_ist = async ( viz_state.cats.meta_cell_attr = meta_cell_attr; viz_state.cats.inst_cell_attr = meta_cell_attr[0] || 'N.A.'; - // console.log('viz_state.cats.meta_cell', viz_state.cats.meta_cell); + console.log('meta_cluster', meta_cluster); if (Object.keys(meta_cluster).length === 0) { viz_state.cats.has_meta_cluster = false; @@ -236,6 +239,8 @@ export const landscape_ist = async ( viz_state.cats.has_meta_cluster = true; } viz_state.cats.meta_cluster = meta_cluster; + viz_state.cats.meta_cluster_attr = meta_cluster_attr; + viz_state.cats.inst_cluster_attr = meta_cluster_attr[0] || 'N.A.'; viz_state.umap = {}; if (Object.keys(umap).length === 0) { diff --git a/js/widget.js b/js/widget.js index b0850373..9e48a13d 100644 --- a/js/widget.js +++ b/js/widget.js @@ -34,22 +34,20 @@ const render_landscape_ist = async ({ model, el }) => { const metaCellBytes = model.get('meta_cell_parquet'); if (metaCellBytes && metaCellBytes.byteLength > 0) { - console.log('here') - meta_cell_data = await objects_from_parquet(metaCellBytes); - console.log('meta_cell_data', meta_cell_data); + meta_cell_data = await objects_from_parquet(metaCellBytes, 'cell_id'); } const metaClusterBytes = model.get('meta_cluster_parquet'); if (metaClusterBytes && metaClusterBytes.byteLength > 0) { - meta_cluster_data = await objects_from_parquet(metaClusterBytes); - + meta_cluster_data = await objects_from_parquet(metaClusterBytes, 'leiden'); console.log('meta_cluster', meta_cluster); } const umapBytes = model.get('umap_parquet'); if (umapBytes && umapBytes.byteLength > 0) { - umap_data = await objects_from_parquet(umapBytes); + // umap_data = await objects_from_parquet(umapBytes); } + const landscape_state = model.get('landscape_state'); const segmentation = model.get('segmentation'); @@ -68,7 +66,9 @@ const render_landscape_ist = async ({ model, el }) => { height, meta_cell_data.result, meta_cell_data.attr, - {}, // meta_cluster, + // {}, // meta_cluster, + meta_cluster_data.result, + meta_cluster_data.attr, {}, // umap, landscape_state, segmentation, diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 25038973..e269356a 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -132,6 +132,8 @@ def _df_to_bytes(df): }, index=cluster_counts.index, ) + + print('meta_cluster_df', meta_cluster_df) pq_meta_cluster = _df_to_bytes(meta_cluster_df) if "X_umap" in adata.obsm: From 7acbd43b2a292fe4d0cdbd7e0eb2dd46f9b049d6 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 15:50:06 -0400 Subject: [PATCH 26/30] meta_cluster and meta_cell starting to work --- js/global_variables/meta_cluster.js | 29 ++++++++++++++++++++++++----- src/celldega/viz/widget.py | 5 +---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/js/global_variables/meta_cluster.js b/js/global_variables/meta_cluster.js index 56964476..77f63eaa 100644 --- a/js/global_variables/meta_cluster.js +++ b/js/global_variables/meta_cluster.js @@ -1,3 +1,4 @@ +import { count } from 'd3'; import { get_arrow_table } from '../read_parquet/get_arrow_table'; import { hexToRgb } from '../utils/hexToRgb'; @@ -29,14 +30,14 @@ export const update_meta_cluster = (cats, new_meta_cluster) => { export const set_cluster_metadata = async (viz_state) => { if (viz_state.cats.has_meta_cluster) { + + // find the index of color in viz_state.cats.meta_cluster_attr + const color_index = viz_state.cats.meta_cluster_attr.indexOf('color'); + // loop through the keys of meta_cluster and assemble a dictionary of colors use a map or something functional for (const cluster_name in viz_state.cats.meta_cluster) { - console.log('cluster_name', cluster_name); - // console.log('color', viz_state.cats.meta_cluster[cluster_name]['color']); - // find the index of color in viz_state.cats.meta_cluster_attr - const color_index = viz_state.cats.meta_cluster_attr.indexOf('color'); viz_state.cats.color_dict_cluster[cluster_name] = hexToRgb( viz_state.cats.meta_cluster[cluster_name][color_index] || '#000000' @@ -44,12 +45,30 @@ export const set_cluster_metadata = async (viz_state) => { } // loop through the keys and assembe cluster_counts + + // find the index of count in viz_state.cats.meta_cluster_attr + const count_index = viz_state.cats.meta_cluster_attr.indexOf('count'); + + // for (const cluster_name in viz_state.cats.meta_cluster) { + // viz_state.cats.cluster_counts.push({ + // name: cluster_name, + // value: viz_state.cats.meta_cluster[cluster_name][count_index] + // }); + // } + for (const cluster_name in viz_state.cats.meta_cluster) { + const count_index = viz_state.cats.meta_cluster_attr.indexOf('count'); + + const raw = viz_state.cats.meta_cluster[cluster_name][count_index]; + const value = raw !== undefined ? Number(raw) : 0; + viz_state.cats.cluster_counts.push({ name: cluster_name, - value: viz_state.cats.meta_cluster[cluster_name]['count'], + value, }); } + + } else { let meta_cell_url; diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index e269356a..9729f9d6 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -105,14 +105,13 @@ def _df_to_bytes(df): import pyarrow as pa import pyarrow.parquet as pq + df.columns = df.columns.map(str) buf = io.BytesIO() pq.write_table(pa.Table.from_pandas(df), buf, compression="zstd") return buf.getvalue() if adata is not None: - print('found AnnData object, extracting metadata') - meta_cell_df = adata.obs.copy() # meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) @@ -133,7 +132,6 @@ def _df_to_bytes(df): index=cluster_counts.index, ) - print('meta_cluster_df', meta_cluster_df) pq_meta_cluster = _df_to_bytes(meta_cluster_df) if "X_umap" in adata.obsm: @@ -166,7 +164,6 @@ def _df_to_bytes(df): parquet_traits["umap_parquet"] = traitlets.Bytes(pq_umap).tag(sync=True) if parquet_traits: - print(parquet_traits) self.add_traits(**parquet_traits) super().__init__(**kwargs) From c9e57b3729f38e10c782ef71a881c07ce1924bb6 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 15:56:27 -0400 Subject: [PATCH 27/30] working on cell_attr --- src/celldega/viz/widget.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 9729f9d6..1a43842c 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -83,6 +83,11 @@ class Landscape(anywidget.AnyWidget): update_trigger = traitlets.Dict().tag(sync=True) cell_clusters = traitlets.Dict({}).tag(sync=True) + # make a traitlet for cell_attr a list that will have the AnnData obs columns + # cell_attr = traitlets.List(['leiden']).tag(sync=True) + cell_attr = traitlets.List(trait=traitlets.Unicode(), default_value=["leiden"]).tag(sync=True) + + segmentation = traitlets.Unicode("default").tag(sync=True) width = traitlets.Int(0).tag(sync=True) @@ -98,6 +103,9 @@ def __init__(self, **kwargs): meta_cluster = kwargs.get("meta_cluster") umap_df = kwargs.pop("umap", None) meta_cluster_df = None + # cell_attr = kwargs.pop("cell_attr", "leiden") + cell_attr = kwargs.pop("cell_attr", ["leiden"]) + def _df_to_bytes(df): import io @@ -112,7 +120,7 @@ def _df_to_bytes(df): if adata is not None: - meta_cell_df = adata.obs.copy() + meta_cell_df = adata.obs[cell_attr].copy() # meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) From 799c516c5ac5635e8c862678c42655b370248685 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 16:39:28 -0400 Subject: [PATCH 28/30] format --- js/deck-gl/layers/cell_layer.js | 3 --- js/global_variables/meta_cluster.js | 6 ------ js/read_parquet/objects_from_parquet.js | 14 +++++++------- js/viz/landscape_ist.js | 8 -------- js/widget.js | 9 ++++----- src/celldega/viz/widget.py | 24 +++++------------------- 6 files changed, 16 insertions(+), 48 deletions(-) diff --git a/js/deck-gl/layers/cell_layer.js b/js/deck-gl/layers/cell_layer.js index f0e880cd..f99ecb26 100644 --- a/js/deck-gl/layers/cell_layer.js +++ b/js/deck-gl/layers/cell_layer.js @@ -107,7 +107,6 @@ export const ini_cell_layer = async (base_url, viz_state) => { set_cell_name_to_index_map(viz_state.cats); if (viz_state.cats.has_meta_cell) { - // look up the index of the inst_cell_attr in the meta_cell_attr array const inst_index = viz_state.cats.meta_cell_attr.indexOf( viz_state.cats.inst_cell_attr @@ -117,9 +116,7 @@ export const ini_cell_layer = async (base_url, viz_state) => { const attrs = viz_state.cats.meta_cell[name]; return attrs?.[inst_index] ?? 'N.A.'; }); - } else { - const cluster_arrow_table = await get_arrow_table( `${base_url}/cell_clusters${viz_state.seg.version && viz_state.seg.version !== 'default' ? `_${viz_state.seg.version}` : ''}/cluster.parquet`, options.fetch, diff --git a/js/global_variables/meta_cluster.js b/js/global_variables/meta_cluster.js index 77f63eaa..5f71d2a2 100644 --- a/js/global_variables/meta_cluster.js +++ b/js/global_variables/meta_cluster.js @@ -30,15 +30,11 @@ export const update_meta_cluster = (cats, new_meta_cluster) => { export const set_cluster_metadata = async (viz_state) => { if (viz_state.cats.has_meta_cluster) { - // find the index of color in viz_state.cats.meta_cluster_attr const color_index = viz_state.cats.meta_cluster_attr.indexOf('color'); // loop through the keys of meta_cluster and assemble a dictionary of colors use a map or something functional for (const cluster_name in viz_state.cats.meta_cluster) { - - - viz_state.cats.color_dict_cluster[cluster_name] = hexToRgb( viz_state.cats.meta_cluster[cluster_name][color_index] || '#000000' ); @@ -67,8 +63,6 @@ export const set_cluster_metadata = async (viz_state) => { value, }); } - - } else { let meta_cell_url; diff --git a/js/read_parquet/objects_from_parquet.js b/js/read_parquet/objects_from_parquet.js index f1d45f1b..58abbf98 100644 --- a/js/read_parquet/objects_from_parquet.js +++ b/js/read_parquet/objects_from_parquet.js @@ -7,17 +7,21 @@ import { arrayBufferToArrowTable } from './arrayBufferToArrowTable'; * @param {string} keyField - The name of the field to use as the key. * @returns {Promise<{ result: Object, attr: string[] }>} */ -export const objects_from_parquet = async (bytes, keyField = '__index_level_0__') => { +export const objects_from_parquet = async ( + bytes, + keyField = '__index_level_0__' +) => { const table = await arrayBufferToArrowTable(bytes.buffer); const fields = table.schema.fields.map((f) => f.name); if (fields.length < 2) return {}; if (!fields.includes(keyField)) { - throw new Error(`Key field "${keyField}" not found in Parquet fields: ${fields.join(', ')}`); + throw new Error( + `Key field "${keyField}" not found in Parquet fields: ${fields.join(', ')}` + ); } - const keyCol = table.getChild(keyField).toArray(); const valueFields = fields.filter((f) => f !== keyField); const valueCols = valueFields.map((f) => table.getChild(f).toArray()); @@ -28,9 +32,5 @@ export const objects_from_parquet = async (bytes, keyField = '__index_level_0__' result[key] = valueCols.map((col) => col[i]); } - console.log('fields', fields); - console.log('keyField', keyField); - console.log('valueFields', valueFields); - return { result, attr: valueFields }; }; diff --git a/js/viz/landscape_ist.js b/js/viz/landscape_ist.js index 03537613..aaf80530 100644 --- a/js/viz/landscape_ist.js +++ b/js/viz/landscape_ist.js @@ -89,12 +89,6 @@ export const landscape_ist = async ( creds = {}, view_change_custom_callback = null ) => { - - console.log('landscape_ist') - - console.log('meta_cell', meta_cell); - console.log('meta_cluster', meta_cluster); - if (width === 0) { width = '100%'; } @@ -231,8 +225,6 @@ export const landscape_ist = async ( viz_state.cats.meta_cell_attr = meta_cell_attr; viz_state.cats.inst_cell_attr = meta_cell_attr[0] || 'N.A.'; - console.log('meta_cluster', meta_cluster); - if (Object.keys(meta_cluster).length === 0) { viz_state.cats.has_meta_cluster = false; } else { diff --git a/js/widget.js b/js/widget.js index 9e48a13d..9b6f6872 100644 --- a/js/widget.js +++ b/js/widget.js @@ -24,13 +24,13 @@ const render_landscape_ist = async ({ model, el }) => { const dataset_name = model.get('dataset_name'); const width = model.get('width'); const height = model.get('height'); - let meta_cell = model.get('meta_cell'); - let meta_cluster = model.get('meta_cluster'); - let umap = model.get('umap'); + // let meta_cell = model.get('meta_cell'); + // let meta_cluster = model.get('meta_cluster'); + // let umap = model.get('umap'); let meta_cell_data; let meta_cluster_data; - let umap_data; + // let umap_data; const metaCellBytes = model.get('meta_cell_parquet'); if (metaCellBytes && metaCellBytes.byteLength > 0) { @@ -40,7 +40,6 @@ const render_landscape_ist = async ({ model, el }) => { const metaClusterBytes = model.get('meta_cluster_parquet'); if (metaClusterBytes && metaClusterBytes.byteLength > 0) { meta_cluster_data = await objects_from_parquet(metaClusterBytes, 'leiden'); - console.log('meta_cluster', meta_cluster); } const umapBytes = model.get('umap_parquet'); diff --git a/src/celldega/viz/widget.py b/src/celldega/viz/widget.py index 1a43842c..5494e27d 100644 --- a/src/celldega/viz/widget.py +++ b/src/celldega/viz/widget.py @@ -13,6 +13,7 @@ _clustergram_registry = {} # maps names to widget instances +_enrich_registry = {} # maps names to widget instances def _hsv_to_hex(h: float) -> str: @@ -21,10 +22,6 @@ def _hsv_to_hex(h: float) -> str: return f"#{int(r * 255):02x}{int(g * 255):02x}{int(b * 255):02x}" -_clustergram_registry = {} # maps names to widget instances -_enrich_registry = {} # maps names to widget instances - - class Landscape(anywidget.AnyWidget): """ A widget for interactive visualization of spatial omics data. This widget @@ -87,7 +84,6 @@ class Landscape(anywidget.AnyWidget): # cell_attr = traitlets.List(['leiden']).tag(sync=True) cell_attr = traitlets.List(trait=traitlets.Unicode(), default_value=["leiden"]).tag(sync=True) - segmentation = traitlets.Unicode("default").tag(sync=True) width = traitlets.Int(0).tag(sync=True) @@ -106,7 +102,6 @@ def __init__(self, **kwargs): # cell_attr = kwargs.pop("cell_attr", "leiden") cell_attr = kwargs.pop("cell_attr", ["leiden"]) - def _df_to_bytes(df): import io @@ -119,7 +114,6 @@ def _df_to_bytes(df): return buf.getvalue() if adata is not None: - meta_cell_df = adata.obs[cell_attr].copy() # meta_cell_df.reset_index(inplace=True) pq_meta_cell = _df_to_bytes(meta_cell_df) @@ -129,9 +123,7 @@ def _df_to_bytes(df): colors = adata.uns.get("leiden_colors") if colors is None: n = len(cluster_counts) - colors = [ - _hsv_to_hex(i / max(n, 1)) for i in range(n) - ] + colors = [_hsv_to_hex(i / max(n, 1)) for i in range(n)] meta_cluster_df = pd.DataFrame( { "color": list(colors)[: len(cluster_counts)], @@ -143,9 +135,7 @@ def _df_to_bytes(df): pq_meta_cluster = _df_to_bytes(meta_cluster_df) if "X_umap" in adata.obsm: - umap_df = pd.DataFrame( - adata.obsm["X_umap"], index=adata.obs.index - ).reset_index() + umap_df = pd.DataFrame(adata.obsm["X_umap"], index=adata.obs.index).reset_index() pq_umap = _df_to_bytes(umap_df) if isinstance(meta_cell_df, pd.DataFrame): @@ -161,13 +151,9 @@ def _df_to_bytes(df): parquet_traits = {} if pq_meta_cell is not None: - parquet_traits["meta_cell_parquet"] = traitlets.Bytes(pq_meta_cell).tag( - sync=True - ) + parquet_traits["meta_cell_parquet"] = traitlets.Bytes(pq_meta_cell).tag(sync=True) if pq_meta_cluster is not None: - parquet_traits["meta_cluster_parquet"] = traitlets.Bytes( - pq_meta_cluster - ).tag(sync=True) + parquet_traits["meta_cluster_parquet"] = traitlets.Bytes(pq_meta_cluster).tag(sync=True) if pq_umap is not None: parquet_traits["umap_parquet"] = traitlets.Bytes(pq_umap).tag(sync=True) From 570eea08b195b1fa976e599c5b749167e9c3a03c Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 16:42:47 -0400 Subject: [PATCH 29/30] cleaning --- js/global_variables/meta_cluster.js | 11 ----------- js/widget.js | 3 --- 2 files changed, 14 deletions(-) diff --git a/js/global_variables/meta_cluster.js b/js/global_variables/meta_cluster.js index 5f71d2a2..f7eb8731 100644 --- a/js/global_variables/meta_cluster.js +++ b/js/global_variables/meta_cluster.js @@ -1,4 +1,3 @@ -import { count } from 'd3'; import { get_arrow_table } from '../read_parquet/get_arrow_table'; import { hexToRgb } from '../utils/hexToRgb'; @@ -40,20 +39,10 @@ export const set_cluster_metadata = async (viz_state) => { ); } - // loop through the keys and assembe cluster_counts - // find the index of count in viz_state.cats.meta_cluster_attr const count_index = viz_state.cats.meta_cluster_attr.indexOf('count'); - // for (const cluster_name in viz_state.cats.meta_cluster) { - // viz_state.cats.cluster_counts.push({ - // name: cluster_name, - // value: viz_state.cats.meta_cluster[cluster_name][count_index] - // }); - // } - for (const cluster_name in viz_state.cats.meta_cluster) { - const count_index = viz_state.cats.meta_cluster_attr.indexOf('count'); const raw = viz_state.cats.meta_cluster[cluster_name][count_index]; const value = raw !== undefined ? Number(raw) : 0; diff --git a/js/widget.js b/js/widget.js index 9b6f6872..4776dbb8 100644 --- a/js/widget.js +++ b/js/widget.js @@ -24,9 +24,6 @@ const render_landscape_ist = async ({ model, el }) => { const dataset_name = model.get('dataset_name'); const width = model.get('width'); const height = model.get('height'); - // let meta_cell = model.get('meta_cell'); - // let meta_cluster = model.get('meta_cluster'); - // let umap = model.get('umap'); let meta_cell_data; let meta_cluster_data; From e997a4f41f7cc5a441444ae10d1d81b237f6f6e0 Mon Sep 17 00:00:00 2001 From: Nicolas Fernandez Date: Wed, 16 Jul 2025 16:51:51 -0400 Subject: [PATCH 30/30] test: add AnnData widget tests --- tests/unit/test_viz/test_landscape_anndata.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/unit/test_viz/test_landscape_anndata.py diff --git a/tests/unit/test_viz/test_landscape_anndata.py b/tests/unit/test_viz/test_landscape_anndata.py new file mode 100644 index 00000000..5233f437 --- /dev/null +++ b/tests/unit/test_viz/test_landscape_anndata.py @@ -0,0 +1,59 @@ +"""Tests for Landscape widget initialization with AnnData.""" + +import io +import numpy as np +import pandas as pd +import pytest +from anndata import AnnData + +try: + from celldega.viz import Landscape +except Exception as e: # pragma: no cover - skip if deps missing + pytest.skip(f"celldega modules unavailable: {e}", allow_module_level=True) + + +def make_simple_anndata() -> AnnData: + """Create a small AnnData object for testing.""" + np.random.seed(0) + X = np.random.rand(5, 3) + obs = pd.DataFrame({"leiden": pd.Categorical(["0", "1", "0", "1", "0"])}) + obs.index = [f"cell{i}" for i in range(5)] + var = pd.DataFrame(index=[f"gene{i}" for i in range(3)]) + adata = AnnData(X=X, obs=obs, var=var) + adata.obsm["X_umap"] = np.random.rand(5, 2) + adata.uns["leiden_colors"] = ["#ff0000", "#00ff00"] + return adata + + +def test_landscape_initializes_with_anndata() -> None: + """Landscape should accept AnnData and expose parquet traitlets.""" + adata = make_simple_anndata() + widget = Landscape(base_url="https://example.com", AnnData=adata) + + assert hasattr(widget, "meta_cell_parquet") + assert hasattr(widget, "meta_cluster_parquet") + assert hasattr(widget, "umap_parquet") + + meta_cell = pd.read_parquet(io.BytesIO(widget.meta_cell_parquet)) + meta_cluster = pd.read_parquet(io.BytesIO(widget.meta_cluster_parquet)) + umap_df = pd.read_parquet(io.BytesIO(widget.umap_parquet)) + + pd.testing.assert_frame_equal( + meta_cell, + adata.obs[["leiden"]].reset_index(), + ) + + cluster_counts = adata.obs["leiden"].value_counts().sort_index() + expected_cluster = pd.DataFrame( + { + "color": adata.uns["leiden_colors"][: len(cluster_counts)], + "count": cluster_counts.values, + }, + index=cluster_counts.index, + ).reset_index() + pd.testing.assert_frame_equal(meta_cluster, expected_cluster) + + pd.testing.assert_frame_equal( + umap_df, + pd.DataFrame(adata.obsm["X_umap"], index=adata.obs.index).reset_index(), + )