diff --git a/src/spatialdata_io/readers/visium.py b/src/spatialdata_io/readers/visium.py index 22a75855..e4d5a5cb 100644 --- a/src/spatialdata_io/readers/visium.py +++ b/src/spatialdata_io/readers/visium.py @@ -4,9 +4,10 @@ import os import re from collections.abc import Mapping +from enum import Enum from pathlib import Path from types import MappingProxyType -from typing import Any +from typing import Any, Tuple import numpy as np import pandas as pd @@ -277,3 +278,194 @@ def _read_image(image_file: Path, imread_kwargs: dict[str, Any]) -> Any: else: raise ValueError(f"Image shape {im.shape} is not supported.") return image + + +def get_sdata_res(sdata: SpatialData): + """ + Retrieve the image resolution from the Visium SpatialData object. + + This function extracts the shape (resolution) of the highest resolution image (scale0) + from the Visium SpatialData object. The shape is returned as the number of channels (c), + height (y), and width (x) in pixels. + + Parameters + ---------- + sdata : SpatialData + A SpatialData object containing images and spatial data, with image resolutions stored + in a multi-scale format. + + Returns + ------- + shape : tuple + A tuple containing the image shape in the format (channels, height, width): + - c: Number of image channels (typically RGB). + - y: Image height in pixels. + - x: Image width in pixels. + """ + + image_name = list(sdata.images.keys())[0] + dimensions = sdata.images[image_name]["scale0"].dims + shape = dimensions["c"], dimensions["y"], dimensions["x"] + return shape + + +class SpotPacking(Enum): + """Types of ST spots disposition, + for Orange Crate Packing see: + https://kb.10xgenomics.com/hc/en-us/articles/360041426992-Where-can-I-find-the-Space-Ranger-barcode-whitelist-and-their-coordinates-on-the-slide + """ + + ORANGE_CRATE_PACKING = 0 + GRID_PACKING = 1 + + +def find_pixel_size_visium( + my_df: pd.DataFrame, inter_spot_dist: float = 100.0, packing: SpotPacking = SpotPacking.ORANGE_CRATE_PACKING +) -> tuple[float, int]: + """Estimate the pixel size of an image in um/px given a dataframe containing the spot coordinates in that image + + Args: + my_df (pd.DataFrame): dataframe containing the coordinates of each spot in an image, it must contain the following columns: + ['pxl_row_in_fullres', 'pxl_col_in_fullres', 'array_col', 'array_row'] + inter_spot_dist (float, optional): the distance in um between two spots on the same row. Defaults to 100.. + packing (SpotPacking, optional): disposition of the spots on the slide. Defaults to SpotPacking.ORANGE_CRATE_PACKING. + + Raises: + Exception: if cannot find two spots on the same row + + Returns: + Tuple[float, int]: approximation of the pixel size in um/px and over how many spots that pixel size was estimated + """ + + def _cart_dist(start_spot, end_spot): + """cartesian distance in pixel between two spots""" + d = np.sqrt( + (start_spot["pxl_col_in_fullres"] - end_spot["pxl_col_in_fullres"]) ** 2 + + (start_spot["pxl_row_in_fullres"] - end_spot["pxl_row_in_fullres"]) ** 2 + ) + return d + + df = my_df.copy() + + max_dist_col = 0 + approx_nb = 0 + best_approx = 0 + df = df.sort_values("array_row") + for _, row in df.iterrows(): + y = row["array_col"] + x = row["array_row"] + if len(df[df["array_row"] == x]) > 1: + b = df[df["array_row"] == x]["array_col"].idxmax() + start_spot = row + end_spot = df.loc[b] + dist_px = _cart_dist(start_spot, end_spot) + + div = 1 if packing == SpotPacking.GRID_PACKING else 2 + dist_col = abs(df.loc[b, "array_col"] - y) // div + + approx_nb += 1 + + if dist_col > max_dist_col: + max_dist_col = dist_col + best_approx = inter_spot_dist / (dist_px / dist_col) + if approx_nb > 3: + break + + if approx_nb == 0: + raise Exception("Pixel size estimation failed. Couldn't find two spots on the same row") + + return best_approx, max_dist_col + + +def create_df_coord_visium(data: SpatialData): + """ + Create a DataFrame with coordinates and array indices from Visium SpatialData. + + This function processes the spatial shapes and table data from a Visium SpatialData object + to generate a DataFrame containing pixel coordinates (row and column) for each spot + on the tissue image at full resolution. It also includes the corresponding array row and column + indices from the data tables. + + Parameters + ---------- + data : SpatialData + A SpatialData object containing Visium spatial information, including shapes and table data + (spot coordinates and array indices). + + Returns + ------- + df_coord : pandas.DataFrame + A DataFrame with the following columns: + - 'pxl_row_in_fullres': Pixel row coordinates in full-resolution tissue image. + - 'pxl_col_in_fullres': Pixel column coordinates in full-resolution tissue image. + - 'array_row': Row index of the spot in the Visium array. + - 'array_col': Column index of the spot in the Visium array. + """ + tissue_name = list(data.shapes.keys())[0] + shapes_df = data.shapes[tissue_name] + shapes_df["pxl_col_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.x) + shapes_df["pxl_row_in_fullres"] = shapes_df.geometry.apply(lambda geom: geom.y) + + shapes_df["array_row"] = list(data.tables["table"].obs["array_row"]) + shapes_df["array_col"] = list(data.tables["table"].obs["array_col"]) + + # Now, you have the necessary DataFrame in the correct format: + df_coord = shapes_df[["pxl_row_in_fullres", "pxl_col_in_fullres", "array_row", "array_col"]] + return df_coord + + +def calculate_pixel_size_from_visium( + path: str, + dataset_id: str, + counts_file: str, + fullres_image_file: str, + tissue_positions_file: str, + scalefactors_file: str, + inter_spot_dist: float = 100.0, +) -> SpatialData: + """ + Main function to load data into a spatialdata class and calculate scale0 image shape and pixel size. + + Parameters + ---------- + path : str + Path to the directory containing the data. + dataset_id : str + ID of the dataset to use. + counts_file : str + Path to the filtered feature barcode matrix (counts file). + fullres_image_file : str + Path to the full-resolution image file (usually tissue_hires_image.png). + tissue_positions_file : str + Path to the tissue positions file (usually tissue_positions_list.csv). + scalefactors_file : str + Path to the scalefactors file (usually scalefactors_json.json). + inter_spot_dist : float, optional + Distance between 2 spots in a visium field. Default value = 100um. + + Returns + ------- + visium_sdata : SpatialData + SpatialData object that includes both image shape and pixel size stored in tables['table'].uns + """ + + # Load the SpatialData object using the visium function. 6 files are expected to be passed from Visium raw data + visium_sdata = visium( + path=path, + dataset_id=dataset_id, + counts_file=counts_file, + fullres_image_file=fullres_image_file, + tissue_positions_file=tissue_positions_file, + scalefactors_file=scalefactors_file, + ) + + df_coord = create_df_coord_visium(visium_sdata) + + pixel_size, _ = find_pixel_size_visium(df_coord, inter_spot_dist) + + image_shape = get_sdata_res(visium_sdata) + + visium_sdata.tables["table"].uns["image_shape"] = image_shape + visium_sdata.tables["table"].uns["pixel_size"] = pixel_size + + return visium_sdata