Skip to content

ts.pca gives IndexError for unsimplified tree sequence #3365

@hyanwong

Description

@hyanwong

In a tree sequence that has census nodes from different populations, it appears as if I can't run ts.pca, even if the census nodes are not samples. At least, I think that's why the code below errors out:

import msprime
import collections
import numpy as np

from matplotlib import pyplot as plt

# Make a tree seq by unioning different populations together

seq_length = 1e6
recent_generations = 75
rho = 1e-7
tree = "((A:200,B:200):100,(C:100,D:100):200);"
initial_size = collections.defaultdict(lambda: 500)
initial_size.update({"A": 1000, "B": 2000, "C": 1000, "D": 1000})
samples = collections.defaultdict(int)
samples.update({"A": 10, "B": 10, "C": 10, "D": 10})

ts_store = []
for name, Ne in initial_size.items():
    demography = msprime.Demography()
    demography.add_population(name=name, initial_size=Ne)
    ts_store.append(msprime.sim_ancestry(
        samples[name],
        demography=demography,
        recombination_rate=rho,
        sequence_length=seq_length,
        end_time=recent_generations,
        random_seed=123,
    ))
tables = ts_store[0].dump_tables()
for ts in ts_store[1:]:
    tables.union(ts.dump_tables(), node_mapping=np.full(ts.num_nodes, tskit.NULL))

ts_initial = tables.tree_sequence()
demography = msprime.Demography.from_species_tree(tree, initial_size)
ts = msprime.sim_ancestry(initial_state= ts_initial, recombination_rate=rho, sequence_length=seq_length, demography=demography, random_seed=12)

# Actual test

ts.pca(2) # fails
ts.simplify().pca(2)  # Works

Edit: This fails (on my machine, PLR) with:

Traceback (most recent call last):
  File "/home/peter/debug/tskit/pca/census_nodes.py", line 41, in <module>
    ts.pca(2) # fails
    ~~~~~~^^^
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9387, in pca
    U[i], D[i], Q[i], E[i] = _rand_svd(
                             ~~~~~~~~~^
        operator=_G,
        ^^^^^^^^^^^^
    ...<4 lines>...
        range_sketch=range_sketch[i],
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9329, in _rand_svd
    Q = _rand_pow_range_finder(
        operator,
    ...<4 lines>...
        Q=range_sketch,
    )
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9313, in _rand_pow_range_finder
    Q = operator(Q)
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9368, in _G
    high = _f_high(
        arr=x,
    ...<3 lines>...
        windows=windows[i : i + 2],
    )
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9084, in _genetic_relatedness_vector_node
    x = self._expand_indices(x, indices)
  File "/home/peter/projects/tskit-dev/tsvenv/lib/python3.13/site-packages/tskit/trees.py", line 9071, in _expand_indices
    y[indices] = x
    ~^^^^^^^^^
IndexError: index 153 is out of bounds for axis 0 with size 80

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions