Skip to content

Commit dd3750c

Browse files
authored
Merge pull request #56 from XingerTang/main
Release v0.0.3 - Fix #51, - Fix #52, - Fix #53 - Update installation instructions in README and documentation - Add accuracy tests
2 parents b2d59c2 + c0e53a3 commit dd3750c

26 files changed

+12570
-71
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
run: python3 -m build
3737

3838
- name: Install AlphaImpute2
39-
run: pip install dist/alphaimpute2-0.0.2-py3-none-any.whl
39+
run: pip install dist/alphaimpute2-0.0.3-py3-none-any.whl
4040

4141
- name: Install pytest
4242
run: |

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ __pycache__/
99
build/
1010
example/outputs
1111
.env
12-
tests/functional_tests/outputs
12+
tests/*_tests/outputs
13+
tests/accuracy_tests/accu_report.txt

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ AlphaImpute2 is available on [PyPI](https://pypi.org/project/AlphaImpute2/):
1515

1616
pip install AlphaImpute2
1717

18-
Availability
19-
============
18+
Example
19+
=======
2020

21-
The AlphaImpute2.zip file contains a python3 wheel file, a manual, and an example dataset.
21+
An example dataset and corresponding script are available in the ``example/`` folder.
2222

2323
Conditions of use
2424
=================

conftest.py

Lines changed: 187 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,198 @@
1+
import pytest
2+
import operator
13
import os
24
import shutil
5+
import numpy as np
6+
7+
8+
@pytest.fixture(scope="session")
9+
def get_params():
10+
param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
11+
with open(param_file, "r") as file:
12+
sim_params = [line.strip().split() for line in file]
13+
14+
params = {}
15+
for param_name, param_value in sim_params:
16+
params[param_name] = float(param_value)
17+
18+
return params
319

420

521
def pytest_configure(config):
622
"""
7-
Prepare path and report file for functional tests
23+
Prepare path and report file for accuracy tests and functional tests
824
"""
25+
accu_output_path = os.path.join("tests", "accuracy_tests", "outputs")
26+
if os.path.exists(accu_output_path):
27+
shutil.rmtree(accu_output_path)
28+
os.mkdir(accu_output_path)
29+
930
func_output_path = os.path.join("tests", "functional_tests", "outputs")
1031
if os.path.exists(func_output_path):
1132
shutil.rmtree(func_output_path)
1233
os.mkdir(func_output_path)
34+
35+
report_path = os.path.join("tests", "accuracy_tests", "accu_report.txt")
36+
f = open(report_path, "w")
37+
f.close()
38+
39+
40+
@pytest.hookimpl(hookwrapper=True)
41+
def pytest_runtest_makereport():
42+
out = yield
43+
report = out.get_result()
44+
if (
45+
report.nodeid[:37] == "tests/accuracy_tests/run_accu_test.py"
46+
and report.when == "call"
47+
):
48+
stdout = report.sections
49+
if "combined" in report.nodeid or "ped_only" in report.nodeid:
50+
num_file = 3
51+
else:
52+
num_file = 2
53+
accu = stdout[-1][-1].split("\n")[-(2 + 3 * num_file) :]
54+
name = accu[0].split()[-1]
55+
with open("tests/accuracy_tests/accu_report.txt", "a") as file:
56+
for i in range(num_file):
57+
assessed_file = accu[i * 3 + 1].split()[-1]
58+
file.write(name + " " + assessed_file + " " + accu[i * 3 + 2] + "\n")
59+
file.write(name + " " + assessed_file + " " + accu[i * 3 + 3] + "\n")
60+
61+
62+
@pytest.hookimpl()
63+
def pytest_terminal_summary(terminalreporter):
64+
param_file = os.path.join("tests", "accuracy_tests", "simulation_parameters.txt")
65+
with open(param_file, "r") as file:
66+
sim_params = [line.strip().split() for line in file]
67+
68+
params = {}
69+
for param_name, param_value in sim_params:
70+
params[param_name] = float(param_value)
71+
72+
nGen = int(params["nGen"])
73+
74+
file_types = [
75+
"genotypes",
76+
"haplotypes",
77+
"segregation",
78+
]
79+
columns = (
80+
"Test Name",
81+
"File Name",
82+
"Accu Type",
83+
"Population Accu",
84+
"Gen1 Accu",
85+
"Gen2 Accu",
86+
"Gen3 Accu",
87+
"Gen4 Accu",
88+
"Gen5 Accu",
89+
)
90+
dt = {"names": columns, "formats": ("U76", "U28", "U25") + ("f4",) * (nGen + 1)}
91+
accu = np.loadtxt("tests/accuracy_tests/accu_report.txt", encoding=None, dtype=dt)
92+
93+
mkr_accu = list(
94+
filter(
95+
lambda x: x["Accu Type"] == "Marker_accuracies",
96+
accu,
97+
)
98+
)
99+
ind_accu = list(
100+
filter(
101+
lambda x: x["Accu Type"] == "Individual_accuracies",
102+
accu,
103+
)
104+
)
105+
106+
mkr_accu_file = {}
107+
ind_accu_file = {}
108+
for file_type in file_types:
109+
mkr_accu_file[file_type] = list(
110+
filter(
111+
lambda x: x["File Name"] == file_type,
112+
mkr_accu,
113+
)
114+
)
115+
ind_accu_file[file_type] = list(
116+
filter(
117+
lambda x: x["File Name"] == file_type,
118+
ind_accu,
119+
)
120+
)
121+
122+
test_names = list(
123+
map(
124+
lambda x: x["Test Name"],
125+
filter(
126+
lambda x: x["File Name"] == file_types[0],
127+
mkr_accu,
128+
),
129+
)
130+
)
131+
test_nums = {}
132+
count = 0
133+
for test_name in test_names:
134+
count += 1
135+
test_nums[test_name] = count
136+
137+
sorted_mkr_accu_file = {}
138+
sorted_ind_accu_file = {}
139+
for file_type in file_types:
140+
sorted_mkr_accu_file[file_type] = sorted(
141+
mkr_accu_file[file_type],
142+
key=operator.itemgetter(*columns[3:]),
143+
reverse=True,
144+
)
145+
sorted_ind_accu_file[file_type] = sorted(
146+
ind_accu_file[file_type],
147+
key=operator.itemgetter(*columns[3:]),
148+
reverse=True,
149+
)
150+
151+
format_first_row = "{:<20} " * (nGen + 2) + "{:<35} "
152+
format_row = "{:<20.0f} " + "{:<20.3f} " * (nGen + 1) + "{:<35} "
153+
out_columns = (
154+
"Test Num",
155+
"Population Accu",
156+
"Gen1 Accu",
157+
"Gen2 Accu",
158+
"Gen3 Accu",
159+
"Gen4 Accu",
160+
"Gen5 Accu",
161+
"Test Name",
162+
)
163+
164+
def write_report(accu_type, sorted=False):
165+
if not sorted:
166+
terminalreporter.write_sep("=", accu_type + " Accuracy")
167+
if accu_type == "Marker":
168+
reports = mkr_accu_file
169+
else:
170+
reports = ind_accu_file
171+
else:
172+
terminalreporter.write_sep("=", accu_type + " Accuracy (Order by accuracy)")
173+
if accu_type == "Marker":
174+
reports = sorted_mkr_accu_file
175+
else:
176+
reports = sorted_ind_accu_file
177+
178+
for file_type in file_types:
179+
terminalreporter.write_sep("-", file_type)
180+
terminalreporter.write_line(format_first_row.format(*out_columns))
181+
for test in reports[file_type]:
182+
terminalreporter.write_line(
183+
format_row.format(
184+
test_nums[test["Test Name"]],
185+
test["Population Accu"],
186+
test["Gen1 Accu"],
187+
test["Gen2 Accu"],
188+
test["Gen3 Accu"],
189+
test["Gen4 Accu"],
190+
test["Gen5 Accu"],
191+
test["Test Name"],
192+
)
193+
)
194+
195+
write_report("Marker", sorted=False)
196+
write_report("Individual", sorted=False)
197+
write_report("Marker", sorted=True)
198+
write_report("Individual", sorted=True)

docs/source/index.rst

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,17 @@ AlphaImpute2 is program to perform imputation in a range of animal and plant spe
2222

2323
2424

25-
Availability
25+
Installation
2626
------------
2727

28-
AlphaImpute2 is available from the `AlphaGenes <http://www.alphagenes.roslin.ed.ac.uk/software-packages/AlphaImpute2/>`_ website. The download files contains a python wheel file along with this documentation and an example.
28+
AlphaImpute2 is available from the `PyPI <https://pypi.org/project/AlphaImpute2/>`_ website.
29+
30+
You can install the latest release with
31+
32+
.. code-block:: bash
33+
pip install AlphaImpute2
34+
35+
The source code is available at `GitHub <https://github.com/AlphaGenes/AlphaImpute2>`_.
2936

3037
Conditions of use
3138
-----------------
@@ -68,29 +75,20 @@ Input Arguments
6875
::
6976

7077
Input arguments:
71-
-bfile [BFILE ...] A file in plink (binary) format. Only stable on Linux).
7278
-genotypes [GENOTYPES ...]
7379
A file in AlphaGenes format.
74-
-reference [REFERENCE ...]
75-
A haplotype reference panel in AlphaGenes format.
76-
-seqfile [SEQFILE ...]
77-
A sequence data file.
7880
-pedigree [PEDIGREE ...]
7981
A pedigree file in AlphaGenes format.
80-
-phasefile [PHASEFILE ...]
81-
A phase file in AlphaGenes format.
8282
-startsnp STARTSNP The first marker to consider. The first marker in the file is marker '1'. Default: 1.
8383
-stopsnp STOPSNP The last marker to consider. Default: all markers considered.
8484
-seed SEED A random seed to use for debugging.
8585

8686
AlphaImpute2 requires a genotype file and an optional pedigree file to run the analysis.
8787

88-
AlphaImpute2 supports binary plink files, ``-bfile``, genotype files in the AlphaGenesFormat, ``-genotypes``. A pedigree file may be supplied using the ``-pedigree`` option.
88+
A pedigree file may be supplied using the ``-pedigree`` option.
8989

9090
Use the ``-startsnp`` and ``-stopsnp`` comands to run the analysis only on a subset of markers.
9191

92-
Binary plink files require the package ``alphaplinkpython``. This can be installed via ``pip`` but is only stable for Linux.
93-
9492
Imputation arguments:
9593
------------------------
9694
::
@@ -221,28 +219,6 @@ Example: ::
221219
id3 id1 id2
222220
id4 id1 id2
223221

224-
Phase file
225-
-----------
226-
227-
The phase file gives the phased haplotypes (either 0 or 1) for each individual in two lines. For individuals where we can determine the haplotype of origin, the first line will provide information on the paternal haplotype, and the second line will provide information on the maternal haplotype.
228-
229-
Example: ::
230-
231-
id1 0 1 9 0 # Maternal haplotype
232-
id1 0 1 9 0 # Paternal haplotype
233-
id2 1 1 1 0
234-
id2 0 0 0 1
235-
id3 1 0 1 0
236-
id3 1 0 1 0
237-
id4 0 1 0 0
238-
id4 0 1 1 0
239-
240-
241-
Binary plink file
242-
-----------------
243-
244-
AlphaImpute2 supports the use of binary plink files using the package ``AlphaPlinkPython``. AlphaImpute2 will use the pedigree supplied by the ``.fam`` file if a pedigree file is not supplied. Otherwise the pedigree file will be used and the ``.fam`` file will be ignored.
245-
246222

247223
Output file formats
248224
~~~~~~~~~~~~~~~~~~~

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "AlphaImpute2"
7-
version = "0.0.2"
7+
version = "0.0.3"
88
authors = [
99
{ name="Andrew Whalen", email="[email protected]" },
1010
]

src/alphaimpute2/Imputation/Heuristic_Peeling.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,21 +206,36 @@ def call_genotypes(ind, final_cutoff, error_rate):
206206
else:
207207
nLoci = len(ind.genotypes)
208208

209-
penetrance = np.full((4, nLoci), 1, dtype=np.float32)
210-
ind.peeling_view.setValueFromGenotypes(penetrance, error_rate)
209+
genotypeProbabilities = np.full((4, nLoci), 1, dtype=np.float32)
210+
ind.peeling_view.setValueFromGenotypes(genotypeProbabilities, error_rate)
211211

212212
if ind.sire is not None and ind.dam is not None:
213213
# Re-calculate parent genotypes with all sources of information.
214214
ind.sire.peeling_view.setGenotypesAll(ind.sire.peeling_view.currentCutoff)
215215
ind.dam.peeling_view.setGenotypesAll(ind.dam.peeling_view.currentCutoff)
216216

217217
# Calculate anterior from the parents.
218+
219+
genotypeProbabilities = ind.peeling_view.genotypeProbabilities
218220
anterior = getAnterior(
219221
ind.peeling_view, ind.sire.peeling_view, ind.dam.peeling_view
220222
)
221-
penetrance *= anterior # Add the penetrance with the anterior. Normalization will happen within the function.
223+
genotypeProbabilities *= anterior # Add the penetrance with the anterior. Normalization will happen within the function.
224+
225+
ind.peeling_view.setGenotypesFromGenotypeProbabilities(
226+
genotypeProbabilities, final_cutoff
227+
)
222228

223-
ind.peeling_view.setGenotypesFromGenotypeProbabilities(penetrance, final_cutoff)
229+
if final_cutoff < 0.5:
230+
nLoci = len(ind.genotypes)
231+
for i in range(nLoci):
232+
if ind.genotypes[i] == 1:
233+
if ind.haplotypes[0][i] + ind.haplotypes[1][i] != 1:
234+
# correct the genotype-haplotype mismatch
235+
phase_probs = ind.peeling_view.genotypeProbabilities[1:3, i]
236+
phase = np.argmax(phase_probs)
237+
ind.haplotypes[0][i] = phase
238+
ind.haplotypes[1][i] = 1 - phase
224239

225240

226241
@time_func("Core peeling cycles")

0 commit comments

Comments
 (0)