Skip to content

Commit f304b2b

Browse files
author
Evgenii Osipov
committed
Added protparam script to calculate protein length, Mw, pI and aa content
Fix. Correcting for PR comments Correcting for PR comments 2. Test added
1 parent 6108ee4 commit f304b2b

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed

scripts/protparam.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from pymol import cmd
2+
from io import StringIO
3+
4+
try:
5+
from Bio.SeqUtils.ProtParam import ProteinAnalysis
6+
from Bio import SeqIO
7+
from Bio.Seq import Seq
8+
except ModuleNotFoundError:
9+
# Note that Bio package might be missing from Pymol 2 installation!
10+
print("Oops! Protparam: Biopython is missing!\n If you want to install it, run protparam_dependencies_install command")
11+
12+
13+
@cmd.extend
14+
def protparam(selection='enabled', bychain=0):
15+
'''
16+
DESCRIPTION:
17+
Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content.
18+
By default, combines all chains of each object into the single sequence.
19+
20+
USAGE:
21+
protparam selection, [bychain]
22+
23+
DEPENDENCIES:
24+
biopython
25+
'''
26+
#TODO: add pretty output suitable for copy-pasting
27+
for entry in cmd.get_object_list(selection):
28+
sequence_obj = cmd.get_fastastr(f"({selection}) and {entry}")
29+
fasta_io = StringIO(sequence_obj)
30+
sequences = list(SeqIO.parse(fasta_io, "fasta"))
31+
sequences = [s.seq for s in sequences]
32+
if not bychain:
33+
#by default combine all chains into single sequence
34+
sequences = [Seq('').join(sequences)]
35+
for sequence in sequences:
36+
sequence = str(sequence).replace('?','').strip()
37+
analysis = ProteinAnalysis(sequence)
38+
counts_aa = analysis.count_amino_acids() #Dict is useful when only specific residues should be reported
39+
print(f"Protein name: {entry}")
40+
print(f"Sequence: {sequence}")
41+
print(f"\nProtein length: {analysis.length} aa")
42+
print(f"Molecular Weight: {analysis.molecular_weight():.1f} Da")
43+
print(f"Isoelectric point: {analysis.isoelectric_point():.2f}")
44+
print(f"Count of aminoacids: {counts_aa}\n\n")
45+
46+
@cmd.extend
47+
def protparam_dependencies_install():
48+
import sys
49+
import subprocess
50+
try:
51+
subprocess.check_call([sys.executable, "-m", "pip", "install", 'biopython'])
52+
print(f"Successfully installed biopython! Reload Protparam plugin or restart PyMOL.")
53+
except subprocess.CalledProcessError as e:
54+
print(f"Failed to install biopython: {e}")
55+
56+
def test_protparam(capsys):
57+
cmd.reinitialize()
58+
cmd.fab("A// ACD B// EFG", "m1")
59+
cmd.fab("HIKL", "m2")
60+
cmd.alter("resn CYS", "resn='UNK'")
61+
protparam()
62+
captured = capsys.readouterr()
63+
assert "Protein name: m1" in captured.out
64+
assert "Protein name: m2" in captured.out
65+
assert "Sequence: ADEFG\n" in captured.out
66+
assert "Sequence: HIKL\n" in captured.out
67+
assert "Protein length: 2 aa" not in captured.out
68+
assert "Protein length: 3 aa" not in captured.out
69+
assert "Protein length: 4 aa" in captured.out
70+
assert "Protein length: 5 aa" in captured.out
71+
assert "Count of aminoacids: {'A': 1," in captured.out
72+
protparam(bychain=1)
73+
captured = capsys.readouterr()
74+
assert "Protein name: m1" in captured.out
75+
assert "Protein name: m2" in captured.out
76+
assert "Sequence: AD\n" in captured.out
77+
assert "Sequence: EFG\n" in captured.out
78+
assert "Protein length: 2 aa" in captured.out
79+
assert "Protein length: 3 aa" in captured.out
80+
assert "Protein length: 4 aa" in captured.out
81+
assert "Protein length: 5 aa" not in captured.out
82+
assert "Molecular Weight: 204.2 Da" in captured.out
83+
protparam("resn LYS")
84+
captured = capsys.readouterr()
85+
assert "Protein name: m1" not in captured.out
86+
assert "Protein name: m2" in captured.out
87+
assert "Protein length: 1 aa" in captured.out
88+
assert "Isoelectric point: 8.75" in captured.out
89+
protparam("resn TRP")
90+
captured = capsys.readouterr()
91+
assert captured.out == ""
92+
assert captured.err == ""

0 commit comments

Comments
 (0)