1+ from  pymol  import  cmd 
2+ from  io  import  StringIO 
3+ 
4+ try :
5+     from  Bio .SeqUtils .ProtParam  import  ProteinAnalysis 
6+     from  Bio  import  SeqIO 
7+     from  Bio .Seq  import  Seq 
8+ except  ModuleNotFoundError :
9+     # Note that Bio package might be missing from Pymol 2 installation! 
10+     print ("Oops! Protparam: Biopython is missing!\n  If you want to install it, run protparam_dependencies_install command" )
11+ 
12+ 
13+ @cmd .extend  
14+ def  protparam (selection = 'enabled' , bychain = 0 ):
15+     ''' 
16+     DESCRIPTION: 
17+     Given selection, calculates common protein properties, like Mw, pI, length and aminoacid content. 
18+     By default, combines all chains of each object into the single sequence. 
19+      
20+     USAGE: 
21+     protparam selection, [bychain] 
22+ 
23+     DEPENDENCIES: 
24+     biopython 
25+     ''' 
26+     #TODO: add pretty output suitable for copy-pasting 
27+     for  entry  in  cmd .get_object_list (selection ):
28+         sequence_obj  =  cmd .get_fastastr (f"({ selection } { entry }  )
29+         fasta_io  =  StringIO (sequence_obj )
30+         sequences  =  list (SeqIO .parse (fasta_io , "fasta" ))
31+         sequences  =  [s .seq  for  s  in  sequences ]
32+         if  not  bychain :
33+             #by default combine all chains into single sequence 
34+             sequences  =  [Seq ('' ).join (sequences )]
35+         for  sequence  in  sequences :
36+             sequence  =  str (sequence ).replace ('?' ,'' ).strip ()
37+             analysis  =  ProteinAnalysis (sequence )
38+             counts_aa  =  analysis .count_amino_acids () #Dict is useful when only specific residues should be reported 
39+             print (f"Protein name: { entry }  )
40+             print (f"Sequence: { sequence }  )
41+             print (f"\n Protein length: { analysis .length }  )
42+             print (f"Molecular Weight: { analysis .molecular_weight ():.1f}  )
43+             print (f"Isoelectric point: { analysis .isoelectric_point ():.2f}  )
44+             print (f"Count of aminoacids: { counts_aa } \n \n " )
45+ 
46+ @cmd .extend  
47+ def  protparam_dependencies_install ():
48+     import  sys 
49+     import  subprocess 
50+     try :
51+         subprocess .check_call ([sys .executable , "-m" , "pip" , "install" , 'biopython' ])
52+         print (f"Successfully installed biopython! Reload Protparam plugin or restart PyMOL." )
53+     except  subprocess .CalledProcessError  as  e :
54+         print (f"Failed to install biopython: { e }  )
55+ 
56+ def  test_protparam (capsys ):
57+     cmd .reinitialize ()
58+     cmd .fab ("A// ACD B// EFG" , "m1" )
59+     cmd .fab ("HIKL" , "m2" )
60+     cmd .alter ("resn CYS" , "resn='UNK'" )
61+     protparam ()
62+     captured  =  capsys .readouterr ()
63+     assert  "Protein name: m1"  in  captured .out 
64+     assert  "Protein name: m2"  in  captured .out 
65+     assert  "Sequence: ADEFG\n "  in  captured .out 
66+     assert  "Sequence: HIKL\n "  in  captured .out 
67+     assert  "Protein length: 2 aa"  not  in captured .out 
68+     assert  "Protein length: 3 aa"  not  in captured .out 
69+     assert  "Protein length: 4 aa"  in  captured .out 
70+     assert  "Protein length: 5 aa"  in  captured .out 
71+     assert  "Count of aminoacids: {'A': 1,"  in  captured .out 
72+     protparam (bychain = 1 )
73+     captured  =  capsys .readouterr ()
74+     assert  "Protein name: m1"  in  captured .out 
75+     assert  "Protein name: m2"  in  captured .out 
76+     assert  "Sequence: AD\n "  in  captured .out 
77+     assert  "Sequence: EFG\n "  in  captured .out 
78+     assert  "Protein length: 2 aa"  in  captured .out 
79+     assert  "Protein length: 3 aa"  in  captured .out 
80+     assert  "Protein length: 4 aa"  in  captured .out 
81+     assert  "Protein length: 5 aa"  not  in captured .out 
82+     assert  "Molecular Weight: 204.2 Da"  in  captured .out 
83+     protparam ("resn LYS" )
84+     captured  =  capsys .readouterr ()
85+     assert  "Protein name: m1"  not  in captured .out 
86+     assert  "Protein name: m2"  in  captured .out 
87+     assert  "Protein length: 1 aa"  in  captured .out 
88+     assert  "Isoelectric point: 8.75"  in  captured .out 
89+     protparam ("resn TRP" )
90+     captured  =  capsys .readouterr ()
91+     assert  captured .out  ==  "" 
92+     assert  captured .err  ==  "" 
0 commit comments