Skip to content

Commit 0e24543

Browse files
author
Tobias Hofmann
committed
all main scripts running in python3
1 parent 6c64e83 commit 0e24543

14 files changed

+153
-128
lines changed

build/lib/secapr/_version.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
version_json = '''
1010
{
11-
"date": "2020-09-02T23:47:50+0200",
11+
"date": "2020-09-03T08:40:33+0200",
1212
"dirty": true,
1313
"error": null,
14-
"full-revisionid": "d0c7a485eca91ca359c58fe38fb3f53d53f6851d",
15-
"version": "1.1.12+41.gd0c7a48.dirty"
14+
"full-revisionid": "6c64e8334194adde121523290caf950c97360619",
15+
"version": "1.1.12+43.g6c64e83.dirty"
1616
}
1717
''' # END VERSION_JSON
1818

build/lib/secapr/assemble_reads.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def add_arguments(parser):
3939
)
4040
parser.add_argument(
4141
'--assembler',
42-
choices=["spades","abyss","trinity"],
42+
choices=["spades","abyss"],# trinity support discontinued
4343
default="spades",
4444
help="""The assembler to use (default = spades)."""
4545
)
@@ -57,7 +57,7 @@ def add_arguments(parser):
5757
parser.add_argument(
5858
'--max_memory',
5959
type=str,
60-
help='Set the maximum memory to be used during assembly in GB (only available for Spades and Trinity). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'
60+
help='Set the maximum memory to be used during assembly in GB (only available for Spades). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'
6161
)
6262
parser.add_argument(
6363
'--single_reads',

build/lib/secapr/locus_selection.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import pandas as pd
1111
import pickle
1212
from Bio import SeqIO
13-
from .utils import CompletePath
14-
13+
from secapr.utils import CompletePath
14+
from secapr.helpers import CreateDir
1515

1616
# Get arguments
1717
def add_arguments(parser):
@@ -25,7 +25,7 @@ def add_arguments(parser):
2525
parser.add_argument(
2626
'--output',
2727
required=True,
28-
action=CompletePath,
28+
action=CreateDir,
2929
default=None,
3030
help='The output directory where results will be safed.'
3131
)
@@ -174,20 +174,20 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
174174
else:
175175
coverage_all_samples = pd.read_csv("%s/average_cov_per_locus.txt" %input_dir, sep = '\t')
176176
# Return boolean for every field, depending on if its greater than the threshold
177-
thres_test = coverage_all_samples.ix[:,1:]>threshold
177+
thres_test = coverage_all_samples.iloc[:,1:]>threshold
178178
# Extract only those rows for which all fields returned 'True' and store in new df
179179
selected_rows = pd.DataFrame([])
180180
for line in thres_test.iterrows():
181181
line = line[1]
182182
if line.all():
183183
selected_rows = selected_rows.append(line)
184184
# Store all indices of the selected data (selected_rows) in a list
185-
indeces = list(selected_rows.index.get_values())
185+
indeces = list(selected_rows.index.values)
186186
# Use indices to extract rows from oriignal df and create new one from it
187187
loci_passing_test = coverage_all_samples.iloc[indeces,:].copy()
188188
list_of_good_loci = list(loci_passing_test.locus)
189189
# Calculate the read-depth sum across all samples for each locus and store as new column in df
190-
loci_passing_test['sum_per_locus'] = loci_passing_test.ix[:,1:].sum(axis=1)
190+
loci_passing_test['sum_per_locus'] = loci_passing_test.iloc[:,1:].sum(axis=1)
191191
# Sort the df by the 'sum' column to have the best covered loci on top
192192
loci_passing_test.sort_values('sum_per_locus', axis=0, ascending=False, inplace=True)
193193
# select best n rows
@@ -235,7 +235,7 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
235235
#locus_sequence = sequence.seq
236236
if locus_name_corrected in target_loci:
237237
sequence_collection.append(sequence)
238-
SeqIO.write(sequence_collection, "%s/%s_%s_selected_sequences.fasta" %(output_subfolder_dict[sample],sample,input_type), "fasta")
238+
SeqIO.write(sequence_collection, "%s/%s_%s_selected_sequences.fasta" %(output_subfolder_dict[sample],sample,input_type), "fasta-2line")
239239
# Now produce a new bam-file
240240
bam = '%s/%s*.bam' %(sample_subfolder_dict[sample],sample)
241241
target_files = glob.glob(bam)
@@ -248,7 +248,7 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
248248
convert_to_bam = 'samtools view -Sb %s > %s' %(sam_output_file,bam_output_file)
249249
os.system(convert_to_bam)
250250
sorted_bam_out = os.path.join(output_subfolder_dict[sample],"%s_%s_selected_loci_sorted.bam" %(sample,input_type))
251-
sort_bam = 'samtools sort %s %s' %(bam_output_file,sorted_bam_out.replace('.bam',''))
251+
sort_bam = 'samtools sort %s -o %s' %(bam_output_file,sorted_bam_out)
252252
os.system(sort_bam)
253253
index_bam = 'samtools index %s' %(sorted_bam_out)
254254
os.system(index_bam)
@@ -279,10 +279,10 @@ def main(args):
279279
output_folder = args.output
280280
n = args.n
281281
threshold = args.read_cov
282-
if not os.path.exists(output_folder):
283-
os.makedirs(output_folder)
284-
else:
285-
raise IOError("The directory {} already exists. Please check and remove by hand.".format(output_folder))
282+
#if not os.path.exists(output_folder):
283+
# os.makedirs(output_folder)
284+
#else:
285+
# raise IOError("The directory {} already exists. Please check and remove by hand.".format(output_folder))
286286
# Create a dictionary containing the bam-file paths for each sample and tell if data is phased or unphased
287287
sample_bam_dict, input_type = get_bam_path_dict(input_dir)
288288
if input_type == 'unphased':

build/lib/secapr/mafft.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from Bio import AlignIO
2020
from Bio.Alphabet import IUPAC, Gapped
2121

22-
from .generic_align import GenericAlign
22+
from secapr.generic_align import GenericAlign
2323

2424

2525
class Align(GenericAlign):

build/lib/secapr/muscle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from Bio import AlignIO
2020
from Bio.Alphabet import IUPAC, Gapped
2121

22-
from .generic_align import GenericAlign
22+
from secapr.generic_align import GenericAlign
2323

2424

2525
class Align(GenericAlign):

build/lib/secapr/phase_alleles.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
import subprocess
1717
import pickle
1818
from Bio import SeqIO
19-
from .utils import CompletePath
20-
from .reference_assembly import bam_consensus, join_fastas
19+
from secapr.utils import CompletePath
20+
from secapr.reference_assembly import bam_consensus, join_fastas
21+
from secapr.helpers import CreateDir
22+
2123

2224
# Get arguments
2325
def add_arguments(parser):
@@ -31,7 +33,7 @@ def add_arguments(parser):
3133
parser.add_argument(
3234
'--output',
3335
required=True,
34-
action=CompletePath,
36+
action=CreateDir,
3537
default=None,
3638
help='The output directory where results will be safed.'
3739
)
@@ -91,8 +93,8 @@ def phase_bam(sorted_bam_file,sample_output_folder,min_cov,reference):
9193
allele_1_sorted_file = "%s.bam" %allele_1_sorted_base
9294

9395
# Sorting phased bam files:
94-
sort_phased_0 = "samtools sort %s %s" %(allele_0_file,allele_0_sorted_base)
95-
sort_phased_1 = "samtools sort %s %s" %(allele_1_file,allele_1_sorted_base)
96+
sort_phased_0 = "samtools sort -o %s %s" %(allele_0_sorted_file,allele_0_file)
97+
sort_phased_1 = "samtools sort -o %s %s" %(allele_1_sorted_file,allele_1_file)
9698
#sort_phased_0 = "samtools sort -o %s %s" %(allele_0_sorted_file, allele_0_file)
9799
#sort_phased_1 = "samtools sort -o %s %s" %(allele_1_sorted_file,allele_1_file)
98100
os.system(sort_phased_0)
@@ -150,14 +152,11 @@ def manage_homzygous_samples(fasta_dir, sample_id):
150152

151153

152154
def main(args):
155+
print('\n')
153156
min_cov = args.min_coverage
154157
reference = args.reference
155158
# Set working directory
156159
out_dir = args.output
157-
if not os.path.exists(out_dir):
158-
os.makedirs(out_dir)
159-
else:
160-
raise IOError("The directory {} already exists. Please check and remove by hand.".format(out_dir))
161160
input_folder = args.input
162161
sample_out_list = []
163162
# Iterate through all sample specific subfolders
@@ -179,7 +178,7 @@ def main(args):
179178
if file.endswith("sorted.bam"):
180179
sorted_bam = file
181180
sorted_bam_path = os.path.join(subfolder_path,sorted_bam)
182-
print(("#" * 50))
181+
print(('\n'+"#" * 50))
183182
print(('Processing sample %s' %sample))
184183
allele_fastas = phase_bam(sorted_bam_path,sample_output_folder,min_cov,reference)
185184

@@ -198,4 +197,5 @@ def main(args):
198197
os.remove(os.path.join(allele_fastas,allele0))
199198
os.remove(os.path.join(allele_fastas,allele1))
200199

201-
join_fastas(out_dir,sample_out_list)
200+
join_fastas(out_dir,sample_out_list)
201+
print('\n')

0 commit comments

Comments
 (0)