AntonelliLab
diff --git a/‎build/lib/secapr/_version.py‎
Lines changed: 3 additions & 3 deletions b/‎build/lib/secapr/_version.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎build/lib/secapr/assemble_reads.py‎
Lines changed: 2 additions & 2 deletions b/‎build/lib/secapr/assemble_reads.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎build/lib/secapr/locus_selection.py‎
Lines changed: 12 additions & 12 deletions b/‎build/lib/secapr/locus_selection.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎build/lib/secapr/mafft.py‎
Lines changed: 1 addition & 1 deletion b/‎build/lib/secapr/mafft.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build/lib/secapr/muscle.py‎
Lines changed: 1 addition & 1 deletion b/‎build/lib/secapr/muscle.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build/lib/secapr/phase_alleles.py‎
Lines changed: 11 additions & 11 deletions b/‎build/lib/secapr/phase_alleles.py‎
Lines changed: 11 additions & 11 deletions
@@ -8,11 +8,11 @@
 
 version_json = '''
 {
- "date": "2020-09-02T23:47:50+0200",
+ "date": "2020-09-03T08:40:33+0200",
  "dirty": true,
  "error": null,
- "full-revisionid": "d0c7a485eca91ca359c58fe38fb3f53d53f6851d",
- "version": "1.1.12+41.gd0c7a48.dirty"
+ "full-revisionid": "6c64e8334194adde121523290caf950c97360619",
+ "version": "1.1.12+43.g6c64e83.dirty"
 }
 '''  # END VERSION_JSON
 
 
@@ -39,7 +39,7 @@ def add_arguments(parser):
     )
     parser.add_argument(
         '--assembler',
-        choices=["spades","abyss","trinity"],
+        choices=["spades","abyss"],# trinity support discontinued 
         default="spades",
         help="""The assembler to use (default = spades)."""
     )
@@ -57,7 +57,7 @@ def add_arguments(parser):
     parser.add_argument(
         '--max_memory',
         type=str,
-        help='Set the maximum memory to be used during assembly in GB (only available for Spades and Trinity). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'
+        help='Set the maximum memory to be used during assembly in GB (only available for Spades). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'
     )
     parser.add_argument(
         '--single_reads',
 
@@ -10,8 +10,8 @@
 import pandas as pd
 import pickle
 from Bio import SeqIO
-from .utils import CompletePath
-
+from secapr.utils import CompletePath
+from secapr.helpers import CreateDir
 
 # Get arguments
 def add_arguments(parser):
@@ -25,7 +25,7 @@ def add_arguments(parser):
     parser.add_argument(
         '--output',
         required=True,
-        action=CompletePath,
+        action=CreateDir,
         default=None,
         help='The output directory where results will be safed.'
     )
@@ -174,20 +174,20 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
     else:
         coverage_all_samples = pd.read_csv("%s/average_cov_per_locus.txt" %input_dir, sep = '\t')
     # Return boolean for every field, depending on if its greater than the threshold
-    thres_test = coverage_all_samples.ix[:,1:]>threshold
+    thres_test = coverage_all_samples.iloc[:,1:]>threshold
     # Extract only those rows for which all fields returned 'True' and store in new df
     selected_rows = pd.DataFrame([])
     for line in thres_test.iterrows():
         line = line[1]
         if line.all():
             selected_rows = selected_rows.append(line)
     # Store all indices of the selected data (selected_rows) in a list
-    indeces = list(selected_rows.index.get_values())
+    indeces = list(selected_rows.index.values)
     # Use indices to extract rows from oriignal df and create new one from it
     loci_passing_test = coverage_all_samples.iloc[indeces,:].copy()
     list_of_good_loci = list(loci_passing_test.locus)
     # Calculate the read-depth sum across all samples for each locus and store as new column in df 
-    loci_passing_test['sum_per_locus'] = loci_passing_test.ix[:,1:].sum(axis=1)
+    loci_passing_test['sum_per_locus'] = loci_passing_test.iloc[:,1:].sum(axis=1)
     # Sort the df by the 'sum' column to have the best covered loci on top
     loci_passing_test.sort_values('sum_per_locus', axis=0, ascending=False, inplace=True)
     # select best n rows
@@ -235,7 +235,7 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
             #locus_sequence = sequence.seq
             if locus_name_corrected in target_loci:
                 sequence_collection.append(sequence)
-        SeqIO.write(sequence_collection, "%s/%s_%s_selected_sequences.fasta" %(output_subfolder_dict[sample],sample,input_type), "fasta")
+        SeqIO.write(sequence_collection, "%s/%s_%s_selected_sequences.fasta" %(output_subfolder_dict[sample],sample,input_type), "fasta-2line")
         # Now produce a new bam-file
         bam = '%s/%s*.bam' %(sample_subfolder_dict[sample],sample)
         target_files = glob.glob(bam)
@@ -248,7 +248,7 @@ def extract_best_loci(subfolder_file_dict,sample_bam_dict,output_folder,n,thresh
         convert_to_bam = 'samtools view -Sb %s > %s' %(sam_output_file,bam_output_file)
         os.system(convert_to_bam)
         sorted_bam_out = os.path.join(output_subfolder_dict[sample],"%s_%s_selected_loci_sorted.bam" %(sample,input_type))
-        sort_bam = 'samtools sort %s %s' %(bam_output_file,sorted_bam_out.replace('.bam',''))
+        sort_bam = 'samtools sort %s -o %s' %(bam_output_file,sorted_bam_out)
         os.system(sort_bam)
         index_bam = 'samtools index %s' %(sorted_bam_out)
         os.system(index_bam)
@@ -279,10 +279,10 @@ def main(args):
     output_folder = args.output
     n = args.n
     threshold = args.read_cov
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-    else:
-        raise IOError("The directory {} already exists.  Please check and remove by hand.".format(output_folder))
+    #if not os.path.exists(output_folder):
+    #    os.makedirs(output_folder)
+    #else:
+    #    raise IOError("The directory {} already exists.  Please check and remove by hand.".format(output_folder))
     # Create a dictionary containing the bam-file paths for each sample and tell if data is phased or unphased
     sample_bam_dict, input_type = get_bam_path_dict(input_dir)
     if input_type == 'unphased':
 
@@ -19,7 +19,7 @@
 from Bio import AlignIO
 from Bio.Alphabet import IUPAC, Gapped
 
-from .generic_align import GenericAlign
+from secapr.generic_align import GenericAlign
 
 
 class Align(GenericAlign):
 
@@ -19,7 +19,7 @@
 from Bio import AlignIO
 from Bio.Alphabet import IUPAC, Gapped
 
-from .generic_align import GenericAlign
+from secapr.generic_align import GenericAlign
 
 
 class Align(GenericAlign):
 
@@ -16,8 +16,10 @@
 import subprocess
 import pickle
 from Bio import SeqIO
-from .utils import CompletePath
-from .reference_assembly import bam_consensus, join_fastas
+from secapr.utils import CompletePath
+from secapr.reference_assembly import bam_consensus, join_fastas
+from secapr.helpers import CreateDir
+
 
 # Get arguments
 def add_arguments(parser):
@@ -31,7 +33,7 @@ def add_arguments(parser):
 	parser.add_argument(
 		'--output',
 		required=True,
-		action=CompletePath,
+		action=CreateDir,
 		default=None,
 		help='The output directory where results will be safed.'
 	)
@@ -91,8 +93,8 @@ def phase_bam(sorted_bam_file,sample_output_folder,min_cov,reference):
 	allele_1_sorted_file = "%s.bam" %allele_1_sorted_base
 
 	# Sorting phased bam files:
-	sort_phased_0 = "samtools sort %s %s" %(allele_0_file,allele_0_sorted_base)
-	sort_phased_1 = "samtools sort %s %s" %(allele_1_file,allele_1_sorted_base)
+	sort_phased_0 = "samtools sort -o %s %s" %(allele_0_sorted_file,allele_0_file)
+	sort_phased_1 = "samtools sort -o %s %s" %(allele_1_sorted_file,allele_1_file)
 	#sort_phased_0 = "samtools sort -o %s %s" %(allele_0_sorted_file, allele_0_file)
 	#sort_phased_1 = "samtools sort -o %s %s" %(allele_1_sorted_file,allele_1_file)
 	os.system(sort_phased_0)
@@ -150,14 +152,11 @@ def manage_homzygous_samples(fasta_dir, sample_id):
 
 
 def main(args):
+	print('\n')
 	min_cov = args.min_coverage
 	reference = args.reference
 	# Set working directory
 	out_dir = args.output
-	if not os.path.exists(out_dir):
-		os.makedirs(out_dir)
-	else:
-		raise IOError("The directory {} already exists.  Please check and remove by hand.".format(out_dir))
 	input_folder = args.input
 	sample_out_list = []
 	# Iterate through all sample specific subfolders
@@ -179,7 +178,7 @@ def main(args):
 					if file.endswith("sorted.bam"):
 						sorted_bam = file
 						sorted_bam_path = os.path.join(subfolder_path,sorted_bam)
-						print(("#" * 50))
+						print(('\n'+"#" * 50))
 						print(('Processing sample %s' %sample))
 						allele_fastas = phase_bam(sorted_bam_path,sample_output_folder,min_cov,reference)
 
@@ -198,4 +197,5 @@ def main(args):
 							os.remove(os.path.join(allele_fastas,allele0))
 							os.remove(os.path.join(allele_fastas,allele1))
 
-	join_fastas(out_dir,sample_out_list)
+	join_fastas(out_dir,sample_out_list)
+	print('\n')
Original file line number	Diff line number	Diff line change
`@@ -8,11 +8,11 @@`
`8`	`8`
`9`	`9`	`version_json = '''`
`10`	`10`	`{`
`11`		`- "date": "2020-09-02T23:47:50+0200",`
	`11`	`+ "date": "2020-09-03T08:40:33+0200",`
`12`	`12`	`"dirty": true,`
`13`	`13`	`"error": null,`
`14`		`- "full-revisionid": "d0c7a485eca91ca359c58fe38fb3f53d53f6851d",`
`15`		`- "version": "1.1.12+41.gd0c7a48.dirty"`
	`14`	`+ "full-revisionid": "6c64e8334194adde121523290caf950c97360619",`
	`15`	`+ "version": "1.1.12+43.g6c64e83.dirty"`
`16`	`16`	`}`
`17`	`17`	`''' # END VERSION_JSON`
`18`	`18`
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def add_arguments(parser):`
`39`	`39`	`)`
`40`	`40`	`parser.add_argument(`
`41`	`41`	`'--assembler',`
`42`		`- choices=["spades","abyss","trinity"],`
	`42`	`+ choices=["spades","abyss"],# trinity support discontinued`
`43`	`43`	`default="spades",`
`44`	`44`	`help="""The assembler to use (default = spades)."""`
`45`	`45`	`)`
`@@ -57,7 +57,7 @@ def add_arguments(parser):`
`57`	`57`	`parser.add_argument(`
`58`	`58`	`'--max_memory',`
`59`	`59`	`type=str,`
`60`		`- help='Set the maximum memory to be used during assembly in GB (only available for Spades and Trinity). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'`
	`60`	`+ help='Set the maximum memory to be used during assembly in GB (only available for Spades). This can be necessary when working with computing nodes with limited memory or to avoid over-allocation of computing resources on clusters which can in some cases cause your assembly to be stopped or interrupted.'`
`61`	`61`	`)`
`62`	`62`	`parser.add_argument(`
`63`	`63`	`'--single_reads',`