Skip to content

Commit 57348ac

Browse files
author
Tobias Hofmann
committed
bugfix trinity assembler
1 parent 375c9b0 commit 57348ac

File tree

4 files changed

+39
-18
lines changed

4 files changed

+39
-18
lines changed

recipe/meta.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ package:
55
version: {{ version }}
66

77
source:
8-
fn: secapr_{{ version }}.tar.gz
98
url: https://github.com/AntonelliLab/seqcap_processor/archive/v{{ version }}.tar.gz
109
sha256: a14cea6d58154759da84c9626b17b6010626b907980e9e5bbde5ee9c2be5a3ca
1110

1211
build:
13-
skip: True # [not py27]
12+
skip: True # [not py27]
1413
number: 2
1514
script: python -m pip install --no-deps --ignore-installed .
1615
entry_points:
@@ -37,9 +36,9 @@ requirements:
3736
- lastz
3837
- mafft >=7.2
3938
- muscle
40-
- trimmomatic
39+
- trimmomatic ==0.33
4140
- abyss
42-
- trinity
41+
- trinity <=2.3.2
4342
- fastqc
4443
- pandas
4544
- numpy
@@ -57,4 +56,4 @@ about:
5756
home: 'https://github.com/AntonelliLab/seqcap_processor'
5857
license: MIT
5958
license_file: LICENSE
60-
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'
59+
summary: 'Process sequence-capture FASTQ files into alignments for phylogenetic analyses. Integrates allele phasing, producing haplotype alignments.'

secapr/assemble_reads.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def main(args):
182182

183183
def assembly_trinity(forw,backw,output_folder,id_sample,cores,min_length,max_memory):
184184
print ("De-novo assembly with Trinity of sample %s:" %id_sample)
185-
print(output_folder)
185+
#print(output_folder)
186186
command = [
187187
"Trinity",
188188
"--seqType",
@@ -204,14 +204,31 @@ def assembly_trinity(forw,backw,output_folder,id_sample,cores,min_length,max_mem
204204
"--output",
205205
output_folder
206206
]
207-
try:
208-
print ("Building contigs........")
209-
with open(os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample), 'w') as log_err_file:
210-
p = subprocess.Popen(command, stdout=log_err_file, stderr=log_err_file)
211-
p.communicate()
212-
print ("%s assembled. Trinity-stats are printed into %s" %(id_sample, os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)))
213-
except:
214-
print ("Trinity failed, maybe due to limited stack-size. Try increase stacksize with command 'zsh | ulimit -s unlimited | sh' and run again.")
207+
print ("Building contigs........")
208+
with open(os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample), 'w') as log_err_file:
209+
p = subprocess.Popen(command, stdout=log_err_file, stderr=log_err_file)
210+
p.communicate()
211+
filename = os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)
212+
file_object = open(filename, 'r')
213+
for line in file_object:
214+
if line.startswith('Error'):
215+
print(line)
216+
print ('SECAPR NOTE:\nTrinity is currently only functional in the Linux distribution of SECAPR due to Java incompatibilities.\n')
217+
#'However, the environment on MacOS machines can be easily altered by hand in order to properly run Trinity.\n',
218+
#'This might however compromise the functionality of other parts of the SECAPR pipeline, therefore we recommend to undo the changes made in the envrionment after using Trinity by following the instructions below.\n\n',
219+
#'In order to run the Trinity assembly on MacOS do the following:\n',
220+
#'1. within the SECAPR conda envrionment type: "conda install openjdk=7"\n',
221+
#'2. run the secapr assemble_reads function with Trinity (using the "--assembler trinity" flag)\n',
222+
#'3. after assembly rebuild the SECAPR default environment by typing "conda install trimmomatic=0.33"\n'
223+
sys.exit()
224+
elif line.startswith('Trinity run failed.'):
225+
print (''.join(file(filename)))
226+
print ('SECAPR NOTE:\nTrinity is currently only functional in the Linux distribution of SECAPR.\n')
227+
sys.exit()
228+
229+
print ("%s assembled. Trinity-stats are printed into %s" %(id_sample, os.path.join(output_folder, "%s_trinity_screen_out.txt" %id_sample)))
230+
#except:
231+
# print ("Trinity failed, maybe due to limited stack-size. Try increase stacksize with command 'zsh | ulimit -s unlimited | sh' and run again.")
215232

216233
def assembly_abyss(forw,backw,singlef,singleb,output_folder,id_sample,kmer,cores,args):
217234
print ("De-novo assembly with abyss of sample %s:" %id_sample)

secapr/phase_alleles.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def main(args):
158158
if os.path.isdir(path):
159159
subfolder_path = os.path.join(input_folder,subfolder)
160160
if subfolder_path.endswith('_remapped') or subfolder_path.endswith('_locus_selection'):
161-
sample = subfolder.split('_')[0]
161+
sample = '_'.join(subfolder.split('_')[:-1])
162162
sample_output_folder = os.path.join(out_dir,'%s_phased' %sample)
163163
if not os.path.exists(sample_output_folder):
164164
os.makedirs(sample_output_folder)

secapr/reference_assembly.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,13 @@ def clean_with_picard(sample_output_folder,sample_id,sorted_bam,log):
283283
"VALIDATION_STRINGENCY=LENIENT"
284284
]
285285
print ("Removing duplicate reads with Picard..........")
286-
with open(os.path.join(log, "picard_screen_out.txt"), 'w') as log_err_file:
287-
pi = subprocess.Popen(run_picard, stderr=log_err_file)
288-
pi.communicate()
286+
try:
287+
with open(os.path.join(log, "picard_screen_out.txt"), 'w') as log_err_file:
288+
pi = subprocess.Popen(run_picard, stderr=log_err_file)
289+
pi.communicate()
290+
except OSError:
291+
print('Not enough reads mapped to reference in order to run Picard. Try using the "--keep_duplicates" flag in order to avoid the use of Picard.')
292+
quit()
289293
print ("Duplicates successfully removed.")
290294
# Cleaning up a bit
291295
has_duplicates = "%s/including_duplicate_reads" %sample_output_folder
@@ -753,6 +757,7 @@ def main(args):
753757
bam_consensus_with_duplicates = bam_consensus(reference,dupl_bam,dupl_name_stem,dupl_output_folder,min_cov)
754758
join_fastas(out_dir,sample_out_list)
755759
# create file with read-coverage overview
760+
print("#" * 50)
756761
sample_bam_dict, input_type = get_bam_path_dict(out_dir)
757762
# currently only available for unphased data
758763
if input_type == 'unphased':

0 commit comments

Comments
 (0)