@@ -29,12 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
29
29
workflow PIPELINE_INITIALISATION {
30
30
31
31
take :
32
- version : boolean // Display version and exit
33
- help : boolean // Display help text
34
- validate_params : boolean // Validate parameters against the schema at runtime
35
- monochrome_logs : boolean // Do not use coloured log outputs
36
- nextflow_cli_args : List // List of positional nextflow CLI args
37
- outdir : String // The output directory where the results will be saved
32
+ version : boolean // Display version and exit
33
+ help : boolean // Display help text
34
+ validate_params : boolean // Validate parameters against the schema at runtime
35
+ monochrome_logs : boolean // Do not use coloured log outputs
36
+ nextflow_cli_args : List<String > // List of positional nextflow CLI args
37
+ outdir : String // The output directory where the results will be saved
38
+ input : Path // File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files
39
+ ena_metadata_fields : String // Comma-separated list of ENA metadata fields to fetch before downloading data
38
40
39
41
main :
40
42
@@ -69,6 +71,23 @@ workflow PIPELINE_INITIALISATION {
69
71
UTILS_NFCORE_PIPELINE (
70
72
nextflow_cli_args
71
73
)
74
+
75
+ //
76
+ // Auto-detect input id type
77
+ //
78
+ ids = file(input)
79
+ .splitCsv(header :false , sep :' ' , strip :true )
80
+ .collect { row -> row[0 ] }
81
+ .toUnique()
82
+ if (! isSraId(ids)) {
83
+ error(' Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!' )
84
+ }
85
+ if (! sraCheckENAMetadataFields(ena_metadata_fields)) {
86
+ error(" Invalid option: '${ ena_metadata_fields} '. Minimally required fields for '--ena_metadata_fields': '${ valid_ena_metadata_fields.join(',')} '" )
87
+ }
88
+
89
+ emit :
90
+ ids
72
91
}
73
92
74
93
/*
@@ -118,39 +137,29 @@ workflow PIPELINE_COMPLETION {
118
137
//
119
138
// Check if input ids are from the SRA
120
139
//
121
- def isSraId(input : Path ) -> boolean {
122
- def is_sra = false
140
+ def isSraId(ids : List<String > ) -> boolean {
123
141
def total_ids = 0
124
142
def no_match_ids = []
125
143
def pattern = / ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d +)$/
126
- input . eachLine { line ->
144
+ ids . each { id ->
127
145
total_ids + = 1
128
- if (! (line =~ pattern)) {
129
- no_match_ids << line
146
+ if (! (id =~ pattern)) {
147
+ no_match_ids << id
130
148
}
131
149
}
132
150
133
151
def num_match = total_ids - no_match_ids. size()
134
- if (num_match > 0 ) {
135
- if (num_match == total_ids) {
136
- is_sra = true
137
- } else {
138
- error(" Mixture of ids provided via --input: ${ no_match_ids.join(', ')} \n Please provide either SRA / ENA / GEO / DDBJ ids!" )
139
- }
140
- }
141
- return is_sra
152
+ return num_match > 0 && num_match == total_ids
142
153
}
143
154
144
155
//
145
156
// Check and validate parameters
146
157
//
147
- def sraCheckENAMetadataFields(ena_metadata_fields) {
158
+ def sraCheckENAMetadataFields(ena_metadata_fields : List< String > ) -> boolean {
148
159
// Check minimal ENA fields are provided to download FastQ files
149
160
def valid_ena_metadata_fields = [' run_accession' , ' experiment_accession' , ' library_layout' , ' fastq_ftp' , ' fastq_md5' ]
150
161
def actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields. split(' ,' ). collect{ it. trim(). toLowerCase() } : valid_ena_metadata_fields
151
- if (! actual_ena_metadata_fields. containsAll(valid_ena_metadata_fields)) {
152
- error(" Invalid option: '${ ena_metadata_fields} '. Minimally required fields for '--ena_metadata_fields': '${ valid_ena_metadata_fields.join(',')} '" )
153
- }
162
+ return actual_ena_metadata_fields. containsAll(valid_ena_metadata_fields)
154
163
}
155
164
156
165
//
0 commit comments