Skip to content

Commit c7e0d23

Browse files
committed
Restore validation of SRA ids and ENA metadata fields
Signed-off-by: Ben Sherman <[email protected]>
1 parent b9174ae commit c7e0d23

File tree

2 files changed

+39
-30
lines changed

2 files changed

+39
-30
lines changed

main.nf

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,9 @@ include { Sample } from './workflows/sra'
3232

3333
params {
3434

35-
// TODO: declare as Set<SraId> and construct SraId with isSraId()
3635
// Set of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files
37-
input: Set<String>
36+
input: Path
3837

39-
// TODO: declare as EnaMetadataFields and construct with sraCheckENAMetadataFields()
4038
// Comma-separated list of ENA metadata fields to fetch before downloading data.
4139
ena_metadata_fields: String = ''
4240

@@ -47,7 +45,7 @@ params {
4745
skip_fastq_download: Boolean = false
4846

4947
// dbGaP repository key.
50-
dbgap_key: Path? = null
48+
dbgap_key: Path?
5149

5250
}
5351

@@ -63,20 +61,22 @@ workflow {
6361
//
6462
// SUBWORKFLOW: Run initialisation tasks
6563
//
66-
PIPELINE_INITIALISATION (
64+
ids = PIPELINE_INITIALISATION (
6765
params.version,
6866
params.help,
6967
params.validate_params,
7068
params.monochrome_logs,
7169
args,
72-
workflow.outputDir
70+
workflow.outputDir,
71+
params.input,
72+
params.ena_metadata_fields
7373
)
7474

7575
//
7676
// WORKFLOW: Run primary workflows for the pipeline
7777
//
7878
sra = SRA (
79-
Channel.fromList(params.input),
79+
Channel.fromList(ids),
8080
[
8181
ena_metadata_fields: params.ena_metadata_fields,
8282
download_method: params.download_method,

subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
2929
workflow PIPELINE_INITIALISATION {
3030

3131
take:
32-
version : boolean // Display version and exit
33-
help : boolean // Display help text
34-
validate_params : boolean // Validate parameters against the schema at runtime
35-
monochrome_logs : boolean // Do not use coloured log outputs
36-
nextflow_cli_args : List // List of positional nextflow CLI args
37-
outdir : String // The output directory where the results will be saved
32+
version : boolean // Display version and exit
33+
help : boolean // Display help text
34+
validate_params : boolean // Validate parameters against the schema at runtime
35+
monochrome_logs : boolean // Do not use coloured log outputs
36+
nextflow_cli_args : List<String> // List of positional nextflow CLI args
37+
outdir : String // The output directory where the results will be saved
38+
input : Path // File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files
39+
ena_metadata_fields : String // Comma-separated list of ENA metadata fields to fetch before downloading data
3840

3941
main:
4042

@@ -69,6 +71,23 @@ workflow PIPELINE_INITIALISATION {
6971
UTILS_NFCORE_PIPELINE (
7072
nextflow_cli_args
7173
)
74+
75+
//
76+
// Auto-detect input id type
77+
//
78+
ids = file(input)
79+
.splitCsv(header:false, sep:'', strip:true)
80+
.collect { row -> row[0] }
81+
.toUnique()
82+
if (!isSraId(ids)) {
83+
error('Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!')
84+
}
85+
if (!sraCheckENAMetadataFields(ena_metadata_fields)) {
86+
error("Invalid option: '${ena_metadata_fields}'. Minimally required fields for '--ena_metadata_fields': '${valid_ena_metadata_fields.join(',')}'")
87+
}
88+
89+
emit:
90+
ids
7291
}
7392

7493
/*
@@ -118,39 +137,29 @@ workflow PIPELINE_COMPLETION {
118137
//
119138
// Check if input ids are from the SRA
120139
//
121-
def isSraId(input: Path) -> boolean {
122-
def is_sra = false
140+
def isSraId(ids: List<String>) -> boolean {
123141
def total_ids = 0
124142
def no_match_ids = []
125143
def pattern = /^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d+)$/
126-
input.eachLine { line ->
144+
ids.each { id ->
127145
total_ids += 1
128-
if (!(line =~ pattern)) {
129-
no_match_ids << line
146+
if (!(id =~ pattern)) {
147+
no_match_ids << id
130148
}
131149
}
132150

133151
def num_match = total_ids - no_match_ids.size()
134-
if (num_match > 0) {
135-
if (num_match == total_ids) {
136-
is_sra = true
137-
} else {
138-
error("Mixture of ids provided via --input: ${no_match_ids.join(', ')}\nPlease provide either SRA / ENA / GEO / DDBJ ids!")
139-
}
140-
}
141-
return is_sra
152+
return num_match > 0 && num_match == total_ids
142153
}
143154

144155
//
145156
// Check and validate parameters
146157
//
147-
def sraCheckENAMetadataFields(ena_metadata_fields) {
158+
def sraCheckENAMetadataFields(ena_metadata_fields: List<String>) -> boolean {
148159
// Check minimal ENA fields are provided to download FastQ files
149160
def valid_ena_metadata_fields = ['run_accession', 'experiment_accession', 'library_layout', 'fastq_ftp', 'fastq_md5']
150161
def actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields.split(',').collect{ it.trim().toLowerCase() } : valid_ena_metadata_fields
151-
if (!actual_ena_metadata_fields.containsAll(valid_ena_metadata_fields)) {
152-
error("Invalid option: '${ena_metadata_fields}'. Minimally required fields for '--ena_metadata_fields': '${valid_ena_metadata_fields.join(',')}'")
153-
}
162+
return actual_ena_metadata_fields.containsAll(valid_ena_metadata_fields)
154163
}
155164

156165
//

0 commit comments

Comments
 (0)