Skip to content

Commit 9bb4e36

Browse files
committed
Replace download method guessing with sum-of-product type
Signed-off-by: Ben Sherman <[email protected]>
1 parent c7e0d23 commit 9bb4e36

File tree

3 files changed

+30
-48
lines changed

3 files changed

+30
-48
lines changed

main.nf

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ include { SRA } from './workflows/sra'
2121
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
2222
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
2323
include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline'
24-
include { DownloadMethod } from './workflows/sra'
2524
include { Sample } from './workflows/sra'
2625

2726
/*
@@ -38,9 +37,6 @@ params {
3837
// Comma-separated list of ENA metadata fields to fetch before downloading data.
3938
ena_metadata_fields: String = ''
4039

41-
// Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.
42-
download_method: DownloadMethod = 'ftp'
43-
4440
// Only download metadata for public data database ids and don't download the FastQ files.
4541
skip_fastq_download: Boolean = false
4642

@@ -79,7 +75,6 @@ workflow {
7975
Channel.fromList(ids),
8076
[
8177
ena_metadata_fields: params.ena_metadata_fields,
82-
download_method: params.download_method,
8378
skip_fastq_download: params.skip_fastq_download,
8479
dbgap_key: params.dbgap_key
8580
]

nextflow_schema.json

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,6 @@
2828
"description": "Comma-separated list of ENA metadata fields to fetch before downloading data.",
2929
"help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)."
3030
},
31-
"download_method": {
32-
"type": "string",
33-
"default": "ftp",
34-
"fa_icon": "fas fa-download",
35-
"enum": ["aspera", "ftp", "sratools"],
36-
"description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.",
37-
"help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ."
38-
},
3931
"skip_fastq_download": {
4032
"type": "boolean",
4133
"fa_icon": "fas fa-fast-forward",

workflows/sra/main.nf

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -41,26 +41,22 @@ workflow SRA {
4141
//
4242
runinfo_ftp = runinfo.map(SRA_RUNINFO_TO_FTP)
4343

44-
// TODO: define record type(s) for expected meta-map
45-
sra_metadata = runinfo_ftp.scatter { tsv ->
46-
tsv.splitCsv(header:true, sep:'\t').unique()
47-
}
44+
sra_metadata = runinfo_ftp
45+
.scatter { tsv -> tsv.splitCsv(header:true, sep:'\t').unique() }
46+
.map { meta -> meta as SraMetadata }
4847

4948
//
5049
// MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums
5150
//
5251
ftp_samples = sra_metadata
53-
.filter { meta ->
54-
!skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP
55-
}
52+
.filter { meta -> !skip_fastq_download && meta instanceof SraMetadata.Ftp }
5653
.map(SRA_FASTQ_FTP)
5754

5855
//
5956
// SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools.
6057
//
61-
sratools_metadata = sra_metadata.filter { meta ->
62-
!skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS
63-
}
58+
sratools_metadata = sra_metadata
59+
.filter { meta -> !skip_fastq_download && meta instanceof SraMetadata.Sratools }
6460

6561
sratools_samples = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS (
6662
sratools_metadata,
@@ -71,9 +67,7 @@ workflow SRA {
7167
// MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums
7268
//
7369
aspera_samples = sra_metadata
74-
.filter { meta ->
75-
!skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA
76-
}
70+
.filter { meta -> !skip_fastq_download && meta instanceof SraMetadata.Aspera }
7771
.map(ASPERA_CLI, user: 'era-fasp')
7872

7973
samples = ftp_samples
@@ -85,23 +79,6 @@ workflow SRA {
8579
metadata : Channel<Path> = runinfo_ftp
8680
}
8781

88-
/*
89-
========================================================================================
90-
FUNCTIONS
91-
========================================================================================
92-
*/
93-
94-
def getDownloadMethod(meta: Map<String,String>, userMethod: DownloadMethod) -> DownloadMethod {
95-
// meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera
96-
// For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz'
97-
// For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz'
98-
if (meta.fastq_aspera && userMethod == DownloadMethod.ASPERA)
99-
return DownloadMethod.ASPERA
100-
if ((!meta.fastq_aspera && !meta.fastq_1) || userMethod == DownloadMethod.SRATOOLS)
101-
return DownloadMethod.SRATOOLS
102-
return DownloadMethod.FTP
103-
}
104-
10582
/*
10683
========================================================================================
10784
TYPES
@@ -110,15 +87,33 @@ def getDownloadMethod(meta: Map<String,String>, userMethod: DownloadMethod) -> D
11087

11188
record SraParams {
11289
ena_metadata_fields : String
113-
download_method : DownloadMethod
11490
skip_fastq_download : boolean
11591
dbgap_key : Path?
11692
}
11793

118-
enum DownloadMethod {
119-
ASPERA,
120-
FTP,
121-
SRATOOLS
94+
enum SraMetadata {
95+
// meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera
96+
// For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz'
97+
// For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz'
98+
Aspera {
99+
id : String
100+
single_end : Boolean
101+
fastq_aspera : String
102+
md5_1 : String
103+
md5_2 : String?
104+
}
105+
Ftp {
106+
id : String
107+
single_end : Boolean
108+
fastq_1 : String
109+
fastq_2 : String?
110+
md5_1 : String
111+
md5_2 : String?
112+
}
113+
Sratools {
114+
id : String
115+
single_end : Boolean
116+
}
122117
}
123118

124119
record Sample {

0 commit comments

Comments
 (0)