Skip to content

Commit 3ac6da4

Browse files
authored
Merge branch 'dev' into fix/formatting
2 parents 83623cb + 9d91463 commit 3ac6da4

File tree

8 files changed

+60
-28
lines changed

8 files changed

+60
-28
lines changed

assets/samplesheet.tsv

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ RNA_ID RNA_BAM_FILE RNA_BAI_FILE DNA_VCF_FILE DNA_TBI_FILE DNA_ID DROP_GROUP PAI
22
HG00096 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00096_ncbi.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00096_ncbi.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21_ncbi.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21_ncbi.vcf.gz.tbi HG00096 outrider,fraser,mae,batch_0 TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896 ncbi
33
HG00103 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00103.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00103.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00103 outrider,fraser,mae,batch_1 TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959 ucsc
44
HG00106 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00106.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00106.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00106 outrider,outrider_external,fraser,fraser_external,mae,batch_1 TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731 ucsc
5-
HG00111 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00111 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
6-
HG00116 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00116 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
7-
HG00126 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00126 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
8-
HG00132 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00132 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
9-
HG00149 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00149 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
10-
HG00150 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00150 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144 v29
11-
HG00176 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00176.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz HG00176 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234 v29
5+
HG00111 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00111 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871 ucsc
6+
HG00116 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00116 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767 ucsc
7+
HG00126 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00126 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293 ucsc
8+
HG00132 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00132 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490 ucsc
9+
HG00149 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00149 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663 ucsc
10+
HG00150 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00150 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144 v29 ucsc
11+
HG00176 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00176.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz HG00176 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234 v29 ucsc
1212
HG00178 outrider_external no https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/external_count_data/geneCounts.tsv.gz v29
1313
HG00181 outrider_external no https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/external_count_data/geneCounts.tsv.gz v29
1414
HG00191 fraser_external no https://github.com/nf-core/test-datasets/raw/refs/heads/drop/data/inputs/external_count_data.tar.gz

assets/schema_input.json

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"type": "object",
99
"properties": {
1010
"RNA_ID": {
11-
"type": "string",
11+
"type": ["string", "integer"],
1212
"pattern": "^\\S+$",
1313
"errorMessage": "RNA ID must be provided and cannot contain spaces",
1414
"description": "Unique identifier for the RNA sample. Must not contain spaces.",
@@ -31,7 +31,7 @@
3131
"description": "Path to the RNA BAM index file. Must exist and cannot contain spaces."
3232
},
3333
"DNA_ID": {
34-
"type": "string",
34+
"type": ["string", "integer"],
3535
"pattern": "^\\S+$",
3636
"errorMessage": "DNA ID must be provided and cannot contain spaces",
3737
"meta": ["dna_id"],
@@ -82,7 +82,7 @@
8282
},
8383
"STRAND": {
8484
"type": "string",
85-
"description": "Strand of the RNA-seq data",
85+
"description": "Strand of the RNA-seq data, shall be unique",
8686
"enum": ["yes", "no", "reverse"],
8787
"meta": ["strand"],
8888
"default": "no"
@@ -111,7 +111,7 @@
111111
"description": "Genome source",
112112
"meta": ["genome"],
113113
"enum": ["ncbi", "ucsc"],
114-
"default": "ncbi"
114+
"default": "ucsc"
115115
},
116116
"SPLICE_COUNTS_DIR": {
117117
"type": "string",
@@ -123,8 +123,7 @@
123123
},
124124
"SEX": {
125125
"type": "string",
126-
"enum": ["m", "male", "f", "female"],
127-
"description": "The sex of the sample",
126+
"description": "The sex of the sample. Samples with 'm','male','f','femal' will be analysed for sex bias abrrant expression report",
128127
"meta": "sex"
129128
},
130129
"TISSUE": {
@@ -149,14 +148,15 @@
149148
{
150149
"required": ["SPLICE_COUNTS_DIR"]
151150
}
152-
]
151+
],
152+
"dependentRequired": {
153+
"DNA_ID": ["DNA_VCF_FILE", "GENOME"],
154+
"DNA_VCF_FILE": ["DNA_ID"]
155+
}
153156
},
154157
"allOf": [
155158
{
156159
"uniqueEntries": ["RNA_ID"]
157-
},
158-
{
159-
"uniqueEntries": ["DNA_ID"]
160160
}
161161
]
162162
}

main.nf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,16 @@ workflow {
6565
error "You must provide either a UCSC FASTA file (--ucsc_fasta) or a NCBI FASTA file (--ncbi_fasta)."
6666
}
6767

68-
def ucsc_fasta = params.ucsc_fasta ? Channel.value([[id: 'ucsc'], file(params.ucsc_fasta)]) : [[:], []]
69-
def ucsc_fai = params.ucsc_fai ? Channel.value([[id: 'ucsc'], file(params.ucsc_fai)]) : [[:], []]
70-
def ncbi_fasta = params.ncbi_fasta ? Channel.value([[id: 'ncbi'], file(params.ncbi_fasta)]) : [[:], []]
71-
def ncbi_fai = params.ncbi_fai ? Channel.value([[id: 'ncbi'], file(params.ncbi_fai)]) : [[:], []]
68+
def ucsc_fasta = params.ucsc_fasta ? Channel.value([[id: 'ucsc'], file(params.ucsc_fasta)]) : Channel.of([[id:'ucsc'], []])
69+
def ucsc_fai = params.ucsc_fai ? Channel.value([[id: 'ucsc'], file(params.ucsc_fai)]) : Channel.of([[id:'ucsc'], []])
70+
def ncbi_fasta = params.ncbi_fasta ? Channel.value([[id: 'ncbi'], file(params.ncbi_fasta)]) : Channel.of([[id:'ncbi'], []])
71+
def ncbi_fai = params.ncbi_fai ? Channel.value([[id: 'ncbi'], file(params.ncbi_fai)]) : Channel.of([[id:'ncbi'], []])
7272

7373
def qc_vcf = params.mae_qc_vcf ?
7474
Channel.value([[id: 'qc_vcf'], file(params.mae_qc_vcf), params.mae_qc_vcf_tbi ? file(params.mae_qc_vcf_tbi) : []]) :
7575
[[:], [], []]
7676

77-
def ucsc_dict = Channel.empty()
77+
def ucsc_dict = Channel.of([[id:'ucsc'], []])
7878
if (params.ucsc_dict) {
7979
ucsc_dict = Channel.value([[id: 'ucsc'], file(params.ucsc_dict)])
8080
} else if (params.ucsc_fasta) {
@@ -84,7 +84,7 @@ workflow {
8484
ucsc_dict = GATK4_CREATESEQUENCEDICTIONARY_UCSC.out.dict.collect()
8585
}
8686

87-
def ncbi_dict = Channel.empty()
87+
def ncbi_dict = Channel.of([[id:'ncbi'], []])
8888
if (params.ncbi_dict) {
8989
ncbi_dict = Channel.value([[id: 'ncbi'], file(params.ncbi_dict)])
9090
} else if (params.ncbi_fasta) {

modules/local/mae/createsnvs/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ process MAE_CREATESNVS {
88
'community.wave.seqera.io/library/bcftools_gatk4_htslib_samtools:255ed784054aa652' }"
99

1010
input:
11-
tuple val(meta), path(vcf), path(tbi), path(bam), path(bai), val(id)
11+
tuple val(meta), path(vcf), path(tbi), path(bam), path(bai), val(dna_id)
1212
path(ncbi2ucsc)
1313
path(ucsc2ncbi)
1414

@@ -28,7 +28,7 @@ process MAE_CREATESNVS {
2828
${ncbi2ucsc} \\
2929
${ucsc2ncbi} \\
3030
${vcf} \\
31-
${id} \\
31+
${dna_id} \\
3232
${bam} \\
3333
${prefix}.vcf.gz \\
3434
bcftools \\

modules/local/maeqc/dnarnamatrix/templates/create_matrix_dna_rna_cor.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ rna_samples <- as.character(c(${rna_ids.collect { "\"$it\"" }.join(', ')}))
2222
mae_res <- c(${res.collect { "\"$it\"" }.join(', ')})
2323

2424
rows_in_group <- sapply(strsplit(sa\$DROP_GROUP, ',|, '), function(d) "$drop_group" %in% d)
25-
vcf_cols <- sa[rows_in_group, .(DNA_ID, DNA_VCF_FILE)] %>% unique
26-
dna_samples <- vcf_cols\$DNA_ID
25+
vcf_cols <- sa[rows_in_group, .(DNA_ID, DNA_VCF_FILE)]
26+
dna_samples <- sort(vcf_cols\$DNA_ID)
2727
vcf_files <- c(${vcfs.collect { "\"$it\"" }.join(', ')})
2828

2929
# Read all RNA genotypes into a list

nextflow_schema.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"genome": {
4040
"type": "string",
4141
"description": "Name of iGenomes reference. Equivalant to the `genomeAssembly` parameter in the snakemake pipeline.",
42+
"enum": ["hg19", "hs37d5 ", "hg38", "GRCh38"],
4243
"fa_icon": "fas fa-book",
4344
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
4445
},

subworkflows/local/mae/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ workflow MAE {
5454
vcf && tbi && bam && bai
5555
}
5656
.map { meta, vcf, tbi, bam, bai ->
57-
[ meta, vcf, tbi, bam, bai, meta.id ]
57+
[ meta, vcf, tbi, bam, bai, meta.dna_id ]
5858
}
5959

6060
MAE_CREATESNVS(

subworkflows/local/utils_nfcore_drop_pipeline/main.nf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ workflow PIPELINE_INITIALISATION {
8787

8888
def samplesheet_list = samplesheetToList(input, "${projectDir}/assets/schema_input.json")
8989

90+
// Enforce consistent strandedness per DROP_GROUP (no mixing 'no' with 'yes'/'reverse')
91+
validateGroupStrandedness(samplesheet_list)
92+
9093
def group_counts = [:]
9194
samplesheet_list.each { it ->
9295
def groups = it[0].drop_group.tokenize(",")
@@ -120,6 +123,8 @@ workflow PIPELINE_INITIALISATION {
120123
def ch_samplesheet = Channel.fromList(samplesheet_list)
121124
.map { meta, rna_bam, rna_bai, dna_vcf, dna_tbi, gene_counts, splice_counts ->
122125
def new_meta = meta + [
126+
id: meta.id as String,
127+
dna_id: meta.dna_id as String,
123128
// Add counts for combination of drop group and gene annotation
124129
drop_group_ann_counts:group_annotation_counts,
125130
// Add counts for drop group
@@ -302,3 +307,29 @@ def methodsDescriptionText(mqc_methods_yaml) {
302307

303308
return description_html.toString()
304309
}
310+
311+
//
312+
// Validate: within each DROP_GROUP, samples must be either all unstranded ('no') or all stranded ('yes'/'reverse'). Mixing is not allowed.
313+
//
314+
def validateGroupStrandedness(List samplesheet_list) {
315+
// Map<String, Map> e.g. [group: [hasNo:bool, hasStranded:bool]]
316+
def flags = [:].withDefault { [hasNo:false, hasStranded:false] }
317+
318+
samplesheet_list.each { meta, _bam, _bai, _vcf, _tbi, _gene_counts, _splice_counts ->
319+
def s = meta.strand
320+
def groups = meta.drop_group.tokenize(',')
321+
groups.each { g ->
322+
if (s == 'no') {
323+
flags[g].hasNo = true
324+
} else if (s == 'yes' || s == 'reverse') {
325+
flags[g].hasStranded = true
326+
}
327+
}
328+
}
329+
330+
def offending = flags.findAll { k, v -> v.hasNo && v.hasStranded }.keySet().sort()
331+
if (offending && !offending.isEmpty()) {
332+
error("Samples within each DROP_GROUP must be consistently stranded or unstranded. " +
333+
"Mixed strandedness found in: ${offending.join(', ')}. Please analyze these groups separately.")
334+
}
335+
}

0 commit comments

Comments
 (0)