Skip to content

Commit 9d91463

Browse files
authored
Merge pull request #65 from fulaibaowang/fix_after_real_data
Fix after real data
2 parents 31993b6 + 3773abd commit 9d91463

File tree

8 files changed

+62
-30
lines changed

8 files changed

+62
-30
lines changed

assets/samplesheet.tsv

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ RNA_ID RNA_BAM_FILE RNA_BAI_FILE DNA_VCF_FILE DNA_TBI_FILE DNA_ID DROP_GROUP PAI
22
HG00096 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00096_ncbi.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00096_ncbi.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21_ncbi.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21_ncbi.vcf.gz.tbi HG00096 outrider,fraser,mae,batch_0 TRUE IntersectionStrict TRUE no HP:0009802,HP:0010896 ncbi
33
HG00103 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00103.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00103.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00103 outrider,fraser,mae,batch_1 TRUE IntersectionStrict TRUE no HP:0004582,HP:0031959 ucsc
44
HG00106 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00106.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00106.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00106 outrider,outrider_external,fraser,fraser_external,mae,batch_1 TRUE IntersectionStrict TRUE no HP:0002895,HP:0006731 ucsc
5-
HG00111 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00111 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871
6-
HG00116 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00116 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767
7-
HG00126 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00126 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293
8-
HG00132 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00132 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490
9-
HG00149 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00149 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663
10-
HG00150 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00150 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144 v29
11-
HG00176 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00176.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz HG00176 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234 v29
5+
HG00111 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00111.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00111 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0100491,HP:0100871 ucsc
6+
HG00116 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00116.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00116 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030613,HP:0012767 ucsc
7+
HG00126 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00126.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00126 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000290,HP:0000293 ucsc
8+
HG00132 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00132.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00132 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0006489,HP:0006490 ucsc
9+
HG00149 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00149.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00149 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0000014,HP:0000020,HP:0032663 ucsc
10+
HG00150 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00150.bam.bai https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz.tbi HG00150 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0030809,HP:0006144 v29 ucsc
11+
HG00176 https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/rna_bam/HG00176.bam https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/dna_vcf/demo_chr21.vcf.gz HG00176 outrider,outrider_external,fraser,fraser_external TRUE IntersectionStrict TRUE no HP:0005215,HP:0010234 v29 ucsc
1212
HG00178 outrider_external no https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/external_count_data/geneCounts.tsv.gz v29
1313
HG00181 outrider_external no https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/external_count_data/geneCounts.tsv.gz v29
1414
HG00191 fraser_external no https://github.com/nf-core/test-datasets/raw/refs/heads/drop/data/inputs/external_count_data.tar.gz

assets/schema_input.json

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"type": "object",
99
"properties": {
1010
"RNA_ID": {
11-
"type": "string",
11+
"type": ["string", "integer"],
1212
"pattern": "^\\S+$",
1313
"errorMessage": "RNA ID must be provided and cannot contain spaces",
1414
"description": "Unique identifier for the RNA sample. Must not contain spaces.",
@@ -31,7 +31,7 @@
3131
"description": "Path to the RNA BAM index file. Must exist and cannot contain spaces."
3232
},
3333
"DNA_ID": {
34-
"type": "string",
34+
"type": ["string", "integer"],
3535
"pattern": "^\\S+$",
3636
"errorMessage": "DNA ID must be provided and cannot contain spaces",
3737
"meta": ["dna_id"],
@@ -82,7 +82,7 @@
8282
},
8383
"STRAND": {
8484
"type": "string",
85-
"description": "Strand of the RNA-seq data",
85+
"description": "Strand of the RNA-seq data, shall be unique",
8686
"enum": ["yes", "no", "reverse"],
8787
"meta": ["strand"],
8888
"default": "no"
@@ -111,7 +111,7 @@
111111
"description": "Genome source",
112112
"meta": ["genome"],
113113
"enum": ["ncbi", "ucsc"],
114-
"default": "ncbi"
114+
"default": "ucsc"
115115
},
116116
"SPLICE_COUNTS_DIR": {
117117
"type": "string",
@@ -123,8 +123,7 @@
123123
},
124124
"SEX": {
125125
"type": "string",
126-
"enum": ["m", "male", "f", "female"],
127-
"description": "The sex of the sample",
126+
"description": "The sex of the sample. Samples with 'm','male','f','femal' will be analysed for sex bias abrrant expression report",
128127
"meta": "sex"
129128
},
130129
"TISSUE": {
@@ -149,14 +148,15 @@
149148
{
150149
"required": ["SPLICE_COUNTS_DIR"]
151150
}
152-
]
151+
],
152+
"dependentRequired": {
153+
"DNA_ID": ["DNA_VCF_FILE", "GENOME"],
154+
"DNA_VCF_FILE": ["DNA_ID"]
155+
}
153156
},
154157
"allOf": [
155158
{
156159
"uniqueEntries": ["RNA_ID"]
157-
},
158-
{
159-
"uniqueEntries": ["DNA_ID"]
160160
}
161161
]
162162
}

main.nf

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,20 +68,20 @@ workflow {
6868

6969
// Use the UCSC FASTA and FAI files if provided, otherwise use the NCBI FASTA and FAI files
7070
def fasta = params.ucsc_fasta ? Channel.value([[id: 'fasta'], file(params.ucsc_fasta)]) :
71-
params.ncbi_fasta ? Channel.value([[id: 'fasta'], file(params.ncbi_fasta)]) : [[:], []]
71+
params.ncbi_fasta ? Channel.value([[id: 'fasta'], file(params.ncbi_fasta)]) : Channel.empty()
7272
def fai = params.ucsc_fai ? Channel.value([[id: 'fai'], file(params.ucsc_fai)]) :
73-
params.ncbi_fai ? Channel.value([[id: 'fai'], file(params.ncbi_fai)]) : [[:], []]
73+
params.ncbi_fai ? Channel.value([[id: 'fai'], file(params.ncbi_fai)]) : Channel.empty()
7474

75-
def ucsc_fasta = params.ucsc_fasta ? Channel.value([[id: 'ucsc'], file(params.ucsc_fasta)]) : [[:], []]
76-
def ucsc_fai = params.ucsc_fai ? Channel.value([[id: 'ucsc'], file(params.ucsc_fai)]) : [[:], []]
77-
def ncbi_fasta = params.ncbi_fasta ? Channel.value([[id: 'ncbi'], file(params.ncbi_fasta)]) : [[:], []]
78-
def ncbi_fai = params.ncbi_fai ? Channel.value([[id: 'ncbi'], file(params.ncbi_fai)]) : [[:], []]
75+
def ucsc_fasta = params.ucsc_fasta ? Channel.value([[id: 'ucsc'], file(params.ucsc_fasta)]) : Channel.of([[id:'ucsc'], []])
76+
def ucsc_fai = params.ucsc_fai ? Channel.value([[id: 'ucsc'], file(params.ucsc_fai)]) : Channel.of([[id:'ucsc'], []])
77+
def ncbi_fasta = params.ncbi_fasta ? Channel.value([[id: 'ncbi'], file(params.ncbi_fasta)]) : Channel.of([[id:'ncbi'], []])
78+
def ncbi_fai = params.ncbi_fai ? Channel.value([[id: 'ncbi'], file(params.ncbi_fai)]) : Channel.of([[id:'ncbi'], []])
7979

8080
def qc_vcf = params.mae_qc_vcf ?
8181
Channel.value([[id: 'qc_vcf'], file(params.mae_qc_vcf), params.mae_qc_vcf_tbi ? file(params.mae_qc_vcf_tbi) : []]) :
8282
[[:], [], []]
8383

84-
def ucsc_dict = Channel.empty()
84+
def ucsc_dict = Channel.of([[id:'ucsc'], []])
8585
if (params.ucsc_dict) {
8686
ucsc_dict = Channel.value([[id: 'ucsc'], file(params.ucsc_dict)])
8787
} else if (params.ucsc_fasta) {
@@ -91,7 +91,7 @@ workflow {
9191
ucsc_dict = GATK4_CREATESEQUENCEDICTIONARY_UCSC.out.dict.collect()
9292
}
9393

94-
def ncbi_dict = Channel.empty()
94+
def ncbi_dict = Channel.of([[id:'ncbi'], []])
9595
if (params.ncbi_dict) {
9696
ncbi_dict = Channel.value([[id: 'ncbi'], file(params.ncbi_dict)])
9797
} else if (params.ncbi_fasta) {

modules/local/mae/createsnvs/main.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ process MAE_CREATESNVS {
88
'community.wave.seqera.io/library/bcftools_gatk4_htslib_samtools:255ed784054aa652' }"
99

1010
input:
11-
tuple val(meta), path(vcf), path(tbi), path(bam), path(bai), val(id)
11+
tuple val(meta), path(vcf), path(tbi), path(bam), path(bai), val(dna_id)
1212
path(ncbi2ucsc)
1313
path(ucsc2ncbi)
1414

@@ -28,7 +28,7 @@ process MAE_CREATESNVS {
2828
${ncbi2ucsc} \\
2929
${ucsc2ncbi} \\
3030
${vcf} \\
31-
${id} \\
31+
${dna_id} \\
3232
${bam} \\
3333
${prefix}.vcf.gz \\
3434
bcftools \\

modules/local/maeqc/dnarnamatrix/templates/create_matrix_dna_rna_cor.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ rna_samples <- as.character(c(${rna_ids.collect { "\"$it\"" }.join(', ')}))
2222
mae_res <- c(${res.collect { "\"$it\"" }.join(', ')})
2323

2424
rows_in_group <- sapply(strsplit(sa\$DROP_GROUP, ',|, '), function(d) "$drop_group" %in% d)
25-
vcf_cols <- sa[rows_in_group, .(DNA_ID, DNA_VCF_FILE)] %>% unique
26-
dna_samples <- vcf_cols\$DNA_ID
25+
vcf_cols <- sa[rows_in_group, .(DNA_ID, DNA_VCF_FILE)]
26+
dna_samples <- sort(vcf_cols\$DNA_ID)
2727
vcf_files <- c(${vcfs.collect { "\"$it\"" }.join(', ')})
2828

2929
# Read all RNA genotypes into a list

nextflow_schema.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"genome": {
4040
"type": "string",
4141
"description": "Name of iGenomes reference. Equivalant to the `genomeAssembly` parameter in the snakemake pipeline.",
42+
"enum": ["hg19", "hs37d5 ", "hg38", "GRCh38"],
4243
"fa_icon": "fas fa-book",
4344
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
4445
},

subworkflows/local/mae/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ workflow MAE {
5353
vcf && tbi && bam && bai
5454
}
5555
.map { meta, vcf, tbi, bam, bai ->
56-
[ meta, vcf, tbi, bam, bai, meta.id ]
56+
[ meta, vcf, tbi, bam, bai, meta.dna_id ]
5757
}
5858

5959
MAE_CREATESNVS(

subworkflows/local/utils_nfcore_drop_pipeline/main.nf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ workflow PIPELINE_INITIALISATION {
8888

8989
def samplesheet_list = samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")
9090

91+
// Enforce consistent strandedness per DROP_GROUP (no mixing 'no' with 'yes'/'reverse')
92+
validateGroupStrandedness(samplesheet_list)
93+
9194
def group_counts = [:]
9295
samplesheet_list.each { it ->
9396
def groups = it[0].drop_group.tokenize(",")
@@ -121,6 +124,8 @@ workflow PIPELINE_INITIALISATION {
121124
def ch_samplesheet = Channel.fromList(samplesheet_list)
122125
.map { meta, rna_bam, rna_bai, dna_vcf, dna_tbi, gene_counts, splice_counts ->
123126
def new_meta = meta + [
127+
id: meta.id as String,
128+
dna_id: meta.dna_id as String,
124129
// Add counts for combination of drop group and gene annotation
125130
drop_group_ann_counts:group_annotation_counts,
126131
// Add counts for drop group
@@ -303,3 +308,29 @@ def methodsDescriptionText(mqc_methods_yaml) {
303308

304309
return description_html.toString()
305310
}
311+
312+
//
313+
// Validate: within each DROP_GROUP, samples must be either all unstranded ('no') or all stranded ('yes'/'reverse'). Mixing is not allowed.
314+
//
315+
def validateGroupStrandedness(List samplesheet_list) {
316+
// Map<String, Map> e.g. [group: [hasNo:bool, hasStranded:bool]]
317+
def flags = [:].withDefault { [hasNo:false, hasStranded:false] }
318+
319+
samplesheet_list.each { meta, _bam, _bai, _vcf, _tbi, _gene_counts, _splice_counts ->
320+
def s = meta.strand
321+
def groups = meta.drop_group.tokenize(',')
322+
groups.each { g ->
323+
if (s == 'no') {
324+
flags[g].hasNo = true
325+
} else if (s == 'yes' || s == 'reverse') {
326+
flags[g].hasStranded = true
327+
}
328+
}
329+
}
330+
331+
def offending = flags.findAll { k, v -> v.hasNo && v.hasStranded }.keySet().sort()
332+
if (offending && !offending.isEmpty()) {
333+
error("Samples within each DROP_GROUP must be consistently stranded or unstranded. " +
334+
"Mixed strandedness found in: ${offending.join(', ')}. Please analyze these groups separately.")
335+
}
336+
}

0 commit comments

Comments
 (0)