Merge pull request #79 from fulaibaowang/ae_skip-instead-of-run-and-documentation

nvnieuwk · web-flow · commit 6de58a64b854 · 2025-09-12T10:56:40.000+02:00
Ae skip instead of run and documentation
diff --git a/README.md b/README.md
@@ -62,9 +62,6 @@ nextflow run nf-core/drop \
    --outdir <OUTDIR> \
    --genome hg19 \
    --gene_annotation <path/to/gene/annotation/yaml> \
-   --ae_run true \
-   --as_run true \
-   --mae_run true \
    --ucsc_fasta <path/to/fasta>
 ```
 
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -35,7 +35,7 @@
                 "pattern": "^\\S+$",
                 "errorMessage": "DNA ID must be provided and cannot contain spaces",
                 "meta": ["dna_id"],
-                "description": "Unique identifier for the DNA sample. Must not contain spaces."
+                "description": "Identifier for the DNA sample. Must not contain spaces."
             },
             "DNA_VCF_FILE": {
                 "type": "string",
@@ -96,8 +96,8 @@
                 "type": "string",
                 "format": "file-path",
                 "exists": true,
-                "pattern": "^\\S+\\.tsv\\.gz$",
-                "errorMessage": "The gene counts file has to exist, cannot contain spaces and must have extension '.tsv.gz'",
+                "pattern": "^\\S+\\.tsv(\\.gz)?$",
+                "errorMessage": "The gene counts file has to exist, cannot contain spaces and must have extension '.tsv' or '.tsv.gz'",
                 "description": "Path to the gene counts file. Must exist and cannot contain spaces."
             },
             "GENE_ANNOTATION": {
diff --git a/conf/test.config b/conf/test.config
@@ -38,7 +38,6 @@ params {
     ncbi_fai = "https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/chr21_ncbi.fa.fai"
 
     // Aberrant expression
-    ae_run = true
     ae_groups = 'outrider,outrider_external'
     ae_genes_to_test = 'https://github.com/gagneurlab/drop_demo_data/raw/refs/heads/main/Data/genes_to_test.yaml'
     ae_fpkm_cutoff = 1
@@ -49,7 +48,6 @@ params {
     ae_yield_size = 2000000
 
     // Aberrant splicing
-    as_run = true
     as_groups = 'fraser,fraser_external'
     as_recount = true
     as_long_read = false
@@ -67,7 +65,6 @@ params {
     as_delta_psi_cutoff = 0.05
 
     // MAE
-    mae_run = true
     mae_groups = 'mae'
     mae_gatk_header_check = false
     mae_padj_cutoff = 0.5
diff --git a/docs/output.md b/docs/output.md
@@ -79,7 +79,7 @@ Additionally the `mae` subworkflow creates the following files:
 <summary>Output files</summary>
 
 - `multiqc/`
-  - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
+  - `multiqc_report.html`: a merged report for all subworkflows.
   - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
   - `multiqc_plots/`: directory containing static images from the report in various formats.
 
diff --git a/docs/usage.md b/docs/usage.md
@@ -6,7 +6,7 @@
 
 ## Introduction
 
-nf-core/drop allows controlling which subworkflows to run via parameters (`--ae_run` (Aberrant Expression), `--as_run` (Aberrant Splicing), `--mae_run` (Mono-Allelic Expression)). By default, each subworkflow is set to false. We describe different global and module-specific parameters in the [parameter documentation](https://nf-co.re/drop/parameters).
+nf-core/drop allows controlling which subworkflows to run. By default, all subworkflows will run. You can skip subsworkflow via parameters (`--ae_skip` (Aberrant Expression), `--as_skip` (Aberrant Splicing), `--mae_skip` (Mono-Allelic Expression)). We describe different global and module-specific parameters in the [parameter documentation](https://nf-co.re/drop/parameters).
 
 ## Samplesheet input
 
@@ -100,12 +100,32 @@ the `DROP_GROUP` BLOOD_AS for the aberrant expression module (containing S10R, E
 
 An [example samplesheet](../assets/samplesheet.tsv) has been provided with the pipeline.
 
+| Column              | Description                                                                                                                                                                                          |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `RNA_ID`            | Unique identifier for the RNA sample.                                                                                                                                                                |
+| `RNA_BAM_FILE`      | Path to the RNA BAM file.                                                                                                                                                                            |
+| `RNA_BAI_FILE`      | Path to the RNA BAM index file. It will be automatically generated from the BAM files if not given.                                                                                                  |
+| `DNA_ID`            | Identifier for the DNA sample. It shall be the sample header in the VCF file.                                                                                                                        |
+| `DNA_VCF_FILE`      | Path to the DNA VCF file.                                                                                                                                                                            |
+| `DNA_TBI_FILE`      | Path to the DNA VCF index file. It will be automatically generated from the VCF files if not given.                                                                                                  |
+| `DROP_GROUP`        | See [above](#drop_group).                                                                                                                                                                            |
+| `PAIRED_END`        | Indicates if the input is paired-end or single-end. Default: `true` (paired-end). Refer to the documentation of [HTSeq](https://htseq.readthedocs.io/en/latest/).                                    |
+| `COUNT_MODE`        | Count mode. Default: `IntersectionStrict`. Options: `union`, `IntersectionStrict`, `IntersectionNotEmpty`. Refer to the documentation of [HTSeq](https://htseq.readthedocs.io/en/latest/).           |
+| `COUNT_OVERLAPS`    | Indicates if overlaps should be counted. Default: `true`. Refer to the documentation of [HTSeq](https://htseq.readthedocs.io/en/latest/).                                                            |
+| `STRAND`            | Samples within each `DROP_GROUP` should either be stranded (`yes`, `reverse`, or a combination of `yes` and `reverse`) or unstranded (only `no`), and this can vary between different `DROP_GROUP`s. |
+| `HPO_TERMS`         | Comma-separated list of HPO terms associated with the sample.                                                                                                                                        |
+| `GENE_COUNTS_FILE`  | Path to the gene counts file (`.tsv` or `.tsv.gz`). See details also [above](#aberrant-expression).                                                                                                  |
+| `GENE_ANNOTATION`   | Gene annotation in YAML format, e.g. [assets/gene_annotation.yaml](../assets/gene_annotation.yaml).                                                                                                  |
+| `GENOME`            | See details [above](#mae). Default: `ucsc`. Options: `ncbi`, `ucsc`.                                                                                                                                 |
+| `SPLICE_COUNTS_DIR` | Path to the splice counts directory. See details [above](#aberrant-splicing).                                                                                                                        |
+| `SEX`               | Sex of the sample. Samples of `m`, `male`, `f`, `femal` are analysed for sex bias aberrant expression report.                                                                                        |
+
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run nf-core/drop --input ./samplesheet.tsv --outdir ./results --genome hg19 --gene_annotation ./gene_annotation.yaml -profile docker --ae_run true
+nextflow run nf-core/drop --input ./samplesheet.tsv --outdir ./results --genome hg19 --gene_annotation ./gene_annotation.yaml -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
diff --git a/main.nf b/main.nf
@@ -141,18 +141,18 @@ workflow {
         ec_exclude_groups,
 
         // Aberrant expression parameters
-        params.ae_run,
+        params.ae_skip,
         ae_groups,
         ae_genes_to_test,
 
         // Aberrant splicing parameters
-        params.as_run,
+        params.as_skip,
         as_groups,
         params.as_fraser_version,
         as_genes_to_test,
 
         // Mono Allelic Expression parameters
-        params.mae_run,
+        params.mae_skip,
         mae_groups,
         mae_qc_groups
     )
diff --git a/nextflow.config b/nextflow.config
@@ -29,7 +29,7 @@ params {
     ec_exclude_groups          = null
 
     // Aberrant expression
-    ae_run                     = false
+    ae_skip                    = false
     ae_groups                  = ""
     ae_min_ids                 = 1
     ae_fpkm_cutoff             = 1
@@ -41,7 +41,7 @@ params {
     ae_genes_to_test           = null
 
     // Aberrant splicing
-    as_run                     = false
+    as_skip                    = false
     as_groups                  = ""
     as_min_ids                 = 1
     as_recount                 = false
@@ -60,7 +60,7 @@ params {
     as_genes_to_test           = null
 
     // Mono-allelic expression
-    mae_run                    = false
+    mae_skip                   = false
     mae_groups                 = null
     mae_gatk_header_check      = false
     mae_padj_cutoff            = 0.05
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -89,9 +89,9 @@
             "fa_icon": "fas fa-file-export",
             "description": "Aberrant expression options.",
             "properties": {
-                "ae_run": {
+                "ae_skip": {
                     "type": "boolean",
-                    "description": "Run aberrant expression analysis.",
+                    "description": "Skip aberrant expression analysis.",
                     "fa_icon": "fas fa-file-export"
                 },
                 "ae_groups": {
@@ -163,9 +163,9 @@
             "fa_icon": "fas fa-file-export",
             "description": "Aberrant splicing options.",
             "properties": {
-                "as_run": {
+                "as_skip": {
                     "type": "boolean",
-                    "description": "Run aberrant splicing analysis.",
+                    "description": "Skip aberrant splicing analysis.",
                     "fa_icon": "fas fa-file-export"
                 },
                 "as_groups": {
@@ -282,9 +282,9 @@
             "fa_icon": "fas fa-file-export",
             "description": "Mono-allelic expression options.",
             "properties": {
-                "mae_run": {
+                "mae_skip": {
                     "type": "boolean",
-                    "description": "Run mono-allelic expression analysis.",
+                    "description": "Skip mono-allelic expression analysis.",
                     "fa_icon": "fas fa-file-export"
                 },
                 "mae_groups": {
diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
diff --git a/subworkflows/local/utils_nfcore_drop_pipeline/main.nf b/subworkflows/local/utils_nfcore_drop_pipeline/main.nf
@@ -199,7 +199,7 @@ workflow PIPELINE_COMPLETION {
 def validateInputParameters() {
     genomeExistsError()
 
-    if (params.ae_run && params.gene_annotation == null) {
+    if ((!params.ae_skip) && params.gene_annotation == null) {
         error("Please provide a gene annotation file using the --gene_annotation parameter when running the aberrant expression analysis.")
     }
 }
diff --git a/tests/aberrant_expression.nf.test b/tests/aberrant_expression.nf.test
@@ -8,9 +8,9 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "${outputDir}"
-                ae_run = true
-                as_run = false
-                mae_run = false
+                ae_skip = false
+                as_skip = true
+                mae_skip = true
             }
         }
 
diff --git a/tests/aberrant_splicing.nf.test b/tests/aberrant_splicing.nf.test
@@ -8,9 +8,9 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "${outputDir}"
-                ae_run = false
-                as_run = true
-                mae_run = false
+                ae_skip = true
+                as_skip = false
+                mae_skip = true
             }
         }
 
diff --git a/tests/mae.nf.test b/tests/mae.nf.test
@@ -12,9 +12,9 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "${outputDir}"
-                ae_run = false
-                as_run = false
-                mae_run = true
+                ae_skip = true
+                as_skip = true
+                mae_skip = false
             }
         }
 
diff --git a/workflows/drop.nf b/workflows/drop.nf
@@ -47,18 +47,18 @@ workflow DROP {
     ec_exclude_groups   // list:          A list of groups to exclude from the counts export
 
     // Aberrant expression parameters
-    ae_run              // boolean:       Run aberrant expression analysis
+    ae_skip             // boolean:       Skip aberrant expression analysis
     ae_groups           // list:          A list of groups to exclude from the aberrant expression analysis
     ae_genes_to_test    // map:           A map containing the names of genes to test
 
     // Aberrant splicing parameters
-    as_run              // boolean:       Run aberrant splicing analysis
+    as_skip             // boolean:       Skip aberrant splicing analysis
     as_groups           // list:          A list of groups to exclude from the aberrant splicing analysis
     as_fraser_version   // string:        Fraser version to use for aberrant splicing analysis
     as_genes_to_test    // map:           A map containing the names of genes to test
 
     // Mono Allelic Expression parameters
-    mae_run             // boolean:       Run mono allelic expression analysis
+    mae_skip            // boolean:       Skip mono allelic expression analysis
     mae_groups          // list:          A list of groups to exclude from the mono allelic expression analysis
     mae_qc_groups       // list:          A list of groups to exclude from QC steps in the mono allelic expression analysis
 
@@ -137,7 +137,7 @@ workflow DROP {
     // Abberant expression
     //
 
-    if(ae_run) {
+    if(!ae_skip) {
         ABERRANTEXPRESSION(
             input.abberantexpression,
             PREPROCESSGENEANNOTATION.out.count_ranges,
@@ -158,7 +158,7 @@ workflow DROP {
     // Aberrant splicing
     //
 
-    if (as_run) {
+    if (!as_skip) {
         ABERRANTSPLICING(
             input.aberrantsplicing,
             PREPROCESSGENEANNOTATION.out.txdb,
@@ -179,7 +179,7 @@ workflow DROP {
     // Mono Allelic Expression
     //
 
-    if(mae_run) {
+    if(!mae_skip) {
         MAE(
             input.mae,
             ucsc_fasta,

Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,7 @@ workflow PIPELINE_COMPLETION {`
`199`	`199`	`def validateInputParameters() {`
`200`	`200`	`genomeExistsError()`
`201`	`201`
`202`		`- if (params.ae_run && params.gene_annotation == null) {`
	`202`	`+ if ((!params.ae_skip) && params.gene_annotation == null) {`
`203`	`203`	`error("Please provide a gene annotation file using the --gene_annotation parameter when running the aberrant expression analysis.")`
`204`	`204`	`}`
`205`	`205`	`}`