nf-core · pinin4fjords · Oct 21, 2025 · Oct 5, 2025 · Oct 6, 2025 · Oct 6, 2025
@@ -39,7 +39,8 @@
         "modules/nf-core/deepvariant/postprocessvariants",
         "modules/nf-core/deepvariant/rundeepvariant",
         "modules/nf-core/deepvariant/vcfstatsreport",
-        "modules/nf-core/diann/insilicolibrarygeneration",
+        "subworkflows/nf-core/dia_proteomics_analysis",
+        "modules/nf-core/diann",
         "modules/nf-core/doubletdetection",
         "modules/nf-core/fastk/fastk",
         "modules/nf-core/fastk/histex",

diff --git a/modules/nf-core/diann/main.nf b/modules/nf-core/diann/main.nf
@@ -0,0 +1,118 @@
+process DIANN {
+    tag "$meta.id"
+    label 'process_high'
+
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
+        'docker.io/biocontainers/diann:v1.8.1_cv1' }"
+
+    input:
+    tuple val(meta), path(ms_files), val(ms_file_names), path(fasta), path(library), path(quant, stageAs: 'quant/*')
+
+    output:
+    // Library outputs
+    tuple val(meta), path("${prefix}.predicted.speclib"), emit: predict_speclib, optional: true
+    tuple val(meta), path("${prefix}.speclib"), emit: final_speclib, optional: true
+    tuple val(meta), path("${prefix}.tsv.skyline.speclib"), emit: skyline_speclib, optional: true
+
+    // Quantification outputs
+    tuple val(meta), path("*.quant"), emit: diann_quant, optional: true
+
+    // Report outputs (from final quantification)
+    tuple val(meta), path("${prefix}.tsv"), emit: main_report, optional: true
+    tuple val(meta), path("${prefix}.parquet"), emit: report_parquet, optional: true
+    tuple val(meta), path("${prefix}.manifest.txt"), emit: report_manifest, optional: true
+    tuple val(meta), path("${prefix}.protein_description.tsv"), emit: protein_description, optional: true
+    tuple val(meta), path("${prefix}.stats.tsv"), emit: report_stats, optional: true
+    tuple val(meta), path("${prefix}.pr_matrix.tsv"), emit: pr_matrix, optional: true
+    tuple val(meta), path("${prefix}.pg_matrix.tsv"), emit: pg_matrix, optional: true
+    tuple val(meta), path("${prefix}.gg_matrix.tsv"), emit: gg_matrix, optional: true
+    tuple val(meta), path("${prefix}.unique_genes_matrix.tsv"), emit: unique_gene_matrix, optional: true
+
+    // Common outputs
+    tuple val(meta), path("*.log.txt"), emit: log
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "DIANN module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}" ?: "diann"
+
+    // Handle MS files input: two modes depending on whether we need actual files or just names
+    // - ms_files: Actual file paths - used when DIA-NN needs to read raw MS data
+    // - ms_file_names: Just basenames - used with --use-quant when DIA-NN only needs file names
+    //   to match against preprocessed .quant files in quant/ directory, avoiding unnecessary file staging
+    def ms_input = ''
+    if (ms_files && ms_files != []) {
+        ms_input = ms_files instanceof List ? ms_files.collect{ "--f ${it}" }.join(' ') : "--f ${ms_files}"
+    } else if (ms_file_names && ms_file_names != []) {
+        ms_input = ms_file_names instanceof List ? ms_file_names.collect{ "--f ${it}" }.join(' ') : "--f ${ms_file_names}"
+    }
+
+    def fasta_input = fasta && fasta != [] ? "--fasta ${fasta}" : ''
+    def lib_input = library && library != [] ? "--lib ${library}" : ''
+
+    // When quant files are provided, set temp directory and enable --use-quant
+    // These flags must be used together: --temp points to quant files, --use-quant tells DIA-NN to use them
+    def quant_args = quant && quant != [] ? "--temp ./quant/ --use-quant" : "--temp ./"
+
+    """
+    diann \\
+        ${ms_input} \\
+        ${fasta_input} \\
+        ${lib_input} \\
+        --threads ${task.cpus} \\
+        --out-lib ${prefix} \\
+        --out ${prefix}.tsv \\
+        ${quant_args} \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
+    END_VERSIONS
+    """
+
+    stub:
+    // Exit if running this module with -profile conda / -profile mamba
+    if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+        error "DIANN module does not support Conda. Please use Docker / Singularity / Podman instead."
+    }
+    def prefix = task.ext.prefix ?: "${meta.id}" ?: "diann"
+
+    """
+    # Library outputs
+    touch ${prefix}.predicted.speclib
+    touch ${prefix}.speclib
+    touch ${prefix}.tsv
+    touch ${prefix}.tsv.skyline.speclib
+
+    # Quant outputs
+    touch ${prefix}.quant
+
+    # Report outputs
+    touch ${prefix}.tsv
+    touch ${prefix}.parquet
+    touch ${prefix}.manifest.txt
+    touch ${prefix}.protein_description.tsv
+    touch ${prefix}.stats.tsv
+    touch ${prefix}.pr_matrix.tsv
+    touch ${prefix}.pg_matrix.tsv
+    touch ${prefix}.gg_matrix.tsv
+    touch ${prefix}.unique_genes_matrix.tsv
+
+    # Common outputs
+    touch ${prefix}.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        DIA-NN: 1.8.1
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/diann/meta.yml b/modules/nf-core/diann/meta.yml
@@ -0,0 +1,235 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "diann"
+description: Generic DIA-NN module for running any DIA-NN operation including in-silico library generation, preliminary analysis, empirical library assembly, individual analysis, and final quantification
+keywords:
+  - proteomics
+  - mass spectrometry
+  - DIA
+  - spectral library
+  - quantification
+tools:
+  - "diann":
+      description: "DIA-NN - a fast and easy to use tool for processing data independent acquisition (DIA) proteomics data"
+      homepage: "https://github.com/vdemichev/DiaNN"
+      documentation: "https://github.com/vdemichev/DiaNN#readme"
+      tool_dev_url: "https://github.com/vdemichev/DiaNN"
+      licence: ['Custom', 'https://raw.githubusercontent.com/vdemichev/DiaNN/master/LICENSE.txt']
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - ms_files:
+        type: file
+        description: |
+          MS data file(s) in mzML or Bruker .d format (can be single file or list).
+          Thermo RAW files are only supported on Linux with DIA-NN 2.0+; older versions
+          require conversion to mzML or .d format first.
+          For preliminary/assembly/individual analysis, these are actual file paths.
+          For final quantification with --use-quant, this should be an empty list.
+        pattern: "*.{mzML,raw,d}"
+    - ms_file_names:
+        type: string
+        description: |
+          MS file basenames (not paths) as strings (can be single name or list).
+          Used for final quantification step with --use-quant where only filenames are needed.
+          For other analysis steps, this should be an empty list.
+          Example: ['sample1.mzML', 'sample2.mzML'] or []
+    - fasta:
+        type: file
+        description: |
+          FASTA database file for peptide searches.
+          Use a placeholder file (e.g., 'NO_FASTA_FILE') if not needed for the specific analysis step.
+        pattern: "*.{fasta,fa}"
+    - library:
+        type: file
+        description: |
+          Spectral library file in .speclib or .tsv format.
+          Use a placeholder file (e.g., 'NO_LIB_FILE') if not needed for the specific analysis step.
+        pattern: "*.{speclib,tsv}"
+    - quant:
+        type: directory
+        description: |
+          Directory containing .quant files from previous DIA-NN analysis.
+          When provided, enables --use-quant mode to reuse cached quantification results,
+          improving performance for empirical library assembly and final quantification.
+          Pass empty list [] if not needed. Files are staged as 'quant/*' in the work directory.
+
+output:
+  predict_speclib:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.predicted.speclib":
+          type: file
+          description: |
+            Predicted spectral library from in-silico generation.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.predicted.speclib"
+  final_speclib:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.speclib":
+          type: file
+          description: |
+            Empirical spectral library refined from experimental data.
+            Produced by the library assembly step, which combines predicted library
+            information with actual MS measurements to improve search accuracy.
+          pattern: "*.speclib"
+  skyline_speclib:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.tsv.skyline.speclib":
+          type: file
+          description: |
+            Spectral library in Skyline format for use with Skyline software.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.tsv.skyline.speclib"
+  diann_quant:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.quant":
+          type: file
+          description: Quantification results in .quant format (intermediate output for empirical library assembly and final quantification)
+          pattern: "*.quant"
+  main_report:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.tsv":
+          type: file
+          description: |
+            Main DIA-NN report in TSV format containing peptide and protein quantification.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.tsv"
+  report_parquet:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.parquet":
+          type: file
+          description: |
+            Main DIA-NN report in Parquet format for efficient data access.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.parquet"
+  report_manifest:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.manifest.txt":
+          type: file
+          description: |
+            Report manifest file listing all output files.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.manifest.txt"
+  protein_description:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.protein_description.tsv":
+          type: file
+          description: |
+            Protein descriptions extracted from FASTA headers.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.protein_description.tsv"
+  report_stats:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.stats.tsv":
+          type: file
+          description: |
+            Report statistics including identification and quantification metrics.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.stats.tsv"
+  pr_matrix:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.pr_matrix.tsv":
+          type: file
+          description: |
+            Precursor-level quantification matrix (peptides across runs).
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.pr_matrix.tsv"
+  pg_matrix:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.pg_matrix.tsv":
+          type: file
+          description: |
+            Protein group-level quantification matrix.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.pg_matrix.tsv"
+  gg_matrix:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.gg_matrix.tsv":
+          type: file
+          description: |
+            Gene group-level quantification matrix.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.gg_matrix.tsv"
+  unique_gene_matrix:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "${prefix}.unique_genes_matrix.tsv":
+          type: file
+          description: |
+            Unique genes quantification matrix.
+            Filename is determined by the prefix (task.ext.prefix or meta.id).
+          pattern: "*.unique_genes_matrix.tsv"
+  log:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.log.txt":
+          type: file
+          description: DIA-NN log file containing run information and recommended settings
+          pattern: "*.log.txt"
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+
+authors:
+  - "@pinin4fjords"
+maintainers:
+  - "@pinin4fjords"