Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
63650e6
Add initial subworkflow working copy
pinin4fjords Oct 5, 2025
02305e0
Actually, we only need a single diann module
pinin4fjords Oct 6, 2025
8dcb8d8
Merge branch 'master' into add_diann_subworkflow_modules
pinin4fjords Oct 6, 2025
36c0c39
msstats module will be a bit more work- defer until later
pinin4fjords Oct 6, 2025
bd18493
Fix mzmlstatistics test
pinin4fjords Oct 6, 2025
99a2466
Fix dianncfg test
pinin4fjords Oct 6, 2025
cd0ec87
Fix diann2mztab test
pinin4fjords Oct 6, 2025
c04fdd6
Revert testing change
pinin4fjords Oct 6, 2025
3f0dd59
Merge branch 'add_diann_subworkflow_modules' of github.com:nf-core/mo…
pinin4fjords Oct 6, 2025
7315f7a
Tidy up
pinin4fjords Oct 6, 2025
660cbba
Try to address linting issues
pinin4fjords Oct 6, 2025
cdf1a7b
more linting fixes
pinin4fjords Oct 6, 2025
ce5f1d9
Hopefully last subworkflow meta fix
pinin4fjords Oct 6, 2025
8910034
None of that Conda thanks
pinin4fjords Oct 6, 2025
c022958
Misc fixes
pinin4fjords Oct 6, 2025
d3146ed
Remove another cpus directive
pinin4fjords Oct 6, 2025
53dbf85
Comment clarification
pinin4fjords Oct 6, 2025
ded6d1c
Remove unsnapshottable things
pinin4fjords Oct 6, 2025
983ecb1
restore test data path
pinin4fjords Oct 6, 2025
929f19d
Replace placeholder string checks with empty list checks in diann module
pinin4fjords Oct 16, 2025
ff59458
Correct outputs, prefixing
pinin4fjords Oct 17, 2025
599ccf8
Subworkflow-level fixes
pinin4fjords Oct 17, 2025
5fb66db
Fix meta ymls
pinin4fjords Oct 17, 2025
f8689d3
clarify in meta.yml
pinin4fjords Oct 17, 2025
27d2804
misc module fixes
pinin4fjords Oct 17, 2025
b6d348f
Fix DIANN module to automatically add --use-quant with --temp
pinin4fjords Oct 17, 2025
79ce7a6
Update DIANN meta.yml: clarify quant parameter is optional
pinin4fjords Oct 17, 2025
c32c0d1
Add conda support to quantmsutils modules
pinin4fjords Oct 17, 2025
d14af0e
Clarify dianncfg only supports Unimod modifications
pinin4fjords Oct 17, 2025
6d1253b
Add --rt-profiling to in-silico library generation
pinin4fjords Oct 17, 2025
9c5fdd6
Clarify ms_files vs ms_file_names usage in DIANN module
pinin4fjords Oct 17, 2025
d765af2
Clarify RAW file support is version-specific in DIANN
pinin4fjords Oct 17, 2025
5f9f551
Merge branch 'master' into add_diann_subworkflow_modules
pinin4fjords Oct 17, 2025
12da55c
Fix DIANN meta.yml output patterns to use ${prefix}
pinin4fjords Oct 17, 2025
cf00e54
Merge branch 'add_diann_subworkflow_modules' of github.com:nf-core/mo…
pinin4fjords Oct 17, 2025
a8ddb83
Add missing when condition to quantmsutils/mzmlstatistics
pinin4fjords Oct 17, 2025
9492801
Add ThermoRawFileParser support to DIA proteomics subworkflow
pinin4fjords Oct 17, 2025
06af1f4
Revert "Add ThermoRawFileParser support to DIA proteomics subworkflow"
pinin4fjords Oct 20, 2025
5d36e59
Merge branch 'master' into add_diann_subworkflow_modules
pinin4fjords Oct 20, 2025
95f24b9
Revert parquet in test
pinin4fjords Oct 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/skip_nf_test.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"modules/nf-core/deepvariant/postprocessvariants",
"modules/nf-core/deepvariant/rundeepvariant",
"modules/nf-core/deepvariant/vcfstatsreport",
"modules/nf-core/diann/insilicolibrarygeneration",
"subworkflows/nf-core/dia_proteomics_analysis",
"modules/nf-core/diann",
"modules/nf-core/doubletdetection",
"modules/nf-core/fastk/fastk",
"modules/nf-core/fastk/histex",
Expand Down
118 changes: 118 additions & 0 deletions modules/nf-core/diann/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
process DIANN {
tag "$meta.id"
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/diann/v1.8.1_cv1/diann_v1.8.1_cv1.img' :
'docker.io/biocontainers/diann:v1.8.1_cv1' }"

input:
tuple val(meta), path(ms_files), val(ms_file_names), path(fasta), path(library), path(quant, stageAs: 'quant/*')

output:
// Library outputs
tuple val(meta), path("${prefix}.predicted.speclib"), emit: predict_speclib, optional: true
tuple val(meta), path("${prefix}.speclib"), emit: final_speclib, optional: true
tuple val(meta), path("${prefix}.tsv.skyline.speclib"), emit: skyline_speclib, optional: true

// Quantification outputs
tuple val(meta), path("*.quant"), emit: diann_quant, optional: true

// Report outputs (from final quantification)
tuple val(meta), path("${prefix}.tsv"), emit: main_report, optional: true
tuple val(meta), path("${prefix}.parquet"), emit: report_parquet, optional: true
tuple val(meta), path("${prefix}.manifest.txt"), emit: report_manifest, optional: true
tuple val(meta), path("${prefix}.protein_description.tsv"), emit: protein_description, optional: true
tuple val(meta), path("${prefix}.stats.tsv"), emit: report_stats, optional: true
tuple val(meta), path("${prefix}.pr_matrix.tsv"), emit: pr_matrix, optional: true
tuple val(meta), path("${prefix}.pg_matrix.tsv"), emit: pg_matrix, optional: true
tuple val(meta), path("${prefix}.gg_matrix.tsv"), emit: gg_matrix, optional: true
tuple val(meta), path("${prefix}.unique_genes_matrix.tsv"), emit: unique_gene_matrix, optional: true

// Common outputs
tuple val(meta), path("*.log.txt"), emit: log
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "DIANN module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}" ?: "diann"

// Handle MS files input: two modes depending on whether we need actual files or just names
// - ms_files: Actual file paths - used when DIA-NN needs to read raw MS data
// - ms_file_names: Just basenames - used with --use-quant when DIA-NN only needs file names
// to match against preprocessed .quant files in quant/ directory, avoiding unnecessary file staging
def ms_input = ''
if (ms_files && ms_files != []) {
ms_input = ms_files instanceof List ? ms_files.collect{ "--f ${it}" }.join(' ') : "--f ${ms_files}"
} else if (ms_file_names && ms_file_names != []) {
ms_input = ms_file_names instanceof List ? ms_file_names.collect{ "--f ${it}" }.join(' ') : "--f ${ms_file_names}"
}

def fasta_input = fasta && fasta != [] ? "--fasta ${fasta}" : ''
def lib_input = library && library != [] ? "--lib ${library}" : ''

// When quant files are provided, set temp directory and enable --use-quant
// These flags must be used together: --temp points to quant files, --use-quant tells DIA-NN to use them
def quant_args = quant && quant != [] ? "--temp ./quant/ --use-quant" : "--temp ./"

"""
diann \\
${ms_input} \\
${fasta_input} \\
${lib_input} \\
--threads ${task.cpus} \\
--out-lib ${prefix} \\
--out ${prefix}.tsv \\
${quant_args} \\
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
DIA-NN: \$(diann 2>&1 | grep "DIA-NN" | grep -oP "\\d+\\.\\d+(\\.\\w+)*(\\.[\\d]+)?")
END_VERSIONS
"""

stub:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "DIANN module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def prefix = task.ext.prefix ?: "${meta.id}" ?: "diann"

"""
# Library outputs
touch ${prefix}.predicted.speclib
touch ${prefix}.speclib
touch ${prefix}.tsv
touch ${prefix}.tsv.skyline.speclib

# Quant outputs
touch ${prefix}.quant

# Report outputs
touch ${prefix}.tsv
touch ${prefix}.parquet
touch ${prefix}.manifest.txt
touch ${prefix}.protein_description.tsv
touch ${prefix}.stats.tsv
touch ${prefix}.pr_matrix.tsv
touch ${prefix}.pg_matrix.tsv
touch ${prefix}.gg_matrix.tsv
touch ${prefix}.unique_genes_matrix.tsv

# Common outputs
touch ${prefix}.log

cat <<-END_VERSIONS > versions.yml
"${task.process}":
DIA-NN: 1.8.1
END_VERSIONS
"""
}
235 changes: 235 additions & 0 deletions modules/nf-core/diann/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "diann"
description: Generic DIA-NN module for running any DIA-NN operation including in-silico library generation, preliminary analysis, empirical library assembly, individual analysis, and final quantification
keywords:
- proteomics
- mass spectrometry
- DIA
- spectral library
- quantification
tools:
- "diann":
description: "DIA-NN - a fast and easy to use tool for processing data independent acquisition (DIA) proteomics data"
homepage: "https://github.com/vdemichev/DiaNN"
documentation: "https://github.com/vdemichev/DiaNN#readme"
tool_dev_url: "https://github.com/vdemichev/DiaNN"
licence: ['Custom', 'https://raw.githubusercontent.com/vdemichev/DiaNN/master/LICENSE.txt']

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- ms_files:
type: file
description: |
MS data file(s) in mzML or Bruker .d format (can be single file or list).
Thermo RAW files are only supported on Linux with DIA-NN 2.0+; older versions
require conversion to mzML or .d format first.
For preliminary/assembly/individual analysis, these are actual file paths.
For final quantification with --use-quant, this should be an empty list.
pattern: "*.{mzML,raw,d}"
- ms_file_names:
type: string
description: |
MS file basenames (not paths) as strings (can be single name or list).
Used for final quantification step with --use-quant where only filenames are needed.
For other analysis steps, this should be an empty list.
Example: ['sample1.mzML', 'sample2.mzML'] or []
- fasta:
type: file
description: |
FASTA database file for peptide searches.
Use a placeholder file (e.g., 'NO_FASTA_FILE') if not needed for the specific analysis step.
pattern: "*.{fasta,fa}"
- library:
type: file
description: |
Spectral library file in .speclib or .tsv format.
Use a placeholder file (e.g., 'NO_LIB_FILE') if not needed for the specific analysis step.
pattern: "*.{speclib,tsv}"
- quant:
type: directory
description: |
Directory containing .quant files from previous DIA-NN analysis.
When provided, enables --use-quant mode to reuse cached quantification results,
improving performance for empirical library assembly and final quantification.
Pass empty list [] if not needed. Files are staged as 'quant/*' in the work directory.

output:
predict_speclib:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.predicted.speclib":
type: file
description: |
Predicted spectral library from in-silico generation.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.predicted.speclib"
final_speclib:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.speclib":
type: file
description: |
Empirical spectral library refined from experimental data.
Produced by the library assembly step, which combines predicted library
information with actual MS measurements to improve search accuracy.
pattern: "*.speclib"
skyline_speclib:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.tsv.skyline.speclib":
type: file
description: |
Spectral library in Skyline format for use with Skyline software.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.tsv.skyline.speclib"
diann_quant:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.quant":
type: file
description: Quantification results in .quant format (intermediate output for empirical library assembly and final quantification)
pattern: "*.quant"
main_report:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.tsv":
type: file
description: |
Main DIA-NN report in TSV format containing peptide and protein quantification.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.tsv"
report_parquet:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.parquet":
type: file
description: |
Main DIA-NN report in Parquet format for efficient data access.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.parquet"
report_manifest:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.manifest.txt":
type: file
description: |
Report manifest file listing all output files.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.manifest.txt"
protein_description:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.protein_description.tsv":
type: file
description: |
Protein descriptions extracted from FASTA headers.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.protein_description.tsv"
report_stats:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.stats.tsv":
type: file
description: |
Report statistics including identification and quantification metrics.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.stats.tsv"
pr_matrix:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.pr_matrix.tsv":
type: file
description: |
Precursor-level quantification matrix (peptides across runs).
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.pr_matrix.tsv"
pg_matrix:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.pg_matrix.tsv":
type: file
description: |
Protein group-level quantification matrix.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.pg_matrix.tsv"
gg_matrix:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.gg_matrix.tsv":
type: file
description: |
Gene group-level quantification matrix.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.gg_matrix.tsv"
unique_gene_matrix:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "${prefix}.unique_genes_matrix.tsv":
type: file
description: |
Unique genes quantification matrix.
Filename is determined by the prefix (task.ext.prefix or meta.id).
pattern: "*.unique_genes_matrix.tsv"
log:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- "*.log.txt":
type: file
description: DIA-NN log file containing run information and recommended settings
pattern: "*.log.txt"
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@pinin4fjords"
maintainers:
- "@pinin4fjords"
Loading
Loading