Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f911ef4
feat: minimap2 tool wrapper
adthrasher Dec 3, 2025
fd2bca4
refactor: handle optionally gzipped reference
adthrasher Dec 3, 2025
1ebe2a0
chore: fill in options
adthrasher Dec 3, 2025
9533656
chore: add samtools to minimap2 image and convert to BAM
adthrasher Dec 3, 2025
1b28075
chore: lint
adthrasher Dec 3, 2025
0ac46db
chore: add disk specification
adthrasher Dec 3, 2025
5161669
feat: add bwa-mem2 task
adthrasher Dec 3, 2025
af994e2
chore: lint
adthrasher Dec 4, 2025
00deed4
feat: add hisat2 task
adthrasher Dec 4, 2025
4434666
chore: change base image as other segfaults
adthrasher Dec 4, 2025
b07d769
feat: add `vg` indexing
adthrasher Dec 5, 2025
cb47772
chore: localize fasta for indexing
adthrasher Dec 5, 2025
39ca901
feat: add vg giraffe task
adthrasher Dec 5, 2025
a921e17
chore: avoid writing intermediate SAM to disk
adthrasher Dec 9, 2025
733368e
Merge branch 'feat/minimap2' of https://github.com/stjudecloud/workfl…
adthrasher Dec 9, 2025
2dc478a
chore: use database prefix
adthrasher Dec 12, 2025
1c832fc
chore: bump resources for azure
adthrasher Dec 12, 2025
bd84132
chore: format+lint
adthrasher Dec 12, 2025
8abaf9c
chore: remove memory oversubscribe
adthrasher Dec 18, 2025
d42fbfb
feat: add strelka and manta wrappers
adthrasher Dec 18, 2025
589190d
feat: add clair3 wrapper
adthrasher Dec 18, 2025
efb1d50
chore: fix invocation
adthrasher Dec 18, 2025
25681c7
feat: add NGSEP wrapper
adthrasher Dec 18, 2025
1083326
feat: add deepsomatic and deepvariant wrappers with GPU support
adthrasher Dec 18, 2025
30d1159
chore: change hisat2 output to BAM
adthrasher Dec 19, 2025
e58c2ed
chore: write to stdout instead of fifo
adthrasher Dec 19, 2025
e680a5b
chore: add undocumented FAI requirement
adthrasher Dec 22, 2025
60dce46
chore: add error checking to minimap2
adthrasher Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docker/bwamem2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
FROM quay.io/biocontainers/bwa-mem2:2.3--he70b90d_0

COPY --from=samtools /usr/local/bin/ /usr/local/bin/
COPY --from=samtools /usr/local/lib/ /usr/local/lib/
COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/

ENTRYPOINT [ "bwa-mem2" ]
5 changes: 5 additions & 0 deletions docker/bwamem2/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"name": "bwamem2",
"version": "2.3",
"revision": "0"
}
8 changes: 8 additions & 0 deletions docker/hisat2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
FROM quay.io/biocontainers/hisat2:2.2.1--hdbdd923_7

COPY --from=samtools /usr/local/bin/ /usr/local/bin/
COPY --from=samtools /usr/local/lib/ /usr/local/lib/
COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/

ENTRYPOINT [ "hisat2" ]
5 changes: 5 additions & 0 deletions docker/hisat2/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"name": "hisat2",
"version": "2.2.1",
"revision": "0"
}
8 changes: 8 additions & 0 deletions docker/minimap2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM quay.io/biocontainers/samtools:1.17--h00cdaf9_0 AS samtools
FROM quay.io/biocontainers/minimap2:2.30--h577a1d6_0

COPY --from=samtools /usr/local/bin/ /usr/local/bin/
COPY --from=samtools /usr/local/lib/ /usr/local/lib/
COPY --from=samtools /usr/local/libexec/ /usr/local/libexec/

ENTRYPOINT [ "minimap2" ]
5 changes: 5 additions & 0 deletions docker/minimap2/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"name": "minimap2",
"version": "2.30",
"revision": "0"
}
5 changes: 5 additions & 0 deletions docker/ngsep/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM eclipse-temurin:8

Check failure

Code scanning / Snyk Container

Medium severity - Directory Traversal vulnerability in pam High

This file introduces a vulnerable pam package with a medium severity vulnerability.

Check failure

Code scanning / Snyk Container

Low severity - Allocation of Resources Without Limits or Throttling vulnerability in glibc High

This file introduces a vulnerable glibc package with a low severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Open Redirect vulnerability in wget Medium

This file introduces a vulnerable wget package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Directory Traversal vulnerability in tar Warning

This file introduces a vulnerable tar package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Out-of-bounds Read vulnerability in libpng1.6 Warning

This file introduces a vulnerable libpng1.6 package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Heap-based Buffer Overflow vulnerability in libpng1.6 Warning

This file introduces a vulnerable libpng1.6 package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Out-of-bounds Read vulnerability in libpng1.6 Warning

This file introduces a vulnerable libpng1.6 package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Out-of-bounds Read vulnerability in libpng1.6 Warning

This file introduces a vulnerable libpng1.6 package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Medium severity - Algorithmic Complexity vulnerability in expat Medium

This file introduces a vulnerable expat package with a medium severity vulnerability.

Check warning

Code scanning / Snyk Container

Low severity - Improper Input Validation vulnerability in coreutils Medium

This file introduces a vulnerable coreutils package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - Information Exposure vulnerability in libgcrypt20 Note

This file introduces a vulnerable libgcrypt20 package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - Out-of-bounds Write vulnerability in gnupg2 Low

This file introduces a vulnerable gnupg2 package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - CVE-2025-0167 vulnerability in curl Note

This file introduces a vulnerable curl package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - CVE-2024-41996 vulnerability in openssl Note

This file introduces a vulnerable openssl package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - CVE-2025-10148 vulnerability in curl Note

This file introduces a vulnerable curl package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - CVE-2024-56433 vulnerability in shadow Note

This file introduces a vulnerable shadow package with a low severity vulnerability.

Check notice

Code scanning / Snyk Container

Low severity - CVE-2025-9086 vulnerability in curl Note

This file introduces a vulnerable curl package with a low severity vulnerability.

RUN wget https://github.com/NGSEP/NGSEPcore/releases/download/v5.1.0/NGSEPcore_5.1.0.jar -O /usr/local/bin/NGSEPcore.jar

ENTRYPOINT [ "java", "-jar", "/usr/local/bin/NGSEPcore.jar" ]
5 changes: 5 additions & 0 deletions docker/ngsep/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"name": "ngsep",
"version": "5.1.0",
"revision": "0"
}
130 changes: 130 additions & 0 deletions tools/bwamem2.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
version 1.2

task align {
meta {
description: "Align DNA sequences against a large reference database using BWA-MEM2"
outputs: {
alignments: "The output alignment file in SAM format"
}
}

parameter_meta {
read_one_fastq_gz: "Input gzipped FASTQ read one file to align with BWA-MEM2"
reference_index: "The BWA-MEM2 index file for the reference genome"
read_group: "The read group string to be included in the SAM header. Format: '@RG\\tID:foo\\tSM:bar'"
read_two_fastq_gz: "Input gzipped FASTQ read two file to align with BWA-MEM2"
prefix: "Prefix for the BAM file. The extension `.bam` will be added."
smart_pairing: "If true, enable smart pairing mode for paired-end reads"
skip_mate_rescue: "If true, skip mate rescue for paired-end reads"
threads: "Number of threads to use for alignment"
modify_disk_size_gb: "Additional disk space to allocate (in GB)"
seed_length: "Seed value for the BWA-MEM2 aligner"
min_score: "Minimum score threshold for reporting alignments"
}

input {
File read_one_fastq_gz
File reference_index
String read_group
File? read_two_fastq_gz
String prefix = sub(
basename(read_one_fastq_gz),
"([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$",
""
)
Boolean smart_pairing = false
Boolean skip_mate_rescue = false
Int threads = 4
Int modify_disk_size_gb = 0
Int seed_length = 19
Int min_score = 30
}

String output_name = prefix + ".bam"
Int disk_size_gb = ceil((
size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")
) * 2)
+ ceil(size(reference_index, "GiB"))
+ 10
+ modify_disk_size_gb

command <<<
set -euo pipefail

mkdir bwa_db
tar -C bwa_db -xzf "~{reference_index}" --no-same-owner
PREFIX=$(basename bwa_db/*.ann ".ann")

bwa-mem2 mem \
-t ~{threads} \
-R "~{read_group}" \
-k ~{seed_length} \
-T ~{min_score} \
~{if smart_pairing then "-p" else ""} \
~{if skip_mate_rescue then "-S" else ""} \
bwa_db/"$PREFIX" \
"~{read_one_fastq_gz}" \
~{if defined(read_two_fastq_gz) then "\"~{read_two_fastq_gz}\"" else ""} |
samtools view -b -o "~{output_name}" -
>>>

output {
File alignments = output_name
}

requirements {
container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
cpu: threads
memory: "64 GB"
disks: "~{disk_size_gb} GB"
}
}

task index {
meta {
description: "Index a reference genome for alignment with minimap2"
outputs: {
reference_index: "The minimap2 index file for the reference genome"
}
}

parameter_meta {
reference_fasta: "The reference genome in FASTA format to be indexed"
db_name: "The base name for the output index files"
modify_disk_size_gb: "Additional disk space to allocate (in GB)"
}

input {
File reference_fasta
String db_name = "reference"
Int modify_disk_size_gb = 0
}

Float input_fasta_size = size(reference_fasta, "GiB")
Int disk_size_gb = ceil(input_fasta_size * 2) + 10 + modify_disk_size_gb
String bwa_db_out_name = db_name + ".tar.gz"

command <<<
set -euo pipefail

ref_fasta=~{basename(reference_fasta, ".gz")}
gunzip -c "~{reference_fasta}" > "$ref_fasta" \
|| ln -sf "~{reference_fasta}" "$ref_fasta"

bwa-mem2 index \
"$ref_fasta"

tar -czf "~{bwa_db_out_name}" "$ref_fasta"*
>>>

output {
File reference_index = bwa_db_out_name
}

requirements {
container: "ghcr.io/stjudecloud/bwamem2:branch-minimap2-2.3-0"
cpu: 1
memory: "120 GB"
disks: "~{disk_size_gb} GB"
}
}
90 changes: 90 additions & 0 deletions tools/clair.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
version 1.2

task clair3 {
meta {
description: "Run Clair3 variant caller for small variants using deep neural networks"
outputs: {
pileup_vcf: "VCF file with variants called using pileup model",
full_alignment_vcf: "VCF file with variants called using full-alignment model",
merged_vcf: "Final merged VCF file with variants from both models",
}
}

parameter_meta {
reference_fasta: "Reference genome in FASTA format"
bam: "Input BAM file with aligned reads"
model: "Pre-trained Clair3 model to use for variant calling"
bed_regions: "Optional BED file specifying regions to call variants in"
vcf_candidates: "Optional VCF file with candidate variants to consider"
output_dir: "Directory to store Clair3 output"
platform: {
description: "Sequencing platform used to generate the reads",
choices: [
"ont",
"hifi",
"ilmn",
],
}
all_contigs: "Boolean indicating whether to include all contigs in variant calling. If false only chr{1..22,X,Y} are called."
print_ref_calls: "Boolean indicating whether to print reference calls in the output VCF"
gvcf: "Boolean indicating whether to output gVCF format"
threads: "Number of threads to use"
modify_disk_size_gb: "Additional disk size in GB to allocate"
}

input {
File reference_fasta
File bam
File model
File? bed_regions
File? vcf_candidates
String output_dir = "clair3_output"
String platform = "ilmn"
Boolean all_contigs = false
Boolean print_ref_calls = false
Boolean gvcf = false
Int threads = 4
Int modify_disk_size_gb = 0
}

Int disk_size_gb = ceil(size(reference_fasta, "GiB") * 2)
+ ceil(size(bam, "GiB"))
+ 20
+ modify_disk_size_gb

command <<<
set -euo pipefail

ref_fasta=~{basename(reference_fasta, ".gz")}
gunzip -c "~{reference_fasta}" > "$ref_fasta" \
|| ln -sf "~{reference_fasta}" "$ref_fasta"

run_clair3.sh \
--bam_fn="~{bam}" \
--ref_fn="$ref_fasta" \
--threads="~{threads}" \
--platform="~{platform}" \
--model_path="~{model}" \
--output="~{output_dir}" \
~{if all_contigs then "--include_all_ctgs" else ""} \
~{if print_ref_calls then "--print_ref_calls" else ""} \
~{if defined(bed_regions) then "--bed_fn='~{bed_regions}'" else ""} \
~{if defined(vcf_candidates) then "--vcf_fn='~{vcf_candidates}'" else ""} \
~{if gvcf then "--gvcf" else ""}

rm -rf "$ref_fasta"
>>>

output {
File pileup_vcf = "~{output_dir}/pileup.vcf.gz"
File full_alignment_vcf = "~{output_dir}/full_alignment.vcf.gz"
File merged_vcf = "~{output_dir}/merge_output.vcf.gz"
}

requirements {
container: "quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0"
cpu: threads
memory: "16 GB"
disks: "~{disk_size_gb} GB"
}
}
Loading
Loading