-
Notifications
You must be signed in to change notification settings - Fork 911
feat: samclip module #8999
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
feat: samclip module #8999
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- nodefaults | ||
dependencies: | ||
- bioconda::samclip=0.4.0 | ||
- bioconda::samtools=1.22.1 |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,54 @@ | ||||||||
process SAMCLIP { | ||||||||
tag "${meta.id}" | ||||||||
label 'process_low' | ||||||||
|
||||||||
conda "${moduleDir}/environment.yml" | ||||||||
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container | ||||||||
? 'oras://community.wave.seqera.io/library/samclip_samtools:7af2916e4ae6f461' | ||||||||
: 'community.wave.seqera.io/library/samclip_samtools:00cc7aefd75be672'}" | ||||||||
|
||||||||
input: | ||||||||
tuple val(meta), path(bam) | ||||||||
tuple val(meta2), path(reference), path(reference_index) | ||||||||
|
||||||||
output: | ||||||||
tuple val(meta), path("*.bam"), emit: bam | ||||||||
path "versions.yml", emit: versions | ||||||||
|
||||||||
when: | ||||||||
task.ext.when == null || task.ext.when | ||||||||
|
||||||||
script: | ||||||||
def args = task.ext.args ?: '' | ||||||||
def prefix = task.ext.prefix ?: "${meta.id}.samclip" | ||||||||
def is_compressed = reference.getName().endsWith(".gz") | ||||||||
def ref_filename = reference.getName().replaceAll(/\.gz$/, "") | ||||||||
""" | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://nf-co.re/docs/guidelines/components/modules#command-file-output-naming
Suggested change
|
||||||||
# decompress reference if gzipped | ||||||||
${is_compressed ? "gzip -c -d ${reference} > ${ref_filename}" : ""} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove this decompressed reference at the end if it is made please. |
||||||||
|
||||||||
samtools view -h --output-fmt sam ${bam} | \\ | ||||||||
samclip ${args} --ref ${ref_filename} | \\ | ||||||||
samtools sort -n -O BAM -T /tmp | \\ | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need to make this output a bam, or would an uncompressed sam be faster? |
||||||||
samtools fixmate -m - - | \\ | ||||||||
samtools sort -O BAM > ${prefix}.bam | ||||||||
|
||||||||
cat <<-END_VERSIONS > versions.yml | ||||||||
"${task.process}": | ||||||||
samclip: \$(echo \$(samclip --version 2>&1) | sed 's/^.*samclip //g' ) | ||||||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||||||||
END_VERSIONS | ||||||||
""" | ||||||||
|
||||||||
stub: | ||||||||
def prefix = task.ext.prefix ?: "${meta.id}.samclip" | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think underscores is how most modules set the extra part of prefix:
Suggested change
|
||||||||
""" | ||||||||
touch ${prefix}.bam | ||||||||
|
||||||||
cat <<-END_VERSIONS > versions.yml | ||||||||
"${task.process}": | ||||||||
samclip: \$(echo \$(samclip --version 2>&1) | sed 's/^.*samclip //g' ) | ||||||||
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') | ||||||||
END_VERSIONS | ||||||||
""" | ||||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,70 @@ | ||||||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||||||
name: samclip | ||||||
description: Filters SAM file for soft and hard clipped alignments | ||||||
keywords: | ||||||
- soft-clipped reads | ||||||
- hard-clipped reads | ||||||
- genomics | ||||||
- sam | ||||||
tools: | ||||||
- samclip: | ||||||
description: Filters SAM file for soft and hard clipped alignments | ||||||
homepage: https://github.com/tseemann/samclip | ||||||
documentation: https://github.com/tseemann/samclip | ||||||
tool_dev_url: https://github.com/tseemann/samclip | ||||||
doi: "no DOI available" | ||||||
licence: ["GPL v3"] | ||||||
identifier: biotools:samclip | ||||||
|
||||||
input: | ||||||
- - meta: | ||||||
type: map | ||||||
description: | | ||||||
Groovy Map containing sample information | ||||||
e.g. `[ id:'sample1', single_end:false ]` | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know it is in the template, but we don't need to mention single_end here.
Suggested change
|
||||||
|
||||||
- bam: | ||||||
type: file | ||||||
description: BAM file | ||||||
pattern: "*.bam" | ||||||
ontologies: | ||||||
- edam: http://edamontology.org/format_2572 # BAM | ||||||
- - meta2: | ||||||
type: map | ||||||
description: | | ||||||
Groovy Map containing fasta reference information | ||||||
e.g. [ id:'test' ] | ||||||
- reference: | ||||||
type: file | ||||||
description: | | ||||||
reference FASTA file | ||||||
pattern: "*.{fasta,fa}" | ||||||
ontologies: | ||||||
- edam: http://edamontology.org/format_1929 # FASTA | ||||||
- reference_index: | ||||||
type: file | ||||||
description: | | ||||||
reference FASTA file index | ||||||
pattern: "*.{fai}" | ||||||
ontologies: [] | ||||||
output: | ||||||
bam: | ||||||
- - meta: | ||||||
type: map | ||||||
description: Groovy Map containing sample information. e.g. [ id:'test'] | ||||||
- "*.bam": | ||||||
type: file | ||||||
description: Filtered BAM | ||||||
ontologies: | ||||||
- edam: http://edamontology.org/format_2572 # BAM | ||||||
versions: | ||||||
- versions.yml: | ||||||
type: file | ||||||
description: File containing software versions | ||||||
pattern: versions.yml | ||||||
ontologies: | ||||||
- edam: http://edamontology.org/format_3750 # YAML | ||||||
authors: | ||||||
- "@emmcauley" | ||||||
maintainers: | ||||||
- "@emmcauley" |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,81 @@ | ||||
nextflow_process { | ||||
|
||||
name "Test Process SAMCLIP" | ||||
script "../main.nf" | ||||
process "SAMCLIP" | ||||
config "./nextflow.config" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
|
||||
tag "modules" | ||||
tag "modules_nfcore" | ||||
tag "samclip" | ||||
tag "samtools/view" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
|
||||
test("test-data - NA12878.chr22.bam") { | ||||
config "./nextflow.config" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
|
||||
when { | ||||
process { | ||||
""" | ||||
input[0] = [ | ||||
[ id:'NA12878_chr22' ], // meta map | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam', checkIfExists: true), | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai', checkIfExists: true), | ||||
|
||||
] | ||||
|
||||
input[1] = [ | ||||
[ id:'chr22_ref' ], // meta map | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), | ||||
] | ||||
|
||||
""" | ||||
} | ||||
} | ||||
|
||||
then { | ||||
assertAll( | ||||
{ assert process.success }, | ||||
{ assert snapshot(process.out).match() } | ||||
) | ||||
} | ||||
|
||||
} | ||||
|
||||
test("test-data - NA12878.chr22.bam - stub") { | ||||
|
||||
options "-stub" | ||||
|
||||
when { | ||||
process { | ||||
""" | ||||
input[0] = [ | ||||
[ id:'NA12878_chr22' ], // meta map | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam', checkIfExists: true), | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai', checkIfExists: true), | ||||
|
||||
] | ||||
|
||||
input[1] = [ | ||||
[ id:'chr22_ref' ], // meta map | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), | ||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true), | ||||
] | ||||
|
||||
""" | ||||
} | ||||
} | ||||
|
||||
then { | ||||
assertAll( | ||||
{ assert process.success }, | ||||
{ assert snapshot( | ||||
process.out, | ||||
process.out.versions.collect{ path(it).yaml } | ||||
).match() } | ||||
) | ||||
} | ||||
|
||||
} | ||||
|
||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
{ | ||
"test-data - NA12878.chr22.bam": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "NA12878_chr22" | ||
}, | ||
"NA12878_chr22.samclip.bam:md5,d5b1f9b1420caf42922adb0fe1080f02" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,b47fb65111012518aa3353d09574787d" | ||
], | ||
"bam": [ | ||
[ | ||
{ | ||
"id": "NA12878_chr22" | ||
}, | ||
"NA12878_chr22.samclip.bam:md5,d5b1f9b1420caf42922adb0fe1080f02" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,b47fb65111012518aa3353d09574787d" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "25.04.6" | ||
}, | ||
"timestamp": "2025-09-14T14:22:07.416493" | ||
}, | ||
"test-data - NA12878.chr22.bam - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "NA12878_chr22" | ||
}, | ||
"NA12878_chr22.samclip.bam:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,b47fb65111012518aa3353d09574787d" | ||
], | ||
"bam": [ | ||
[ | ||
{ | ||
"id": "NA12878_chr22" | ||
}, | ||
"NA12878_chr22.samclip.bam:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,b47fb65111012518aa3353d09574787d" | ||
] | ||
}, | ||
[ | ||
{ | ||
"SAMCLIP": { | ||
"samclip": "0.4.0", | ||
"samtools": "1.22.1" | ||
} | ||
} | ||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "25.04.6" | ||
}, | ||
"timestamp": "2025-09-14T14:05:27.817109" | ||
} | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can delete this now you are not using samtools_view |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
process { | ||
withName: SAMTOOLS_VIEW { | ||
ext.args = "-h --output-fmt bam" | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think underscores is how most modules set the extra part of prefix: