Skip to content

Commit

Permalink
update for vcfpolyx (#6641)
Browse files Browse the repository at this point in the history
* update for vcfpolyx

* fix #6641 (comment)

* remove default param

* add test with bed

* prevent md5sum for empty file
  • Loading branch information
lindenb authored Sep 17, 2024
1 parent 7c316ca commit d5a553e
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 31 deletions.
1 change: 1 addition & 0 deletions modules/nf-core/jvarkit/vcfpolyx/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ channels:
- conda-forge
dependencies:
- "bioconda::jvarkit=2024.08.25"
- "bioconda:bcftools=1.20"
41 changes: 28 additions & 13 deletions modules/nf-core/jvarkit/vcfpolyx/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process JVARKIT_VCFPOLYX {
'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }"

input:
tuple val(meta), path(vcf)
tuple val(meta), path(vcf), path(tbi), path(regions_file)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
Expand All @@ -23,24 +23,28 @@ process JVARKIT_VCFPOLYX {
task.ext.when == null || task.ext.when

script:
def args1 = task.ext.args1 ?: ''
def args2 = meta.vcfpolyx_args ?: (task.ext.args2 ?: ' --tag POLYX --max-repeats 10 ')
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def args1 = task.ext.args1 ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def regions_cmd = regions_file ? (tbi ? " --regions-file" : " --targets-file") + " '${regions_file}' " : ""

extension = args3.contains("--output-type b") || args3.contains("-Ob") ? "bcf.gz" :
args3.contains("--output-type u") || args3.contains("-Ou") ? "bcf" :
args3.contains("--output-type z") || args3.contains("-Oz") ? "vcf.gz" :
args3.contains("--output-type v") || args3.contains("-Ov") ? "vcf" :
"vcf"
extension = getVcfExtension(args3); /* custom function, see below */

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
mkdir -p TMP
bcftools view -O v ${args1} "${vcf}" |\\
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx --reference "${fasta}" ${args2} |\\
bcftools view --output "${prefix}.${extension}" ${args3}
bcftools view -O v \\
${regions_cmd} \\
${args1} \\
"${vcf}" |\\
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcfpolyx \\
--reference "${fasta}" \\
${args2} |\\
bcftools view \\
--output "${prefix}.${extension}" \\
${args3}
rm -rf TMP
Expand All @@ -53,6 +57,8 @@ process JVARKIT_VCFPOLYX {

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def args3 = task.ext.args3 ?: ''
extension = getVcfExtension(args3); /* custom function, see below */
"""
touch "${prefix}.${extension}"
Expand All @@ -63,3 +69,12 @@ process JVARKIT_VCFPOLYX {
END_VERSIONS
"""
}

// Custom Function to get VCF extension
String getVcfExtension(String args) {
return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf";
}
25 changes: 16 additions & 9 deletions modules/nf-core/jvarkit/vcfpolyx/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ description: annotate VCF files for poly repeats
keywords:
- vcf
- bcf
- variant
- annotation
- repeats
- polyx
tools:
- "jvarkit":
description: "Java utilities for Bioinformatics."
Expand All @@ -15,6 +17,7 @@ tools:
tool_dev_url: "https://github.com/lindenb/jvarkit"
doi: "10.6084/m9.figshare.1425030"
licence: ["MIT License"]
args_id: "$args2"

- "bcftools":
description: |
Expand All @@ -23,48 +26,52 @@ tools:
documentation: "http://www.htslib.org/doc/bcftools.html"
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]

args_id: ["$args1", "$args3"]
input:
- meta:
type: map
description: |
Groovy Map containing VCF information
e.g. [ id:'test_reference' ]
- meta2:
type: map
description: |
Groovy Map containing fasta information
e.g. [ id:'test_reference' ]
- meta3:
type: map
description: |
Groovy Map containing fasta.fai information
e.g. [ id:'test_reference' ]
- meta4:
type: map
description: |
Groovy Map containing fasta.dict information
e.g. [ id:'test_reference' ]
- vcf:
type: file
description: Groovy Map containing reference genome information for vcf
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"

- tbi:
type: file
description: Optional VCF/BCF index file
pattern: "*.{tbi,csi}"
- regions_file:
type: file
description: Optional. Restrict to regions listed in a file
pattern: "*.{bed,bed.gz,txt,tsv}"
- fasta:
type: file
description: Groovy Map containing reference genome information for fai reference fasta file
pattern: "*.fasta"

- fai:
type: file
description: Groovy Map containing reference genome information for fai
pattern: "*.fasta.fai"

- dict:
type: file
description: Groovy Map containing reference genome information for GATK sequence dictionary
pattern: "*.dict"

output:
- meta:
type: map
Expand Down
71 changes: 65 additions & 6 deletions modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ nextflow_process {
name "Test Process JVARKIT_VCFPOLYX"
script "../main.nf"
process "JVARKIT_VCFPOLYX"

config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "jvarkit"
Expand All @@ -17,7 +18,9 @@ nextflow_process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
Expand All @@ -30,14 +33,70 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
)
}

}


test("sarscov2 - vcf+bed") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert file(process.out.vcf[0][1]).exists() },
{ assert snapshot(process.out.versions).match() }
)
}

}

test("sarscov2 - vcf - stub") {

options "-stub"

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(path(process.out.vcf[0][1]), process.out.versions).match() }
)
}

}
}
31 changes: 28 additions & 3 deletions modules/nf-core/jvarkit/vcfpolyx/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,39 @@
"sarscov2 - vcf": {
"content": [
"65a03a6057dc74467c2b7b17230e7f14",
[
"versions.yml:md5,b3c351a56da9062295ef90011a9cd48c"
]
[
"versions.yml:md5,b3c351a56da9062295ef90011a9cd48c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},
"sarscov2 - vcf+bed": {
"content": [
[
"versions.yml:md5,b3c351a56da9062295ef90011a9cd48c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},
"sarscov2 - vcf - stub": {
"content": [
"vcf_test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"versions.yml:md5,b3c351a56da9062295ef90011a9cd48c"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-13T17:17:41.14805166"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/jvarkit/vcfpolyx/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: JVARKIT_VCFPOLYX {
ext.args2=" --tag POLYX --max-repeats 10 "
}
}

0 comments on commit d5a553e

Please sign in to comment.