Skip to content

Commit

Permalink
Merge pull request #6328 from fubar2/sniffles
Browse files Browse the repository at this point in the history
Update Sniffles from 1.0.12 to 2.4
  • Loading branch information
bgruening authored Sep 24, 2024
2 parents bf308a3 + 3d2fd0d commit d2d7bf4
Show file tree
Hide file tree
Showing 6 changed files with 366 additions and 318 deletions.
249 changes: 92 additions & 157 deletions tools/sniffles/sniffles.xml

Large diffs are not rendered by default.

87 changes: 55 additions & 32 deletions tools/sniffles/test-data/expected_outcome3.vcf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
##fileformat=VCFv4.1
##source=Sniffles
##fileDate=20200901:51:47 AMef_minus
##fileformat=VCFv4.2
##source=Sniffles2_2.4
##command="/home/ross/miniconda3/envs/[email protected]/bin/sniffles -t 1 -i input.bam -v /tmp/tmpxu4n4sep/job_working_directory/000/6/outputs/dataset_424b7739-58c9-4942-8a28-964803e1e0e7.dat --minsupport 1 --max-splits-kb 0.1 --minsvlen 50 --mapq 20 --min-alignment-length 100 --cluster-binsize 100 --cluster-r 2.5 --allow-overwrite"
##fileDate="2024/09/14 14:16:19"
##contig=<ID=1,length=249250621>
##contig=<ID=2,length=243199373>
##contig=<ID=3,length=198022430>
Expand Down Expand Up @@ -87,36 +88,58 @@
##contig=<ID=GL000192.1,length=547496>
##contig=<ID=NC_007605,length=171823>
##contig=<ID=hs37d5,length=35477943>
##ALT=<ID=INS,Description="Insertion">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
##ALT=<ID=TRA,Description="Translocation">
##ALT=<ID=INS,Description="Insertion">
##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -.">
##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref.">
##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias.">
##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)">
##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
##ALT=<ID=BND,Description="Breakend; Translocation">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam
21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=DR,Number=1,Type=Integer,Description="Number of reference reads">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of variant reads">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase-block, zero if none or not phased">
##FORMAT=<ID=ID,Number=1,Type=String,Description="Individual sample SV ID for multi-sample output">
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=GT,Description="Genotype filter">
##FILTER=<ID=SUPPORT_MIN,Description="Minimum read support filter">
##FILTER=<ID=STDEV_POS,Description="SV Breakpoint standard deviation filter">
##FILTER=<ID=STDEV_LEN,Description="SV length standard deviation filter">
##FILTER=<ID=COV_MIN,Description="Minimum coverage filter">
##FILTER=<ID=COV_MIN_GT,Description="Minimum coverage filter (missing genotype)">
##FILTER=<ID=COV_CHANGE,Description="Coverage change filter">
##FILTER=<ID=COV_CHANGE_INS,Description="Coverage change filter for INS">
##FILTER=<ID=COV_CHANGE_FRAC_US,Description="Coverage fractional change filter: upstream-start">
##FILTER=<ID=COV_CHANGE_FRAC_SC,Description="Coverage fractional change filter: start-center">
##FILTER=<ID=COV_CHANGE_FRAC_CE,Description="Coverage fractional change filter: center-end">
##FILTER=<ID=COV_CHANGE_FRAC_ED,Description="Coverage fractional change filter: end-downstream">
##FILTER=<ID=MOSAIC_AF,Description="Mosaic variant allele frequency filter">
##FILTER=<ID=NOT_MOSAIC_AF,Description="Variant allele frequency filter for non-mosaic">
##FILTER=<ID=ALN_NM,Description="Length adjusted mismatch filter">
##FILTER=<ID=STRAND_BND,Description="Strand support filter for BNDs">
##FILTER=<ID=STRAND,Description="Strand support filter for germline SVs">
##FILTER=<ID=STRAND_MOSAIC,Description="Strand support filter for mosaic SVs">
##FILTER=<ID=SVLEN_MIN,Description="SV length filter">
##FILTER=<ID=SVLEN_MIN_MOSAIC,Description="SV length filter for mosaic SVs">
##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Structural variation with precise breakpoints">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Structural variation with imprecise breakpoints">
##INFO=<ID=MOSAIC,Number=0,Type=Flag,Description="Structural variation classified as putative mosaic">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of structural variation">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variation">
##INFO=<ID=CHR2,Number=1,Type=String,Description="Mate chromsome for BND SVs">
##INFO=<ID=SUPPORT,Number=1,Type=Integer,Description="Number of reads supporting the structural variation">
##INFO=<ID=SUPPORT_INLINE,Number=1,Type=Integer,Description="Number of reads supporting an INS/DEL SV (non-split events only)">
##INFO=<ID=SUPPORT_LONG,Number=1,Type=Integer,Description="Number of soft-clipped reads putatively supporting the long insertion SV">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of structural variation">
##INFO=<ID=STDEV_POS,Number=1,Type=Float,Description="Standard deviation of structural variation start position">
##INFO=<ID=STDEV_LEN,Number=1,Type=Float,Description="Standard deviation of structural variation length">
##INFO=<ID=COVERAGE,Number=.,Type=Float,Description="Coverages near upstream, start, center, end, downstream of structural variation">
##INFO=<ID=STRAND,Number=1,Type=String,Description="Strands of supporting reads for structural variant">
##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count, summed up over all samples">
##INFO=<ID=SUPP_VEC,Number=1,Type=String,Description="List of read support for all samples">
##INFO=<ID=CONSENSUS_SUPPORT,Number=1,Type=Integer,Description="Number of reads that support the generated insertion (INS) consensus sequence">
##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of supporting reads (if enabled with --output-rnames)">
##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency">
##INFO=<ID=NM,Number=.,Type=Float,Description="Mean number of query alignment length adjusted mismatches of supporting reads">
##INFO=<ID=PHASE,Number=.,Type=String,Description="Phasing information derived from supporting reads, represented as list of: HAPLOTYPE,PHASESET,HAPLOTYPE_SUPPORT,PHASESET_SUPPORT,HAPLOTYPE_FILTER,PHASESET_FILTER">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
21 21492142 Sniffles2.DEL.0S14 N <DEL> 52 PASS PRECISE;SVTYPE=DEL;SVLEN=-506;END=21492648;SUPPORT=48;COVERAGE=46,43,43,43,48;STRAND=+-;AF=1.000;STDEV_LEN=0.509;STDEV_POS=1.404 GT:GQ:DR:DV 1/1:60:0:48
87 changes: 55 additions & 32 deletions tools/sniffles/test-data/expected_outcome4.vcf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
##fileformat=VCFv4.1
##source=Sniffles
##fileDate=20200901:51:57 AMef_minus
##fileformat=VCFv4.2
##source=Sniffles2_2.4
##command="/home/ross/miniconda3/envs/[email protected]/bin/sniffles -t 1 -i input.bam -v /tmp/tmpxu4n4sep/job_working_directory/000/8/outputs/dataset_b4585ddd-d52d-4087-9461-cb14a87c00d4.dat --minsupport auto --max-splits-kb 0.1 --minsvlen 50 --mapq 20 --min-alignment-length 100 --cluster-binsize 5 --cluster-r 2.5 --allow-overwrite"
##fileDate="2024/09/14 14:16:58"
##contig=<ID=1,length=249250621>
##contig=<ID=2,length=243199373>
##contig=<ID=3,length=198022430>
Expand Down Expand Up @@ -87,36 +88,58 @@
##contig=<ID=GL000192.1,length=547496>
##contig=<ID=NC_007605,length=171823>
##contig=<ID=hs37d5,length=35477943>
##ALT=<ID=INS,Description="Insertion">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
##ALT=<ID=TRA,Description="Translocation">
##ALT=<ID=INS,Description="Insertion">
##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -.">
##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref.">
##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias.">
##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)">
##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
##ALT=<ID=BND,Description="Breakend; Translocation">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT input.bam
21 21492143 0 AAAATATGTTTTAAATTGTTGATGATTTCAAATATTACAGGAATAGAAACTTTAACTTAACACAGAATGATTATCTGGCTTCCTTCTGTAAAATATCTTAAAGGTTAATGTGGATTTGAATTGCACAACATTCCAAATGCTTCTCCCCCTTTAAAAAGAATAGTCTTATCTTTTAAAAAGAATACTCATATCTTTTATTTTTCTTATGCAAGAGCAAAAATAAGGAAAAAATATATTATTCAGGAGAATCATGGCAACAATTTAAGGAAGACAAAACCAGTCTTTAGCAACCAGTATACATATATATCATCTTTTTTTCTGCTTTAGGGTAGGTTGCTTCTATCACCAACCTGTTCCAAATCCTCCTCTTACATGCACCATTAAAACATACTCTTTCAAAAACGAGGTGATAAAATCACAAATATCAATCTATCGTTCAGAAGAAGGTACCTTTATTTTACCTTAAAGGAATTTGATATATAATGGAGAAAAGAAAATTACTTTCT N . PASS PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=21;END=21492649;STD_quant_start=0.000000;STD_quant_stop=0.000000;Kurtosis_quant_start=0.572582;Kurtosis_quant_stop=1.417662;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-506;STRANDS=+-;STRANDS2=22,26,22,26;RE=48;REF_strand=19,24;Strandbias_pval=1;AF=0.527473 GT:DR:DV 0/1:43:48
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=DR,Number=1,Type=Integer,Description="Number of reference reads">
##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of variant reads">
##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Phase-block, zero if none or not phased">
##FORMAT=<ID=ID,Number=1,Type=String,Description="Individual sample SV ID for multi-sample output">
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=GT,Description="Genotype filter">
##FILTER=<ID=SUPPORT_MIN,Description="Minimum read support filter">
##FILTER=<ID=STDEV_POS,Description="SV Breakpoint standard deviation filter">
##FILTER=<ID=STDEV_LEN,Description="SV length standard deviation filter">
##FILTER=<ID=COV_MIN,Description="Minimum coverage filter">
##FILTER=<ID=COV_MIN_GT,Description="Minimum coverage filter (missing genotype)">
##FILTER=<ID=COV_CHANGE,Description="Coverage change filter">
##FILTER=<ID=COV_CHANGE_INS,Description="Coverage change filter for INS">
##FILTER=<ID=COV_CHANGE_FRAC_US,Description="Coverage fractional change filter: upstream-start">
##FILTER=<ID=COV_CHANGE_FRAC_SC,Description="Coverage fractional change filter: start-center">
##FILTER=<ID=COV_CHANGE_FRAC_CE,Description="Coverage fractional change filter: center-end">
##FILTER=<ID=COV_CHANGE_FRAC_ED,Description="Coverage fractional change filter: end-downstream">
##FILTER=<ID=MOSAIC_AF,Description="Mosaic variant allele frequency filter">
##FILTER=<ID=NOT_MOSAIC_AF,Description="Variant allele frequency filter for non-mosaic">
##FILTER=<ID=ALN_NM,Description="Length adjusted mismatch filter">
##FILTER=<ID=STRAND_BND,Description="Strand support filter for BNDs">
##FILTER=<ID=STRAND,Description="Strand support filter for germline SVs">
##FILTER=<ID=STRAND_MOSAIC,Description="Strand support filter for mosaic SVs">
##FILTER=<ID=SVLEN_MIN,Description="SV length filter">
##FILTER=<ID=SVLEN_MIN_MOSAIC,Description="SV length filter for mosaic SVs">
##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Structural variation with precise breakpoints">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Structural variation with imprecise breakpoints">
##INFO=<ID=MOSAIC,Number=0,Type=Flag,Description="Structural variation classified as putative mosaic">
##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of structural variation">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variation">
##INFO=<ID=CHR2,Number=1,Type=String,Description="Mate chromsome for BND SVs">
##INFO=<ID=SUPPORT,Number=1,Type=Integer,Description="Number of reads supporting the structural variation">
##INFO=<ID=SUPPORT_INLINE,Number=1,Type=Integer,Description="Number of reads supporting an INS/DEL SV (non-split events only)">
##INFO=<ID=SUPPORT_LONG,Number=1,Type=Integer,Description="Number of soft-clipped reads putatively supporting the long insertion SV">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of structural variation">
##INFO=<ID=STDEV_POS,Number=1,Type=Float,Description="Standard deviation of structural variation start position">
##INFO=<ID=STDEV_LEN,Number=1,Type=Float,Description="Standard deviation of structural variation length">
##INFO=<ID=COVERAGE,Number=.,Type=Float,Description="Coverages near upstream, start, center, end, downstream of structural variation">
##INFO=<ID=STRAND,Number=1,Type=String,Description="Strands of supporting reads for structural variant">
##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count, summed up over all samples">
##INFO=<ID=SUPP_VEC,Number=1,Type=String,Description="List of read support for all samples">
##INFO=<ID=CONSENSUS_SUPPORT,Number=1,Type=Integer,Description="Number of reads that support the generated insertion (INS) consensus sequence">
##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of supporting reads (if enabled with --output-rnames)">
##INFO=<ID=AF,Number=1,Type=Float,Description="Allele Frequency">
##INFO=<ID=NM,Number=.,Type=Float,Description="Mean number of query alignment length adjusted mismatches of supporting reads">
##INFO=<ID=PHASE,Number=.,Type=String,Description="Phasing information derived from supporting reads, represented as list of: HAPLOTYPE,PHASESET,HAPLOTYPE_SUPPORT,PHASESET_SUPPORT,HAPLOTYPE_FILTER,PHASESET_FILTER">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
21 21492142 Sniffles2.DEL.1S14 N <DEL> 52 PASS PRECISE;SVTYPE=DEL;SVLEN=-506;END=21492648;SUPPORT=47;COVERAGE=48,43,43,43,51;STRAND=+-;AF=1.000;STDEV_LEN=0.500;STDEV_POS=1.384 GT:GQ:DR:DV 1/1:60:0:47
Loading

0 comments on commit d2d7bf4

Please sign in to comment.