From ee31055caba89b2e6565a7a22ab3cf1dadaeb84c Mon Sep 17 00:00:00 2001 From: Mootor Date: Tue, 9 Jul 2024 16:00:30 -0700 Subject: [PATCH 01/13] initial code for wrapper based on Battenberg v2.2.9 battenberg_wgs.R --- battenberg_wgs.R | 103 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 battenberg_wgs.R diff --git a/battenberg_wgs.R b/battenberg_wgs.R new file mode 100644 index 0000000..9d6dd44 --- /dev/null +++ b/battenberg_wgs.R @@ -0,0 +1,103 @@ +############################################################################### +# A pure R Battenberg v2.2.9 WGS pipeline implementation. +############################################################################### +library(Battenberg); +library(optparse); + +option_list = list( + make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), + make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), + make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), + make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), + make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), + make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), + make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), + make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), + make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character") + ); + +opt_parser = OptionParser(option_list=option_list); +opt = parse_args(opt_parser); + +TUMOURNAME = opt$tumourname; +NORMALNAME = opt$normalname; +NORMALBAM = opt$nb; +TUMOURBAM = opt$tb; +IS.MALE = opt$sex=="male" | opt$sex=="Male"; +RUN_DIR = opt$output; +SKIP_ALLELECOUNTING = opt$skip_allelecount; +SKIP_PREPROCESSING = opt$skip_preprocessing; +SKIP_PHASING = opt$skip_phasing; +NTHREADS = opt$cpu; +PRIOR_BREAKPOINTS_FILE = opt$bp; + +# General static +IMPUTEINFOFILE = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_impute_v3/impute_info.txt" +G1000PREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr" +G1000PREFIX_AC = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr" +GCCORRECTPREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_wgs_gc_correction_1000g_v3_noNA/1000_genomes_GC_corr_chr_" +REPLICCORRECTPREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_" +IMPUTE_EXE = "impute2" + +PLATFORM_GAMMA = 1 +PHASING_GAMMA = 1 +SEGMENTATION_GAMMA = 10 +SEGMENTATIIN_KMIN = 3 +PHASING_KMIN = 1 +CLONALITY_DIST_METRIC = 0 +ASCAT_DIST_METRIC = 1 +MIN_PLOIDY = 1.6 +MAX_PLOIDY = 4.8 +MIN_RHO = 0.1 +MIN_GOODNESS_OF_FIT = 0.63 +BALANCED_THRESHOLD = 0.51 +MIN_NORMAL_DEPTH = 10 +MIN_BASE_QUAL = 20 +MIN_MAP_QUAL = 35 +CALC_SEG_BAF_OPTION = 3 + +# WGS specific static +ALLELECOUNTER = "alleleCounter" +PROBLEMLOCI = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_probloci/probloci_270415.txt.gz" + +# Change to work directory and load the chromosome information +setwd(RUN_DIR) + +battenberg(tumourname=TUMOURNAME, + normalname=NORMALNAME, + tumour_data_file=TUMOURBAM, + normal_data_file=NORMALBAM, + ismale=IS.MALE, + imputeinfofile=IMPUTEINFOFILE, + g1000prefix=G1000PREFIX, + g1000allelesprefix=G1000PREFIX_AC, + gccorrectprefix=GCCORRECTPREFIX, + repliccorrectprefix=REPLICCORRECTPREFIX, + problemloci=PROBLEMLOCI, + data_type="wgs", + impute_exe=IMPUTE_EXE, + allelecounter_exe=ALLELECOUNTER, + nthreads=NTHREADS, + platform_gamma=PLATFORM_GAMMA, + phasing_gamma=PHASING_GAMMA, + segmentation_gamma=SEGMENTATION_GAMMA, + segmentation_kmin=SEGMENTATIIN_KMIN, + phasing_kmin=PHASING_KMIN, + clonality_dist_metric=CLONALITY_DIST_METRIC, + ascat_dist_metric=ASCAT_DIST_METRIC, + min_ploidy=MIN_PLOIDY, + max_ploidy=MAX_PLOIDY, + min_rho=MIN_RHO, + min_goodness=MIN_GOODNESS_OF_FIT, + uninformative_BAF_threshold=BALANCED_THRESHOLD, + min_normal_depth=MIN_NORMAL_DEPTH, + min_base_qual=MIN_BASE_QUAL, + min_map_qual=MIN_MAP_QUAL, + calc_seg_baf_option=CALC_SEG_BAF_OPTION, + skip_allele_counting=SKIP_ALLELECOUNTING, + skip_preprocessing=SKIP_PREPROCESSING, + skip_phasing=SKIP_PHASING, + prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE + ) From 42c7d33260693050b4062a6edd8afe0de08357fb Mon Sep 17 00:00:00 2001 From: Mootor Date: Tue, 9 Jul 2024 16:01:36 -0700 Subject: [PATCH 02/13] fix basic linting --- battenberg_wgs.R | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index 9d6dd44..b34160e 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -34,36 +34,36 @@ NTHREADS = opt$cpu; PRIOR_BREAKPOINTS_FILE = opt$bp; # General static -IMPUTEINFOFILE = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_impute_v3/impute_info.txt" -G1000PREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesAlleles2012_chr" -G1000PREFIX_AC = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr" -GCCORRECTPREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_wgs_gc_correction_1000g_v3_noNA/1000_genomes_GC_corr_chr_" -REPLICCORRECTPREFIX = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_" -IMPUTE_EXE = "impute2" +IMPUTEINFOFILE = "/opt/battenberg_reference/impute_info.txt"; +G1000PREFIX = "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr"; +G1000PREFIX_AC = "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr"; +GCCORRECTPREFIX = "/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr"; +REPLICCORRECTPREFIX = "/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr"; +IMPUTE_EXE = "impute2"; -PLATFORM_GAMMA = 1 -PHASING_GAMMA = 1 -SEGMENTATION_GAMMA = 10 -SEGMENTATIIN_KMIN = 3 -PHASING_KMIN = 1 -CLONALITY_DIST_METRIC = 0 -ASCAT_DIST_METRIC = 1 -MIN_PLOIDY = 1.6 -MAX_PLOIDY = 4.8 -MIN_RHO = 0.1 -MIN_GOODNESS_OF_FIT = 0.63 -BALANCED_THRESHOLD = 0.51 -MIN_NORMAL_DEPTH = 10 -MIN_BASE_QUAL = 20 -MIN_MAP_QUAL = 35 -CALC_SEG_BAF_OPTION = 3 +PLATFORM_GAMMA = 1; +PHASING_GAMMA = 1; +SEGMENTATION_GAMMA = 10; +SEGMENTATIIN_KMIN = 3; +PHASING_KMIN = 1; +CLONALITY_DIST_METRIC = 0; +ASCAT_DIST_METRIC = 1; +MIN_PLOIDY = 1.6; +MAX_PLOIDY = 4.8; +MIN_RHO = 0.1; +MIN_GOODNESS_OF_FIT = 0.63; +BALANCED_THRESHOLD = 0.51; +MIN_NORMAL_DEPTH = 10; +MIN_BASE_QUAL = 20; +MIN_MAP_QUAL = 35; +CALC_SEG_BAF_OPTION = 3; # WGS specific static -ALLELECOUNTER = "alleleCounter" -PROBLEMLOCI = "/lustre/scratch117/casm/team219/sd11/reference/GenomeFiles/battenberg_probloci/probloci_270415.txt.gz" +ALLELECOUNTER = "alleleCounter"; +PROBLEMLOCI = "/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz"; # Change to work directory and load the chromosome information -setwd(RUN_DIR) +setwd(RUN_DIR); battenberg(tumourname=TUMOURNAME, normalname=NORMALNAME, @@ -100,4 +100,4 @@ battenberg(tumourname=TUMOURNAME, skip_preprocessing=SKIP_PREPROCESSING, skip_phasing=SKIP_PHASING, prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE - ) + ); From 9298514bcd7c8ddd7312a520bf60563c554ff760 Mon Sep 17 00:00:00 2001 From: Mootor Date: Tue, 9 Jul 2024 16:32:39 -0700 Subject: [PATCH 03/13] parameterize min/max ploidy and rho --- battenberg_wgs.R | 86 ++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index b34160e..8f49bbe 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -15,7 +15,11 @@ option_list = list( make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), - make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character") + make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), + make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), + make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), + make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), + make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character") ); opt_parser = OptionParser(option_list=option_list); @@ -32,6 +36,10 @@ SKIP_PREPROCESSING = opt$skip_preprocessing; SKIP_PHASING = opt$skip_phasing; NTHREADS = opt$cpu; PRIOR_BREAKPOINTS_FILE = opt$bp; +MIN_PLOIDY = opt$min_ploidy; +MAX_PLOIDY = opt$max_ploidy; +MIN_RHO = opt$min_rho; +MAX_RHO = opt$max_rho; # General static IMPUTEINFOFILE = "/opt/battenberg_reference/impute_info.txt"; @@ -48,9 +56,6 @@ SEGMENTATIIN_KMIN = 3; PHASING_KMIN = 1; CLONALITY_DIST_METRIC = 0; ASCAT_DIST_METRIC = 1; -MIN_PLOIDY = 1.6; -MAX_PLOIDY = 4.8; -MIN_RHO = 0.1; MIN_GOODNESS_OF_FIT = 0.63; BALANCED_THRESHOLD = 0.51; MIN_NORMAL_DEPTH = 10; @@ -65,39 +70,40 @@ PROBLEMLOCI = "/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz # Change to work directory and load the chromosome information setwd(RUN_DIR); -battenberg(tumourname=TUMOURNAME, - normalname=NORMALNAME, - tumour_data_file=TUMOURBAM, - normal_data_file=NORMALBAM, - ismale=IS.MALE, - imputeinfofile=IMPUTEINFOFILE, - g1000prefix=G1000PREFIX, - g1000allelesprefix=G1000PREFIX_AC, - gccorrectprefix=GCCORRECTPREFIX, - repliccorrectprefix=REPLICCORRECTPREFIX, - problemloci=PROBLEMLOCI, - data_type="wgs", - impute_exe=IMPUTE_EXE, - allelecounter_exe=ALLELECOUNTER, - nthreads=NTHREADS, - platform_gamma=PLATFORM_GAMMA, - phasing_gamma=PHASING_GAMMA, - segmentation_gamma=SEGMENTATION_GAMMA, - segmentation_kmin=SEGMENTATIIN_KMIN, - phasing_kmin=PHASING_KMIN, - clonality_dist_metric=CLONALITY_DIST_METRIC, - ascat_dist_metric=ASCAT_DIST_METRIC, - min_ploidy=MIN_PLOIDY, - max_ploidy=MAX_PLOIDY, - min_rho=MIN_RHO, - min_goodness=MIN_GOODNESS_OF_FIT, - uninformative_BAF_threshold=BALANCED_THRESHOLD, - min_normal_depth=MIN_NORMAL_DEPTH, - min_base_qual=MIN_BASE_QUAL, - min_map_qual=MIN_MAP_QUAL, - calc_seg_baf_option=CALC_SEG_BAF_OPTION, - skip_allele_counting=SKIP_ALLELECOUNTING, - skip_preprocessing=SKIP_PREPROCESSING, - skip_phasing=SKIP_PHASING, - prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE - ); +battenberg( + tumourname=TUMOURNAME, + normalname=NORMALNAME, + tumour_data_file=TUMOURBAM, + normal_data_file=NORMALBAM, + ismale=IS.MALE, + imputeinfofile=IMPUTEINFOFILE, + g1000prefix=G1000PREFIX, + g1000allelesprefix=G1000PREFIX_AC, + gccorrectprefix=GCCORRECTPREFIX, + repliccorrectprefix=REPLICCORRECTPREFIX, + problemloci=PROBLEMLOCI, + data_type="wgs", + impute_exe=IMPUTE_EXE, + allelecounter_exe=ALLELECOUNTER, + nthreads=NTHREADS, + platform_gamma=PLATFORM_GAMMA, + phasing_gamma=PHASING_GAMMA, + segmentation_gamma=SEGMENTATION_GAMMA, + segmentation_kmin=SEGMENTATIIN_KMIN, + phasing_kmin=PHASING_KMIN, + clonality_dist_metric=CLONALITY_DIST_METRIC, + ascat_dist_metric=ASCAT_DIST_METRIC, + min_ploidy=MIN_PLOIDY, + max_ploidy=MAX_PLOIDY, + min_rho=MIN_RHO, + min_goodness=MIN_GOODNESS_OF_FIT, + uninformative_BAF_threshold=BALANCED_THRESHOLD, + min_normal_depth=MIN_NORMAL_DEPTH, + min_base_qual=MIN_BASE_QUAL, + min_map_qual=MIN_MAP_QUAL, + calc_seg_baf_option=CALC_SEG_BAF_OPTION, + skip_allele_counting=SKIP_ALLELECOUNTING, + skip_preprocessing=SKIP_PREPROCESSING, + skip_phasing=SKIP_PHASING, + prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE + ); From fa2cc1df831d103eb261fc6a34e03272958c5575 Mon Sep 17 00:00:00 2001 From: Mootor Date: Tue, 9 Jul 2024 18:34:43 -0700 Subject: [PATCH 04/13] add gamma parameters --- battenberg_wgs.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index 8f49bbe..ec15e17 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -19,7 +19,15 @@ option_list = list( make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), - make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character") + make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character"), + make_option(c("--platform_gamma"), type="numeric", default=1, help="Platform specific gamma value (0.55 for SNP6, 1 for NGS)", metavar="character"), + make_option(c("--phasing_gamma"), type="numeric", default=1, help="Gamma parameter used when correcting phasing mistakes (Default: 1)", metavar="character"), + make_option(c("--segmentation_gamma"), type="numeric", default=10, help="The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)", metavar="character") + #, + #make_option(c("--"), type="", default=, help="", metavar="character"), + #make_option(c("--"), type="", default=, help="", metavar="character"), + #make_option(c("--"), type="", default=, help="", metavar="character"), + #make_option(c("--"), type="", default=, help="", metavar="character"), ); opt_parser = OptionParser(option_list=option_list); @@ -40,6 +48,9 @@ MIN_PLOIDY = opt$min_ploidy; MAX_PLOIDY = opt$max_ploidy; MIN_RHO = opt$min_rho; MAX_RHO = opt$max_rho; +PLATFORM_GAMMA = opt$platform_gamma; +PHASING_GAMMA = opt$phasing_gamma; +SEGMENTATION_GAMMA = opt$segmentation_gamma; # General static IMPUTEINFOFILE = "/opt/battenberg_reference/impute_info.txt"; @@ -49,9 +60,6 @@ GCCORRECTPREFIX = "/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes REPLICCORRECTPREFIX = "/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr"; IMPUTE_EXE = "impute2"; -PLATFORM_GAMMA = 1; -PHASING_GAMMA = 1; -SEGMENTATION_GAMMA = 10; SEGMENTATIIN_KMIN = 3; PHASING_KMIN = 1; CLONALITY_DIST_METRIC = 0; From f7133026e53f7129773d307c15e615241a5cc0d3 Mon Sep 17 00:00:00 2001 From: Mootor Date: Thu, 11 Jul 2024 18:48:56 -0700 Subject: [PATCH 05/13] parameterize existing parameters in the script --- battenberg_wgs.R | 136 +++++++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 65 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index ec15e17..34a63d8 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -4,76 +4,82 @@ library(Battenberg); library(optparse); -option_list = list( - make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), - make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), - make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), - make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), - make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), - make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), - make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), - make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), - make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), - make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), - make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), - make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), - make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), - make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), - make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character"), - make_option(c("--platform_gamma"), type="numeric", default=1, help="Platform specific gamma value (0.55 for SNP6, 1 for NGS)", metavar="character"), - make_option(c("--phasing_gamma"), type="numeric", default=1, help="Gamma parameter used when correcting phasing mistakes (Default: 1)", metavar="character"), - make_option(c("--segmentation_gamma"), type="numeric", default=10, help="The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)", metavar="character") - #, - #make_option(c("--"), type="", default=, help="", metavar="character"), - #make_option(c("--"), type="", default=, help="", metavar="character"), - #make_option(c("--"), type="", default=, help="", metavar="character"), - #make_option(c("--"), type="", default=, help="", metavar="character"), - ); +option_list <- list( + make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), + make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), + make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), + make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), + make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), + make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), + make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), + make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), + make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), + make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), + make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), + make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), + make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), + make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character"), + make_option(c("--platform_gamma"), type="numeric", default=1, help="Platform specific gamma value (0.55 for SNP6, 1 for NGS)", metavar="character"), + make_option(c("--phasing_gamma"), type="numeric", default=1, help="Gamma parameter used when correcting phasing mistakes (Default: 1)", metavar="character"), + make_option(c("--segmentation_gamma"), type="numeric", default=10, help="The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)", metavar="character"), + make_option(c("--segmentation_kmin"), type="numeric", default=3, help="Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)", metavar="character"), + make_option(c("--phasing_kmin"), type="numeric", default=1, help="Kmin used when correcting for phasing mistakes (Default: 3)", metavar="character"), + make_option(c("--clonality_dist_metric"), type="numeric", default=0, help="Distance metric to use when choosing purity/ploidy combinations (Default: 0)", metavar="character"), + make_option(c("--ascat_dist_metric"), type="numeric", default=1, help="Distance metric to use when choosing purity/ploidy combinations (Default: 1)", metavar="character"), + make_option(c("--min_goodness_of_fit"), type="double", default=0.63, help="Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)", metavar="character"), + make_option(c("--balanced_threshold"), type="double", default=0.51, help="The threshold beyond which BAF becomes uninformative (Default: 0.51)", metavar="character"), + make_option(c("--min_normal_depth"), type="numeric", default=10, help="Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)", metavar="character"), + make_option(c("--min_base_qual"), type="numeric", default=20, help="Minimum base quality required for a read to be counted when allele counting (Default: 20)", metavar="character"), + make_option(c("--min_map_qual"), type="numeric", default=35, help="Minimum mapping quality required for a read to be counted when allele counting (Default: 35)", metavar="character"), + make_option(c("--calc_seg_baf_option"), type="numeric", default=3, help="Sets way to calculate BAF per segment: 1=mean, 2=median, 3=ifelse median==0 | 1, mean, median (Default: 3)", metavar="character"), + make_option(c("--data_type"), type="character", default="wgs", help="String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)", metavar="character") + ); -opt_parser = OptionParser(option_list=option_list); -opt = parse_args(opt_parser); +opt_parser <- OptionParser(option_list=option_list); +opt <- parse_args(opt_parser); -TUMOURNAME = opt$tumourname; -NORMALNAME = opt$normalname; -NORMALBAM = opt$nb; -TUMOURBAM = opt$tb; -IS.MALE = opt$sex=="male" | opt$sex=="Male"; -RUN_DIR = opt$output; -SKIP_ALLELECOUNTING = opt$skip_allelecount; -SKIP_PREPROCESSING = opt$skip_preprocessing; -SKIP_PHASING = opt$skip_phasing; -NTHREADS = opt$cpu; -PRIOR_BREAKPOINTS_FILE = opt$bp; -MIN_PLOIDY = opt$min_ploidy; -MAX_PLOIDY = opt$max_ploidy; -MIN_RHO = opt$min_rho; -MAX_RHO = opt$max_rho; -PLATFORM_GAMMA = opt$platform_gamma; -PHASING_GAMMA = opt$phasing_gamma; -SEGMENTATION_GAMMA = opt$segmentation_gamma; +TUMOURNAME <- opt$tumourname; +NORMALNAME <- opt$normalname; +NORMALBAM <- opt$nb; +TUMOURBAM <- opt$tb; +IS.MALE <- opt$sex=="male" | opt$sex=="Male"; +RUN_DIR <- opt$output; +SKIP_ALLELECOUNTING <- opt$skip_allelecount; +SKIP_PREPROCESSING <- opt$skip_preprocessing; +SKIP_PHASING <- opt$skip_phasing; +NTHREADS <- opt$cpu; +PRIOR_BREAKPOINTS_FILE <- opt$bp; +MIN_PLOIDY <- opt$min_ploidy; +MAX_PLOIDY <- opt$max_ploidy; +MIN_RHO <- opt$min_rho; +MAX_RHO <- opt$max_rho; +PLATFORM_GAMMA <- opt$platform_gamma; +PHASING_GAMMA <- opt$phasing_gamma; +SEGMENTATION_GAMMA <- opt$segmentation_gamma; +SEGMENTATIIN_KMIN <- opt$segmentation_kmin; +PHASING_KMIN <- opt$phasing_kmin; +CLONALITY_DIST_METRIC <- opt$clonality_dist_metric; +ASCAT_DIST_METRIC <- opt$ascat_dist_metric; +MIN_GOODNESS_OF_FIT <- opt$min_goodness_of_fit; +BALANCED_THRESHOLD <- opt$balanced_threshold; +MIN_NORMAL_DEPTH <- opt$min_normal_depth; +MIN_BASE_QUAL <- opt$min_base_qual; +MIN_MAP_QUAL <- opt$min_map_qual; +CALC_SEG_BAF_OPTION <- opt$calc_seg_baf_option; +DATA_TYPE <- opt$data_type; # General static -IMPUTEINFOFILE = "/opt/battenberg_reference/impute_info.txt"; -G1000PREFIX = "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr"; -G1000PREFIX_AC = "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr"; -GCCORRECTPREFIX = "/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr"; -REPLICCORRECTPREFIX = "/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr"; -IMPUTE_EXE = "impute2"; - -SEGMENTATIIN_KMIN = 3; -PHASING_KMIN = 1; -CLONALITY_DIST_METRIC = 0; -ASCAT_DIST_METRIC = 1; -MIN_GOODNESS_OF_FIT = 0.63; -BALANCED_THRESHOLD = 0.51; -MIN_NORMAL_DEPTH = 10; -MIN_BASE_QUAL = 20; -MIN_MAP_QUAL = 35; -CALC_SEG_BAF_OPTION = 3; +IMPUTEINFOFILE <- "/opt/battenberg_reference/impute_info.txt"; +G1000PREFIX <- "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr"; +G1000PREFIX_AC <- "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr"; +GCCORRECTPREFIX <- "/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr"; +REPLICCORRECTPREFIX <- "/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr"; +IMPUTE_EXE <- "impute2"; # WGS specific static -ALLELECOUNTER = "alleleCounter"; -PROBLEMLOCI = "/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz"; +ALLELECOUNTER <- "alleleCounter"; +PROBLEMLOCI <- "/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz"; # Change to work directory and load the chromosome information setwd(RUN_DIR); @@ -90,7 +96,7 @@ battenberg( gccorrectprefix=GCCORRECTPREFIX, repliccorrectprefix=REPLICCORRECTPREFIX, problemloci=PROBLEMLOCI, - data_type="wgs", + data_type=DATA_TYPE, impute_exe=IMPUTE_EXE, allelecounter_exe=ALLELECOUNTER, nthreads=NTHREADS, From 0cf7438b9e9c7c6b8580eab8d88097feaac22052 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 09:51:19 -0700 Subject: [PATCH 06/13] remove max_rho temporarily and fix a parameter typo --- battenberg_wgs.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index 34a63d8..d46bfac 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -19,7 +19,6 @@ option_list <- list( make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), - make_option(c("--max_rho"), type="double", default=1.0, help="The maximum cellularity to consider", metavar="character"), make_option(c("--platform_gamma"), type="numeric", default=1, help="Platform specific gamma value (0.55 for SNP6, 1 for NGS)", metavar="character"), make_option(c("--phasing_gamma"), type="numeric", default=1, help="Gamma parameter used when correcting phasing mistakes (Default: 1)", metavar="character"), make_option(c("--segmentation_gamma"), type="numeric", default=10, help="The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)", metavar="character"), @@ -53,11 +52,10 @@ PRIOR_BREAKPOINTS_FILE <- opt$bp; MIN_PLOIDY <- opt$min_ploidy; MAX_PLOIDY <- opt$max_ploidy; MIN_RHO <- opt$min_rho; -MAX_RHO <- opt$max_rho; PLATFORM_GAMMA <- opt$platform_gamma; PHASING_GAMMA <- opt$phasing_gamma; SEGMENTATION_GAMMA <- opt$segmentation_gamma; -SEGMENTATIIN_KMIN <- opt$segmentation_kmin; +SEGMENTATION_KMIN <- opt$segmentation_kmin; PHASING_KMIN <- opt$phasing_kmin; CLONALITY_DIST_METRIC <- opt$clonality_dist_metric; ASCAT_DIST_METRIC <- opt$ascat_dist_metric; @@ -103,7 +101,7 @@ battenberg( platform_gamma=PLATFORM_GAMMA, phasing_gamma=PHASING_GAMMA, segmentation_gamma=SEGMENTATION_GAMMA, - segmentation_kmin=SEGMENTATIIN_KMIN, + segmentation_kmin=SEGMENTATION_KMIN, phasing_kmin=PHASING_KMIN, clonality_dist_metric=CLONALITY_DIST_METRIC, ascat_dist_metric=ASCAT_DIST_METRIC, From 3ff9be952e873c06f03f6c58e83a51bd52e4ac45 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 09:58:40 -0700 Subject: [PATCH 07/13] add battenberg_wgs.R to Dockerfile and remove path modification script --- Dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index df46e21..608dc88 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,10 +34,9 @@ RUN R -q -e 'install.packages("devtools", dependencies = TRUE)' && \ R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT@v3.1.2")' && \ R -q -e 'devtools::install_github("Wedge-Oxford/battenberg@v2.2.9")' -# Modify paths to reference files -COPY modify_reference_path.sh /usr/local/bin/modify_reference_path.sh -RUN chmod +x /usr/local/bin/modify_reference_path.sh && \ - bash /usr/local/bin/modify_reference_path.sh /usr/local/lib/R/site-library/Battenberg/example/battenberg_wgs.R /usr/local/bin/battenberg_wgs.R +# Add custom Battenberg R wrapper +COPY battenberg_wgs.R /usr/local/bin/battenberg_wgs.R +RUN chmod +x /usr/local/bin/battenberg_wgs.R RUN ln -sf /usr/local/lib/R/site-library/Battenberg/example/filter_sv_brass.R /usr/local/bin/filter_sv_brass.R && \ ln -sf /usr/local/lib/R/site-library/Battenberg/example/battenberg_cleanup.sh /usr/local/bin/battenberg_cleanup.sh From e8e80856b47ba48b46828917969520be672b579f Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 09:59:22 -0700 Subject: [PATCH 08/13] remove path modification script from repo --- modify_reference_path.sh | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 modify_reference_path.sh diff --git a/modify_reference_path.sh b/modify_reference_path.sh deleted file mode 100644 index 4c3d7d1..0000000 --- a/modify_reference_path.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/sh - -refpath_default=$1 -refpath_modified=$2 - -cat "${refpath_default}" | \ - sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/impute_info.txt\"|' | \ - sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr\"|' | \ - sed 's|G1000PREFIX_AC = \".*|G1000PREFIX_AC = \"/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr\"|' | \ - sed 's|GCCORRECTPREFIX = \".*|GCCORRECTPREFIX = \"/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr\"|' | \ - sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz\"|' | \ - sed 's|REPLICCORRECTPREFIX = \".*|REPLICCORRECTPREFIX = \"/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr\"|' > "${refpath_modified}" From 7be441977e775a95e3b2c9fb31db3fc0603ffd29 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 10:23:44 -0700 Subject: [PATCH 09/13] Update CHANGELOG.md --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b122cfa..4059582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). --- +## Unreleased +### Added +- Add custom `battenberg_wgs.R` + +### Removed +- Remove `modify_reference_path.sh` -## [2.2.9] - 2023-06-27 +## [2.2.9] - 2023-06-27 [YANKED] ### Added - Add `modify_reference_path.sh` - Add GRCh37 and GRCh38 resource paths to `README.md` From b29312f39d558e0d9537934c679f0a34dca0bed9 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 10:23:53 -0700 Subject: [PATCH 10/13] remove comments --- metadata.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metadata.yaml b/metadata.yaml index 6c33da3..42a3af8 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -4,7 +4,7 @@ description: 'Docker repository for Wedge-lab/battenberg' maintainers: ['mmootor@mednet.ucla.edu'] languages: ['Dockerfile'] tools: ['battenberg'] -version: ['2.2.9'] # Tool version number -purpose: 'Whole Genome Sequencing subclonal copy number caller' # Description of what this tool does -references: 'https://github.com/Wedge-lab/battenberg' # is the tool/dependencies published, is there a confluence page -image_name: 'battenberg' # name of the new docker image +version: ['2.2.9'] +purpose: 'Whole Genome Sequencing subclonal copy number caller' +references: 'https://github.com/Wedge-lab/battenberg' +image_name: 'battenberg' From 25f07d46b9576cbdc7b038d56ee40554eaba99f4 Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 10:38:05 -0700 Subject: [PATCH 11/13] fix linting in battenberg_wgs.R --- battenberg_wgs.R | 194 +++++++++++++++++++++++------------------------ 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index d46bfac..30774a4 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -4,118 +4,118 @@ library(Battenberg); library(optparse); -option_list <- list( - make_option(c("-t", "--tumourname"), type="character", default=NULL, help="Samplename of the tumour", metavar="character"), - make_option(c("-n", "--normalname"), type="character", default=NULL, help="Samplename of the normal", metavar="character"), - make_option(c("--tb"), type="character", default=NULL, help="Tumour BAM file", metavar="character"), - make_option(c("--nb"), type="character", default=NULL, help="Normal BAM file", metavar="character"), - make_option(c("--sex"), type="character", default=NULL, help="Sex of the sample", metavar="character"), - make_option(c("-o", "--output"), type="character", default=NULL, help="Directory where output will be written", metavar="character"), - make_option(c("--skip_allelecount"), type="logical", default=FALSE, action="store_true", help="Provide when alleles don't have to be counted. This expects allelecount files on disk", metavar="character"), - make_option(c("--skip_preprocessing"), type="logical", default=FALSE, action="store_true", help="Provide when pre-processing has previously completed. This expects the files on disk", metavar="character"), - make_option(c("--skip_phasing"), type="logical", default=FALSE, action="store_true", help="Provide when phasing has previously completed. This expects the files on disk", metavar="character"), - make_option(c("--cpu"), type="numeric", default=8, help="The number of CPU cores to be used by the pipeline (Default: 8)", metavar="character"), - make_option(c("--bp"), type="character", default=NULL, help="Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation", metavar="character"), - make_option(c("--min_ploidy"), type="double", default=1.6, help="The minimum ploidy to consider", metavar="character"), - make_option(c("--max_ploidy"), type="double", default=4.8, help="The maximum ploidy to consider", metavar="character"), - make_option(c("--min_rho"), type="double", default=0.1, help="The minimum cellularity to consider", metavar="character"), - make_option(c("--platform_gamma"), type="numeric", default=1, help="Platform specific gamma value (0.55 for SNP6, 1 for NGS)", metavar="character"), - make_option(c("--phasing_gamma"), type="numeric", default=1, help="Gamma parameter used when correcting phasing mistakes (Default: 1)", metavar="character"), - make_option(c("--segmentation_gamma"), type="numeric", default=10, help="The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)", metavar="character"), - make_option(c("--segmentation_kmin"), type="numeric", default=3, help="Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)", metavar="character"), - make_option(c("--phasing_kmin"), type="numeric", default=1, help="Kmin used when correcting for phasing mistakes (Default: 3)", metavar="character"), - make_option(c("--clonality_dist_metric"), type="numeric", default=0, help="Distance metric to use when choosing purity/ploidy combinations (Default: 0)", metavar="character"), - make_option(c("--ascat_dist_metric"), type="numeric", default=1, help="Distance metric to use when choosing purity/ploidy combinations (Default: 1)", metavar="character"), - make_option(c("--min_goodness_of_fit"), type="double", default=0.63, help="Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)", metavar="character"), - make_option(c("--balanced_threshold"), type="double", default=0.51, help="The threshold beyond which BAF becomes uninformative (Default: 0.51)", metavar="character"), - make_option(c("--min_normal_depth"), type="numeric", default=10, help="Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)", metavar="character"), - make_option(c("--min_base_qual"), type="numeric", default=20, help="Minimum base quality required for a read to be counted when allele counting (Default: 20)", metavar="character"), - make_option(c("--min_map_qual"), type="numeric", default=35, help="Minimum mapping quality required for a read to be counted when allele counting (Default: 35)", metavar="character"), - make_option(c("--calc_seg_baf_option"), type="numeric", default=3, help="Sets way to calculate BAF per segment: 1=mean, 2=median, 3=ifelse median==0 | 1, mean, median (Default: 3)", metavar="character"), - make_option(c("--data_type"), type="character", default="wgs", help="String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)", metavar="character") +option.list <- list( + make_option(c('-t', '--tumourname'), type = 'character', default = NULL, help = 'Samplename of the tumour', metavar = 'character'), + make_option(c('-n', '--normalname'), type = 'character', default = NULL, help = 'Samplename of the normal', metavar = 'character'), + make_option(c('--tb'), type = 'character', default = NULL, help = 'Tumour BAM file', metavar = 'character'), + make_option(c('--nb'), type = 'character', default = NULL, help = 'Normal BAM file', metavar = 'character'), + make_option(c('--sex'), type = 'character', default = NULL, help = 'Sex of the sample', metavar = 'character'), + make_option(c('-o', '--output'), type = 'character', default = NULL, help = 'Directory where output will be written', metavar = 'character'), + make_option(c('--skip_allelecount'), type = 'logical', default = FALSE, action = 'store_true', help = 'Provide when alleles don\'t have to be counted. This expects allelecount files on disk', metavar = 'character'), + make_option(c('--skip_preprocessing'), type = 'logical', default = FALSE, action = 'store_true', help = 'Provide when pre-processing has previously completed. This expects the files on disk', metavar = 'character'), + make_option(c('--skip_phasing'), type = 'logical', default = FALSE, action = 'store_true', help = 'Provide when phasing has previously completed. This expects the files on disk', metavar = 'character'), + make_option(c('--cpu'), type = 'numeric', default = 8, help = 'The number of CPU cores to be used by the pipeline (Default: 8)', metavar = 'character'), + make_option(c('--bp'), type = 'character', default = NULL, help = 'Optional two column file (chromosome and position) specifying prior breakpoints to be used during segmentation', metavar = 'character'), + make_option(c('--min_ploidy'), type = 'double', default = 1.6, help = 'The minimum ploidy to consider', metavar = 'character'), + make_option(c('--max_ploidy'), type = 'double', default = 4.8, help = 'The maximum ploidy to consider', metavar = 'character'), + make_option(c('--min_rho'), type = 'double', default = 0.1, help = 'The minimum cellularity to consider', metavar = 'character'), + make_option(c('--platform_gamma'), type = 'numeric', default = 1, help = 'Platform specific gamma value (0.55 for SNP6, 1 for NGS)', metavar = 'character'), + make_option(c('--phasing_gamma'), type = 'numeric', default = 1, help = 'Gamma parameter used when correcting phasing mistakes (Default: 1)', metavar = 'character'), + make_option(c('--segmentation_gamma'), type = 'numeric', default = 10, help = 'The gamma parameter controls the size of the penalty of starting a new segment during segmentation. It is therefore the key parameter for controlling the number of segments (Default: 10)', metavar = 'character'), + make_option(c('--segmentation_kmin'), type = 'numeric', default = 3, help = 'Kmin represents the minimum number of probes/SNPs that a segment should consist of (Default: 3)', metavar = 'character'), + make_option(c('--phasing_kmin'), type = 'numeric', default = 1, help = 'Kmin used when correcting for phasing mistakes (Default: 3)', metavar = 'character'), + make_option(c('--clonality_dist_metric'), type = 'numeric', default = 0, help = 'Distance metric to use when choosing purity/ploidy combinations (Default: 0)', metavar = 'character'), + make_option(c('--ascat_dist_metric'), type = 'numeric', default = 1, help = 'Distance metric to use when choosing purity/ploidy combinations (Default: 1)', metavar = 'character'), + make_option(c('--min_goodness_of_fit'), type = 'double', default = 0.63, help = 'Minimum goodness of fit required for a purity/ploidy combination to be accepted as a solution (Default: 0.63)', metavar = 'character'), + make_option(c('--balanced_threshold'), type = 'double', default = 0.51, help = 'The threshold beyond which BAF becomes uninformative (Default: 0.51)', metavar = 'character'), + make_option(c('--min_normal_depth'), type = 'numeric', default = 10, help = 'Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)', metavar = 'character'), + make_option(c('--min_base_qual'), type = 'numeric', default = 20, help = 'Minimum base quality required for a read to be counted when allele counting (Default: 20)', metavar = 'character'), + make_option(c('--min_map_qual'), type = 'numeric', default = 35, help = 'Minimum mapping quality required for a read to be counted when allele counting (Default: 35)', metavar = 'character'), + make_option(c('--calc_seg_baf_option'), type = 'numeric', default = 3, help = 'Sets way to calculate BAF per segment: 1=mean, 2=median, 3=ifelse median==0 | 1, mean, median (Default: 3)', metavar = 'character'), + make_option(c('--data_type'), type = 'character', default = 'wgs', help = 'String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)', metavar = 'character') ); -opt_parser <- OptionParser(option_list=option_list); -opt <- parse_args(opt_parser); +opt.parser <- OptionParser(option_list = option.list); +opt <- parse_args(opt.parser); TUMOURNAME <- opt$tumourname; NORMALNAME <- opt$normalname; NORMALBAM <- opt$nb; TUMOURBAM <- opt$tb; -IS.MALE <- opt$sex=="male" | opt$sex=="Male"; -RUN_DIR <- opt$output; -SKIP_ALLELECOUNTING <- opt$skip_allelecount; -SKIP_PREPROCESSING <- opt$skip_preprocessing; -SKIP_PHASING <- opt$skip_phasing; +IS.MALE <- opt$sex == 'male' | opt$sex == 'Male'; +RUN.DIR <- opt$output; +SKIP.ALLELECOUNTING <- opt$skip_allelecount; +SKIP.PREPROCESSING <- opt$skip_preprocessing; +SKIP.PHASING <- opt$skip_phasing; NTHREADS <- opt$cpu; -PRIOR_BREAKPOINTS_FILE <- opt$bp; -MIN_PLOIDY <- opt$min_ploidy; -MAX_PLOIDY <- opt$max_ploidy; -MIN_RHO <- opt$min_rho; -PLATFORM_GAMMA <- opt$platform_gamma; -PHASING_GAMMA <- opt$phasing_gamma; -SEGMENTATION_GAMMA <- opt$segmentation_gamma; -SEGMENTATION_KMIN <- opt$segmentation_kmin; -PHASING_KMIN <- opt$phasing_kmin; -CLONALITY_DIST_METRIC <- opt$clonality_dist_metric; -ASCAT_DIST_METRIC <- opt$ascat_dist_metric; -MIN_GOODNESS_OF_FIT <- opt$min_goodness_of_fit; -BALANCED_THRESHOLD <- opt$balanced_threshold; -MIN_NORMAL_DEPTH <- opt$min_normal_depth; -MIN_BASE_QUAL <- opt$min_base_qual; -MIN_MAP_QUAL <- opt$min_map_qual; -CALC_SEG_BAF_OPTION <- opt$calc_seg_baf_option; -DATA_TYPE <- opt$data_type; +PRIOR.BREAKPOINTS.FILE <- opt$bp; +MIN.PLOIDY <- opt$min_ploidy; +MAX.PLOIDY <- opt$max_ploidy; +MIN.RHO <- opt$min_rho; +PLATFORM.GAMMA <- opt$platform_gamma; +PHASING.GAMMA <- opt$phasing_gamma; +SEGMENTATION.GAMMA <- opt$segmentation_gamma; +SEGMENTATION.KMIN <- opt$segmentation_kmin; +PHASING.KMIN <- opt$phasing_kmin; +CLONALITY.DIST.METRIC <- opt$clonality_dist_metric; +ASCAT.DIST.METRIC <- opt$ascat_dist_metric; +MIN.GOODNESS.OF.FIT <- opt$min_goodness_of_fit; +BALANCED.THRESHOLD <- opt$balanced_threshold; +MIN.NORMAL.DEPTH <- opt$min_normal_depth; +MIN.BASE.QUAL <- opt$min_base_qual; +MIN.MAP.QUAL <- opt$min_map_qual; +CALC.SEG.BAF.OPTION <- opt$calc_seg_baf_option; +DATA.TYPE <- opt$data_type; # General static -IMPUTEINFOFILE <- "/opt/battenberg_reference/impute_info.txt"; -G1000PREFIX <- "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr"; -G1000PREFIX_AC <- "/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr"; -GCCORRECTPREFIX <- "/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr"; -REPLICCORRECTPREFIX <- "/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr"; -IMPUTE_EXE <- "impute2"; +IMPUTEINFOFILE <- '/opt/battenberg_reference/impute_info.txt'; +G1000PREFIX <- '/opt/battenberg_reference/1000_genomes_loci/1000_genomes_allele_index_chr'; +G1000PREFIX.AC <- '/opt/battenberg_reference/1000_genomes_loci/1000_genomes_loci_chr'; +GCCORRECTPREFIX <- '/opt/battenberg_reference/1000_genomes_gcContent/1000_genomes_GC_corr_chr'; +REPLICCORRECTPREFIX <- '/opt/battenberg_reference/battenberg_wgs_replication_timing_correction_1000_genomes/1000_genomes_replication_timing_chr'; +IMPUTE.EXE <- 'impute2'; # WGS specific static -ALLELECOUNTER <- "alleleCounter"; -PROBLEMLOCI <- "/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz"; +ALLELECOUNTER <- 'alleleCounter'; +PROBLEMLOCI <- '/opt/battenberg_reference/battenberg_problem_loci/probloci.txt.gz'; # Change to work directory and load the chromosome information -setwd(RUN_DIR); +setwd(RUN.DIR); battenberg( - tumourname=TUMOURNAME, - normalname=NORMALNAME, - tumour_data_file=TUMOURBAM, - normal_data_file=NORMALBAM, + tumourname = TUMOURNAME, + normalname = NORMALNAME, + tumour_data_file = TUMOURBAM, + normal_data_file = NORMALBAM, ismale=IS.MALE, - imputeinfofile=IMPUTEINFOFILE, - g1000prefix=G1000PREFIX, - g1000allelesprefix=G1000PREFIX_AC, - gccorrectprefix=GCCORRECTPREFIX, - repliccorrectprefix=REPLICCORRECTPREFIX, - problemloci=PROBLEMLOCI, - data_type=DATA_TYPE, - impute_exe=IMPUTE_EXE, - allelecounter_exe=ALLELECOUNTER, - nthreads=NTHREADS, - platform_gamma=PLATFORM_GAMMA, - phasing_gamma=PHASING_GAMMA, - segmentation_gamma=SEGMENTATION_GAMMA, - segmentation_kmin=SEGMENTATION_KMIN, - phasing_kmin=PHASING_KMIN, - clonality_dist_metric=CLONALITY_DIST_METRIC, - ascat_dist_metric=ASCAT_DIST_METRIC, - min_ploidy=MIN_PLOIDY, - max_ploidy=MAX_PLOIDY, - min_rho=MIN_RHO, - min_goodness=MIN_GOODNESS_OF_FIT, - uninformative_BAF_threshold=BALANCED_THRESHOLD, - min_normal_depth=MIN_NORMAL_DEPTH, - min_base_qual=MIN_BASE_QUAL, - min_map_qual=MIN_MAP_QUAL, - calc_seg_baf_option=CALC_SEG_BAF_OPTION, - skip_allele_counting=SKIP_ALLELECOUNTING, - skip_preprocessing=SKIP_PREPROCESSING, - skip_phasing=SKIP_PHASING, - prior_breakpoints_file=PRIOR_BREAKPOINTS_FILE + imputeinfofile = IMPUTEINFOFILE, + g1000prefix = G1000PREFIX, + g1000allelesprefix = G1000PREFIX.AC, + gccorrectprefix = GCCORRECTPREFIX, + repliccorrectprefix = REPLICCORRECTPREFIX, + problemloci = PROBLEMLOCI, + data_type = DATA.TYPE, + impute_exe = IMPUTE.EXE, + allelecounter_exe = ALLELECOUNTER, + nthreads = NTHREADS, + platform_gamma = PLATFORM.GAMMA, + phasing_gamma = PHASING.GAMMA, + segmentation_gamma = SEGMENTATION.GAMMA, + segmentation_kmin = SEGMENTATION.KMIN, + phasing_kmin = PHASING.KMIN, + clonality_dist_metric = CLONALITY.DIST.METRIC, + ascat_dist_metric = ASCAT.DIST.METRIC, + min_ploidy = MIN.PLOIDY, + max_ploidy = MAX.PLOIDY, + min_rho = MIN.RHO, + min_goodness = MIN.GOODNESS.OF.FIT, + uninformative_BAF_threshold = BALANCED.THRESHOLD, + min_normal_depth = MIN.NORMAL.DEPTH, + min_base_qual = MIN.BASE.QUAL, + min_map_qual = MIN.MAP.QUAL, + calc_seg_baf_option = CALC.SEG.BAF.OPTION, + skip_allele_counting = SKIP.ALLELECOUNTING, + skip_preprocessing = SKIP.PREPROCESSING, + skip_phasing = SKIP.PHASING, + prior_breakpoints_file = PRIOR.BREAKPOINTS.FILE ); From 3a6f8382281961b6597971ed3ea170dd1e504b5d Mon Sep 17 00:00:00 2001 From: Mootor Date: Fri, 12 Jul 2024 10:42:22 -0700 Subject: [PATCH 12/13] fix linting --- battenberg_wgs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/battenberg_wgs.R b/battenberg_wgs.R index 30774a4..fa82b3e 100644 --- a/battenberg_wgs.R +++ b/battenberg_wgs.R @@ -87,7 +87,7 @@ battenberg( normalname = NORMALNAME, tumour_data_file = TUMOURBAM, normal_data_file = NORMALBAM, - ismale=IS.MALE, + ismale = IS.MALE, imputeinfofile = IMPUTEINFOFILE, g1000prefix = G1000PREFIX, g1000allelesprefix = G1000PREFIX.AC, From e921a9a438ec45bcd50bc7690226005d3a65ed34 Mon Sep 17 00:00:00 2001 From: Mootor Date: Wed, 17 Jul 2024 15:10:11 -0700 Subject: [PATCH 13/13] add r-base image to dockerfile --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 608dc88..5636ca1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,7 @@ RUN R -q -e 'install.packages("BiocManager")' && \ "gridExtra","doParallel","foreach", "splines", "VariantAnnotation", "copynumber"))' # Install devtools, ASCAT & Battenberg +FROM r-base:latest RUN R -q -e 'install.packages("devtools", dependencies = TRUE)' && \ R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT@v3.1.2")' && \ R -q -e 'devtools::install_github("Wedge-Oxford/battenberg@v2.2.9")'