diff --git a/.github/workflows/render-puml.yaml b/.github/workflows/render-puml.yaml index 23e8621..e320d42 100644 --- a/.github/workflows/render-puml.yaml +++ b/.github/workflows/render-puml.yaml @@ -13,7 +13,7 @@ jobs: steps: - name: Generate PUML diagrams - uses: uclahs-cds/tool-PlantUML-action@v1.0.0 + uses: uclahs-cds/tool-PlantUML-action@v1.0.1 with: github-token: ${{ secrets.GITHUB_TOKEN }} ghcr-username: ${{ github.actor }} diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd158c..135a650 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm --- ## [Unreleased] +### Changed +- Replace workflow diagram with PlantUML version +- Update PlantUML action to v1.0.1 --- diff --git a/README.md b/README.md index e124249..999151c 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ python submit_nextflow_pipeline.py \ ## Flow Diagram -![call-gSNP flow diagram](call-gSNP-DSL2.png) +![call-gSNP flow diagram](docs/call-gsnp-flow.svg) --- diff --git a/call-gSNP-DSL2.png b/call-gSNP-DSL2.png deleted file mode 100644 index 0c2ee4f..0000000 Binary files a/call-gSNP-DSL2.png and /dev/null differ diff --git a/docs/call-gsnp-flow.puml b/docs/call-gsnp-flow.puml new file mode 100644 index 0000000..f7ab3ca --- /dev/null +++ b/docs/call-gsnp-flow.puml @@ -0,0 +1,87 @@ +@startuml + +skinparam ConditionEndStyle hline + +start + +if (Explicit intervals?) is (Yes) then + :==run_SplitIntervals_GATK + ---- + Split reference genome into up + to **scatter_count** interval lists, + without subdividing any of the + input intervals; +else (No) + :==run_SplitIntervals_GATK + ---- + Split reference genome into + **scatter_count** interval lists; +endif + +split + +:==run_HaplotypeCallerVCF_GATK +---- +Generate VCFs for each split interval +using HaplotypeCaller; + +:==run_MergeVcfs_Picard_VCF +---- +Merge raw variants into whole VCF file; + +#palegreen:Per-sample raw VCF + index files> + +partition "Recalibrate Variants" { + +:==run_VariantRecalibratorSNP_GATK +---- +Generate Variant Quality Score Recalibration +(VQSR) table for SNPs; + +:==run_ApplyVQSR_GATK_SNP +---- +Filter SNPs based on VQSR table; + +:==run_VariantRecalibratorINDEL_GATK +---- +Generate VQSR table for INDELs; + +:==run_ApplyVQSR_GATK_INDEL +---- +Filter INDELs based on VQSR table; + +} + +#palegreen:SNP and INDEL recalibrated +variants + index files> + +:==filter_gSNP_GATK +---- +Filter ambiguous variants with +customized Perl script; + +#palegreen:Filtered germline +variants + index files> + +detach + +split again + +:==run_HaplotypeCallerGVCF_GATK +---- +Generate GVCFs for each split interval +using HaplotypeCaller; + +:==run_MergeVcfs_Picard_GVCF +---- +Merge raw variants into whole GVCF file; + +#palegreen:Per-sample GVCF + index files> + +detach + +endsplit + + +@enduml + diff --git a/docs/call-gsnp-flow.svg b/docs/call-gsnp-flow.svg new file mode 100644 index 0000000..1de001c --- /dev/null +++ b/docs/call-gsnp-flow.svg @@ -0,0 +1,96 @@ +Explicit intervals?YesNorun_SplitIntervals_GATKSplit reference genome into uptoscatter_countinterval lists,without subdividing any of theinput intervalsrun_SplitIntervals_GATKSplit reference genome intoscatter_countinterval listsrun_HaplotypeCallerVCF_GATKGenerate VCFs for each split intervalusing HaplotypeCallerrun_MergeVcfs_Picard_VCFMerge raw variants into whole VCF filePer-sample raw VCF + index filesRecalibrate Variantsrun_VariantRecalibratorSNP_GATKGenerate Variant Quality Score Recalibration(VQSR) table for SNPsrun_ApplyVQSR_GATK_SNPFilter SNPs based on VQSR tablerun_VariantRecalibratorINDEL_GATKGenerate VQSR table for INDELsrun_ApplyVQSR_GATK_INDELFilter INDELs based on VQSR tableSNP and INDEL recalibratedvariants + index filesfilter_gSNP_GATKFilter ambiguous variants withcustomized Perl scriptFiltered germlinevariants + index filesrun_HaplotypeCallerGVCF_GATKGenerate GVCFs for each split intervalusing HaplotypeCallerrun_MergeVcfs_Picard_GVCFMerge raw variants into whole GVCF filePer-sample GVCF + index files \ No newline at end of file