From 8223a3969e39c3256fd5b1a22a7f0b8570b96f84 Mon Sep 17 00:00:00 2001 From: rnaidu Date: Tue, 1 Oct 2024 11:12:12 -0400 Subject: [PATCH 1/5] updated traceback subworkflow and associated modules with changes for nucleovar version 1 --- modules/msk/genotypevariants/all/main.nf | 1 - modules/msk/pvmaf/concat/main.nf | 4 ++-- modules/msk/pvmaf/tagtraceback/main.nf | 9 +++++---- modules/msk/pvmaf/tagtraceback/meta.yml | 12 ++++-------- subworkflows/msk/traceback/main.nf | 3 ++- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/modules/msk/genotypevariants/all/main.nf b/modules/msk/genotypevariants/all/main.nf index 40d3a081..05ab0665 100644 --- a/modules/msk/genotypevariants/all/main.nf +++ b/modules/msk/genotypevariants/all/main.nf @@ -39,7 +39,6 @@ process GENOTYPEVARIANTS_ALL { $bams_standard \\ $bam_liquid \\ $sample \\ - -t $task.cpus \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/msk/pvmaf/concat/main.nf b/modules/msk/pvmaf/concat/main.nf index bb12148f..225751be 100644 --- a/modules/msk/pvmaf/concat/main.nf +++ b/modules/msk/pvmaf/concat/main.nf @@ -4,8 +4,8 @@ process PVMAF_CONCAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'ghcr.io/msk-access/postprocessing_variant_calls:0.3.0': - 'ghcr.io/msk-access/postprocessing_variant_calls:0.3.0' }" + 'ghcr.io/msk-access/postprocessing_variant_calls:0.2.6': + 'ghcr.io/msk-access/postprocessing_variant_calls:0.2.6' }" input: tuple val(meta), path(maf_files, stageAs: "*?-ORG-SIMPLEX-DUPLEX_genotyped.maf")// [ id:'sample1', patient:'patient1' ], [maf_1, ... maf_n] diff --git a/modules/msk/pvmaf/tagtraceback/main.nf b/modules/msk/pvmaf/tagtraceback/main.nf index 27f4c164..bee0144e 100644 --- a/modules/msk/pvmaf/tagtraceback/main.nf +++ b/modules/msk/pvmaf/tagtraceback/main.nf @@ -1,14 +1,15 @@ process PVMAF_TAGTRACEBACK { tag "$meta.id" label 'process_single' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'ghcr.io/msk-access/postprocessing_variant_calls:type_traceback_0.0.7': - 'ghcr.io/msk-access/postprocessing_variant_calls:type_traceback_0.0.7' }" + 'ghcr.io/msk-access/postprocessing_variant_calls:0.2.6': + 'ghcr.io/msk-access/postprocessing_variant_calls:0.2.6' }" input: - tuple val(meta), path(maf) // [ id:'sample1', patient:'patient1' ], *.maf - path(sample_sheets) // [samplesheet_1, ..., samplesheet_n] + tuple val(meta), path(maf) + path(sample_sheets) output: tuple val(meta), path("*.maf"), emit: maf diff --git a/modules/msk/pvmaf/tagtraceback/meta.yml b/modules/msk/pvmaf/tagtraceback/meta.yml index 7e5fa336..4a300cb5 100644 --- a/modules/msk/pvmaf/tagtraceback/meta.yml +++ b/modules/msk/pvmaf/tagtraceback/meta.yml @@ -25,13 +25,9 @@ input: type: file description: Maf file with columns required for selected tagging type. pattern: "*.{maf}" - - - path(sample_sheets): - type: file - description: | - Samplesheet with `sample_id` and `type` columns. - Used to add fillout type information to provided maf. - See Nucleovar for more info: https://github.com/mskcc-omics-workflows/nucleovar/blob/main/README.md. + - type: + type: string + description: The type of tagging to be performed. output: #Only when we have meta @@ -46,7 +42,7 @@ output: pattern: "versions.yml" - maf: type: file - description: tagged traceback maf. + description: tagged maf file. pattern: "*.{maf}" authors: diff --git a/subworkflows/msk/traceback/main.nf b/subworkflows/msk/traceback/main.nf index f51b5c6f..4f597ead 100644 --- a/subworkflows/msk/traceback/main.nf +++ b/subworkflows/msk/traceback/main.nf @@ -27,12 +27,13 @@ workflow TRACEBACK { .map {it -> [it[0].subMap('patient')[0], *it[1..-1]] } .set{concat_maf} + + bams .map { it -> [it[0].subMap('patient')[0], it[0], *it[1..-1]] } .combine(concat_maf, by:0) .map { it[1..-1] } .set{bam_list_maf} - // genotype each bam combined maf, per patient if provided GENOTYPEVARIANTS_ALL(bam_list_maf, reference, reference_fai) ch_versions = ch_versions.mix(GENOTYPEVARIANTS_ALL.out.versions.first()) From b206828edb6f6330586643768f8b9ace9fb27a8b Mon Sep 17 00:00:00 2001 From: rnaidu Date: Tue, 1 Oct 2024 12:05:41 -0400 Subject: [PATCH 2/5] updated meta and environment.ymls for traceback associated modulees to pass updated nf-core linting schema checks. --- modules/msk/genotypevariants/all/meta.yml | 48 +++++++++------- modules/msk/pvmaf/concat/meta.yml | 39 +++++++------ .../msk/pvmaf/tagtraceback/environment.yml | 4 +- modules/msk/pvmaf/tagtraceback/meta.yml | 55 +++++++++---------- 4 files changed, 74 insertions(+), 72 deletions(-) diff --git a/modules/msk/genotypevariants/all/meta.yml b/modules/msk/genotypevariants/all/meta.yml index a5e7d9e0..bc00b36b 100644 --- a/modules/msk/genotypevariants/all/meta.yml +++ b/modules/msk/genotypevariants/all/meta.yml @@ -1,20 +1,20 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "genotypevariants_all" description: write your description here keywords: - - genotype - - bams - - maf +- genotype +- bams +- maf tools: - - "genotypevariants": - description: "module supports genotyping and merging small variants (SNV and INDELS)." - documentation: "https://genotype-variants.readthedocs.io/en/latest/" - licence: ["MIT"] +- "genotypevariants": + description: "module supports genotyping and merging small variants (SNV and INDELS)." + documentation: "https://genotype-variants.readthedocs.io/en/latest/" + licence: ["MIT"] + identifier: '' input: # Only when we have meta - - meta: +- - meta: type: map description: | Groovy Map containing sample information @@ -25,7 +25,8 @@ input: pattern: "*.{bam}" - bai_standard: type: file - description: Requires the standard .bai file is present at same location as the bam file. + description: Requires the standard .bai file is present at same location as + the bam file. pattern: "*.{bai}" - bam_duplex: type: file @@ -33,7 +34,8 @@ input: pattern: "*.{bam}" - bai_duplex: type: file - description: Requires the duplex .bai file is present at same location as the bam file. + description: Requires the duplex .bai file is present at same location as the + bam file. pattern: "*.{bai}" - bam_simplex: type: file @@ -41,38 +43,42 @@ input: pattern: "*.{bam}" - bai_simplex: type: file - description: Requires the simplex .bai file is present at same location as the bam file. + description: Requires the simplex .bai file is present at same location as the + bam file. pattern: "*.{bai}" - maf: type: file description: Full path to small variants input file in MAF format pattern: "*.{maf}" - - fasta: +- - fasta: type: file description: The reference fasta file pattern: "*.fasta" - - fai: +- - fai: type: file description: Index of reference fasta file pattern: "*.fasta.fai" - output: #Only when we have meta +- maf: - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', patient:'patient_1' ]` - - maf: + - '*.maf': type: file - description: Genotyped maf for each bam provided and a merged genotyped maf. The mafs will be labelled with patient identifier or sample identifier as the prefix, and end with the type of bam (duplex, simplex, or standard). The sample identifier is prioritized. + description: Genotyped maf for each bam provided and a merged genotyped maf. + The mafs will be labelled with patient identifier or sample identifier as + the prefix, and end with the type of bam (duplex, simplex, or standard). The + sample identifier is prioritized. pattern: "*.{mafs}" - - versions: +- versions: + - versions.yml: type: file description: File containing software versions pattern: "versions.yml" - authors: - - "@buehlere" +- "@buehlere" maintainers: - - "@buehlere" +- "@buehlere" diff --git a/modules/msk/pvmaf/concat/meta.yml b/modules/msk/pvmaf/concat/meta.yml index f676983e..96595e3a 100644 --- a/modules/msk/pvmaf/concat/meta.yml +++ b/modules/msk/pvmaf/concat/meta.yml @@ -1,21 +1,21 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pvmaf_concat" description: a flexible command for concatenating maf files keywords: - - sort - - example - - genomics +- sort +- example +- genomics tools: - - "pvmaf": - description: "provides a variety of commands for manipulating mafs." - homepage: "https://github.com/msk-access/postprocessing_variant_calls" - documentation: "https://cmo-ci.gitbook.io/postprocessing_variant_calls/" - licence: ["MIT"] +- "pvmaf": + description: "provides a variety of commands for manipulating mafs." + homepage: "https://github.com/msk-access/postprocessing_variant_calls" + documentation: "https://cmo-ci.gitbook.io/postprocessing_variant_calls/" + licence: ["MIT"] + identifier: '' input: # Only when we have meta - - meta: +- - meta: type: map description: | Groovy Map containing sample information @@ -24,25 +24,24 @@ input: type: file description: list of maf files to concatenate pattern: "*.{maf}" - output: #Only when we have meta +- maf: - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', patient:'patient1' ]` - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - maf: + - '*.maf': type: file description: concatenated maf file pattern: "*.{maf}" - +- versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@buehlere" +- "@buehlere" maintainers: - - "@buehlere" +- "@buehlere" diff --git a/modules/msk/pvmaf/tagtraceback/environment.yml b/modules/msk/pvmaf/tagtraceback/environment.yml index b461aeda..4c59b932 100644 --- a/modules/msk/pvmaf/tagtraceback/environment.yml +++ b/modules/msk/pvmaf/tagtraceback/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "pvmaf_tagtraceback" channels: - conda-forge - bioconda - - defaults dependencies: - - "YOUR-TOOL-HERE" + - "YOUR-TOOL=HERE" diff --git a/modules/msk/pvmaf/tagtraceback/meta.yml b/modules/msk/pvmaf/tagtraceback/meta.yml index 4a300cb5..a6c74526 100644 --- a/modules/msk/pvmaf/tagtraceback/meta.yml +++ b/modules/msk/pvmaf/tagtraceback/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pvmaf_tagtraceback" description: a flexible command for tagging maf files @@ -12,39 +11,39 @@ tools: homepage: "https://github.com/msk-access/postprocessing_variant_calls" documentation: "https://cmo-ci.gitbook.io/postprocessing_variant_calls/" licence: ["MIT"] + identifier: "" input: # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient1' ]` - - - maf: - type: file - description: Maf file with columns required for selected tagging type. - pattern: "*.{maf}" - - type: - type: string - description: The type of tagging to be performed. - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient1' ]` + - maf: + type: file + description: Maf file with columns required for selected tagging type. + pattern: "*.{maf}" + - - sample_sheets: + type: list + description: array holding the two samplesheets mandatory for running nucleovar, (pipeline_input and aux_bams) output: #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient1' ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - maf: - type: file - description: tagged maf file. - pattern: "*.{maf}" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient1' ]` + - "*.maf": + type: file + description: tagged maf file. + pattern: "*.{maf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@buehlere" maintainers: From b4e81212549acb92743565aa080f5c3851f7e478 Mon Sep 17 00:00:00 2001 From: rnaidu Date: Tue, 1 Oct 2024 12:09:52 -0400 Subject: [PATCH 3/5] passing prettier and editorconfig checks --- modules/msk/genotypevariants/all/meta.yml | 148 +++++++++++----------- modules/msk/pvmaf/concat/meta.yml | 70 +++++----- subworkflows/msk/traceback/main.nf | 1 - 3 files changed, 111 insertions(+), 108 deletions(-) diff --git a/modules/msk/genotypevariants/all/meta.yml b/modules/msk/genotypevariants/all/meta.yml index bc00b36b..72f0548a 100644 --- a/modules/msk/genotypevariants/all/meta.yml +++ b/modules/msk/genotypevariants/all/meta.yml @@ -2,83 +2,87 @@ name: "genotypevariants_all" description: write your description here keywords: -- genotype -- bams -- maf + - genotype + - bams + - maf tools: -- "genotypevariants": - description: "module supports genotyping and merging small variants (SNV and INDELS)." - documentation: "https://genotype-variants.readthedocs.io/en/latest/" - licence: ["MIT"] - identifier: '' + - "genotypevariants": + description: "module supports genotyping and merging small variants (SNV and INDELS)." + documentation: "https://genotype-variants.readthedocs.io/en/latest/" + licence: ["MIT"] + identifier: "" input: # Only when we have meta -- - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient_1' ]` - - bam_standard: - type: file - description: Full path to standard bam file. - pattern: "*.{bam}" - - bai_standard: - type: file - description: Requires the standard .bai file is present at same location as - the bam file. - pattern: "*.{bai}" - - bam_duplex: - type: file - description: Full path to duplex bam file. - pattern: "*.{bam}" - - bai_duplex: - type: file - description: Requires the duplex .bai file is present at same location as the - bam file. - pattern: "*.{bai}" - - bam_simplex: - type: file - description: Full path to simplex bam file. - pattern: "*.{bam}" - - bai_simplex: - type: file - description: Requires the simplex .bai file is present at same location as the - bam file. - pattern: "*.{bai}" - - maf: - type: file - description: Full path to small variants input file in MAF format - pattern: "*.{maf}" -- - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" -- - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient_1' ]` + - bam_standard: + type: file + description: Full path to standard bam file. + pattern: "*.{bam}" + - bai_standard: + type: file + description: + Requires the standard .bai file is present at same location as + the bam file. + pattern: "*.{bai}" + - bam_duplex: + type: file + description: Full path to duplex bam file. + pattern: "*.{bam}" + - bai_duplex: + type: file + description: + Requires the duplex .bai file is present at same location as the + bam file. + pattern: "*.{bai}" + - bam_simplex: + type: file + description: Full path to simplex bam file. + pattern: "*.{bam}" + - bai_simplex: + type: file + description: + Requires the simplex .bai file is present at same location as the + bam file. + pattern: "*.{bai}" + - maf: + type: file + description: Full path to small variants input file in MAF format + pattern: "*.{maf}" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" output: #Only when we have meta -- maf: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient_1' ]` - - '*.maf': - type: file - description: Genotyped maf for each bam provided and a merged genotyped maf. - The mafs will be labelled with patient identifier or sample identifier as - the prefix, and end with the type of bam (duplex, simplex, or standard). The - sample identifier is prioritized. - pattern: "*.{mafs}" -- versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + - maf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient_1' ]` + - "*.maf": + type: file + description: + Genotyped maf for each bam provided and a merged genotyped maf. + The mafs will be labelled with patient identifier or sample identifier as + the prefix, and end with the type of bam (duplex, simplex, or standard). The + sample identifier is prioritized. + pattern: "*.{mafs}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: -- "@buehlere" + - "@buehlere" maintainers: -- "@buehlere" + - "@buehlere" diff --git a/modules/msk/pvmaf/concat/meta.yml b/modules/msk/pvmaf/concat/meta.yml index 96595e3a..a34fdb6d 100644 --- a/modules/msk/pvmaf/concat/meta.yml +++ b/modules/msk/pvmaf/concat/meta.yml @@ -2,46 +2,46 @@ name: "pvmaf_concat" description: a flexible command for concatenating maf files keywords: -- sort -- example -- genomics + - sort + - example + - genomics tools: -- "pvmaf": - description: "provides a variety of commands for manipulating mafs." - homepage: "https://github.com/msk-access/postprocessing_variant_calls" - documentation: "https://cmo-ci.gitbook.io/postprocessing_variant_calls/" - licence: ["MIT"] - identifier: '' + - "pvmaf": + description: "provides a variety of commands for manipulating mafs." + homepage: "https://github.com/msk-access/postprocessing_variant_calls" + documentation: "https://cmo-ci.gitbook.io/postprocessing_variant_calls/" + licence: ["MIT"] + identifier: "" input: # Only when we have meta -- - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient1' ]` - - maf_files: - type: file - description: list of maf files to concatenate - pattern: "*.{maf}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient1' ]` + - maf_files: + type: file + description: list of maf files to concatenate + pattern: "*.{maf}" output: #Only when we have meta -- maf: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', patient:'patient1' ]` - - '*.maf': - type: file - description: concatenated maf file - pattern: "*.{maf}" -- versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + - maf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', patient:'patient1' ]` + - "*.maf": + type: file + description: concatenated maf file + pattern: "*.{maf}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: -- "@buehlere" + - "@buehlere" maintainers: -- "@buehlere" + - "@buehlere" diff --git a/subworkflows/msk/traceback/main.nf b/subworkflows/msk/traceback/main.nf index 4f597ead..7123932a 100644 --- a/subworkflows/msk/traceback/main.nf +++ b/subworkflows/msk/traceback/main.nf @@ -27,7 +27,6 @@ workflow TRACEBACK { .map {it -> [it[0].subMap('patient')[0], *it[1..-1]] } .set{concat_maf} - bams .map { it -> [it[0].subMap('patient')[0], it[0], *it[1..-1]] } From 7c41c266b9e0d9e7b51b37d727e2a524a0e7e319 Mon Sep 17 00:00:00 2001 From: rnaidu Date: Tue, 1 Oct 2024 12:19:38 -0400 Subject: [PATCH 4/5] updated snapshot for pvmaf/concat module --- modules/msk/pvmaf/concat/tests/main.nf.test.snap | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/msk/pvmaf/concat/tests/main.nf.test.snap b/modules/msk/pvmaf/concat/tests/main.nf.test.snap index a6626e47..71ed98cc 100644 --- a/modules/msk/pvmaf/concat/tests/main.nf.test.snap +++ b/modules/msk/pvmaf/concat/tests/main.nf.test.snap @@ -28,7 +28,11 @@ ] } ], - "timestamp": "2024-02-23T11:31:39.015292" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.2" + }, + "timestamp": "2024-10-01T12:18:30.061693329" }, "chr22maf - msk": { "content": [ @@ -39,7 +43,7 @@ "id": "chr22", "patient": "test" }, - "test_combined.maf:md5,7b36c0fda7ebb28b27ddf51b7d2a09e8" + "test_combined.maf:md5,e1d7d2ecaf53ce75908ad49b25289f32" ] ], "1": [ @@ -51,7 +55,7 @@ "id": "chr22", "patient": "test" }, - "test_combined.maf:md5,7b36c0fda7ebb28b27ddf51b7d2a09e8" + "test_combined.maf:md5,e1d7d2ecaf53ce75908ad49b25289f32" ] ], "versions": [ @@ -59,6 +63,10 @@ ] } ], - "timestamp": "2024-02-23T11:31:28.145005" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.2" + }, + "timestamp": "2024-10-01T12:18:20.94747798" } } \ No newline at end of file From 3e07cba1d17d4a0bd4a56d1b4d4a41695cf58f99 Mon Sep 17 00:00:00 2001 From: rnaidu Date: Tue, 1 Oct 2024 16:34:51 -0400 Subject: [PATCH 5/5] updating meta.yml so it is in sync with local copy in nucleovar --- modules/msk/genotypevariants/all/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/msk/genotypevariants/all/meta.yml b/modules/msk/genotypevariants/all/meta.yml index 72f0548a..a8f3c373 100644 --- a/modules/msk/genotypevariants/all/meta.yml +++ b/modules/msk/genotypevariants/all/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +## yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "genotypevariants_all" description: write your description here keywords: