From 94b67613365db298b95a430f52f2688b95118dad Mon Sep 17 00:00:00 2001 From: Usman Rashid Date: Wed, 21 Feb 2024 14:29:22 +1300 Subject: [PATCH] Added ncbi_fcs_gx --- .github/workflows/linting.yml | 66 ++++++++++----------- assets/schema_input.json | 4 +- conf/modules.config | 8 +++ modules/local/assemblathon_stats.nf | 10 +++- modules/local/ncbi_fcs_gx_krona_plot.nf | 42 +++++++++++++ modules/local/ncbi_fcs_gx_screen_samples.nf | 22 +++---- modules/local/ncbi_fcs_gx_setup_sample.nf | 26 ++++++++ subworkflows/local/ncbi_fcs_gx.nf | 63 +------------------- tests/stub/FI1.monoploid.seqs.txt | 7 +++ tests/stub/FI1.seq.labels.tsv | 7 +++ tests/stub/TT_2021a.seq.list | 7 +++ tests/stub/assemblysheet.csv | 2 + tests/stub/hic/Dummy_hic.R1.fq.gz | 0 tests/stub/hic/Dummy_hic.R2.fq.gz | 0 14 files changed, 153 insertions(+), 111 deletions(-) create mode 100644 modules/local/ncbi_fcs_gx_krona_plot.nf create mode 100644 modules/local/ncbi_fcs_gx_setup_sample.nf create mode 100644 tests/stub/FI1.monoploid.seqs.txt create mode 100644 tests/stub/FI1.seq.labels.tsv create mode 100644 tests/stub/TT_2021a.seq.list create mode 100644 tests/stub/assemblysheet.csv create mode 100644 tests/stub/hic/Dummy_hic.R1.fq.gz create mode 100644 tests/stub/hic/Dummy_hic.R2.fq.gz diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 81cd098..f807f5e 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -28,42 +28,42 @@ jobs: - name: Run pre-commit run: pre-commit run --all-files - nf-core: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v4 + # nf-core: + # runs-on: ubuntu-latest + # steps: + # - name: Check out pipeline code + # uses: actions/checkout@v4 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + # - name: Install Nextflow + # uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - architecture: "x64" + # - uses: actions/setup-python@v5 + # with: + # python-version: "3.11" + # architecture: "x64" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install nf-core + # - name: Install dependencies + # run: | + # python -m pip install --upgrade pip + # pip install nf-core - - name: Run nf-core lint - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + # - name: Run nf-core lint + # env: + # GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + # run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Save PR number - if: ${{ always() }} - run: echo ${{ github.event.pull_request.number }} > PR_number.txt + # - name: Save PR number + # if: ${{ always() }} + # run: echo ${{ github.event.pull_request.number }} > PR_number.txt - - name: Upload linting log file artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: linting-logs - path: | - lint_log.txt - lint_results.md - PR_number.txt + # - name: Upload linting log file artifact + # if: ${{ always() }} + # uses: actions/upload-artifact@v4 + # with: + # name: linting-logs + # path: | + # lint_log.txt + # lint_results.md + # PR_number.txt diff --git a/assets/schema_input.json b/assets/schema_input.json index 6aff8b1..d8573b2 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,7 +36,7 @@ ] }, "hic_reads": { - "errorMessage": "HiC reads should either be provided as a SRA ID or as a path to paired reads with pattern '*_R{1,2}.(fastq|fq).gz'", + "errorMessage": "HiC reads should either be provided as a SRA ID or as a path to paired reads with pattern '*R{1,2}.(fastq|fq).gz'", "anyOf": [ { "type": "string", @@ -44,7 +44,7 @@ }, { "type": "string", - "pattern": "^\\S+_R\\{1,2\\}\\.f(ast)?q\\.gz$" + "pattern": "^\\S+R\\{1,2\\}\\.f(ast)?q\\.gz$" }, { "type": "string", diff --git a/conf/modules.config b/conf/modules.config index 8202dd2..b1a6da5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -47,6 +47,14 @@ process { ] } + withName: NCBI_FCS_GX_KRONA_PLOT { + publishDir = [ + path: { "${params.outdir}/ncbi_fcs_gx" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + withName: RUN_BUSCO { publishDir = [ path: { "${params.outdir}/busco" }, diff --git a/modules/local/assemblathon_stats.nf b/modules/local/assemblathon_stats.nf index 3285e08..3ef37de 100644 --- a/modules/local/assemblathon_stats.nf +++ b/modules/local/assemblathon_stats.nf @@ -1,10 +1,11 @@ process ASSEMBLATHON_STATS { tag "${asm_tag}" - label "process_single" + label 'process_single' - container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ? + conda "conda-forge::perl" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'quay.io/nf-core/ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(asm_tag), path(fasta_file) @@ -14,6 +15,9 @@ process ASSEMBLATHON_STATS { path "${asm_tag}_stats.csv" , emit: stats path 'versions.yml' , emit: versions + when: + task.ext.when == null || task.ext.when + script: def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2" """ diff --git a/modules/local/ncbi_fcs_gx_krona_plot.nf b/modules/local/ncbi_fcs_gx_krona_plot.nf new file mode 100644 index 0000000..853689d --- /dev/null +++ b/modules/local/ncbi_fcs_gx_krona_plot.nf @@ -0,0 +1,42 @@ +process NCBI_FCS_GX_KRONA_PLOT { + tag "${asm_tag}" + label 'process_single' + + container 'docker.io/nanozoo/krona:2.7.1--e7615f7' + + input: + tuple val(asm_tag), path(fcs_gx_taxonomy) + + output: + tuple path("${asm_tag}.inter.tax.rpt.tsv"), path("${asm_tag}.fcs.gx.krona.cut"), path("${asm_tag}.fcs.gx.krona.html") , emit: plot + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_GX_KRONA_PLOT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + cat $fcs_gx_taxonomy \\ + | awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \\ + > "${asm_tag}.inter.tax.rpt.tsv" + + cat "${asm_tag}.inter.tax.rpt.tsv" \\ + | awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \\ + > "${asm_tag}.fcs.gx.krona.cut" + + cat "${asm_tag}.inter.tax.rpt.tsv" \\ + | awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \\ + >> "${asm_tag}.fcs.gx.krona.cut" + + ktImportTaxonomy -i -o "${asm_tag}.fcs.gx.krona.html" -m "4" "${asm_tag}.fcs.gx.krona.cut" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1') + END_VERSIONS + """ +} diff --git a/modules/local/ncbi_fcs_gx_screen_samples.nf b/modules/local/ncbi_fcs_gx_screen_samples.nf index 0ad0d7d..3df915b 100644 --- a/modules/local/ncbi_fcs_gx_screen_samples.nf +++ b/modules/local/ncbi_fcs_gx_screen_samples.nf @@ -3,9 +3,10 @@ process NCBI_FCS_GX_SCREEN_SAMPLES { label 'process_high' label 'process_long' + conda "bioconda::ncbi-fcs-gx=0.5.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.4.0/fcs-gx.sif': - 'docker.io/ncbi/fcs-gx:0.4.0' }" + 'https://depot.galaxyproject.org/singularity/ncbi-fcs-gx:0.5.0--h4ac6f70_3': + 'biocontainers/ncbi-fcs-gx:0.5.0--h4ac6f70_3' }" input: path samples @@ -17,17 +18,16 @@ process NCBI_FCS_GX_SCREEN_SAMPLES { path "*.taxonomy.rpt" , emit: fcs_gx_taxonomies path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "NCBI_FCS_GX_SCREEN_SAMPLES module does not support Conda. Please use Docker / Singularity / Podman instead." - } - def VERSION = 0.4 + def VERSION = 0.5 """ for sample_fasta in $samples; do sample_tag=\$(echo "\$sample_fasta" | sed 's/fasta.file.for.//g' | sed 's/.fasta//g') - python3 /app/bin/run_gx --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${tax_id}" + run_gx.py --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${tax_id}" mv "\${sample_fasta%.fasta}.${tax_id}.fcs_gx_report.txt" "\${sample_tag}.fcs_gx_report.txt" mv "\${sample_fasta%.fasta}.${tax_id}.taxonomy.rpt" "\${sample_tag}.taxonomy.rpt" @@ -40,11 +40,7 @@ process NCBI_FCS_GX_SCREEN_SAMPLES { """ stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "NCBI_FCS_GX_SCREEN_SAMPLES module does not support Conda. Please use Docker / Singularity / Podman instead." - } - def VERSION = 0.4 + def VERSION = 0.5 """ for sample_fasta in $samples; do diff --git a/modules/local/ncbi_fcs_gx_setup_sample.nf b/modules/local/ncbi_fcs_gx_setup_sample.nf new file mode 100644 index 0000000..a5f7238 --- /dev/null +++ b/modules/local/ncbi_fcs_gx_setup_sample.nf @@ -0,0 +1,26 @@ +process NCBI_FCS_GX_SETUP_SAMPLE { + tag "${asm_tag}" + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(asm_tag), path(fasta_file) + + output: + path 'fasta.file.for.*.fasta', emit: fsata + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "NCBI_FCS_GX_SETUP_SAMPLE module does not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + ln -s $fasta_file "fasta.file.for.${asm_tag}.fasta" + """ +} diff --git a/subworkflows/local/ncbi_fcs_gx.nf b/subworkflows/local/ncbi_fcs_gx.nf index 4aa448e..bcc9350 100644 --- a/subworkflows/local/ncbi_fcs_gx.nf +++ b/subworkflows/local/ncbi_fcs_gx.nf @@ -1,4 +1,6 @@ -include { NCBI_FCS_GX_SCREEN_SAMPLES } from '../../modules/local/ncbi_fcs_gx_screen_samples' +include { NCBI_FCS_GX_SETUP_SAMPLE } from '../../modules/local/ncbi_fcs_gx_setup_sample' +include { NCBI_FCS_GX_SCREEN_SAMPLES } from '../../modules/local/ncbi_fcs_gx_screen_samples' +include { NCBI_FCS_GX_KRONA_PLOT } from '../../modules/local/ncbi_fcs_gx_krona_plot' workflow NCBI_FCS_GX { take: @@ -51,62 +53,3 @@ workflow NCBI_FCS_GX { gx_taxonomy_plot = ch_gx_taxonomy_plot versions = ch_versions } - -process NCBI_FCS_GX_SETUP_SAMPLE { - tag "${asm_tag}" - label "process_single" - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04': - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(asm_tag), path(fasta_file) - - output: - path 'fasta.file.for.*.fasta', emit: fsata - - script: - """ - ln -s $fasta_file "fasta.file.for.${asm_tag}.fasta" - """ -} - -process NCBI_FCS_GX_KRONA_PLOT { - tag "${asm_tag}" - label 'process_single' - - container 'docker.io/nanozoo/krona:2.7.1--e7615f7' - publishDir "${params.outdir}/ncbi_fcs_gx", mode: 'copy' - - input: - tuple val(asm_tag), path(fcs_gx_taxonomy) - - output: - tuple path("${asm_tag}.inter.tax.rpt.tsv"), - path("${asm_tag}.fcs.gx.krona.cut"), - path("${asm_tag}.fcs.gx.krona.html") , emit: plot - path "versions.yml" , emit: versions - - script: - """ - cat $fcs_gx_taxonomy \\ - | awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \\ - > "${asm_tag}.inter.tax.rpt.tsv" - - cat "${asm_tag}.inter.tax.rpt.tsv" \\ - | awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \\ - > "${asm_tag}.fcs.gx.krona.cut" - - cat "${asm_tag}.inter.tax.rpt.tsv" \\ - | awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \\ - >> "${asm_tag}.fcs.gx.krona.cut" - - ktImportTaxonomy -i -o "${asm_tag}.fcs.gx.krona.html" -m "4" "${asm_tag}.fcs.gx.krona.cut" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1') - END_VERSIONS - """ -} diff --git a/tests/stub/FI1.monoploid.seqs.txt b/tests/stub/FI1.monoploid.seqs.txt new file mode 100644 index 0000000..f54b5e6 --- /dev/null +++ b/tests/stub/FI1.monoploid.seqs.txt @@ -0,0 +1,7 @@ +CP031385.1 +CP031386.1 +CP031387.1 +CP031388.1 +CP031389.1 +CP031390.1 +CP031391.1 diff --git a/tests/stub/FI1.seq.labels.tsv b/tests/stub/FI1.seq.labels.tsv new file mode 100644 index 0000000..73636ff --- /dev/null +++ b/tests/stub/FI1.seq.labels.tsv @@ -0,0 +1,7 @@ +CP031385.1 FI1_1 +CP031386.1 FI1_2 +CP031387.1 FI1_3 +CP031388.1 FI1_4 +CP031389.1 FI1_5 +CP031390.1 FI1_6 +CP031391.1 FI1_7 diff --git a/tests/stub/TT_2021a.seq.list b/tests/stub/TT_2021a.seq.list new file mode 100644 index 0000000..a51ee15 --- /dev/null +++ b/tests/stub/TT_2021a.seq.list @@ -0,0 +1,7 @@ +CP083245.1 TT1 +CP083246.1 TT2 +CP083247.1 TT3 +CP083248.1 TT4 +CP083249.1 TT5 +CP083250.1 TT6 +CP083251.1 TT7 diff --git a/tests/stub/assemblysheet.csv b/tests/stub/assemblysheet.csv new file mode 100644 index 0000000..86b2de8 --- /dev/null +++ b/tests/stub/assemblysheet.csv @@ -0,0 +1,2 @@ +tag,fasta,gff3,monoploid_ids,hic_reads,synteny_labels +FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/docs/test_files/FI1.monoploid.seqs.txt,"https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/docs/test_files/hic/stub_hic.R{1,2}.fq.gz",https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/FI1.seq.labels.tsv diff --git a/tests/stub/hic/Dummy_hic.R1.fq.gz b/tests/stub/hic/Dummy_hic.R1.fq.gz new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/hic/Dummy_hic.R2.fq.gz b/tests/stub/hic/Dummy_hic.R2.fq.gz new file mode 100644 index 0000000..e69de29