Skip to content

Commit

Permalink
Added ncbi_fcs_gx
Browse files Browse the repository at this point in the history
  • Loading branch information
GallVp committed Feb 21, 2024
1 parent 2717953 commit 94b6761
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 111 deletions.
66 changes: 33 additions & 33 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,42 +28,42 @@ jobs:
- name: Run pre-commit
run: pre-commit run --all-files

nf-core:
runs-on: ubuntu-latest
steps:
- name: Check out pipeline code
uses: actions/checkout@v4
# nf-core:
# runs-on: ubuntu-latest
# steps:
# - name: Check out pipeline code
# uses: actions/checkout@v4

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
# - name: Install Nextflow
# uses: nf-core/setup-nextflow@v1

- uses: actions/setup-python@v5
with:
python-version: "3.11"
architecture: "x64"
# - uses: actions/setup-python@v5
# with:
# python-version: "3.11"
# architecture: "x64"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install nf-core
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install nf-core

- name: Run nf-core lint
env:
GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md
# - name: Run nf-core lint
# env:
# GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
# run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md

- name: Save PR number
if: ${{ always() }}
run: echo ${{ github.event.pull_request.number }} > PR_number.txt
# - name: Save PR number
# if: ${{ always() }}
# run: echo ${{ github.event.pull_request.number }} > PR_number.txt

- name: Upload linting log file artifact
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: linting-logs
path: |
lint_log.txt
lint_results.md
PR_number.txt
# - name: Upload linting log file artifact
# if: ${{ always() }}
# uses: actions/upload-artifact@v4
# with:
# name: linting-logs
# path: |
# lint_log.txt
# lint_results.md
# PR_number.txt
4 changes: 2 additions & 2 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@
]
},
"hic_reads": {
"errorMessage": "HiC reads should either be provided as a SRA ID or as a path to paired reads with pattern '*_R{1,2}.(fastq|fq).gz'",
"errorMessage": "HiC reads should either be provided as a SRA ID or as a path to paired reads with pattern '*R{1,2}.(fastq|fq).gz'",
"anyOf": [
{
"type": "string",
"pattern": "^SR\\w+$"
},
{
"type": "string",
"pattern": "^\\S+_R\\{1,2\\}\\.f(ast)?q\\.gz$"
"pattern": "^\\S+R\\{1,2\\}\\.f(ast)?q\\.gz$"
},
{
"type": "string",
Expand Down
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ process {
]
}

withName: NCBI_FCS_GX_KRONA_PLOT {
publishDir = [
path: { "${params.outdir}/ncbi_fcs_gx" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals("versions.yml") ? null : filename }
]
}

withName: RUN_BUSCO {
publishDir = [
path: { "${params.outdir}/busco" },
Expand Down
10 changes: 7 additions & 3 deletions modules/local/assemblathon_stats.nf
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
process ASSEMBLATHON_STATS {
tag "${asm_tag}"
label "process_single"
label 'process_single'

container "${ workflow.containerEngine == 'singularity' || workflow.containerEngine == 'apptainer' ?
conda "conda-forge::perl"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
'quay.io/nf-core/ubuntu:20.04' }"
'nf-core/ubuntu:20.04' }"

input:
tuple val(asm_tag), path(fasta_file)
Expand All @@ -14,6 +15,9 @@ process ASSEMBLATHON_STATS {
path "${asm_tag}_stats.csv" , emit: stats
path 'versions.yml' , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def VERSION = "github/PlantandFoodResearch/assemblathon2-analysis/a93cba2"
"""
Expand Down
42 changes: 42 additions & 0 deletions modules/local/ncbi_fcs_gx_krona_plot.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
process NCBI_FCS_GX_KRONA_PLOT {
tag "${asm_tag}"
label 'process_single'

container 'docker.io/nanozoo/krona:2.7.1--e7615f7'

input:
tuple val(asm_tag), path(fcs_gx_taxonomy)

output:
tuple path("${asm_tag}.inter.tax.rpt.tsv"), path("${asm_tag}.fcs.gx.krona.cut"), path("${asm_tag}.fcs.gx.krona.html") , emit: plot
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "NCBI_FCS_GX_KRONA_PLOT module does not support Conda. Please use Docker / Singularity / Podman instead."
}
"""
cat $fcs_gx_taxonomy \\
| awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \\
> "${asm_tag}.inter.tax.rpt.tsv"
cat "${asm_tag}.inter.tax.rpt.tsv" \\
| awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \\
> "${asm_tag}.fcs.gx.krona.cut"
cat "${asm_tag}.inter.tax.rpt.tsv" \\
| awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \\
>> "${asm_tag}.fcs.gx.krona.cut"
ktImportTaxonomy -i -o "${asm_tag}.fcs.gx.krona.html" -m "4" "${asm_tag}.fcs.gx.krona.cut"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1')
END_VERSIONS
"""
}
22 changes: 9 additions & 13 deletions modules/local/ncbi_fcs_gx_screen_samples.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ process NCBI_FCS_GX_SCREEN_SAMPLES {
label 'process_high'
label 'process_long'

conda "bioconda::ncbi-fcs-gx=0.5.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.4.0/fcs-gx.sif':
'docker.io/ncbi/fcs-gx:0.4.0' }"
'https://depot.galaxyproject.org/singularity/ncbi-fcs-gx:0.5.0--h4ac6f70_3':
'biocontainers/ncbi-fcs-gx:0.5.0--h4ac6f70_3' }"

input:
path samples
Expand All @@ -17,17 +18,16 @@ process NCBI_FCS_GX_SCREEN_SAMPLES {
path "*.taxonomy.rpt" , emit: fcs_gx_taxonomies
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "NCBI_FCS_GX_SCREEN_SAMPLES module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def VERSION = 0.4
def VERSION = 0.5
"""
for sample_fasta in $samples;
do
sample_tag=\$(echo "\$sample_fasta" | sed 's/fasta.file.for.//g' | sed 's/.fasta//g')
python3 /app/bin/run_gx --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${tax_id}"
run_gx.py --fasta ./\$sample_fasta --out-dir ./ --gx-db $db_path --tax-id "${tax_id}"
mv "\${sample_fasta%.fasta}.${tax_id}.fcs_gx_report.txt" "\${sample_tag}.fcs_gx_report.txt"
mv "\${sample_fasta%.fasta}.${tax_id}.taxonomy.rpt" "\${sample_tag}.taxonomy.rpt"
Expand All @@ -40,11 +40,7 @@ process NCBI_FCS_GX_SCREEN_SAMPLES {
"""

stub:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "NCBI_FCS_GX_SCREEN_SAMPLES module does not support Conda. Please use Docker / Singularity / Podman instead."
}
def VERSION = 0.4
def VERSION = 0.5
"""
for sample_fasta in $samples;
do
Expand Down
26 changes: 26 additions & 0 deletions modules/local/ncbi_fcs_gx_setup_sample.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process NCBI_FCS_GX_SETUP_SAMPLE {
tag "${asm_tag}"
label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
'nf-core/ubuntu:20.04' }"

input:
tuple val(asm_tag), path(fasta_file)

output:
path 'fasta.file.for.*.fasta', emit: fsata

when:
task.ext.when == null || task.ext.when

script:
// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "NCBI_FCS_GX_SETUP_SAMPLE module does not support Conda. Please use Docker / Singularity / Podman instead."
}
"""
ln -s $fasta_file "fasta.file.for.${asm_tag}.fasta"
"""
}
63 changes: 3 additions & 60 deletions subworkflows/local/ncbi_fcs_gx.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
include { NCBI_FCS_GX_SCREEN_SAMPLES } from '../../modules/local/ncbi_fcs_gx_screen_samples'
include { NCBI_FCS_GX_SETUP_SAMPLE } from '../../modules/local/ncbi_fcs_gx_setup_sample'
include { NCBI_FCS_GX_SCREEN_SAMPLES } from '../../modules/local/ncbi_fcs_gx_screen_samples'
include { NCBI_FCS_GX_KRONA_PLOT } from '../../modules/local/ncbi_fcs_gx_krona_plot'

workflow NCBI_FCS_GX {
take:
Expand Down Expand Up @@ -51,62 +53,3 @@ workflow NCBI_FCS_GX {
gx_taxonomy_plot = ch_gx_taxonomy_plot
versions = ch_versions
}

process NCBI_FCS_GX_SETUP_SAMPLE {
tag "${asm_tag}"
label "process_single"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04':
'nf-core/ubuntu:20.04' }"

input:
tuple val(asm_tag), path(fasta_file)

output:
path 'fasta.file.for.*.fasta', emit: fsata

script:
"""
ln -s $fasta_file "fasta.file.for.${asm_tag}.fasta"
"""
}

process NCBI_FCS_GX_KRONA_PLOT {
tag "${asm_tag}"
label 'process_single'

container 'docker.io/nanozoo/krona:2.7.1--e7615f7'
publishDir "${params.outdir}/ncbi_fcs_gx", mode: 'copy'

input:
tuple val(asm_tag), path(fcs_gx_taxonomy)

output:
tuple path("${asm_tag}.inter.tax.rpt.tsv"),
path("${asm_tag}.fcs.gx.krona.cut"),
path("${asm_tag}.fcs.gx.krona.html") , emit: plot
path "versions.yml" , emit: versions

script:
"""
cat $fcs_gx_taxonomy \\
| awk 'NR>1 {print \$1,\$2,\$6,\$7,\$11,\$32}' FS="\\t" OFS="\\t" \\
> "${asm_tag}.inter.tax.rpt.tsv"
cat "${asm_tag}.inter.tax.rpt.tsv" \\
| awk '\$6 !~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,\$4,\$5,\$2}' FS="\\t" OFS="\\t" \\
> "${asm_tag}.fcs.gx.krona.cut"
cat "${asm_tag}.inter.tax.rpt.tsv" \\
| awk 'NR>1 && \$6 ~ /(bogus|repeat|low-coverage|inconclusive)/ {print \$1,"0",\$5,\$2}' FS="\\t" OFS="\\t" \\
>> "${asm_tag}.fcs.gx.krona.cut"
ktImportTaxonomy -i -o "${asm_tag}.fcs.gx.krona.html" -m "4" "${asm_tag}.fcs.gx.krona.cut"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
KronaTools: \$(ktImportTaxonomy | sed -n '/KronaTools/s/KronaTools//p' | tr -d ' _/[:space:]' | sed 's/-ktImportTaxonomy\\\\//1')
END_VERSIONS
"""
}
7 changes: 7 additions & 0 deletions tests/stub/FI1.monoploid.seqs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CP031385.1
CP031386.1
CP031387.1
CP031388.1
CP031389.1
CP031390.1
CP031391.1
7 changes: 7 additions & 0 deletions tests/stub/FI1.seq.labels.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CP031385.1 FI1_1
CP031386.1 FI1_2
CP031387.1 FI1_3
CP031388.1 FI1_4
CP031389.1 FI1_5
CP031390.1 FI1_6
CP031391.1 FI1_7
7 changes: 7 additions & 0 deletions tests/stub/TT_2021a.seq.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CP083245.1 TT1
CP083246.1 TT2
CP083247.1 TT3
CP083248.1 TT4
CP083249.1 TT5
CP083250.1 TT6
CP083251.1 TT7
2 changes: 2 additions & 0 deletions tests/stub/assemblysheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tag,fasta,gff3,monoploid_ids,hic_reads,synteny_labels
FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/docs/test_files/FI1.monoploid.seqs.txt,"https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/docs/test_files/hic/stub_hic.R{1,2}.fq.gz",https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/FI1.seq.labels.tsv
Empty file.
Empty file.

0 comments on commit 94b6761

Please sign in to comment.