nf-core · lrauschning · Feb 15, 2024 · Dec 8, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/modules/nf-core/clustalo/align/environment.yml b/modules/nf-core/clustalo/align/environment.yml
@@ -5,3 +5,4 @@ channels:
   - defaults
 dependencies:
   - bioconda::clustalo=1.2.4
+  - conda-forge::pigz=2.8
diff --git a/modules/nf-core/clustalo/align/main.nf b/modules/nf-core/clustalo/align/main.nf
@@ -4,45 +4,54 @@ process CLUSTALO_ALIGN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/clustalo:1.2.4--h87f3376_5':
-        'biocontainers/clustalo:1.2.4--h87f3376_5' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0':
+        'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }"
 
     input:
-    tuple val(meta),  path(fasta)
+    tuple val(meta) , path(fasta)
     tuple val(meta2), path(tree)
+    val(compress)
 
     output:
-    tuple val(meta), path("*.aln"), emit: alignment
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path("*.aln{.gz,}"), emit: alignment
+    path "versions.yml"                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    def write_output = compress ? "--force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "> ${prefix}.aln"
+    // using >() is necessary to preserve the return value,
+    // so nextflow knows to display an error when it failed
+    // the --force -o is necessary, as clustalo expands the commandline input,
+    // causing it to treat the pipe as a parameter and fail
+    // this way, the command expands to /dev/fd/<id>, and --force allows writing output to an already existing file
     """
-    clustalo \\
-        -i ${fasta} \\
-        --threads=${task.cpus} \\
-        $args \\
-        -o ${prefix}.aln
+    clustalo \
+        -i ${fasta} \
+        --threads=${task.cpus} \
+        $args \
+        $write_output
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         clustalo: \$( clustalo --version )
+        pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
     END_VERSIONS
     """
 
     stub:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.aln
+    touch ${prefix}.aln.gz
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         clustalo: \$( clustalo --version )
+        pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/clustalo/align/meta.yml b/modules/nf-core/clustalo/align/meta.yml
@@ -12,6 +12,10 @@ tools:
       tool_dev_url: "http://www.clustal.org/omega/"
       doi: "10.1038/msb.2011.75"
       licence: ["GPL v2"]
+  - "pigz":
+      description: "Parallel implementation of the gzip algorithm."
+      homepage: "https://zlib.net/pigz/"
+      documentation: "https://zlib.net/pigz/pigz.pdf"
 input:
   - meta:
       type: map
@@ -31,6 +35,9 @@ input:
       type: file
       description: Input guide tree in Newick format
       pattern: "*.{dnd}"
+  - compress:
+      type: boolean
+      description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded.
 output:
   - meta:
       type: map
@@ -39,8 +46,8 @@ output:
         e.g. `[ id:'test']`
   - alignment:
       type: file
-      description: Alignment file.
-      pattern: "*.{aln}"
+      description: Alignment file, in gzipped fasta format
+      pattern: "*.aln{.gz,}"
   - versions:
       type: file
       description: File containing software versions
@@ -51,3 +58,4 @@ authors:
 maintainers:
   - "@luisas"
   - "@joseespinosa"
+  - "@lrauschning"
diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test b/modules/nf-core/clustalo/align/tests/main.nf.test
@@ -4,13 +4,38 @@ nextflow_process {
     script "../main.nf"
     process "CLUSTALO_ALIGN"
     config "./nextflow.config"
-    
+
     tag "modules"
     tag "modules_nfcore"
     tag "clustalo"
     tag "clustalo/align"
+    tag "clustalo/guidetree"
+
+    test("sarscov2 - contigs-fasta - uncompressed") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test' ], // meta map
+                             file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
+                           ]
+                input[1] = [[:],[]]
+                input[2] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.alignment).match("alignment - uncompressed")},
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
 
-    test("sarscov2 - contigs-fasta") {
+    test("sarscov2 - contigs-fasta - compressed") {
 
         when {
             process {
@@ -19,14 +44,15 @@ nextflow_process {
                              file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
                            ]
                 input[1] = [[:],[]]
+                input[2] = true
                 """
             }
         }
 
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.alignment).match("alignment")},
+                { assert snapshot(process.out.alignment).match("alignment - compressed")},
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -56,6 +82,7 @@ nextflow_process {
                              file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
                            ]
                 input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]}
+                input[2] = true
                 """
             }
         }
@@ -68,4 +95,4 @@ nextflow_process {
             )
         }
     }
-}
+}
diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test.snap b/modules/nf-core/clustalo/align/tests/main.nf.test.snap
diff --git a/modules/nf-core/famsa/align/main.nf b/modules/nf-core/famsa/align/main.nf
@@ -10,26 +10,29 @@ process FAMSA_ALIGN {
         'biocontainers/famsa:2.2.2--h9f5acd7_0' }"
 
     input:
-    tuple val(meta),  path(fasta)
+    tuple val(meta) , path(fasta)
     tuple val(meta2), path(tree)
+    val(compress)
 
     output:
-    tuple val(meta), path("*.aln"), emit: alignment
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path("*.aln{.gz,}"), emit: alignment
+    path "versions.yml"                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
+    def compress_args = compress ? '-gz' : ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def options_tree = tree ? "-gt import $tree" : ""
     """
     famsa $options_tree \\
+        $compress_args \\
         $args \\
         -t ${task.cpus} \\
         ${fasta} \\
-        ${prefix}.aln
+        ${prefix}.aln${compress ? '.gz':''}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
@@ -40,7 +43,7 @@ process FAMSA_ALIGN {
     stub:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
-    touch ${prefix}.aln
+    touch ${prefix}.aln.gz
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/nf-core/famsa/align/meta.yml b/modules/nf-core/famsa/align/meta.yml
@@ -33,6 +33,9 @@ input:
       type: file
       description: Input guide tree in Newick format
       pattern: "*.{dnd}"
+  - compress:
+      type: boolean
+      description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is handled by passing '-gz' to FAMSA along with any other options specified in task.ext.args.
 output:
   - meta:
       type: map
@@ -41,8 +44,8 @@ output:
         e.g. `[ id:'test']`
   - alignment:
       type: file
-      description: Alignment file.
-      pattern: "*.{aln}"
+      description: Alignment file, in FASTA format. May be gzipped or uncompressed, depending on if compress is set to true or false
+      pattern: "*.aln{.gz,}"
   - versions:
       type: file
       description: File containing software versions

diff --git a/modules/nf-core/famsa/align/tests/main.nf.test b/modules/nf-core/famsa/align/tests/main.nf.test
@@ -8,8 +8,9 @@ nextflow_process {
     tag "modules_nfcore"
     tag "famsa"
     tag "famsa/align"
+    tag "famsa/guidetree"
 
-    test("sarscov2 - fasta") {
+    test("sarscov2 - fasta - uncompressed") {
 
         when {
             process {
@@ -18,14 +19,39 @@ nextflow_process {
                              file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
                            ]
                 input[1] = [[:],[]]
+                input[2] = false
                 """
             }
         }
 
         then {
             assertAll(
                 { assert process.success },
-                { assert snapshot(process.out.alignment).match("alignment")},
+                { assert snapshot(process.out.alignment).match("alignment_uncompressed")},
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fasta - compressed") {
+
+        when {
+            process {
+                """
+                input[0] = [ [ id:'test' ], // meta map
+                             file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
+                           ]
+                input[1] = [[:],[]]
+                input[2] = true
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.alignment).match("alignment_compressed")},
                 { assert snapshot(process.out.versions).match("versions") }
             )
         }
@@ -54,6 +80,7 @@ nextflow_process {
                              file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true)
                            ]
                 input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]}
+                input[2] = true
                 """
             }
         }
@@ -66,4 +93,4 @@ nextflow_process {
             )
         }
     }
-}
+}