diff --git a/modules/nf-core/limma/differential/environment.yml b/modules/nf-core/limma/differential/environment.yml index f70a20eff5b..bce4ef9cbe1 100644 --- a/modules/nf-core/limma/differential/environment.yml +++ b/modules/nf-core/limma/differential/environment.yml @@ -2,4 +2,8 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::bioconductor-limma=3.54.0 + - bioconda::bioconductor-edger=4.0.16 + - bioconda::bioconductor-ihw=1.28.0 + - bioconda::bioconductor-limma=3.58.1 + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-readr=2.1.5 diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index 384e4649871..b3f304aac4c 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -17,6 +17,7 @@ process LIMMA_DIFFERENTIAL { tuple val(meta), path("*.MArrayLM.limma.rds") , emit: rdata tuple val(meta), path("*.limma.model.txt") , emit: model tuple val(meta), path("*.R_sessionInfo.log") , emit: session_info + tuple val(meta), path("*.normalised_counts.tsv") , emit: normalised_counts, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 1d0195694df..a4171db9ac6 100755 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -77,6 +77,9 @@ opt <- list( subset_to_contrast_samples = FALSE, exclude_samples_col = NULL, exclude_samples_values = NULL, + use_voom = TRUE, + IHW_correction = TRUE, + number = Inf, ndups = NULL, # lmFit spacing = NULL, # lmFit block = NULL, # lmFit @@ -142,6 +145,11 @@ opt[vector_opt] = lapply(strsplit(unlist(opt[vector_opt]), ','), as.numeric) ################################################ library(limma) +library(edgeR) +library(readr) +library(dplyr) +library(tibble) +library(IHW) ################################################ ################################################ @@ -287,11 +295,38 @@ colnames(design) <- sub( paste0(contrast_variable, '.'), colnames(design) ) +# Perform voom normalisation for RNA-seq data +if (!is.null(opt\$use_voom) && opt\$use_voom) { + # Create a DGEList object for RNA-seq data + dge <- DGEList(counts = intensities.table) + + # Normalize counts using TMM + dge <- calcNormFactors(dge, method = "TMM") + + # Run voom to transform the data + voom_result <- voom(dge, design) + data_for_fit <- voom_result + + # Write the normalized counts matrix to a TSV file + normalized_counts <- voom_result\$E + normalized_counts_with_genes <- data.frame(Gene = rownames(normalized_counts), normalized_counts, row.names = NULL) + colnames(normalized_counts_with_genes)[1] <- opt\$probe_id_col + write.table(normalized_counts_with_genes, + file = paste(opt\$output_prefix, "normalised_counts.tsv", sep = '.'), + sep = "\t", + quote = FALSE, + row.names = FALSE) + +} else { + # Use as.matrix for regular microarray analysis + data_for_fit <- as.matrix(intensities.table) +} + # Prepare for and run lmFit() lmfit_args = c( list( - object = as.matrix(intensities.table), + object = data_for_fit, design = design ), opt[c('ndups', 'spacing', 'block', 'method')] @@ -322,18 +357,54 @@ ebayes_args = c( fit2 <- do.call(eBayes, ebayes_args) -# Run topTable() to generate a results data frame +if ((!is.null(opt\$use_voom) && opt\$use_voom) && (!is.null(opt\$IHW_correction) && opt\$IHW_correction)) { -toptable_args = c( - list( - fit = fit2, - sort.by = 'none', - number = nrow(intensities.table) - ), - opt[c('adjust.method', 'p.value', 'lfc', 'confint')] -) + # Define the column name for the group comparison + name_con_ref <- glue::glue("{contrast_variable}.{opt\$target_level}-{contrast_variable}.{opt\$reference_level}") + + # Calculate the median gene expression + median_expression <- apply(dge\$counts, 1, median) + # Apply IHW for multiple testing correction using median expression as covariate + ihw_result <- ihw(fit2\$p.value[, name_con_ref] ~ median_expression, alpha = 0.05) + ihw_result@df <- as_tibble(ihw_result@df, rownames = opt\$probe_id_col) + + # Convert fit\$p.value to a tibble, including rownames as a new column 'Gene' + fit_df <- as_tibble(fit2\$p.value, rownames = opt\$probe_id_col) + + fit_df <- fit_df %>% + left_join(ihw_result@df %>% select(!!sym(opt\$probe_id_col),adj_pvalue),by=opt\$probe_id_col) %>% + rename(ihw.adj.p.value = adj_pvalue) -comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),] + # Convert back to a matrix if necessary (not needed for merging, but for consistency later) + fit\$p.value <- as.matrix(fit_df %>% select(-!!sym(opt\$probe_id_col))) # Keep the original structure without probe_id_col column + + # Generate the topTable including the new adjusted p-values + toptable_args <- list(fit = fit2, coef = name_con_ref, number = opt\$number, adjust.method = opt\$adjust.method, + p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint) + + comp.results <- do.call(topTable, toptable_args) + # Add the IHW-adjusted p-values to the topTable results by merging based on probe_id_col + comp.results <- comp.results %>% + rownames_to_column(opt\$probe_id_col) %>% # Ensure probe_id_col column exists in comp.results + left_join(fit_df, by = opt\$probe_id_col) %>% # Merge based on probe_id_col column + column_to_rownames(var=opt\$probe_id_col) %>% + select(-sym(name_con_ref)) + +} else { + + # Run topTable() to generate a results data frame + + toptable_args = c( + list( + fit = fit2, + sort.by = 'none', + number = nrow(intensities.table) + ), + opt[c('adjust.method', 'p.value', 'lfc', 'confint')] + ) + + comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),] +} ################################################ ################################################ diff --git a/modules/nf-core/limma/differentialvoom/environment.yml b/modules/nf-core/limma/differentialvoom/environment.yml new file mode 100644 index 00000000000..bce4ef9cbe1 --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bioconductor-edger=4.0.16 + - bioconda::bioconductor-ihw=1.28.0 + - bioconda::bioconductor-limma=3.58.1 + - conda-forge::r-dplyr=1.1.4 + - conda-forge::r-readr=2.1.5 diff --git a/modules/nf-core/limma/differentialvoom/main.nf b/modules/nf-core/limma/differentialvoom/main.nf new file mode 100644 index 00000000000..bbd0c3607b3 --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/main.nf @@ -0,0 +1,61 @@ +process LIMMA_DIFFERENTIALVOOM { + tag "$meta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/bioconductor-edger_bioconductor-ihw_bioconductor-limma_r-dplyr_r-readr:7fc48564d286c1c6' : + 'community.wave.seqera.io/library/bioconductor-edger_bioconductor-ihw_bioconductor-limma_r-dplyr_r-readr:edea0f9fbaeba3a0' }" + + input: + tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta2), path(samplesheet), path(intensities) + + output: + tuple val(meta), path("*.limma.results.tsv") , emit: results + tuple val(meta), path("*.limma.mean_difference.png") , emit: md_plot + tuple val(meta), path("*.MArrayLM.limma.rds") , emit: rdata + tuple val(meta), path("*.normalised_counts.tsv") , emit: normalised_counts, optional: true + tuple val(meta), path("*.limma.model.txt") , emit: model + tuple val(meta), path("*.R_sessionInfo.log") , emit: session_info + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'limma_de.R' + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + library(limma) + + a <- file("${prefix}.limma.results.tsv", "w") + close(a) + a <- file("${prefix}.limma.mean_difference.png", "w") + close(a) + a <- file("${prefix}.MArrayLM.limma.rds", "w") + close(a) + a <- file("${prefix}.normalised_counts.tsv", "w") + close(a) + a <- file("${prefix}.limma.model.txt", "w") + close(a) + a <- file("${prefix}.R_sessionInfo.log", "w") + close(a) + + ## VERSIONS FILE + r.version <- strsplit(version[['version.string']], ' ')[[1]][3] + limma.version <- as.character(packageVersion('limma')) + + writeLines( + c( + '"task.process":', + paste(' r-base:', r.version), + paste(' bioconductor-limma:', limma.version) + ), + 'versions.yml' + ) + """ +} diff --git a/modules/nf-core/limma/differentialvoom/meta.yml b/modules/nf-core/limma/differentialvoom/meta.yml new file mode 100644 index 00000000000..962b5dffb0d --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/meta.yml @@ -0,0 +1,86 @@ +name: "limma_differential" +description: runs a differential expression analysis with Limma +keywords: + - differential + - expression + - microarray + - limma +tools: + - "limma": + description: "Linear Models for Microarray Data" + homepage: "https://bioconductor.org/packages/release/bioc/html/limma.html" + documentation: "https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf" + tool_dev_url: https://github.com/cran/limma"" + doi: "10.18129/B9.bioc.limma" + licence: ["LGPL >=3"] +input: + - meta: + type: map + description: | + Groovy Map containing contrast information. This can be used at the + workflow level to pass optional parameters to the module, e.g. + [ id:'contrast1', blocking:'patient' ] passed in as ext.args like: + '--blocking_variable $meta.blocking'. + - contrast_variable: + type: string + description: | + The column in the sample sheet that should be used to define groups for + comparison + - reference: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the reference samples + - target: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the target samples + - meta2: + type: map + description: | + Groovy map containing study-wide metadata related to the sample sheet + and matrix + - samplesheeet: + type: file + description: | + CSV or TSV format sample sheet with sample metadata + - intensities: + type: file + description: | + Raw TSV or CSV format expression matrix with probes by row and samples + by column + - type: + type: string + description: | + Analysis type to be performed determines template which should be used for analysis + +output: + - results: + type: file + description: TSV-format table of differential expression information as output by Limma + pattern: "*.limma.results.tsv" + - md_plot: + type: file + description: Limma mean difference plot + pattern: "*.mean_difference.png" + - rdata: + type: file + description: Serialised MArrayLM object + pattern: "*.MArrayLM.limma.rds" + - model: + type: file + description: TXT-format limma model + pattern: "*.limma.model.tsv" + - session_info: + type: file + description: dump of R SessionInfo + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@KamilMaliszArdigen" +maintainers: + - "@KamilMaliszArdigen" diff --git a/modules/nf-core/limma/differentialvoom/templates/limma_de.R b/modules/nf-core/limma/differentialvoom/templates/limma_de.R new file mode 100644 index 00000000000..565373fe437 --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/templates/limma_de.R @@ -0,0 +1,411 @@ +#!/usr/bin/env Rscript +library(limma) +library(edgeR) +library(readr) +library(dplyr) +library(tibble) + +library(IHW) + +parse_args <- function(x){ + args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] + args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) + + # Ensure the option vectors are length 2 (key/ value) to catch empty ones + args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z}) + + parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1])) + parsed_args[ ( ! parsed_args %in% c('', 'null')) & ! is.na(parsed_args)] +} + +#' Flexibly read CSV or TSV files +#' +#' @param file Input file +#' @param header Passed to read.delim() +#' @param row.names Passed to read.delim() +#' +#' @return output Data frame + +read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.names = TRUE){ + + ext <- tolower(tail(strsplit(basename(file), split = "\\\\.")[[1]], 1)) + + if (ext == "tsv" || ext == "txt") { + separator <- "\t" + } else if (ext == "csv") { + separator <- "," + } else { + stop(paste("Unknown separator for", ext)) + } + + read.delim( + file, + sep = separator, + header = header, + row.names = row.names, + check.names = check.names + ) +} + +# Validates the input parameters against the provided sample sheet. +# Ensures the contrast variable exists in the sample sheet, +# verifies that the reference and target are present within that variable, +# and checks if blocking variables, if provided, are valid columns in the sample sheet. +validate_sample_sheet <- function(sample_sheet, contrast_variable, reference_level, target_level, blocking_variables) { + # Check if the contrast variable is in the sample sheet + if (!contrast_variable %in% colnames(sample_sheet)) { + stop( + paste0( + 'Chosen contrast variable \"', + contrast_variable, + '\" not in sample sheet' + ) + ) + } + + # Check if the reference and target levels are in the contrast variable column + if (any(!c(reference_level, target_level) %in% sample_sheet[[contrast_variable]])) { + stop( + paste( + 'Please choose reference and target levels that are present in the', + contrast_variable, + 'column of the sample sheet' + ) + ) + } + + # If blocking variables are provided, check if they are in the sample sheet + if (!is.null(blocking_variables)) { + blocking_vars <- make.names(unlist(strsplit(blocking_variables, split = ';'))) + if (!all(blocking_vars %in% colnames(sample_sheet))) { + missing_block <- paste(blocking_vars[!blocking_vars %in% colnames(sample_sheet)], collapse = ',') + stop( + paste( + 'Blocking variables', missing_block, + 'do not correspond to sample sheet columns.' + ) + ) + } + } +} + +# Set defaults and classes +opt <- list( + output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'), + count_file = '$intensities', + pair_id_col = 'pair_id', + sample_file = '$samplesheet', + contrast_variable = '$contrast_variable', + reference_level = '$reference', + target_level = '$target', + blocking_variables = NULL, + probe_id_col = "probe_id", + sample_id_col = "sample", + analysis_type = NULL, + subset_to_contrast_samples = FALSE, + exclude_samples_col = NULL, + exclude_samples_values = NULL, + geneFilter = 10, + IHW_correction = TRUE, + do_contrast = TRUE, + ndups = NULL, # lmFit + spacing = NULL, # lmFit + block = NULL, # lmFit + correlation = NULL, # lmFit + method = 'ls', # lmFit + proportion = 0.01, # eBayes + stdev.coef.lim = '0.1,4', # eBayes + trend = FALSE, # eBayes + robust = FALSE, # eBayes + winsor.tail.p = '0.05,0.1', # eBayes + adjust.method = "BH", # topTable + p.value = 1, # topTable + lfc = 0, # topTable + confint = FALSE, # topTable + number = Inf # topTable +) +opt_types <- lapply(opt, class) + +args_opt <- parse_args('$task.ext.args') + +# Update opt based on parsed_args +opt <- modifyList(opt, args_opt) + +print(opt) + +# Set correct types for options +for (name in names(opt)) { + if (!is.null(opt[[name]])) { + class(opt[[name]]) <- opt_types[[name]] + } +} + +## CHECKS +# Check if required parameters have been provided +required_opts <- c('contrast_variable', 'reference_level', 'target_level', 'output_prefix') +missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] + +if (length(missing) > 0){ + stop(paste("Missing required options:", paste(missing, collapse=', '))) +} + +# Check file inputs are valid +for (file_input in c('count_file', 'sample_file')){ + if (is.null(opt[[file_input]])) { + stop(paste("Please provide", file_input), call. = FALSE) + } + + if (! file.exists(opt[[file_input]])){ + stop(paste0('Value of ', file_input, ': ', opt[[file_input]], ' is not a valid file')) + } +} + +# Convert things to vectors where needed +vector_opt <- c('stdev.coef.lim', 'winsor.tail.p') +opt[vector_opt] = lapply(strsplit(unlist(opt[vector_opt]), ','), as.numeric) + +# READ IN COUNTS FILE AND SAMPLE METADATA +count.table <- + read_delim_flexible( + file = opt\$count_file, + header = TRUE, + row.names = 1, + check.names = FALSE + ) +sample.sheet <- read_delim_flexible(file = opt\$sample_file) + +# Correct sample.sheet colnames +colnames(sample.sheet) <- make.names(colnames(sample.sheet)) + +# Deal with spaces that may be in sample column +opt\$sample_id_col <- make.names(opt\$sample_id_col) + +if (! opt\$sample_id_col %in% colnames(sample.sheet)){ + stop(paste0("Specified sample ID column '", opt\$sample_id_col, "' is not in the sample sheet")) +} + +# Sample sheet can have duplicate rows for multiple sequencing runs, so uniqify +# before assigning row names +sample.sheet <- sample.sheet[! duplicated(sample.sheet[[opt\$sample_id_col]]), ] +rownames(sample.sheet) <- sample.sheet[[opt\$sample_id_col]] + +# Check that all samples specified in the input sheet are present in the +# counts table. Assuming they are, subset and sort the count table to +# match the sample sheet +missing_samples <- + sample.sheet[!rownames(sample.sheet) %in% colnames(count.table), opt\$sample_id_col] + +if (length(missing_samples) > 0) { + stop(paste( + length(missing_samples), + 'specified samples missing from count table:', + paste(missing_samples, collapse = ',') + )) +} else{ + # Save any non-count data, will gene metadata etc we might need later + noncount.table <- + count.table[, !colnames(count.table) %in% rownames(sample.sheet), drop = FALSE] + count.table <- count.table[, rownames(sample.sheet)] +} + +########## START ANALYSIS +DGE <- DGEList(count.table) +dim(DGE) + +## Filter genes +keep <- which(apply(DGE\$counts, 1, max) >= opt\$geneFilter) +filtered_counts <- DGE\$counts[keep, ] +DGE\$counts <- filtered_counts +dim(DGE) + +# Calculate normalization factors +DGE <- calcNormFactors(DGE) + +if (opt\$analysis_type == "pairwise") { + # Pairwise analysis setup + contrast_variable <- make.names(opt\$contrast_variable) + reference_level <- make.names(opt\$reference_level) + target_level <- make.names(opt\$target_level) + + validate_sample_sheet(sample.sheet, contrast_variable, reference_level, target_level, opt\$blocking_variables) + + sample.sheet\$pair_IDs <- as.integer(as.factor(sample.sheet[[opt\$pair_id_col]])) + model <- paste('~ 0 + pair_IDs +', contrast_variable) + + design <- model.matrix(as.formula(model), data=sample.sheet) + colnames(design) <- sub(contrast_variable, paste0(contrast_variable, '.'), colnames(design)) + + voom_args = list(counts = DGE, design = design, plot = FALSE) + voom_1 <- do.call(voom, voom_args) + # Write the normalized counts matrix to a TSV file + normalized_counts <- voom_1\$E + normalized_counts_with_genes <- data.frame(Gene = rownames(normalized_counts), normalized_counts, row.names = NULL) + colnames(normalized_counts_with_genes)[1] <- opt\$probe_id_col + write.table(normalized_counts_with_genes, + file = paste(opt\$output_prefix, "normalised_counts.tsv", sep = '.'), + sep = "\t", + quote = FALSE, + row.names = FALSE) + + lmfit_args = list(object = voom_1, design = design) + fit <- do.call(lmFit, lmfit_args) + + if (opt\$do_contrast) { + contrast <- paste(paste(contrast_variable, c(opt\$reference_level, opt\$target_level), sep='.'), collapse='-') + contrast_matrix <- makeContrasts(contrasts=contrast, levels=design) + fit <- contrasts.fit(fit, contrast_matrix) + } + + ebayes_args = list(fit = fit) + fit <- do.call(eBayes, ebayes_args) + +} else if (opt\$analysis_type == "mixedmodel") { + + ## CHECK CONTRAST SPECIFICATION + contrast_variable <- make.names(opt\$contrast_variable) + reference_level <- make.names(opt\$reference_level) + target_level <- make.names(opt\$target_level) + sample.sheet[[contrast_variable]] = paste(sample.sheet[[strsplit2(contrast_variable, split = "\\\\.")[1]]], + sample.sheet[[strsplit2(contrast_variable, split = "\\\\.")[2]]], sep = ".") + + validate_sample_sheet(sample.sheet, contrast_variable, reference_level, target_level, opt\$blocking_variables) + + blocking.vars <- sample.sheet[[opt\$blocking_variables]] + + # Mixed model analysis setup + model <- paste('~ 0 +', contrast_variable) + design <- model.matrix(as.formula(model), data=sample.sheet) + colnames(design) <- sub(contrast_variable, paste0(contrast_variable, '.'), colnames(design)) + + voom_args = list(counts = DGE, design = design, plot = FALSE) + voom_1 <- do.call(voom, voom_args) + # Write the normalized counts matrix to a TSV file + normalized_counts <- voom_1\$E + normalized_counts_with_genes <- data.frame(Gene = rownames(normalized_counts), normalized_counts, row.names = NULL) + colnames(normalized_counts_with_genes)[1] <- opt\$probe_id_col + write.table(normalized_counts_with_genes, + file = paste(opt\$output_prefix, "normalised_counts.tsv", sep = '.'), + sep = "\t", + quote = FALSE, + row.names = FALSE) + + corfit_args <- list(object = voom_1, design = design, block = blocking.vars) + corfit = do.call(duplicateCorrelation, corfit_args) + + voom_args <- list(counts = DGE, design = design, plot = FALSE, correlation = corfit\$consensus.correlation) + voom_1 <- do.call(voom, voom_args) + + corfit_args <- list(object = voom_1, design = design, block = blocking.vars) + corfit = do.call(duplicateCorrelation, corfit_args) + + lmfit_args = list(object = voom_1, design = design, block = blocking.vars, correlation = corfit\$consensus.correlation) + fit <- do.call(lmFit, lmfit_args) + + if (opt\$do_contrast) { + contrast <- paste(paste(contrast_variable, c(reference_level, target_level), sep='.'), collapse='-') + contrast_matrix <- makeContrasts(contrasts=contrast, levels=design) + fit <- contrasts.fit(fit, contrast_matrix) + } + + ebayes_args = list(fit = fit) + fit <- do.call(eBayes, ebayes_args) + +} else { + stop( + paste("Bad '--analysis_type' parameter. It can be either 'paired' or 'mixedmodel'. ") + ) +} + +## Generate outputs +# Differential expression table + +if (opt\$IHW_correction) { + + # Define the column name for the group comparison + name_con_ref <- glue::glue("{contrast_variable}.{reference_level}-{contrast_variable}.{target_level}") + + # Calculate the median gene expression + median_expression <- apply(DGE\$counts, 1, median) + + # Apply IHW for multiple testing correction using median expression as covariate + ihw_result <- ihw(fit\$p.value[, name_con_ref] ~ median_expression, alpha = 0.05) + ihw_result@df <- as_tibble(ihw_result@df, rownames = "Gene") + + # Convert fit\$p.value to a tibble, including rownames as a new column 'Gene' + fit_df <- as_tibble(fit\$p.value, rownames = "Gene") + + fit_df <- fit_df %>% + left_join(ihw_result@df %>% select(Gene,adj_pvalue),by="Gene") %>% + rename(ihw.adj.p.value = adj_pvalue) + + # Convert back to a matrix if necessary (not needed for merging, but for consistency later) + fit\$p.value <- as.matrix(fit_df %>% select(-Gene)) # Keep the original structure without 'Gene' column + + # Generate the topTable including the new adjusted p-values + toptable_args <- list(fit = fit, coef = name_con_ref, number = opt\$number, adjust.method = opt\$adjust.method, + p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint) + + comp.results <- do.call(topTable,toptable_args) + + # Add the IHW-adjusted p-values to the topTable results by merging based on 'Gene' + comp.results <- comp.results %>% + rownames_to_column("Gene") %>% # Ensure 'Gene' column exists in comp.results + left_join(fit_df, by = "Gene") %>% # Merge based on 'Gene' column + select(-sym(name_con_ref)) + + # Optional: Rearrange columns if necessary + comp.results_test <- comp.results %>% + select(Gene, everything()) # Move 'Gene' column to the front + +} else { + toptable_args <- list(fit = fit, number = opt\$number, adjust.method = opt\$adjust.method, + p.value = opt\$p.value, lfc = opt\$lfc, confint = opt\$confint) + + comp.results <- do.call(topTable,toptable_args) +} + +names(comp.results)[names(comp.results) == 'Gene'] <- opt\$probe_id_col + +write.table( + comp.results, + file = paste(opt\$output_prefix, 'limma.results.tsv', sep = '.'), + col.names = TRUE, + row.names = FALSE, + sep = '\t', + quote = FALSE +) + +# Dispersion plot +png( + file = paste(opt\$output_prefix, 'limma.mean_difference.png', sep = '.'), + width = 600, + height = 600 +) +plotMD(fit) +dev.off() + +# R object for other processes to use +saveRDS(fit, file = paste(opt\$output_prefix, 'MArrayLM.limma.rds', sep = '.')) + +# Save model to file +write(model, file = paste(opt\$output_prefix, 'limma.model.txt', sep = '.')) + + +## R SESSION INFO +sink(paste(opt\$output_prefix, "R_sessionInfo.log", sep = '.')) +print(sessionInfo()) +sink() + + +## VERSIONS FILE +r.version <- strsplit(version[['version.string']], ' ')[[1]][3] +limma.version <- as.character(packageVersion('limma')) + +writeLines( + c( + '"task.process":', + paste(' r-base:', r.version), + paste(' bioconductor-limma:', limma.version) + ), + 'versions.yml' +) diff --git a/modules/nf-core/limma/differentialvoom/tests/main.nf.test b/modules/nf-core/limma/differentialvoom/tests/main.nf.test new file mode 100644 index 00000000000..0871ee689df --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/tests/main.nf.test @@ -0,0 +1,62 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core modules test limma/differentialvoom +nextflow_process { + + name "Test Process LIMMA_DIFFERENTIALVOOM" + script "../main.nf" + process "LIMMA_DIFFERENTIALVOOM" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "limma" + tag "limma/differentialvoom" + + test("test_limma_differential") { + + when { + process { + """ + input[0] = Channel.of(['id': 'test', 'variable': 'treatment', 'reference': 'hND6', 'target': 'mCherry']) + .map{tuple(it, it.variable, it.reference, it.target)} + input[1] = Channel.of([[id:'test'], + file(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_limma_differential - stub") { + + options "-stub" + + when { + process { """ + input[0] = Channel.of(['id': 'test', 'variable': 'treatment', 'reference': 'hND6', 'target': 'mCherry']) + .map{tuple(it, it.variable, it.reference, it.target)} + input[1] = Channel.of([[id:'test'], + file(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/limma/differentialvoom/tests/main.nf.test.snap b/modules/nf-core/limma/differentialvoom/tests/main.nf.test.snap new file mode 100644 index 00000000000..5895ddda9a2 --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/tests/main.nf.test.snap @@ -0,0 +1,300 @@ +{ + "test_limma_differential - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.results.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.mean_difference.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.MArrayLM.limma.rds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.normalised_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.model.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.R_sessionInfo.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,c90309ffa2653f7204690ea14064d2e9" + ], + "md_plot": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.mean_difference.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "model": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.model.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "normalised_counts": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.normalised_counts.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rdata": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.MArrayLM.limma.rds:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.results.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "session_info": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.R_sessionInfo.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c90309ffa2653f7204690ea14064d2e9" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-14T23:12:18.543218" + }, + "test_limma_differential": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.results.tsv:md5,5ac6b133d00f25a8623dec9f38af98e4" + ] + ], + "1": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.mean_difference.png:md5,5dd9b0185b917dd6f9e0506d71705421" + ] + ], + "2": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.MArrayLM.limma.rds:md5,23543e8796931ec2fce8600c4f5a4606" + ] + ], + "3": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.normalised_counts.tsv:md5,a9c81ca72e246e78505e4bf4ca1d1df4" + ] + ], + "4": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.model.txt:md5,37fb2d77cd222b9cddaa38e8e6dddaf8" + ] + ], + "5": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.R_sessionInfo.log:md5,e51f5a47c37d58cf4e8a0c6766759736" + ] + ], + "6": [ + "versions.yml:md5,c90309ffa2653f7204690ea14064d2e9" + ], + "md_plot": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.mean_difference.png:md5,5dd9b0185b917dd6f9e0506d71705421" + ] + ], + "model": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.model.txt:md5,37fb2d77cd222b9cddaa38e8e6dddaf8" + ] + ], + "normalised_counts": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.normalised_counts.tsv:md5,a9c81ca72e246e78505e4bf4ca1d1df4" + ] + ], + "rdata": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.MArrayLM.limma.rds:md5,23543e8796931ec2fce8600c4f5a4606" + ] + ], + "results": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.limma.results.tsv:md5,5ac6b133d00f25a8623dec9f38af98e4" + ] + ], + "session_info": [ + [ + { + "id": "test", + "variable": "treatment", + "reference": "hND6", + "target": "mCherry" + }, + "test.R_sessionInfo.log:md5,e51f5a47c37d58cf4e8a0c6766759736" + ] + ], + "versions": [ + "versions.yml:md5,c90309ffa2653f7204690ea14064d2e9" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-14T23:12:08.333183" + } +} \ No newline at end of file diff --git a/modules/nf-core/limma/differentialvoom/tests/nextflow.config b/modules/nf-core/limma/differentialvoom/tests/nextflow.config new file mode 100644 index 00000000000..8f6151c8ee7 --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'LIMMA_DIFFERENTIALVOOM' { + ext.args = { [ + "--sample_id_col sample", + "--probe_id_col gene_id", + "--pair_id_col sample_number", + "--analysis_type pairwise" + + ].join(' ').trim() } + } + +} diff --git a/modules/nf-core/limma/differentialvoom/tests/tags.yml b/modules/nf-core/limma/differentialvoom/tests/tags.yml new file mode 100644 index 00000000000..8d2914df28f --- /dev/null +++ b/modules/nf-core/limma/differentialvoom/tests/tags.yml @@ -0,0 +1,2 @@ +limma/differentialvoom: + - "modules/nf-core/limma/differentialvoom/**"