Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Jul 31, 2023
1 parent 2baf503 commit 6bc879e
Show file tree
Hide file tree
Showing 24 changed files with 139 additions and 278 deletions.
6 changes: 2 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ Imports:
docopt (>= 0.7.1),
dplyr (>= 1.1.2),
httr (>= 1.4.6),
future (>= 1.33.0),
future.apply (>= 1.11.0),
igraph (>= 1.5.0.1),
jsonlite (>= 1.8.7),
MsBackendMgf (>= 1.8.0),
msentropy (>= 0.1.3),
progressr (>= 0.13.0),
pbapply (>= 1.7.2),
R.utils (>= 2.12.2),
rotl (>= 3.1.0),
Spectra (>= 1.10.1),
Expand Down Expand Up @@ -81,7 +79,7 @@ Collate:
'create_adducts_neg.R'
'create_adducts_pos.R'
'create_components.R'
'create_edges_parallel.R'
'create_edges.R'
'keep_peaks.R'
'sanitize_spectra.R'
'remove_above_precursor.R'
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export(create_adducts_neg)
export(create_adducts_pos)
export(create_components)
export(create_dir)
export(create_edges_parallel)
export(create_edges)
export(create_edges_spectra)
export(decorate_bio)
export(decorate_chemo)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

# timaR 2.8.2

* Change from pbmclapply to future.lapply
* Change from pbmclapply to pblapply
* Added spectral entropy
* Fix empty chemical classes
* [renv](https://rstudio.github.io/renv/index.html) removal
* Performance improvement by replacing the [tidyverse](https://www.tidyverse.org) by the [fastverse](https://fastverse.github.io/fastverse) (in progress)
Expand Down
78 changes: 22 additions & 56 deletions R/annotate_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ utils::globalVariables(
#'
#' @details It takes two files as input.
#' A query file that will be matched against a library file.
#' Parallel processing is also made available.
#'
#' @include export_output.R
#' @include export_params.R
Expand All @@ -44,7 +43,6 @@ utils::globalVariables(
#' @param condition Condition to be fulfilled.
#' Either 'OR' or 'AND' (mass and peaks minima).
#' @param qutoff Intensity under which ms2 fragments will be removed.
#' @param parallel Boolean. Process in parallel
#' @param approx Perform matching without precursor match
#' @param parameters Params
#'
Expand All @@ -62,7 +60,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
ppm = params$ms$tolerances$mass$ppm$ms2,
dalton = params$ms$tolerances$mass$dalton$ms2,
qutoff = params$ms$intensity$thresholds$ms2,
parallel = params$options$parallel,
approx = params$annotations$ms2$approx,
parameters = params) {
stopifnot("Your input file does not exist." = file.exists(input))
Expand Down Expand Up @@ -235,65 +232,34 @@ annotate_spectra <- function(input = params$files$spectral$raw,
)
}
}
)
) |>
Filter(f = Negate(is.null)) |>
dplyr::bind_rows()

inner_list <- Filter(Negate(is.null), inner_list)
inner_list <- dplyr::bind_rows(inner_list)
return(inner_list)
}

log_debug("Performing spectral comparison")
## TODO investigate issue with Windows
if (parallel && Sys.info()[["sysname"]] != "Windows") {
options(future.globals.onReference = "error")
future::plan(future::multisession)
progressr::handlers(list(
progressr::handler_progress(
format = ":current/:total [:bar] :percent in :elapsed ETA: :eta"
)
))
outer_list <-
future.apply::future_lapply(
X = seq_along(spectra),
p = progressr::progressor(along = seq_along(spectra)),
future.seed = TRUE,
FUN = function(spectrum, qp = query_precursors, p) {
p(sprintf("spectra=%g", length(qp)))
precursor <- qp[spectrum]
calculate_score_and_create_inner_list(
spectrum = spectrum,
precursor = precursor,
spectral_lib = lib_spectra,
query_spectra = query_spectra,
query_rts = query_rts,
lib_id = lib_id,
minimal = minimal,
maximal = maximal
)
}
) |>
progressr::with_progress()
} else {
outer_list <-
lapply(
X = seq_along(spectra),
FUN = function(spectrum, qp = query_precursors) {
precursor <- qp[spectrum]
calculate_score_and_create_inner_list(
spectrum = spectrum,
precursor = precursor,
spectral_lib = lib_spectra,
query_spectra = query_spectra,
query_rts = query_rts,
lib_id = lib_id,
minimal = minimal,
maximal = maximal
)
}
)
}
outer_list <-
pbapply::pblapply(
X = seq_along(spectra),
FUN = function(spectrum, qp = query_precursors) {
precursor <- qp[spectrum]
calculate_score_and_create_inner_list(
spectrum = spectrum,
precursor = precursor,
spectral_lib = lib_spectra,
query_spectra = query_spectra,
query_rts = query_rts,
lib_id = lib_id,
minimal = minimal,
maximal = maximal
)
}
) |>
dplyr::bind_rows()

return(dplyr::bind_rows(outer_list))
return(outer_list)
}

df_final <-
Expand Down
6 changes: 4 additions & 2 deletions R/calculate_entropy.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @param frags Fragments
#' @param ms2_tolerance MS2 tolerance
#' @param ppm_tolerance ppm tolerance
#' @param threshold Threshold
#'
#' @return NULL
#'
Expand All @@ -17,7 +18,8 @@ calculate_entropy <- function(index,
target,
frags,
ms2_tolerance,
ppm_tolerance) {
ppm_tolerance,
threshold = 0.1) {
score <-
msentropy::calculate_entropy_similarity(
frags[[index]],
Expand All @@ -31,7 +33,7 @@ calculate_entropy <- function(index,
clean_spectra = TRUE
)

if (score >= 0.1) {
if (score >= threshold) {
return(
list(
"feature_id" = index,
Expand Down
48 changes: 48 additions & 0 deletions R/create_edges.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
utils::globalVariables(
c(
"params"
)
)

#' @title Create edges
#'
#' @description This function applies similarity calculation to a list of
#' spectra to create edges
#'
#' @include calculate_entropy.R
#'
#' @param index Indices
#' @param frags Fragments
#' @param precs Precursors
#' @param nspecs Number of spectra
#' @param ms2_tolerance MS2 tolerance
#' @param ppm_tolerance ppm tolerance
#' @param threshold Threshold
#'
#' @return NULL
#'
#' @export
#'
#' @examples NULL
create_edges <- function(index,
frags,
precs,
nspecs,
ms2_tolerance,
ppm_tolerance,
threshold) {
# Calculate the similarity using lapply
inner_list <- lapply(X = (index + 1):nspecs, FUN = function(target) {
calculate_entropy(
index,
target,
frags,
ms2_tolerance,
ppm_tolerance,
threshold
)
}) |>
dplyr::bind_rows()

return(inner_list)
}
44 changes: 0 additions & 44 deletions R/create_edges_parallel.R

This file was deleted.

68 changes: 15 additions & 53 deletions R/create_edges_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ utils::globalVariables(c(
#' @description This function create edges
#' based on fragmentation spectra similarity
#'
#' @include create_edges_parallel.R
#' @include create_edges.R
#' @include import_spectra.R
#' @include normalize_peaks.R
#' @include remove_above_precursor.R
Expand All @@ -25,7 +25,6 @@ utils::globalVariables(c(
#' @param ppm Relative ppm tolerance to be used
#' @param dalton Absolute Dalton tolerance to be used
#' @param qutoff Intensity under which ms2 fragments will be removed.
#' @param parallel Boolean. Process in parallel
#' @param parameters Params
#'
#' @return NULL
Expand All @@ -42,7 +41,6 @@ create_edges_spectra <- function(
ppm = params$ms$tolerances$mass$ppm$ms2,
dalton = params$ms$tolerances$mass$dalton$ms2,
qutoff = params$ms$intensity$thresholds$ms2,
parallel = params$options$parallel,
parameters = params) {
stopifnot("Your input file does not exist." = file.exists(input))
## Not checking for ppm and Da limits, everyone is free.
Expand All @@ -62,62 +60,26 @@ create_edges_spectra <- function(

log_debug("Performing spectral comparison")
log_debug(
"As we do not bin the spectra,
nor limit the precursors delta,
expect a long processing time."
"As we do not limit the precursors delta,
expect a (relatively) long processing time."
)
log_debug("Take yourself a break, you deserve it.")
nspecz <- length(spectra)
precz <- spectra$precursorMz
fragz <- spectra@backend@peaksData

if (parallel) {
options(future.globals.onReference = "error")
future::plan(future::multisession)
progressr::handlers(
list(
progressr::handler_progress(
format = ":current/:total [:bar] :percent in :elapsed ETA: :eta"
)
)
)
result_list <-
future.apply::future_lapply(
X = 1:(nspecz - 1),
FUN = create_edges_parallel,
p = progressr::progressor(along = 1:(nspecz - 1)),
future.seed = TRUE,
future.chunk.size = structure(TRUE, ordering = "random"),
frags = fragz,
precs = precz,
nspecs = nspecz,
ms2_tolerance = dalton,
ppm_tolerance = ppm,
parallel = parallel
) |>
progressr::with_progress()
} else {
result_list <-
lapply(
X = 1:(nspecz - 1),
FUN = create_edges_parallel,
frags = fragz,
precs = precz,
nspecs = nspecz,
ms2_tolerance = dalton,
ppm_tolerance = ppm,
p = NA,
parallel = parallel
)
}
edges <- do.call(
what = rbind,
args = unlist(result_list, recursive = FALSE)
) |>
data.frame()

log_debug("Collecting garbage ...")
gc()
edges <-
pbapply::pblapply(
X = 1:(nspecz - 1),
FUN = create_edges,
frags = fragz,
precs = precz,
nspecs = nspecz,
ms2_tolerance = dalton,
ppm_tolerance = ppm,
threshold = threshold
) |>
dplyr::bind_rows()

edges <- edges |>
tidytable::select(
Expand Down
3 changes: 0 additions & 3 deletions R/parse_cli_params.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,6 @@ parse_cli_params <- function() {
if (!is.null(arguments$force)) {
params$options$force <- as.logical(arguments$force)
}
if (!is.null(arguments$parallel)) {
params$options$parallel <- as.logical(arguments$parallel)
}
if (!is.null(arguments$summarise)) {
params$options$summarise <- as.logical(arguments$summarise)
}
Expand Down
Loading

0 comments on commit 6bc879e

Please sign in to comment.