From 882ecd44409baa5443e728920cbea68f6a84040e Mon Sep 17 00:00:00 2001
From: Adriano Rutz <adriano.rutz@hotmail.com>
Date: Mon, 5 Aug 2024 08:38:46 +0200
Subject: [PATCH 1/5] Even simpler example files

---
 DESCRIPTION                         |  2 +-
 R/get_example_files.R               | 50 ++++++++++++++++++-----------
 codemeta.json                       |  4 +--
 man/get_example_files.Rd            |  5 ++-
 vignettes/articles/II-preparing.Rmd |  7 ++--
 5 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7fb21a5a..bcc2c9e1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tima
 Title: Taxonomically Informed Metabolite Annotation
-Version: 2.10.0
+Version: 2.10.1
 Authors@R: c(
     person("Adriano", "Rutz", , "rutz@imsb.biol.ethz.ch", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0443-9902")),
diff --git a/R/get_example_files.R b/R/get_example_files.R
index f6f98ba2..04e285ac 100644
--- a/R/get_example_files.R
+++ b/R/get_example_files.R
@@ -7,28 +7,42 @@
 #' @include go_to_cache.R
 #' @include parse_yaml_paths.R
 #'
+#' @param example The example(s) you want to download
+#'
 #' @return Example files.
 #'
 #' @export
 #'
 #' @examples NULL
-get_example_files <- function() {
-  go_to_cache()
-  message("Features")
-  get_file(
-    url = tima::parse_yaml_paths()$urls$examples$features,
-    export = tima::parse_yaml_paths()$data$source$features
-  )
-  message("Metadata")
-  get_file(
-    url = tima::parse_yaml_paths()$urls$examples$metadata,
-    export = tima::parse_yaml_paths()$data$source$metadata
-  )
-  message("Sirius")
-  get_example_sirius()
-  message("Spectra")
-  get_file(
-    url = tima::parse_yaml_paths()$urls$examples$spectra,
-    export = tima::parse_yaml_paths()$data$source$spectra
+get_example_files <- function(example = c("features", "metadata", "sirius", "spectra")) {
+  stopifnot(
+    "Example files available are `features`, `metadata`, `sirius` and `spectra`." =
+      example %in% c("features", "metadata", "sirius", "spectra")
   )
+  go_to_cache()
+  if ("features" %in% example) {
+    message("Features")
+    get_file(
+      url = tima::parse_yaml_paths()$urls$examples$features,
+      export = tima::parse_yaml_paths()$data$source$features
+    )
+  }
+  if ("metadata" %in% example) {
+    message("Metadata")
+    get_file(
+      url = tima::parse_yaml_paths()$urls$examples$metadata,
+      export = tima::parse_yaml_paths()$data$source$metadata
+    )
+  }
+  if ("sirius" %in% example) {
+    message("Sirius")
+    get_example_sirius()
+  }
+  if ("spectra" %in% example) {
+    message("Spectra")
+    get_file(
+      url = tima::parse_yaml_paths()$urls$examples$spectra,
+      export = tima::parse_yaml_paths()$data$source$spectra
+    )
+  }
 }
diff --git a/codemeta.json b/codemeta.json
index b4ee08a4..108b7995 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -8,7 +8,7 @@
   "codeRepository": "https://github.com/taxonomicallyinformedannotation/tima",
   "issueTracker": "https://github.com/taxonomicallyinformedannotation/tima/issues",
   "license": "https://spdx.org/licenses/GPL-3.0",
-  "version": "2.10.0",
+  "version": "2.10.1",
   "programmingLanguage": {
     "@type": "ComputerLanguage",
     "name": "R",
@@ -644,7 +644,7 @@
     "SystemRequirements": null
   },
   "keywords": ["metaboliteannotation", "chemotaxonomy", "scoringsystem", "naturalproducts", "computationalmetabolomics", "taxonomicdistance", "specializedmetabolome"],
-  "fileSize": "3169.746KB",
+  "fileSize": "3170.327KB",
   "citation": [
     {
       "@type": "ScholarlyArticle",
diff --git a/man/get_example_files.Rd b/man/get_example_files.Rd
index b32c58cd..f76e4e77 100644
--- a/man/get_example_files.Rd
+++ b/man/get_example_files.Rd
@@ -4,7 +4,10 @@
 \alias{get_example_files}
 \title{Get example files}
 \usage{
-get_example_files()
+get_example_files(example = c("features", "metadata", "sirius", "spectra"))
+}
+\arguments{
+\item{example}{The example(s) you want to download}
 }
 \value{
 Example files.
diff --git a/vignettes/articles/II-preparing.Rmd b/vignettes/articles/II-preparing.Rmd
index 0eb51299..0d422e53 100644
--- a/vignettes/articles/II-preparing.Rmd
+++ b/vignettes/articles/II-preparing.Rmd
@@ -175,13 +175,10 @@ The edges are created based on the `spectral entropy similarity` calculated betw
 targets::tar_make(names = matches("fea_edg_spe"))
 ```
 
-If needed, you can get an example of what your minimal feature table should look like by running (no parameters needed):
+If needed, you can get an example of what your minimal feature table should look like by running:
 
 ```{r get-features, results="hide", message=FALSE, warning=FALSE}
-tima::get_file(
-  url = tima::parse_yaml_paths()$urls$examples$features,
-  export = tima::parse_yaml_paths()$data$source$features
-)
+tima::get_example_files(example = "features")
 ```
 
 ```{r prepare-features, results="hide", message=FALSE, warning=FALSE, include = FALSE}

From a87375a04db948ca8cf50b6fe3b13398d5e06700 Mon Sep 17 00:00:00 2001
From: Adriano Rutz <adriano.rutz@hotmail.com>
Date: Tue, 6 Aug 2024 17:42:55 +0200
Subject: [PATCH 2/5] Fix #166

---
 DESCRIPTION                     |  4 --
 NAMESPACE                       |  7 ++--
 NEWS.md                         |  4 ++
 R/annotate_spectra.R            | 46 +++++++-------------
 R/cleanup_spectra.R             | 37 -----------------
 R/create_edges_spectra.R        | 21 +++-------
 R/import_spectra.R              | 33 ++++++++++++---
 R/keep_peaks.R                  | 15 -------
 R/normalize_peaks.R             | 15 -------
 R/remove_above_precursor.R      | 19 ---------
 R/sanitize_spectra.R            | 74 +++++++++++++++++++++++----------
 codemeta.json                   |  2 +-
 inst/pipelines/_targets.R       | 14 +------
 man/cleanup_spectra.Rd          | 20 ---------
 man/import_spectra.Rd           | 10 ++++-
 man/keep_peaks.Rd               | 19 ---------
 man/normalize_peaks.Rd          | 14 -------
 man/remove_above_precursor.Rd   | 21 ----------
 man/sanitize_spectra.Rd         | 19 +++++++--
 tests/testthat/test-functions.R |  5 ---
 20 files changed, 134 insertions(+), 265 deletions(-)
 delete mode 100644 R/cleanup_spectra.R
 delete mode 100644 R/keep_peaks.R
 delete mode 100644 R/normalize_peaks.R
 delete mode 100644 R/remove_above_precursor.R
 delete mode 100644 man/cleanup_spectra.Rd
 delete mode 100644 man/keep_peaks.Rd
 delete mode 100644 man/normalize_peaks.Rd
 delete mode 100644 man/remove_above_precursor.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index bcc2c9e1..56b4b0ff 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -93,11 +93,7 @@ Collate:
     'parse_adduct.R'
     'calculate_mass_of_m.R'
     'annotate_masses.R'
-    'keep_peaks.R'
     'sanitize_spectra.R'
-    'remove_above_precursor.R'
-    'normalize_peaks.R'
-    'cleanup_spectra.R'
     'import_spectra.R'
     'annotate_spectra.R'
     'benchmark_taxize_spectra.R'
diff --git a/NAMESPACE b/NAMESPACE
index 329eb1bc..391f9e39 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,6 @@ export(calculate_mass_of_m)
 export(clean_bio)
 export(clean_chemo)
 export(clean_collapse)
-export(cleanup_spectra)
 export(columns_model)
 export(complement_metadata_structures)
 export(create_components)
@@ -46,11 +45,9 @@ export(harmonize_names_sirius)
 export(harmonize_spectra)
 export(import_spectra)
 export(install)
-export(keep_peaks)
 export(load_yaml_files)
 export(log_debug)
 export(log_pipe)
-export(normalize_peaks)
 export(parse_adduct)
 export(parse_cli_params)
 export(parse_yaml_params)
@@ -72,7 +69,6 @@ export(prepare_libraries_spectra)
 export(prepare_params)
 export(prepare_taxa)
 export(read_from_sirius_zip)
-export(remove_above_precursor)
 export(replace_id)
 export(round_reals)
 export(run_app)
@@ -101,8 +97,11 @@ importFrom(Spectra,dropNaSpectraVariables)
 importFrom(Spectra,filterEmptySpectra)
 importFrom(Spectra,filterFourierTransformArtefacts)
 importFrom(Spectra,filterIntensity)
+importFrom(Spectra,filterMsLevel)
 importFrom(Spectra,filterPrecursorCharge)
+importFrom(Spectra,filterPrecursorPeaks)
 importFrom(Spectra,reduceSpectra)
+importFrom(Spectra,scalePeaks)
 importFrom(crayon,blue)
 importFrom(crayon,cyan)
 importFrom(crayon,green)
diff --git a/NEWS.md b/NEWS.md
index e2d2ffff..550b251a 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # tima
 
+# tima 2.10.1
+
+* Replaced internal functions by `Spectra` equivalents (#166)
+
 # tima 2.10.0
 
 * Added alt text to vignettes
diff --git a/R/annotate_spectra.R b/R/annotate_spectra.R
index deb34015..04a71e9b 100644
--- a/R/annotate_spectra.R
+++ b/R/annotate_spectra.R
@@ -2,11 +2,8 @@ import::from(dplyr, join_by, .into = environment())
 import::from(msentropy, calculate_entropy_similarity, .into = environment())
 import::from(msentropy, calculate_spectral_entropy, .into = environment())
 import::from(pbapply, pblapply, .into = environment())
-import::from(Spectra, addProcessing, .into = environment())
 import::from(Spectra, applyProcessing, .into = environment())
 import::from(Spectra, concatenateSpectra, .into = environment())
-import::from(Spectra, filterIntensity, .into = environment())
-import::from(Spectra, filterPrecursorCharge, .into = environment())
 import::from(tidytable, any_of, .into = environment())
 import::from(tidytable, arrange, .into = environment())
 import::from(tidytable, as_tidytable, .into = environment())
@@ -31,11 +28,8 @@ import::from(tidytable, tidytable, .into = environment())
 #' @importFrom msentropy calculate_entropy_similarity
 #' @importFrom msentropy calculate_spectral_entropy
 #' @importFrom pbapply pblapply
-#' @importFrom Spectra addProcessing
 #' @importFrom Spectra applyProcessing
 #' @importFrom Spectra concatenateSpectra
-#' @importFrom Spectra filterIntensity
-#' @importFrom Spectra filterPrecursorCharge
 #' @importFrom tidytable any_of
 #' @importFrom tidytable arrange
 #' @importFrom tidytable as_tidytable
@@ -52,9 +46,6 @@ import::from(tidytable, tidytable, .into = environment())
 #' @include get_params.R
 #' @include harmonize_adducts.R
 #' @include import_spectra.R
-#' @include normalize_peaks.R
-#' @include remove_above_precursor.R
-#' @include sanitize_spectra.R
 #'
 #' @param input Query file containing spectra. Currently an '.mgf' file
 #' @param library Library containing spectra to match against.
@@ -94,12 +85,12 @@ annotate_spectra <- function(input = get_params(step = "annotate_spectra")$files
 
   log_debug("Loading spectra...")
   spectra <- input |>
-    import_spectra() |>
-    filterPrecursorCharge(z = if (polarity == "pos") {
-      c(1, 2, 3)
-    } else {
-      c(-1, -2, -3)
-    })
+    import_spectra(
+      cutoff = qutoff,
+      dalton = dalton,
+      polarity = polarity,
+      ppm = ppm
+    )
 
   df_empty <- data.frame(
     feature_id = NA,
@@ -120,24 +111,15 @@ annotate_spectra <- function(input = get_params(step = "annotate_spectra")$files
   if (length(spectra) > 0) {
     log_debug("Loading spectral library")
     spectral_library <- unlist(library) |>
-      lapply(FUN = import_spectra) |>
-      concatenateSpectra() |>
-      sanitize_spectra() |>
-      addProcessing(remove_above_precursor(),
-        spectraVariables = c("precursorMz")
-      ) |>
-      addProcessing(normalize_peaks()) |>
-      applyProcessing()
-
-    log_debug("Applying initial intensity filter to query spectra")
-    spectra <- spectra |>
-      sanitize_spectra() |>
-      filterIntensity(intensity = c(qutoff, Inf)) |>
-      addProcessing(remove_above_precursor(),
-        spectraVariables = c("precursorMz")
+      lapply(
+        FUN = import_spectra,
+        cutoff = qutoff,
+        dalton = dalton,
+        polarity = polarity,
+        ppm = ppm
       ) |>
-      addProcessing(normalize_peaks()) |>
-      applyProcessing()
+      lapply(FUN = applyProcessing) |>
+      concatenateSpectra()
 
     query_precursors <- spectra@backend@spectraData$precursorMz
     query_spectra <- spectra@backend@peaksData
diff --git a/R/cleanup_spectra.R b/R/cleanup_spectra.R
deleted file mode 100644
index 7e614eb1..00000000
--- a/R/cleanup_spectra.R
+++ /dev/null
@@ -1,37 +0,0 @@
-import::from(Spectra, applyProcessing, .into = environment())
-import::from(Spectra, combineSpectra, .into = environment())
-import::from(Spectra, filterEmptySpectra, .into = environment())
-import::from(Spectra, reduceSpectra, .into = environment())
-
-#' @title Cleanup spectra
-#'
-#' @description This function cleans up spectra (keeps only merged MS2)
-#'
-#' @importFrom Spectra applyProcessing
-#' @importFrom Spectra combineSpectra
-#' @importFrom Spectra filterEmptySpectra
-#' @importFrom Spectra reduceSpectra
-#'
-#' @param spectra Spectra object to clean
-#'
-#' @return Spectra object containing the imported spectra
-#'
-#' @export
-#'
-#' @examples NULL
-cleanup_spectra <- function(spectra) {
-  spectra <- spectra |>
-    reduceSpectra() |>
-    applyProcessing()
-  if ("MSLEVEL" %in% colnames(spectra@backend@spectraData)) {
-    spectra <- spectra[spectra$MSLEVEL == 2]
-  }
-  if ("FEATURE_ID" %in% colnames(spectra@backend@spectraData)) {
-    message("Combining spectra in case...")
-    spectra <- spectra |>
-      combineSpectra(f = spectra$FEATURE_ID)
-  }
-  spectra <- spectra |>
-    filterEmptySpectra()
-  return(spectra)
-}
diff --git a/R/create_edges_spectra.R b/R/create_edges_spectra.R
index d6ffa372..3a19979c 100644
--- a/R/create_edges_spectra.R
+++ b/R/create_edges_spectra.R
@@ -1,7 +1,5 @@
 import::from(msentropy, calculate_spectral_entropy, .into = environment())
 import::from(pbapply, pblapply, .into = environment())
-import::from(Spectra, addProcessing, .into = environment())
-import::from(Spectra, applyProcessing, .into = environment())
 import::from(tidyfst, rn_col, .into = environment())
 import::from(tidytable, any_of, .into = environment())
 import::from(tidytable, bind_rows, .into = environment())
@@ -21,8 +19,6 @@ import::from(tidytable, tidytable, .into = environment())
 #'
 #' @importFrom msentropy calculate_spectral_entropy
 #' @importFrom pbapply pblapply
-#' @importFrom Spectra addProcessing
-#' @importFrom Spectra applyProcessing
 #' @importFrom tidyfst rn_col
 #' @importFrom tidytable any_of
 #' @importFrom tidytable bind_rows
@@ -38,9 +34,6 @@ import::from(tidytable, tidytable, .into = environment())
 #' @include create_edges.R
 #' @include get_params.R
 #' @include import_spectra.R
-#' @include normalize_peaks.R
-#' @include remove_above_precursor.R
-#' @include sanitize_spectra.R
 #'
 #' @param input Query file containing spectra. Currently an '.mgf' file
 #' @param output Output file.
@@ -69,16 +62,12 @@ create_edges_spectra <- function(input = get_params(step = "create_edges_spectra
 
   log_debug("Loading spectra...")
   spectra <- input |>
-    import_spectra()
+    import_spectra(
+      cutoff = qutoff,
+      dalton = dalton,
+      ppm = ppm
+    )
   if (length(spectra) > 1) {
-    spectra <- spectra |>
-      sanitize_spectra(cutoff = qutoff) |>
-      # addProcessing(remove_above_precursor(),
-      #   spectraVariables = c("precursorMz")
-      # ) |>
-      addProcessing(normalize_peaks()) |>
-      applyProcessing()
-
     log_debug("Performing spectral comparison")
     log_debug("As we do not limit the precursors delta,
       expect a (relatively) long processing time.")
diff --git a/R/import_spectra.R b/R/import_spectra.R
index 40cb51f3..b83abcaf 100644
--- a/R/import_spectra.R
+++ b/R/import_spectra.R
@@ -14,16 +14,24 @@ import::from(stringi, stri_replace_all_regex, .into = environment())
 #' @importFrom Spectra Spectra
 #' @importFrom stringi stri_replace_all_regex
 #'
-#' @include cleanup_spectra.R
+#' @include sanitize_spectra.R
 #'
 #' @param file File path of the spectrum file to be imported
+#' @param cutoff Absolute minimal intensity
+#' @param dalton Dalton tolerance
+#' @param polarity Polarity
+#' @param ppm PPM tolerance
 #'
 #' @return Spectra object containing the imported spectra
 #'
 #' @export
 #'
 #' @examples NULL
-import_spectra <- function(file) {
+import_spectra <- function(file,
+                           cutoff = 0,
+                           dalton = 0.01,
+                           polarity = NA,
+                           ppm = 10) {
   file_ext <-
     stri_replace_all_regex(
       str = file,
@@ -39,12 +47,22 @@ import_spectra <- function(file) {
         # TODO Change as soon as R 4.4.0 becomes oldrel
         # readMgfSplit(f = file) |>
         Spectra() |>
-        cleanup_spectra()
+        sanitize_spectra(
+          cutoff = cutoff,
+          dalton = dalton,
+          polarity = polarity,
+          ppm = ppm
+        )
     },
     "msp" = {
       readMsp(f = file) |>
         Spectra() |>
-        cleanup_spectra()
+        sanitize_spectra(
+          cutoff = cutoff,
+          dalton = dalton,
+          polarity = polarity,
+          ppm = ppm
+        )
     },
     # "sqlite" = {
     #   CompDb(x = file) |>
@@ -55,7 +73,12 @@ import_spectra <- function(file) {
       readRDS(file = file) |>
         data.frame() |>
         Spectra() |>
-        cleanup_spectra()
+        sanitize_spectra(
+          cutoff = cutoff,
+          dalton = dalton,
+          polarity = polarity,
+          ppm = ppm
+        )
     }
   )
 }
diff --git a/R/keep_peaks.R b/R/keep_peaks.R
deleted file mode 100644
index b4254936..00000000
--- a/R/keep_peaks.R
+++ /dev/null
@@ -1,15 +0,0 @@
-#' @title Keep peaks
-#'
-#' @description This function keeps a proportion of peaks
-#'
-#' @param x PeakData
-#' @param prop Minimal ratio to the max peak
-#'
-#' @return NULL
-#'
-#' @export
-#'
-#' @examples NULL
-keep_peaks <- function(x, prop) {
-  x > max(x, na.rm = TRUE) / prop
-}
diff --git a/R/normalize_peaks.R b/R/normalize_peaks.R
deleted file mode 100644
index a9beb514..00000000
--- a/R/normalize_peaks.R
+++ /dev/null
@@ -1,15 +0,0 @@
-#' @title Normalize peaks
-#'
-#' @description This function normalizes peaks
-#'
-#' @return NULL
-#'
-#' @export
-#'
-#' @examples NULL
-normalize_peaks <- function() {
-  function(x, ...) {
-    x[, 2] <- 100 * x[, 2] / max(x[, 2])
-    return(x)
-  }
-}
diff --git a/R/remove_above_precursor.R b/R/remove_above_precursor.R
deleted file mode 100644
index f7291e66..00000000
--- a/R/remove_above_precursor.R
+++ /dev/null
@@ -1,19 +0,0 @@
-#' @title Remove peaks above precursor in MS2 spectra
-#'
-#' @description This function remove peaks above precursor in MS2 spectra
-#'
-#' @details Credit goes to Carolin Huber (0000-0002-9355-8948)
-#'    With fine tuning of Michele Stravs (0000-0002-1426-8572)
-#'
-#' @param tol_mz m/z tolerance
-#'
-#' @return NULL
-#'
-#' @export
-#'
-#' @examples NULL
-remove_above_precursor <- function(tol_mz = 0.5) {
-  function(x, precursorMz, ...) {
-    x[!(x[, 1] >= precursorMz - tol_mz), , drop = FALSE]
-  }
-}
diff --git a/R/sanitize_spectra.R b/R/sanitize_spectra.R
index 6e00169d..c9279d2a 100644
--- a/R/sanitize_spectra.R
+++ b/R/sanitize_spectra.R
@@ -1,23 +1,39 @@
+import::from(Spectra, addProcessing, .into = environment())
 import::from(Spectra, applyProcessing, .into = environment())
+import::from(Spectra, combineSpectra, .into = environment())
 import::from(Spectra, dropNaSpectraVariables, .into = environment())
+import::from(Spectra, filterEmptySpectra, .into = environment())
 import::from(Spectra, filterFourierTransformArtefacts, .into = environment())
 import::from(Spectra, filterIntensity, .into = environment())
+import::from(Spectra, filterMsLevel, .into = environment())
+import::from(Spectra, filterPrecursorCharge, .into = environment())
+import::from(Spectra, filterPrecursorPeaks, .into = environment())
+import::from(Spectra, reduceSpectra, .into = environment())
+import::from(Spectra, scalePeaks, .into = environment())
 
 #' @title Sanitize spectra
 #'
 #' @description This function sanitizes spectra
 #'
+#' @importFrom Spectra addProcessing
 #' @importFrom Spectra applyProcessing
+#' @importFrom Spectra combineSpectra
 #' @importFrom Spectra dropNaSpectraVariables
+#' @importFrom Spectra filterEmptySpectra
 #' @importFrom Spectra filterFourierTransformArtefacts
 #' @importFrom Spectra filterIntensity
-#'
-#' @include keep_peaks.R
+#' @importFrom Spectra filterMsLevel
+#' @importFrom Spectra filterPrecursorCharge
+#' @importFrom Spectra filterPrecursorPeaks
+#' @importFrom Spectra reduceSpectra
+#' @importFrom Spectra scalePeaks
 #'
 #' @param spectra Spectra object
-#' @param ratio Minimal ratio to the max peak
 #' @param cutoff Absolute minimal intensity
-#' @param fragments Minimal number of fragments
+#' @param dalton Dalton tolerance
+#' @param polarity Polarity
+#' @param ppm PPM tolerance
+#' @param ratio Minimal ratio to the max peak
 #'
 #' @return NULL
 #'
@@ -26,33 +42,49 @@ import::from(Spectra, filterIntensity, .into = environment())
 #' @examples NULL
 sanitize_spectra <-
   function(spectra,
-           ratio = 10000,
            cutoff = 0,
-           fragments = 3) {
+           dalton = 0.01,
+           polarity = NA,
+           ppm = 10,
+           ratio = 10000) {
     log_debug("Applying sanitization of the spectra")
 
-    ## Not needed anymore (fixed in Spectra 1.10.3)
-    ## see https://github.com/rformassspectrometry/Spectra/issues/302
-    # spectra@backend@peaksData <- spectra@backend@peaksData |>
-    # lapply(FUN = Spectra:::.peaks_remove_fft_artifact)
+    if ("msLevel" %in% colnames(spectra@backend@spectraData)) {
+      message("Filtering MS2 only")
+      spectra <- spectra |>
+        filterMsLevel(2L)
+    }
+
+    if (!is.na(polarity)) {
+      spectra <- spectra |>
+        filterPrecursorCharge(z = if (polarity == "pos") {
+          c(1, 2, 3)
+        } else {
+          c(-1, -2, -3)
+        })
+    }
 
     spectra <- spectra |>
       dropNaSpectraVariables() |>
+      reduceSpectra(tolerance = dalton, ppm = ppm) |>
       filterFourierTransformArtefacts() |> # fixed in Spectra 1.10.3
       filterIntensity(intensity = c(cutoff, Inf)) |>
-      filterIntensity(intensity = keep_peaks, prop = ratio) |>
-      applyProcessing()
+      filterPrecursorPeaks(
+        tolerance = dalton,
+        ppm = ppm,
+        mz = c(">=")
+      ) |>
+      scalePeaks() |>
+      filterIntensity(intensity = c(1 / ratio, 1))
 
-    # spectra <- spectra |>
-    #   filterIntensity(
-    #     intensity = function(x) {
-    #       ## eventually go to 25%
-    #       x <- x > quantile(x)[1]
-    #     }
-    #   ) |>
-    #   applyProcessing()
+    if ("FEATURE_ID" %in% colnames(spectra@backend@spectraData)) {
+      message("Combining spectra in case...")
+      spectra <- spectra |>
+        combineSpectra(f = spectra$FEATURE_ID)
+    }
 
-    spectra <- spectra[lapply(X = spectra@backend@peaksData, FUN = length) >= fragments * 2]
+    spectra <- spectra |>
+      filterEmptySpectra()
 
     return(spectra)
   }
diff --git a/codemeta.json b/codemeta.json
index 108b7995..7216b15f 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -644,7 +644,7 @@
     "SystemRequirements": null
   },
   "keywords": ["metaboliteannotation", "chemotaxonomy", "scoringsystem", "naturalproducts", "computationalmetabolomics", "taxonomicdistance", "specializedmetabolome"],
-  "fileSize": "3170.327KB",
+  "fileSize": "3166.925KB",
   "citation": [
     {
       "@type": "ScholarlyArticle",
diff --git a/inst/pipelines/_targets.R b/inst/pipelines/_targets.R
index a61cc0d4..45ce6be0 100644
--- a/inst/pipelines/_targets.R
+++ b/inst/pipelines/_targets.R
@@ -1904,24 +1904,14 @@ list(
       format = "file",
       command = {
         sp <- benchmark_converted |>
-          import_spectra() |>
-          sanitize_spectra(
-            cutoff = 0,
-            ratio = 10000,
-            fragments = 5
-          )
+          import_spectra()
 
         sp@backend@spectraData$precursorMz <-
           sp@backend@spectraData$PRECURSOR_MZ |>
           as.numeric()
 
         log_debug("Imported")
-        sp_clean <- sp |>
-          Spectra::addProcessing(remove_above_precursor(),
-            spectraVariables = c("precursorMz")
-          ) |>
-          Spectra::addProcessing(normalize_peaks()) |>
-          Spectra::applyProcessing()
+        sp_clean <- sp
 
         log_debug("Cleaned")
         df_meta <- tidytable::tidytable(
diff --git a/man/cleanup_spectra.Rd b/man/cleanup_spectra.Rd
deleted file mode 100644
index 48cada4e..00000000
--- a/man/cleanup_spectra.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cleanup_spectra.R
-\name{cleanup_spectra}
-\alias{cleanup_spectra}
-\title{Cleanup spectra}
-\usage{
-cleanup_spectra(spectra)
-}
-\arguments{
-\item{spectra}{Spectra object to clean}
-}
-\value{
-Spectra object containing the imported spectra
-}
-\description{
-This function cleans up spectra (keeps only merged MS2)
-}
-\examples{
-NULL
-}
diff --git a/man/import_spectra.Rd b/man/import_spectra.Rd
index accf2957..eadf27b5 100644
--- a/man/import_spectra.Rd
+++ b/man/import_spectra.Rd
@@ -4,10 +4,18 @@
 \alias{import_spectra}
 \title{Import spectra}
 \usage{
-import_spectra(file)
+import_spectra(file, cutoff = 0, dalton = 0.01, polarity = NA, ppm = 10)
 }
 \arguments{
 \item{file}{File path of the spectrum file to be imported}
+
+\item{cutoff}{Absolute minimal intensity}
+
+\item{dalton}{Dalton tolerance}
+
+\item{polarity}{Polarity}
+
+\item{ppm}{PPM tolerance}
 }
 \value{
 Spectra object containing the imported spectra
diff --git a/man/keep_peaks.Rd b/man/keep_peaks.Rd
deleted file mode 100644
index 40ccc153..00000000
--- a/man/keep_peaks.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/keep_peaks.R
-\name{keep_peaks}
-\alias{keep_peaks}
-\title{Keep peaks}
-\usage{
-keep_peaks(x, prop)
-}
-\arguments{
-\item{x}{PeakData}
-
-\item{prop}{Minimal ratio to the max peak}
-}
-\description{
-This function keeps a proportion of peaks
-}
-\examples{
-NULL
-}
diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd
deleted file mode 100644
index 2bb5375b..00000000
--- a/man/normalize_peaks.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/normalize_peaks.R
-\name{normalize_peaks}
-\alias{normalize_peaks}
-\title{Normalize peaks}
-\usage{
-normalize_peaks()
-}
-\description{
-This function normalizes peaks
-}
-\examples{
-NULL
-}
diff --git a/man/remove_above_precursor.Rd b/man/remove_above_precursor.Rd
deleted file mode 100644
index 9eb0a62f..00000000
--- a/man/remove_above_precursor.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/remove_above_precursor.R
-\name{remove_above_precursor}
-\alias{remove_above_precursor}
-\title{Remove peaks above precursor in MS2 spectra}
-\usage{
-remove_above_precursor(tol_mz = 0.5)
-}
-\arguments{
-\item{tol_mz}{m/z tolerance}
-}
-\description{
-This function remove peaks above precursor in MS2 spectra
-}
-\details{
-Credit goes to Carolin Huber (0000-0002-9355-8948)
-With fine tuning of Michele Stravs (0000-0002-1426-8572)
-}
-\examples{
-NULL
-}
diff --git a/man/sanitize_spectra.Rd b/man/sanitize_spectra.Rd
index 79e5a7bd..f3cd32de 100644
--- a/man/sanitize_spectra.Rd
+++ b/man/sanitize_spectra.Rd
@@ -4,16 +4,27 @@
 \alias{sanitize_spectra}
 \title{Sanitize spectra}
 \usage{
-sanitize_spectra(spectra, ratio = 10000, cutoff = 0, fragments = 3)
+sanitize_spectra(
+  spectra,
+  cutoff = 0,
+  dalton = 0.01,
+  polarity = NA,
+  ppm = 10,
+  ratio = 10000
+)
 }
 \arguments{
 \item{spectra}{Spectra object}
 
-\item{ratio}{Minimal ratio to the max peak}
-
 \item{cutoff}{Absolute minimal intensity}
 
-\item{fragments}{Minimal number of fragments}
+\item{dalton}{Dalton tolerance}
+
+\item{polarity}{Polarity}
+
+\item{ppm}{PPM tolerance}
+
+\item{ratio}{Minimal ratio to the max peak}
 }
 \description{
 This function sanitizes spectra
diff --git a/tests/testthat/test-functions.R b/tests/testthat/test-functions.R
index 0e414410..892fc3cf 100644
--- a/tests/testthat/test-functions.R
+++ b/tests/testthat/test-functions.R
@@ -617,11 +617,6 @@ test_that(desc = "Test functions", code = {
   arguments$remove_ties <- TRUE
   arguments$summarise <- TRUE
 
-  # Useless
-  keep_peaks(x = 0, prop = 0)
-  normalize_peaks()
-  remove_above_precursor()
-
   parse_cli_params(arguments = arguments, parameters = params)
 
   tima_full()

From 85843a16553f07785741bfcce8c5195c30758c56 Mon Sep 17 00:00:00 2001
From: Adriano Rutz <adriano.rutz@hotmail.com>
Date: Tue, 6 Aug 2024 19:48:13 +0200
Subject: [PATCH 3/5] Fix #166

---
 R/annotate_spectra.R          |  3 ++-
 R/import_spectra.R            | 40 +++++++++++++++--------------------
 R/prepare_libraries_spectra.R |  9 ++------
 R/sanitize_spectra.R          |  6 ++----
 codemeta.json                 |  2 +-
 man/import_spectra.Rd         | 11 +++++++++-
 man/sanitize_spectra.Rd       |  9 +-------
 7 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/R/annotate_spectra.R b/R/annotate_spectra.R
index 04a71e9b..b53abab8 100644
--- a/R/annotate_spectra.R
+++ b/R/annotate_spectra.R
@@ -116,7 +116,8 @@ annotate_spectra <- function(input = get_params(step = "annotate_spectra")$files
         cutoff = qutoff,
         dalton = dalton,
         polarity = polarity,
-        ppm = ppm
+        ppm = ppm,
+        sanitize = FALSE
       ) |>
       lapply(FUN = applyProcessing) |>
       concatenateSpectra()
diff --git a/R/import_spectra.R b/R/import_spectra.R
index b83abcaf..0d523f6c 100644
--- a/R/import_spectra.R
+++ b/R/import_spectra.R
@@ -21,6 +21,7 @@ import::from(stringi, stri_replace_all_regex, .into = environment())
 #' @param dalton Dalton tolerance
 #' @param polarity Polarity
 #' @param ppm PPM tolerance
+#' @param sanitize Flag indicating whether to sanitize. Default TRUE
 #'
 #' @return Spectra object containing the imported spectra
 #'
@@ -31,7 +32,8 @@ import_spectra <- function(file,
                            cutoff = 0,
                            dalton = 0.01,
                            polarity = NA,
-                           ppm = 10) {
+                           ppm = 10,
+                           sanitize = TRUE) {
   file_ext <-
     stri_replace_all_regex(
       str = file,
@@ -40,29 +42,17 @@ import_spectra <- function(file,
       vectorize_all = FALSE
     )
 
-  switch(
+  spectra <- switch(
     EXPR = file_ext,
     "mgf" = {
       readMgf(f = file) |>
         # TODO Change as soon as R 4.4.0 becomes oldrel
         # readMgfSplit(f = file) |>
-        Spectra() |>
-        sanitize_spectra(
-          cutoff = cutoff,
-          dalton = dalton,
-          polarity = polarity,
-          ppm = ppm
-        )
+        Spectra()
     },
     "msp" = {
       readMsp(f = file) |>
-        Spectra() |>
-        sanitize_spectra(
-          cutoff = cutoff,
-          dalton = dalton,
-          polarity = polarity,
-          ppm = ppm
-        )
+        Spectra()
     },
     # "sqlite" = {
     #   CompDb(x = file) |>
@@ -72,13 +62,17 @@ import_spectra <- function(file,
     "rds" = {
       readRDS(file = file) |>
         data.frame() |>
-        Spectra() |>
-        sanitize_spectra(
-          cutoff = cutoff,
-          dalton = dalton,
-          polarity = polarity,
-          ppm = ppm
-        )
+        Spectra()
     }
   )
+  if (sanitize) {
+    spectra <- spectra |>
+      sanitize_spectra(
+        cutoff = cutoff,
+        dalton = dalton,
+        polarity = polarity,
+        ppm = ppm
+      )
+  }
+  return(spectra)
 }
diff --git a/R/prepare_libraries_spectra.R b/R/prepare_libraries_spectra.R
index 838b09d3..375bcc7f 100644
--- a/R/prepare_libraries_spectra.R
+++ b/R/prepare_libraries_spectra.R
@@ -23,7 +23,6 @@ import::from(tidytable, tidytable, .into = environment())
 #' @include harmonize_spectra.R
 #' @include import_spectra.R
 #' @include parse_yaml_paths.R
-#' @include sanitize_spectra.R
 #'
 #' @param input File containing spectra
 #' @param polarity MS polarity
@@ -91,13 +90,9 @@ prepare_libraries_spectra <-
         log_debug("Importing")
         spectra <- lapply(X = input, FUN = import_spectra)
 
-        log_debug("Sanitizing")
-        spectra_sanitized <- lapply(X = spectra, FUN = sanitize_spectra)
-        rm(spectra)
-
         log_debug("Extracting")
-        spectra_extracted <- lapply(X = spectra_sanitized, FUN = extract_spectra)
-        rm(spectra_sanitized)
+        spectra_extracted <- lapply(X = spectra, FUN = extract_spectra)
+        rm(spectra)
 
         log_debug("Harmonizing ...")
         log_debug("... pos")
diff --git a/R/sanitize_spectra.R b/R/sanitize_spectra.R
index c9279d2a..8015b11d 100644
--- a/R/sanitize_spectra.R
+++ b/R/sanitize_spectra.R
@@ -45,8 +45,7 @@ sanitize_spectra <-
            cutoff = 0,
            dalton = 0.01,
            polarity = NA,
-           ppm = 10,
-           ratio = 10000) {
+           ppm = 10) {
     log_debug("Applying sanitization of the spectra")
 
     if ("msLevel" %in% colnames(spectra@backend@spectraData)) {
@@ -74,8 +73,7 @@ sanitize_spectra <-
         ppm = ppm,
         mz = c(">=")
       ) |>
-      scalePeaks() |>
-      filterIntensity(intensity = c(1 / ratio, 1))
+      scalePeaks()
 
     if ("FEATURE_ID" %in% colnames(spectra@backend@spectraData)) {
       message("Combining spectra in case...")
diff --git a/codemeta.json b/codemeta.json
index 7216b15f..7f89772d 100644
--- a/codemeta.json
+++ b/codemeta.json
@@ -644,7 +644,7 @@
     "SystemRequirements": null
   },
   "keywords": ["metaboliteannotation", "chemotaxonomy", "scoringsystem", "naturalproducts", "computationalmetabolomics", "taxonomicdistance", "specializedmetabolome"],
-  "fileSize": "3166.925KB",
+  "fileSize": "3166.651KB",
   "citation": [
     {
       "@type": "ScholarlyArticle",
diff --git a/man/import_spectra.Rd b/man/import_spectra.Rd
index eadf27b5..65031a3f 100644
--- a/man/import_spectra.Rd
+++ b/man/import_spectra.Rd
@@ -4,7 +4,14 @@
 \alias{import_spectra}
 \title{Import spectra}
 \usage{
-import_spectra(file, cutoff = 0, dalton = 0.01, polarity = NA, ppm = 10)
+import_spectra(
+  file,
+  cutoff = 0,
+  dalton = 0.01,
+  polarity = NA,
+  ppm = 10,
+  sanitize = TRUE
+)
 }
 \arguments{
 \item{file}{File path of the spectrum file to be imported}
@@ -16,6 +23,8 @@ import_spectra(file, cutoff = 0, dalton = 0.01, polarity = NA, ppm = 10)
 \item{polarity}{Polarity}
 
 \item{ppm}{PPM tolerance}
+
+\item{sanitize}{Flag indicating whether to sanitize. Default TRUE}
 }
 \value{
 Spectra object containing the imported spectra
diff --git a/man/sanitize_spectra.Rd b/man/sanitize_spectra.Rd
index f3cd32de..8e0fecf2 100644
--- a/man/sanitize_spectra.Rd
+++ b/man/sanitize_spectra.Rd
@@ -4,14 +4,7 @@
 \alias{sanitize_spectra}
 \title{Sanitize spectra}
 \usage{
-sanitize_spectra(
-  spectra,
-  cutoff = 0,
-  dalton = 0.01,
-  polarity = NA,
-  ppm = 10,
-  ratio = 10000
-)
+sanitize_spectra(spectra, cutoff = 0, dalton = 0.01, polarity = NA, ppm = 10)
 }
 \arguments{
 \item{spectra}{Spectra object}

From faa6e223abc8c8be75f65f98f0c2ae141b0d05b5 Mon Sep 17 00:00:00 2001
From: Adriano Rutz <adriano.rutz@hotmail.com>
Date: Tue, 6 Aug 2024 21:44:14 +0200
Subject: [PATCH 4/5] lint

---
 R/run_app.R | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/R/run_app.R b/R/run_app.R
index 8f72749b..48c53596 100644
--- a/R/run_app.R
+++ b/R/run_app.R
@@ -20,24 +20,18 @@ import::from(shiny, runApp, .into = environment())
 run_app <- function(host = "127.0.0.1",
                     port = 3838,
                     browser = TRUE) {
-  # Check if runs in Docker environment or not
   if (file.exists("/.dockerenv")) {
     system('echo "I\'m inside the matrix ;("')
     browser <- FALSE
     host <- "0.0.0.0"
   } else {
-    # Check if running latest version
-    tryCatch(
-      expr = {
-        install()
-      },
-      error = function(e) {
-        message(e)
-      }
-    )
+    tryCatch(expr = {
+      install()
+    }, error = function(e) {
+      message(e)
+    })
     system('echo "I\'m living in the real world!"')
   }
-
   shiny::runApp(
     appDir = system.file(package = "tima"),
     port = port,

From 293364943e0a5b069a757ad30bb31ebe37c28cc0 Mon Sep 17 00:00:00 2001
From: Adriano Rutz <adriano.rutz@hotmail.com>
Date: Tue, 6 Aug 2024 21:44:29 +0200
Subject: [PATCH 5/5] tests

---
 tests/testthat/test-functions.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/testthat/test-functions.R b/tests/testthat/test-functions.R
index 892fc3cf..23d8c5f8 100644
--- a/tests/testthat/test-functions.R
+++ b/tests/testthat/test-functions.R
@@ -13,6 +13,9 @@ test_that(desc = "Test functions", code = {
   ## Go to cache
   go_to_cache()
 
+  ## Get example files
+  get_example_files()
+
   ## Prepare parameters
   paths <- parse_yaml_paths()
   params <- get_params(step = "prepare_params")