Skip to content

Commit

Permalink
benchmark update
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Jul 23, 2023
1 parent 14a9a29 commit 48d4a72
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 238 deletions.
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ Collate:
'prepare_params.R'
'prepare_taxa.R'
'replace_id.R'
'sanitize_spectra_benchmark.R'
'select_sop_columns.R'
'taxize_spectra_benchmark.R'
'weight_chemo.R'
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ export(remove_above_precursor)
export(replace_id)
export(round_reals)
export(sanitize_spectra)
export(sanitize_spectra_benchmark)
export(select_annotations_columns)
export(select_sirius_columns)
export(select_sirius_columns_2)
Expand Down
199 changes: 0 additions & 199 deletions R/sanitize_spectra_benchmark.R

This file was deleted.

172 changes: 169 additions & 3 deletions inst/pipelines/_targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -1867,9 +1867,175 @@ list(
Spectra::Spectra(source = MsBackendMsp::MsBackendMsp()) |>
Spectra::setBackend(Spectra::MsBackendMemory())

sp |> sanitize_spectra_benchmark(
mgf_pos_path = benchmark_path_mgf_pos,
mgf_neg_path = benchmark_path_mgf_neg
sp$precursorMz <- as.numeric(sp$PRECURSOR_MZ)
sp$precursorCharge <- as.integer(sp$CHARGE)

sp_clean <- sp |>
Spectra::addProcessing(remove_above_precursor(),
spectraVariables = c("precursorMz")
) |>
Spectra::addProcessing(normalize_peaks()) |>
Spectra::applyProcessing()

adduct <- sp_clean$ADDUCT
inchikey <- sp_clean$inchikey
instrument <- sp_clean$SOURCE_INSTRUMENT
# fragments <- sp_clean$NUM.PEAKS
fragments <- lapply(sp_clean@backend@peaksData, length) |>
as.character() |>
as.numeric() / 2
# pepmass <- gsub("\\[|\\]", "", sp_clean$PARENT_MASS)
pepmass <- sp_clean$PEPMASS
smiles <- sp_clean$smiles
ccmslib <- sp_clean$SPECTRUMID
charge <- sp_clean$precursorCharge

df_meta <- tidytable::tidytable(
adduct,
inchikey,
instrument,
fragments,
pepmass,
smiles,
ccmslib,
charge
) |>
tidyft::mutate_vars(
is.character,
.func = function(x) {
tidytable::na_if(x, "")
}
)

df_clean <- df_meta |>
dplyr::filter(!is.na(inchikey)) |>
dplyr::filter(fragments > 5) |>
dplyr::filter(fragments <= 1000) |>
dplyr::filter(!grepl(
pattern = "QQQ",
x = instrument,
fixed = TRUE
)) |>
dplyr::mutate(mass = pepmass) |>
tidyr::separate(
col = mass,
sep = "\\.",
into = c("a", "b")
) |>
dplyr::filter(!is.na(b)) |>
dplyr::filter(stringr::str_length(b) > 1) |>
dplyr::select(-a, -b) |>
dplyr::mutate(inchikey_2D = gsub(
pattern = "-.*",
replacement = "",
x = inchikey
)) |>
dplyr::distinct(inchikey_2D, adduct, .keep_all = TRUE) |>
dplyr::mutate(mz = pepmass) |>
dplyr::group_by(inchikey_2D) |>
## Weird way to have some kind of retention time
dplyr::mutate(rt = dplyr::cur_group_id()) |>
dplyr::ungroup()

df_clean_neg <- df_clean |>
dplyr::filter(grepl(
pattern = "-",
x = charge,
fixed = TRUE
)) |>
dplyr::mutate(feature_id = dplyr::row_number())

df_clean_pos <- df_clean |>
dplyr::anti_join(df_clean_neg) |>
dplyr::mutate(feature_id = dplyr::row_number())

sp_pos <- sp_clean[sp_clean$SPECTRUMID %in% df_clean_pos$ccmslib]
sp_neg <- sp_clean[sp_clean$SPECTRUMID %in% df_clean_neg$ccmslib]
sp_pos$feature_id <- df_clean_pos$feature_id
sp_neg$feature_id <- df_clean_neg$feature_id
sp_pos$spectrum_id <- df_clean_pos$feature_id
sp_neg$spectrum_id <- df_clean_neg$feature_id

spectra_harmonized_pos <- sp_pos |>
extract_spectra() |>
dplyr::mutate(polarity = "pos") |>
harmonize_spectra(
col_ce = NA,
col_ci = NA,
col_em = "PARENT_MASS",
col_in = "inchi",
col_io = NA,
col_ik = "inchikey",
col_il = NA,
# col_mf = "formula",
col_mf = NA,
col_na = "name",
col_po = "polarity",
col_sm = "smiles",
col_sn = NA,
col_si = "spectrum_id",
col_sp = NA,
col_sy = NA,
col_xl = NA,
mode = "pos"
)

spectra_harmonized_neg <- sp_neg |>
extract_spectra() |>
dplyr::mutate(polarity = "neg") |>
harmonize_spectra(
col_ce = NA,
col_ci = NA,
col_em = "PARENT_MASS",
col_in = "inchi",
col_io = NA,
col_ik = "inchikey",
col_il = NA,
# col_mf = "formula",
col_mf = NA,
col_na = "name",
col_po = "polarity",
col_sm = "smiles",
col_sn = NA,
col_si = "spectrum_id",
col_sp = NA,
col_sy = NA,
col_xl = NA,
mode = "neg"
)

spectra_harmonized_pos$acquisitionNum <-
spectra_harmonized_pos$spectrum_id |>
as.integer()
spectra_harmonized_neg$acquisitionNum <-
spectra_harmonized_neg$spectrum_id |>
as.integer()

log_debug("Exporting")
spectra_harmonized_pos |>
Spectra::Spectra() |>
Spectra::export(
backend = MsBackendMgf::MsBackendMgf(),
file = mgf_pos_path
)
spectra_harmonized_neg |>
Spectra::Spectra() |>
Spectra::export(
backend = MsBackendMgf::MsBackendMgf(),
file = mgf_neg_path
)
df_clean_pos |>
export_output(meta_pos_path)
df_clean_neg |>
export_output(meta_neg_path)

return(
c(
"spectra_pos" = benchmark_path_mgf_pos,
"spectra_neg" = benchmark_path_mgf_neg,
"meta_pos" = "data/interim/benchmark/benchmark_meta_pos.tsv",
"meta_neg" = "data/interim/benchmark/benchmark_meta_neg.tsv"
)
)
}
),
Expand Down
Loading

0 comments on commit 48d4a72

Please sign in to comment.