From bcbf9d0df1f9db3108203aea78e7bc18b415f44d Mon Sep 17 00:00:00 2001 From: orichters Date: Thu, 19 Oct 2023 17:38:17 +0200 Subject: [PATCH 1/2] allow to use read.snapshot without filtering if grep etc. not present --- .buildlibrary | 2 +- CITATION.cff | 2 +- DESCRIPTION | 2 +- R/read.snapshot.R | 17 +++++++++-------- README.md | 6 +++--- tests/testthat/test-read.snapshot.R | 12 +++++++++--- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/.buildlibrary b/.buildlibrary index ee51f18..2ace8c0 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '614424230' +ValidationKey: '614443879' AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' - 'Warning: namespace ''.*'' is not available and has been replaced' diff --git a/CITATION.cff b/CITATION.cff index 828c4fb..1e09e03 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,7 +2,7 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'quitte: Bits and pieces of code to use with quitte-style data frames' -version: 0.3127.0 +version: 0.3127.1 date-released: '2023-10-19' abstract: A collection of functions for easily dealing with quitte-style data frames, doing multi-model comparisons and plots. diff --git a/DESCRIPTION b/DESCRIPTION index 0a2cca1..c77b075 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: quitte Title: Bits and pieces of code to use with quitte-style data frames -Version: 0.3127.0 +Version: 0.3127.1 Date: 2023-10-19 Authors@R: c( person("Michaja", "Pehl", , "pehl@pik-potsdam.de", role = c("aut", "cre")), diff --git a/R/read.snapshot.R b/R/read.snapshot.R index f22dccc..db3996a 100644 --- a/R/read.snapshot.R +++ b/R/read.snapshot.R @@ -25,18 +25,19 @@ read.snapshot <- function(file, keep = list()) { if (length(unknowntype) > 0) { stop("Unknown types to be kept: ", toString(unknowntype)) } - testcommand <- c("grep", "head", "tail", "sed") - exitcodes <- suppressWarnings( - sapply(paste(testcommand, '--version'), system, - ignore.stdout = TRUE, ignore.stderr = TRUE)) - if (any(0 != exitcodes)) { - stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '), - "are not available system commands, please use 'read.quitte'.")) - } # temporary file tmpfile <- tempfile(pattern = "data", fileext = ".csv") if (length(setdiff(names(keep), "period")) > 0) { + # check whether system commands are supported + testcommand <- c("grep", "head", "tail", "sed") + exitcodes <- suppressWarnings( + sapply(paste(testcommand, '--version'), system, + ignore.stdout = TRUE, ignore.stderr = TRUE)) + if (any(0 != exitcodes)) { + stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '), + "are not available system commands, please use 'read.quitte'.")) + } # always keep first lines of original file (comments, colnames), grep in the rest alwayskeep <- 20 system(paste("head -n", alwayskeep, file, ">", tmpfile)) diff --git a/README.md b/README.md index 3ba4aff..e60b236 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Bits and pieces of code to use with quitte-style data frames -R package **quitte**, version **0.3127.0** +R package **quitte**, version **0.3127.1** [![CRAN status](https://www.r-pkg.org/badges/version/quitte)](https://cran.r-project.org/package=quitte) [![R build status](https://github.com/pik-piam/quitte/workflows/check/badge.svg)](https://github.com/pik-piam/quitte/actions) [![codecov](https://codecov.io/gh/pik-piam/quitte/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/quitte) [![r-universe](https://pik-piam.r-universe.dev/badges/quitte)](https://pik-piam.r-universe.dev/builds) @@ -47,7 +47,7 @@ In case of questions / problems please contact Michaja Pehl . +Pehl M, Bauer N, Hilaire J, Levesque A, Luderer G, Schultes A, Dietrich J, Richters O (2023). _quitte: Bits and pieces of code to use with quitte-style data frames_. R package version 0.3127.1, . A BibTeX entry for LaTeX users is @@ -56,7 +56,7 @@ A BibTeX entry for LaTeX users is title = {quitte: Bits and pieces of code to use with quitte-style data frames}, author = {Michaja Pehl and Nico Bauer and Jérôme Hilaire and Antoine Levesque and Gunnar Luderer and Anselm Schultes and Jan Philipp Dietrich and Oliver Richters}, year = {2023}, - note = {R package version 0.3127.0}, + note = {R package version 0.3127.1}, url = {https://github.com/pik-piam/quitte}, } ``` diff --git a/tests/testthat/test-read.snapshot.R b/tests/testthat/test-read.snapshot.R index bc3806c..ddc999b 100644 --- a/tests/testthat/test-read.snapshot.R +++ b/tests/testthat/test-read.snapshot.R @@ -1,17 +1,23 @@ test_that("read.snapshot works", { - qe <- droplevels(quitteSort(as.quitte(quitte_example_dataAR6, na.rm = TRUE))) + qe <- quitte_example_dataAR6 %>% + as.quitte(na.rm = TRUE) %>% + filter(! variable == "Temperature|Global Mean") %>% + quitteSort() %>% + droplevels() tmpfile <- tempfile(pattern = "data", fileext = ".csv") write.table(pivot_wider(qe, names_from = "period"), file = tmpfile, append = FALSE, quote = FALSE, sep = ",", eol = "\n", na = "", dec = ".", row.names = FALSE, col.names = TRUE) # mimick IIASA snapshot format - fails <- tryCatch(read.snapshot(tmpfile), error = function(e) { paste(e) }) + expect_equal(qe, read.snapshot(tmpfile)) + fails <- tryCatch(read.snapshot(tmpfile, list(region = head(levels(qe$region), 1))), + error = function(e) { paste(e) }) if (is.character(fails) && length(fails) == 1 && grepl("not available system commands", fails)) { skip(paste0(gsub("Error in ", "", gsub(", pleas.*", "", fails)), ", skipping tests.")) } system(paste("sed -i 's/GCAM/\"GCAM\"/g;'", tmpfile)) system(paste("sed -i 's/Delayed transition/\"Delayed transition\"/g;'", tmpfile)) - rtests <- list(head(levels(qe$region), 1), head(levels(qe$region), 2)) + rtests <- list(head(levels(qe$region), 2)) for (r in rtests) { expect_equal(droplevels(dplyr::filter(qe, region %in% r)), read.snapshot(tmpfile, list(region = r))) From 8c20db54df855dbb6821343bdd1d5598dc54dfbf Mon Sep 17 00:00:00 2001 From: orichters Date: Fri, 20 Oct 2023 08:52:16 +0200 Subject: [PATCH 2/2] allow to pass through filter.function in read.snapshot, treat duplicated names in list --- R/read.snapshot.R | 28 +++++++++++++++++++++------- tests/testthat/test-read.snapshot.R | 10 +++++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/R/read.snapshot.R b/R/read.snapshot.R index db3996a..b6c1476 100644 --- a/R/read.snapshot.R +++ b/R/read.snapshot.R @@ -5,7 +5,10 @@ #' #' @md #' @param file Path of single IAMC-style .csv/.mif file -#' @param keep list with quitte columns as names and data points that should be kept. +#' @param keep list with quitte columns as names and data points that should be kept. Using 'grep', +#' this list is used to extract the data before reading it into R. The more you restrict the data here, +#' the faster the data is read. +#' @param filter.function A function used to filter data during read, see read.quitte description. #' #' @return A quitte data frame. #' @@ -17,14 +20,18 @@ #' } #' #' @importFrom dplyr filter +#' @importFrom stats setNames #' #' @export -read.snapshot <- function(file, keep = list()) { +read.snapshot <- function(file, keep = list(), filter.function = NULL) { unknowntype <- setdiff(names(keep), c("model", "scenario", "region", "variable", "unit", "period")) if (length(unknowntype) > 0) { - stop("Unknown types to be kept: ", toString(unknowntype)) + stop("Unknown types in 'keep': ", toString(unknowntype)) } + # join if multiple elements with same name exist in list + joinelements <- function(v, list) return(setNames(list(unique(unname(unlist(list[names(list) == v])))), v)) + keep <- do.call(c, lapply(unique(names(keep)), joinelements, list = keep)) # temporary file tmpfile <- tempfile(pattern = "data", fileext = ".csv") @@ -59,14 +66,21 @@ read.snapshot <- function(file, keep = list()) { } else { file.copy(file, tmpfile, overwrite = TRUE) } + joinedfilter <- function(data) { + for (t in names(keep)) { + data <- droplevels(filter(data, .data[[t]] %in% keep[[t]])) + } + if (is.function(filter.function)) { + data <- filter.function(data) + } + return(data) + } # read file and do correct filtering data <- read.quitte(tmpfile, na.strings = c("UNDF", "NA", "N/A", "n_a", ""), quote = '"', - drop.na = TRUE) + drop.na = TRUE, + filter.function = joinedfilter) unlink(tmpfile) - for (t in names(keep)) { - data <- droplevels(filter(data, .data[[t]] %in% keep[[t]])) - } return(data) } diff --git a/tests/testthat/test-read.snapshot.R b/tests/testthat/test-read.snapshot.R index ddc999b..1990506 100644 --- a/tests/testthat/test-read.snapshot.R +++ b/tests/testthat/test-read.snapshot.R @@ -37,7 +37,11 @@ test_that("read.snapshot works", { expect_equal(droplevels(dplyr::filter(qe, period %in% p)), read.snapshot(tmpfile, list(period = p))) } - # test all jointly with last setting - expect_equal(droplevels(dplyr::filter(qe, period %in% p, variable %in% v, region %in% r, scenario %in% s)), - read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s))) + # test all jointly with last setting, test passing of filter.function + filter.function <- function(x) droplevels(dplyr::filter(x, period %in% p, variable %in% v, region %in% r, scenario %in% s)) + snapshotdata <- read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s)) + expect_equal(filter.function(qe), + snapshotdata) + expect_equal(read.snapshot(tmpfile, filter.function = filter.function), + snapshotdata) })