From bcbf9d0df1f9db3108203aea78e7bc18b415f44d Mon Sep 17 00:00:00 2001
From: orichters <oliverr@pik-potsdam.de>
Date: Thu, 19 Oct 2023 17:38:17 +0200
Subject: [PATCH 1/2] allow to use read.snapshot without filtering if grep etc.
 not present

---
 .buildlibrary                       |  2 +-
 CITATION.cff                        |  2 +-
 DESCRIPTION                         |  2 +-
 R/read.snapshot.R                   | 17 +++++++++--------
 README.md                           |  6 +++---
 tests/testthat/test-read.snapshot.R | 12 +++++++++---
 6 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/.buildlibrary b/.buildlibrary
index ee51f18..2ace8c0 100644
--- a/.buildlibrary
+++ b/.buildlibrary
@@ -1,4 +1,4 @@
-ValidationKey: '614424230'
+ValidationKey: '614443879'
 AcceptedWarnings:
 - 'Warning: package ''.*'' was built under R version'
 - 'Warning: namespace ''.*'' is not available and has been replaced'
diff --git a/CITATION.cff b/CITATION.cff
index 828c4fb..1e09e03 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -2,7 +2,7 @@ cff-version: 1.2.0
 message: If you use this software, please cite it using the metadata from this file.
 type: software
 title: 'quitte: Bits and pieces of code to use with quitte-style data frames'
-version: 0.3127.0
+version: 0.3127.1
 date-released: '2023-10-19'
 abstract: A collection of functions for easily dealing with quitte-style data frames,
   doing multi-model comparisons and plots.
diff --git a/DESCRIPTION b/DESCRIPTION
index 0a2cca1..c77b075 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: quitte
 Title: Bits and pieces of code to use with quitte-style data frames
-Version: 0.3127.0
+Version: 0.3127.1
 Date: 2023-10-19
 Authors@R: c(
     person("Michaja", "Pehl", , "pehl@pik-potsdam.de", role = c("aut", "cre")),
diff --git a/R/read.snapshot.R b/R/read.snapshot.R
index f22dccc..db3996a 100644
--- a/R/read.snapshot.R
+++ b/R/read.snapshot.R
@@ -25,18 +25,19 @@ read.snapshot <- function(file, keep = list()) {
   if (length(unknowntype) > 0) {
     stop("Unknown types to be kept: ", toString(unknowntype))
   }
-  testcommand <- c("grep", "head", "tail", "sed")
-  exitcodes <- suppressWarnings(
-      sapply(paste(testcommand, '--version'), system,
-             ignore.stdout = TRUE, ignore.stderr = TRUE))
-  if (any(0 != exitcodes)) {
-      stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '),
-                "are not available system commands, please use 'read.quitte'."))
-  }
 
   # temporary file
   tmpfile <- tempfile(pattern = "data", fileext = ".csv")
   if (length(setdiff(names(keep), "period")) > 0) {
+    # check whether system commands are supported
+    testcommand <- c("grep", "head", "tail", "sed")
+    exitcodes <- suppressWarnings(
+        sapply(paste(testcommand, '--version'), system,
+               ignore.stdout = TRUE, ignore.stderr = TRUE))
+    if (any(0 != exitcodes)) {
+        stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '),
+                  "are not available system commands, please use 'read.quitte'."))
+    }
     # always keep first lines of original file (comments, colnames), grep in the rest
     alwayskeep <- 20
     system(paste("head -n", alwayskeep, file, ">", tmpfile))
diff --git a/README.md b/README.md
index 3ba4aff..e60b236 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Bits and pieces of code to use with quitte-style data frames
 
-R package **quitte**, version **0.3127.0**
+R package **quitte**, version **0.3127.1**
 
 [![CRAN status](https://www.r-pkg.org/badges/version/quitte)](https://cran.r-project.org/package=quitte)  [![R build status](https://github.com/pik-piam/quitte/workflows/check/badge.svg)](https://github.com/pik-piam/quitte/actions) [![codecov](https://codecov.io/gh/pik-piam/quitte/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/quitte) [![r-universe](https://pik-piam.r-universe.dev/badges/quitte)](https://pik-piam.r-universe.dev/builds)
 
@@ -47,7 +47,7 @@ In case of questions / problems please contact Michaja Pehl <michaja.pehl@pik-po
 
 To cite package **quitte** in publications use:
 
-Pehl M, Bauer N, Hilaire J, Levesque A, Luderer G, Schultes A, Dietrich J, Richters O (2023). _quitte: Bits and pieces of code to use with quitte-style data frames_. R package version 0.3127.0, <URL: https://github.com/pik-piam/quitte>.
+Pehl M, Bauer N, Hilaire J, Levesque A, Luderer G, Schultes A, Dietrich J, Richters O (2023). _quitte: Bits and pieces of code to use with quitte-style data frames_. R package version 0.3127.1, <URL: https://github.com/pik-piam/quitte>.
 
 A BibTeX entry for LaTeX users is
 
@@ -56,7 +56,7 @@ A BibTeX entry for LaTeX users is
   title = {quitte: Bits and pieces of code to use with quitte-style data frames},
   author = {Michaja Pehl and Nico Bauer and Jérôme Hilaire and Antoine Levesque and Gunnar Luderer and Anselm Schultes and Jan Philipp Dietrich and Oliver Richters},
   year = {2023},
-  note = {R package version 0.3127.0},
+  note = {R package version 0.3127.1},
   url = {https://github.com/pik-piam/quitte},
 }
 ```
diff --git a/tests/testthat/test-read.snapshot.R b/tests/testthat/test-read.snapshot.R
index bc3806c..ddc999b 100644
--- a/tests/testthat/test-read.snapshot.R
+++ b/tests/testthat/test-read.snapshot.R
@@ -1,17 +1,23 @@
 test_that("read.snapshot works", {
-  qe <- droplevels(quitteSort(as.quitte(quitte_example_dataAR6, na.rm = TRUE)))
+  qe <- quitte_example_dataAR6 %>%
+    as.quitte(na.rm = TRUE) %>%
+    filter(! variable == "Temperature|Global Mean") %>%
+    quitteSort() %>%
+    droplevels()
   tmpfile <- tempfile(pattern = "data", fileext = ".csv")
   write.table(pivot_wider(qe, names_from = "period"), 
               file = tmpfile, append = FALSE, quote = FALSE, sep = ",",
               eol = "\n", na = "", dec = ".", row.names = FALSE,
               col.names = TRUE) # mimick IIASA snapshot format
-  fails <- tryCatch(read.snapshot(tmpfile), error = function(e) { paste(e) })
+  expect_equal(qe, read.snapshot(tmpfile))
+  fails <- tryCatch(read.snapshot(tmpfile, list(region = head(levels(qe$region), 1))),
+                    error = function(e) { paste(e) })
   if (is.character(fails) && length(fails) == 1 && grepl("not available system commands", fails)) {
     skip(paste0(gsub("Error in ", "", gsub(", pleas.*", "", fails)), ", skipping tests."))
   }
   system(paste("sed -i 's/GCAM/\"GCAM\"/g;'", tmpfile))
   system(paste("sed -i 's/Delayed transition/\"Delayed transition\"/g;'", tmpfile))
-  rtests <- list(head(levels(qe$region), 1), head(levels(qe$region), 2))
+  rtests <- list(head(levels(qe$region), 2))
   for (r in rtests) {
     expect_equal(droplevels(dplyr::filter(qe, region %in% r)),
                  read.snapshot(tmpfile, list(region = r)))

From 8c20db54df855dbb6821343bdd1d5598dc54dfbf Mon Sep 17 00:00:00 2001
From: orichters <oliverr@pik-potsdam.de>
Date: Fri, 20 Oct 2023 08:52:16 +0200
Subject: [PATCH 2/2] allow to pass through filter.function in read.snapshot,
 treat duplicated names in list

---
 R/read.snapshot.R                   | 28 +++++++++++++++++++++-------
 tests/testthat/test-read.snapshot.R | 10 +++++++---
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/R/read.snapshot.R b/R/read.snapshot.R
index db3996a..b6c1476 100644
--- a/R/read.snapshot.R
+++ b/R/read.snapshot.R
@@ -5,7 +5,10 @@
 #'
 #' @md
 #' @param file Path of single IAMC-style .csv/.mif file
-#' @param keep list with quitte columns as names and data points that should be kept.
+#' @param keep list with quitte columns as names and data points that should be kept. Using 'grep',
+#' this list is used to extract the data before reading it into R. The more you restrict the data here,
+#' the faster the data is read.
+#' @param filter.function A function used to filter data during read, see read.quitte description.
 #'
 #' @return A quitte data frame.
 #'
@@ -17,14 +20,18 @@
 #' }
 #'
 #' @importFrom dplyr filter
+#' @importFrom stats setNames
 #'
 #' @export
 
-read.snapshot <- function(file, keep = list()) {
+read.snapshot <- function(file, keep = list(), filter.function = NULL) {
   unknowntype <- setdiff(names(keep), c("model", "scenario", "region", "variable", "unit", "period"))
   if (length(unknowntype) > 0) {
-    stop("Unknown types to be kept: ", toString(unknowntype))
+    stop("Unknown types in 'keep': ", toString(unknowntype))
   }
+  # join if multiple elements with same name exist in list
+  joinelements <- function(v, list) return(setNames(list(unique(unname(unlist(list[names(list) == v])))), v))
+  keep <- do.call(c, lapply(unique(names(keep)), joinelements, list = keep))
 
   # temporary file
   tmpfile <- tempfile(pattern = "data", fileext = ".csv")
@@ -59,14 +66,21 @@ read.snapshot <- function(file, keep = list()) {
   } else {
     file.copy(file, tmpfile, overwrite = TRUE)
   }
+  joinedfilter <- function(data) {
+    for (t in names(keep)) {
+      data <- droplevels(filter(data, .data[[t]] %in% keep[[t]]))
+    }
+    if (is.function(filter.function)) {
+      data <- filter.function(data)
+    }
+    return(data)
+  }
   # read file and do correct filtering
   data <- read.quitte(tmpfile,
                       na.strings = c("UNDF", "NA", "N/A", "n_a", ""),
                       quote = '"',
-                      drop.na = TRUE)
+                      drop.na = TRUE,
+                      filter.function = joinedfilter)
   unlink(tmpfile)
-  for (t in names(keep)) {
-    data <- droplevels(filter(data, .data[[t]] %in% keep[[t]]))
-  }
   return(data)
 }
diff --git a/tests/testthat/test-read.snapshot.R b/tests/testthat/test-read.snapshot.R
index ddc999b..1990506 100644
--- a/tests/testthat/test-read.snapshot.R
+++ b/tests/testthat/test-read.snapshot.R
@@ -37,7 +37,11 @@ test_that("read.snapshot works", {
     expect_equal(droplevels(dplyr::filter(qe, period %in% p)),
                  read.snapshot(tmpfile, list(period = p)))
   }
-  # test all jointly with last setting
-  expect_equal(droplevels(dplyr::filter(qe, period %in% p, variable %in% v, region %in% r, scenario %in% s)),
-               read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s)))
+  # test all jointly with last setting, test passing of filter.function
+  filter.function <- function(x) droplevels(dplyr::filter(x, period %in% p, variable %in% v, region %in% r, scenario %in% s))
+  snapshotdata <- read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s))
+  expect_equal(filter.function(qe),
+               snapshotdata)
+  expect_equal(read.snapshot(tmpfile, filter.function = filter.function),
+               snapshotdata)
 })