allow to pass through filter.function in read.snapshot, treat duplica…

…ted names in list
pik-piam · Oct 20, 2023 · 8c20db5 · 8c20db5
1 parent bcbf9d0
commit 8c20db5
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 10 deletions.
diff --git a/R/read.snapshot.R b/R/read.snapshot.R
@@ -5,7 +5,10 @@
 #'
 #' @md
 #' @param file Path of single IAMC-style .csv/.mif file
-#' @param keep list with quitte columns as names and data points that should be kept.
+#' @param keep list with quitte columns as names and data points that should be kept. Using 'grep',
+#' this list is used to extract the data before reading it into R. The more you restrict the data here,
+#' the faster the data is read.
+#' @param filter.function A function used to filter data during read, see read.quitte description.
 #'
 #' @return A quitte data frame.
 #'
@@ -17,14 +20,18 @@
 #' }
 #'
 #' @importFrom dplyr filter
+#' @importFrom stats setNames
 #'
 #' @export
 
-read.snapshot <- function(file, keep = list()) {
+read.snapshot <- function(file, keep = list(), filter.function = NULL) {
   unknowntype <- setdiff(names(keep), c("model", "scenario", "region", "variable", "unit", "period"))
   if (length(unknowntype) > 0) {
-    stop("Unknown types to be kept: ", toString(unknowntype))
+    stop("Unknown types in 'keep': ", toString(unknowntype))
   }
+  # join if multiple elements with same name exist in list
+  joinelements <- function(v, list) return(setNames(list(unique(unname(unlist(list[names(list) == v])))), v))
+  keep <- do.call(c, lapply(unique(names(keep)), joinelements, list = keep))
 
   # temporary file
   tmpfile <- tempfile(pattern = "data", fileext = ".csv")
@@ -59,14 +66,21 @@ read.snapshot <- function(file, keep = list()) {
   } else {
     file.copy(file, tmpfile, overwrite = TRUE)
   }
+  joinedfilter <- function(data) {
+    for (t in names(keep)) {
+      data <- droplevels(filter(data, .data[[t]] %in% keep[[t]]))
+    }
+    if (is.function(filter.function)) {
+      data <- filter.function(data)
+    }
+    return(data)
+  }
   # read file and do correct filtering
   data <- read.quitte(tmpfile,
                       na.strings = c("UNDF", "NA", "N/A", "n_a", ""),
                       quote = '"',
-                      drop.na = TRUE)
+                      drop.na = TRUE,
+                      filter.function = joinedfilter)
   unlink(tmpfile)
-  for (t in names(keep)) {
-    data <- droplevels(filter(data, .data[[t]] %in% keep[[t]]))
-  }
   return(data)
 }
diff --git a/tests/testthat/test-read.snapshot.R b/tests/testthat/test-read.snapshot.R
@@ -37,7 +37,11 @@ test_that("read.snapshot works", {
     expect_equal(droplevels(dplyr::filter(qe, period %in% p)),
                  read.snapshot(tmpfile, list(period = p)))
   }
-  # test all jointly with last setting
-  expect_equal(droplevels(dplyr::filter(qe, period %in% p, variable %in% v, region %in% r, scenario %in% s)),
-               read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s)))
+  # test all jointly with last setting, test passing of filter.function
+  filter.function <- function(x) droplevels(dplyr::filter(x, period %in% p, variable %in% v, region %in% r, scenario %in% s))
+  snapshotdata <- read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s))
+  expect_equal(filter.function(qe),
+               snapshotdata)
+  expect_equal(read.snapshot(tmpfile, filter.function = filter.function),
+               snapshotdata)
 })