-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #78 from orichters/read.snapshot
Add read.snapshot to load IIASA database snapshots
- Loading branch information
Showing
11 changed files
with
214 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
Type: Package | ||
Package: quitte | ||
Title: Bits and pieces of code to use with quitte-style data frames | ||
Version: 0.3126.1 | ||
Date: 2023-10-18 | ||
Version: 0.3127.0 | ||
Date: 2023-10-19 | ||
Authors@R: c( | ||
person("Michaja", "Pehl", , "[email protected]", role = c("aut", "cre")), | ||
person("Nico", "Bauer", , "[email protected]", role = "aut"), | ||
|
@@ -53,7 +53,7 @@ Suggests: | |
knitr, | ||
mip, | ||
rmarkdown, | ||
testthat, | ||
testthat (>= 3.2.0), | ||
tidyverse, | ||
VignetteBuilder: | ||
knitr | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#' Reads IAMC-style .csv files obtained as a IIASA snapshot into a quitte data frame, | ||
#' filtering the data. This function is helpful if the csv file is large and R runs out | ||
#' of memory loading it completely. This function requires head, tail and grep on your system. | ||
#' If not supported, use read.quitte(). | ||
#' | ||
#' @md | ||
#' @param file Path of single IAMC-style .csv/.mif file | ||
#' @param keep list with quitte columns as names and data points that should be kept. | ||
#' | ||
#' @return A quitte data frame. | ||
#' | ||
#' @author Oliver Richters | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' read.filter.snapshot("snapshot.csv", list(scenario = c("CurPol", "NDC"), region = "World")) | ||
#' } | ||
#' | ||
#' @importFrom dplyr filter | ||
#' | ||
#' @export | ||
|
||
read.snapshot <- function(file, keep = list()) { | ||
unknowntype <- setdiff(names(keep), c("model", "scenario", "region", "variable", "unit", "period")) | ||
if (length(unknowntype) > 0) { | ||
stop("Unknown types to be kept: ", toString(unknowntype)) | ||
} | ||
testcommand <- c("grep", "head", "tail", "sed") | ||
exitcodes <- suppressWarnings( | ||
sapply(paste(testcommand, '--version'), system, | ||
ignore.stdout = TRUE, ignore.stderr = TRUE)) | ||
if (any(0 != exitcodes)) { | ||
stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '), | ||
"are not available system commands, please use 'read.quitte'.")) | ||
} | ||
|
||
# temporary file | ||
tmpfile <- tempfile(pattern = "data", fileext = ".csv") | ||
if (length(setdiff(names(keep), "period")) > 0) { | ||
# always keep first lines of original file (comments, colnames), grep in the rest | ||
alwayskeep <- 20 | ||
system(paste("head -n", alwayskeep, file, ">", tmpfile)) | ||
# the goal of the next lines is to grep one after the other through the elements of keep | ||
# keep = list(variable = "GDP|PPP", region = c("World", "FRA")) should get you | ||
# | grep -E '(GDP\|PPP)' | grep -E '(World|FRA)' | ||
# 1. escape | in variable names and do not grep for period | ||
cleanup <- function(x) { | ||
x <- gsub("[^A-Za-z0-9\\| ]", ".", x) | ||
x <- gsub("|", "\\|", x, fixed = TRUE) | ||
} | ||
keepescaped <- lapply(keep[setdiff(names(keep), "period")], cleanup) | ||
# 2. collapse each element with a | | ||
keepcollapsed <- unlist(lapply(keepescaped, paste0, collapse = "|")) | ||
# generate a grep -E statement for each element of keep list | ||
greptext <- paste0(" | grep -E '(", keepcollapsed, ")'", collapse = "") | ||
command <- paste0("tail -n +", (alwayskeep + 1), " ", file, greptext, " >> ", tmpfile) | ||
system(command) | ||
} else { | ||
file.copy(file, tmpfile, overwrite = TRUE) | ||
} | ||
# read file and do correct filtering | ||
data <- read.quitte(tmpfile, | ||
na.strings = c("UNDF", "NA", "N/A", "n_a", ""), | ||
quote = '"', | ||
drop.na = TRUE) | ||
unlink(tmpfile) | ||
for (t in names(keep)) { | ||
data <- droplevels(filter(data, .data[[t]] %in% keep[[t]])) | ||
} | ||
return(data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
test_that("read.snapshot works", { | ||
qe <- droplevels(quitteSort(as.quitte(quitte_example_dataAR6, na.rm = TRUE))) | ||
tmpfile <- tempfile(pattern = "data", fileext = ".csv") | ||
write.table(pivot_wider(qe, names_from = "period"), | ||
file = tmpfile, append = FALSE, quote = FALSE, sep = ",", | ||
eol = "\n", na = "", dec = ".", row.names = FALSE, | ||
col.names = TRUE) # mimick IIASA snapshot format | ||
fails <- tryCatch(read.snapshot(tmpfile), error = function(e) { paste(e) }) | ||
if (is.character(fails) && length(fails) == 1 && grepl("not available system commands", fails)) { | ||
skip(paste0(gsub("Error in ", "", gsub(", pleas.*", "", fails)), ", skipping tests.")) | ||
} | ||
system(paste("sed -i 's/GCAM/\"GCAM\"/g;'", tmpfile)) | ||
system(paste("sed -i 's/Delayed transition/\"Delayed transition\"/g;'", tmpfile)) | ||
rtests <- list(head(levels(qe$region), 1), head(levels(qe$region), 2)) | ||
for (r in rtests) { | ||
expect_equal(droplevels(dplyr::filter(qe, region %in% r)), | ||
read.snapshot(tmpfile, list(region = r))) | ||
} | ||
stests <- list(head(levels(qe$scenario), 1), head(levels(qe$scenario), 2)) | ||
for (s in stests) { | ||
expect_equal(droplevels(dplyr::filter(qe, scenario %in% s)), | ||
read.snapshot(tmpfile, list(scenario = s))) | ||
} | ||
vtests <- list(head(levels(qe$variable), 1), head(levels(qe$variable), 2), levels(qe$variable)) | ||
for (v in vtests) { | ||
expect_equal(droplevels(dplyr::filter(qe, variable %in% v)), | ||
read.snapshot(tmpfile, list(variable = v))) | ||
} | ||
ptests <- list(head(unique(qe$period), 1), head(unique(qe$period, 2)), unique(qe$period)) | ||
for (p in ptests) { | ||
expect_equal(droplevels(dplyr::filter(qe, period %in% p)), | ||
read.snapshot(tmpfile, list(period = p))) | ||
} | ||
# test all jointly with last setting | ||
expect_equal(droplevels(dplyr::filter(qe, period %in% p, variable %in% v, region %in% r, scenario %in% s)), | ||
read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s))) | ||
}) |