Skip to content

Commit

Permalink
Merge pull request #78 from orichters/read.snapshot
Browse files Browse the repository at this point in the history
Add read.snapshot to load IIASA database snapshots
  • Loading branch information
orichters authored Oct 19, 2023
2 parents ab88750 + 1843108 commit f6f6147
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .buildlibrary
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ValidationKey: '614216128'
ValidationKey: '614424230'
AcceptedWarnings:
- 'Warning: package ''.*'' was built under R version'
- 'Warning: namespace ''.*'' is not available and has been replaced'
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: mixed-line-ending

- repo: https://github.com/lorenzwalthert/precommit
rev: v0.3.2.9019
rev: v0.3.2.9021
hooks:
- id: parsable-R
- id: deps-in-desc
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ cff-version: 1.2.0
message: If you use this software, please cite it using the metadata from this file.
type: software
title: 'quitte: Bits and pieces of code to use with quitte-style data frames'
version: 0.3126.1
date-released: '2023-10-18'
version: 0.3127.0
date-released: '2023-10-19'
abstract: A collection of functions for easily dealing with quitte-style data frames,
doing multi-model comparisons and plots.
authors:
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: quitte
Title: Bits and pieces of code to use with quitte-style data frames
Version: 0.3126.1
Date: 2023-10-18
Version: 0.3127.0
Date: 2023-10-19
Authors@R: c(
person("Michaja", "Pehl", , "[email protected]", role = c("aut", "cre")),
person("Nico", "Bauer", , "[email protected]", role = "aut"),
Expand Down Expand Up @@ -53,7 +53,7 @@ Suggests:
knitr,
mip,
rmarkdown,
testthat,
testthat (>= 3.2.0),
tidyverse,
VignetteBuilder:
knitr
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export(quitte2quantiles)
export(quitteSort)
export(read.gdx)
export(read.quitte)
export(read.snapshot)
export(removeColNa)
export(replace_column)
export(replace_column_)
Expand Down
71 changes: 71 additions & 0 deletions R/read.snapshot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#' Reads IAMC-style .csv files obtained as a IIASA snapshot into a quitte data frame,
#' filtering the data. This function is helpful if the csv file is large and R runs out
#' of memory loading it completely. This function requires head, tail and grep on your system.
#' If not supported, use read.quitte().
#'
#' @md
#' @param file Path of single IAMC-style .csv/.mif file
#' @param keep list with quitte columns as names and data points that should be kept.
#'
#' @return A quitte data frame.
#'
#' @author Oliver Richters
#'
#' @examples
#' \dontrun{
#' read.filter.snapshot("snapshot.csv", list(scenario = c("CurPol", "NDC"), region = "World"))
#' }
#'
#' @importFrom dplyr filter
#'
#' @export

read.snapshot <- function(file, keep = list()) {
unknowntype <- setdiff(names(keep), c("model", "scenario", "region", "variable", "unit", "period"))
if (length(unknowntype) > 0) {
stop("Unknown types to be kept: ", toString(unknowntype))
}
testcommand <- c("grep", "head", "tail", "sed")
exitcodes <- suppressWarnings(
sapply(paste(testcommand, '--version'), system,
ignore.stdout = TRUE, ignore.stderr = TRUE))
if (any(0 != exitcodes)) {
stop(paste(paste0('`', testcommand[0 != exitcodes], '`', collapse = ', '),
"are not available system commands, please use 'read.quitte'."))
}

# temporary file
tmpfile <- tempfile(pattern = "data", fileext = ".csv")
if (length(setdiff(names(keep), "period")) > 0) {
# always keep first lines of original file (comments, colnames), grep in the rest
alwayskeep <- 20
system(paste("head -n", alwayskeep, file, ">", tmpfile))
# the goal of the next lines is to grep one after the other through the elements of keep
# keep = list(variable = "GDP|PPP", region = c("World", "FRA")) should get you
# | grep -E '(GDP\|PPP)' | grep -E '(World|FRA)'
# 1. escape | in variable names and do not grep for period
cleanup <- function(x) {
x <- gsub("[^A-Za-z0-9\\| ]", ".", x)
x <- gsub("|", "\\|", x, fixed = TRUE)
}
keepescaped <- lapply(keep[setdiff(names(keep), "period")], cleanup)
# 2. collapse each element with a |
keepcollapsed <- unlist(lapply(keepescaped, paste0, collapse = "|"))
# generate a grep -E statement for each element of keep list
greptext <- paste0(" | grep -E '(", keepcollapsed, ")'", collapse = "")
command <- paste0("tail -n +", (alwayskeep + 1), " ", file, greptext, " >> ", tmpfile)
system(command)
} else {
file.copy(file, tmpfile, overwrite = TRUE)
}
# read file and do correct filtering
data <- read.quitte(tmpfile,
na.strings = c("UNDF", "NA", "N/A", "n_a", ""),
quote = '"',
drop.na = TRUE)
unlink(tmpfile)
for (t in names(keep)) {
data <- droplevels(filter(data, .data[[t]] %in% keep[[t]]))
}
return(data)
}
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Bits and pieces of code to use with quitte-style data frames

R package **quitte**, version **0.3126.1**
R package **quitte**, version **0.3127.0**

[![CRAN status](https://www.r-pkg.org/badges/version/quitte)](https://cran.r-project.org/package=quitte) [![R build status](https://github.com/pik-piam/quitte/workflows/check/badge.svg)](https://github.com/pik-piam/quitte/actions) [![codecov](https://codecov.io/gh/pik-piam/quitte/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/quitte) [![r-universe](https://pik-piam.r-universe.dev/badges/quitte)](https://pik-piam.r-universe.dev/builds)

Expand Down Expand Up @@ -47,7 +47,7 @@ In case of questions / problems please contact Michaja Pehl <michaja.pehl@pik-po

To cite package **quitte** in publications use:

Pehl M, Bauer N, Hilaire J, Levesque A, Luderer G, Schultes A, Dietrich J, Richters O (2023). _quitte: Bits and pieces of code to use with quitte-style data frames_. R package version 0.3126.1, <https://github.com/pik-piam/quitte>.
Pehl M, Bauer N, Hilaire J, Levesque A, Luderer G, Schultes A, Dietrich J, Richters O (2023). _quitte: Bits and pieces of code to use with quitte-style data frames_. R package version 0.3127.0, <URL: https://github.com/pik-piam/quitte>.

A BibTeX entry for LaTeX users is

Expand All @@ -56,7 +56,7 @@ A BibTeX entry for LaTeX users is
title = {quitte: Bits and pieces of code to use with quitte-style data frames},
author = {Michaja Pehl and Nico Bauer and Jérôme Hilaire and Antoine Levesque and Gunnar Luderer and Anselm Schultes and Jan Philipp Dietrich and Oliver Richters},
year = {2023},
note = {R package version 0.3126.1},
note = {R package version 0.3127.0},
url = {https://github.com/pik-piam/quitte},
}
```
23 changes: 23 additions & 0 deletions man/df_variation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 38 additions & 1 deletion man/read.quitte.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/read.snapshot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions tests/testthat/test-read.snapshot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
test_that("read.snapshot works", {
qe <- droplevels(quitteSort(as.quitte(quitte_example_dataAR6, na.rm = TRUE)))
tmpfile <- tempfile(pattern = "data", fileext = ".csv")
write.table(pivot_wider(qe, names_from = "period"),
file = tmpfile, append = FALSE, quote = FALSE, sep = ",",
eol = "\n", na = "", dec = ".", row.names = FALSE,
col.names = TRUE) # mimick IIASA snapshot format
fails <- tryCatch(read.snapshot(tmpfile), error = function(e) { paste(e) })
if (is.character(fails) && length(fails) == 1 && grepl("not available system commands", fails)) {
skip(paste0(gsub("Error in ", "", gsub(", pleas.*", "", fails)), ", skipping tests."))
}
system(paste("sed -i 's/GCAM/\"GCAM\"/g;'", tmpfile))
system(paste("sed -i 's/Delayed transition/\"Delayed transition\"/g;'", tmpfile))
rtests <- list(head(levels(qe$region), 1), head(levels(qe$region), 2))
for (r in rtests) {
expect_equal(droplevels(dplyr::filter(qe, region %in% r)),
read.snapshot(tmpfile, list(region = r)))
}
stests <- list(head(levels(qe$scenario), 1), head(levels(qe$scenario), 2))
for (s in stests) {
expect_equal(droplevels(dplyr::filter(qe, scenario %in% s)),
read.snapshot(tmpfile, list(scenario = s)))
}
vtests <- list(head(levels(qe$variable), 1), head(levels(qe$variable), 2), levels(qe$variable))
for (v in vtests) {
expect_equal(droplevels(dplyr::filter(qe, variable %in% v)),
read.snapshot(tmpfile, list(variable = v)))
}
ptests <- list(head(unique(qe$period), 1), head(unique(qe$period, 2)), unique(qe$period))
for (p in ptests) {
expect_equal(droplevels(dplyr::filter(qe, period %in% p)),
read.snapshot(tmpfile, list(period = p)))
}
# test all jointly with last setting
expect_equal(droplevels(dplyr::filter(qe, period %in% p, variable %in% v, region %in% r, scenario %in% s)),
read.snapshot(tmpfile, list(period = p, variable = v, region = r, scenario = s)))
})

0 comments on commit f6f6147

Please sign in to comment.