From f499175ed09a4753f02cdd42a55d9a8e298a92c1 Mon Sep 17 00:00:00 2001 From: orichters Date: Wed, 28 Feb 2024 17:08:13 +0100 Subject: [PATCH 1/5] automatically check export summations --- CHANGELOG.md | 2 + config/default.cfg | 3 +- .../output/single/checkProjectSummations.R | 37 +++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 scripts/output/single/checkProjectSummations.R diff --git a/CHANGELOG.md b/CHANGELOG.md index 346ba00b7..a8b18c320 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - **37_industry** add process-based steel model as alternative to CES-tree branch - **47_regipol** add support for delaying quantity targets and improving regional emission tax convergence - **core** change of preference parameters and associated computation of interest rates/mark ups +- **scripts** add script to automatically check project summations from piamInterfaces + [[#1587](https://github.com/remindmodel/remind/pull/1587)] ### fixed - fixed weights of energy carriers in `pm_IndstCO2Captured` diff --git a/config/default.cfg b/config/default.cfg index 7c5879096..a30643c8e 100644 --- a/config/default.cfg +++ b/config/default.cfg @@ -116,7 +116,8 @@ cfg$logoption <- 2 # Just list the name of the output scripts that should be used by output.R # At the moment there are several R-scripts located in scripts/output/ -cfg$output <- c("reporting","reportCEScalib","rds_report","fixOnRef") #"ar6Climate","emulator" +cfg$output <- c("reporting","reportCEScalib","rds_report","fixOnRef","checkProjectSummations") +# "ar6Climate","validation","emulator","reportCEScalib","validationSummary","dashboard" # Set the format for the results folder, type string :date: in order to use the current time stamp in the folder name (e.g. "results:date:") use :title: to use the current title in the folder name cfg$results_folder <- "output/:title::date:" diff --git a/scripts/output/single/checkProjectSummations.R b/scripts/output/single/checkProjectSummations.R new file mode 100644 index 000000000..86f0ed797 --- /dev/null +++ b/scripts/output/single/checkProjectSummations.R @@ -0,0 +1,37 @@ +library(piamInterfaces) +library(quitte) +suppressPackageStartupMessages(library(tidyverse)) + +if(! exists("source_include")) { + # Define arguments that can be read from command line + outputdir <- "." + lucode2::readArgs("outputdir") +} + +scen <- lucode2::getScenNames(outputdir) +mif <- file.path(outputdir, paste0("REMIND_generic_", scen, ".mif")) + +stopmessage <- NULL + +absDiff <- 0.00001 +relDiff <- 0.01 + +for (template in c("AR6", "NAVIGATE")) { + + d <- generateIIASASubmission(mif, outputDirectory = NULL, logFile = NULL, mapping = template, checkSummation = FALSE) + failing <- d %>% + checkSummations(template = template, summationsFile = template, logFile = NULL, dataDumpFile = NULL, + absDiff = absDiff, relDiff = relDiff) %>% + filter(abs(diff) >= absDiff, abs(reldiff) >= relDiff) %>% + df_variation() %>% + droplevels() + + if (nrow(failing) > 0) { + stopmessage <- c(stopmessage, + paste0("\nThe following variables do not satisfy the ", template, " summation checks:"), + paste("\n-", unique(failing$variable), collapse = "")) + } +} +if (length(stopmessage) > 0) { + stop("Failing summation checks, see above.", stopmessage) +} From e28b0927a0463f583c08c130ff36d76a0fc90716 Mon Sep 17 00:00:00 2001 From: orichters Date: Tue, 5 Mar 2024 17:07:09 +0100 Subject: [PATCH 2/5] fix output script bugs, cleanup default.cfg description of cfgoutput --- DESCRIPTION | 2 +- config/default.cfg | 9 +++-- modules/21_tax/on/input/files | 1 - output.R | 24 +++++++++----- scripts/output/single/ar6Climate.R | 2 ++ .../output/single/checkProjectSummations.R | 19 +++++++---- scripts/output/single/fixOnRef.R | 33 ++++++++++++------- 7 files changed, 57 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 56ea819de..66c1152ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Imports: nleqslv, optparse, piamenv (>= 0.4.0), - piamInterfaces (>= 0.12.18), + piamInterfaces (>= 0.13.12), plotly, purrr, quitte (>= 0.3128.4), diff --git a/config/default.cfg b/config/default.cfg index a30643c8e..de99bbcf3 100644 --- a/config/default.cfg +++ b/config/default.cfg @@ -114,10 +114,13 @@ cfg$pythonEnabled <- "off" # def <- off # Should REMIND output be shown in console? (0:no, 2:logfile, 3:yes) cfg$logoption <- 2 -# Just list the name of the output scripts that should be used by output.R -# At the moment there are several R-scripts located in scripts/output/ +# List the name of output scripts from ./scripts/output/ to be used by output.R +# You can adjust that via an 'output' column in your scenario_config. +# To overwrite, enter 'reporting,whatever'. To add, enter 'cfg$output,additionalreporting'. cfg$output <- c("reporting","reportCEScalib","rds_report","fixOnRef","checkProjectSummations") -# "ar6Climate","validation","emulator","reportCEScalib","validationSummary","dashboard" + +# automatically fix remaining differences between run and path_ref +cfg$fixOnRefAuto <- FALSE # Set the format for the results folder, type string :date: in order to use the current time stamp in the folder name (e.g. "results:date:") use :title: to use the current title in the folder name cfg$results_folder <- "output/:title::date:" diff --git a/modules/21_tax/on/input/files b/modules/21_tax/on/input/files index 27d1b6ae5..14cf390f2 100644 --- a/modules/21_tax/on/input/files +++ b/modules/21_tax/on/input/files @@ -6,4 +6,3 @@ f21_tau_pe_sub.cs4r f21_max_pe_sub.cs4r f21_tax_convergence.cs4r p21_tau_xpres_tax.cs4r -f21_vehiclesSubsidies.cs4r diff --git a/output.R b/output.R index 946901025..f853820b9 100755 --- a/output.R +++ b/output.R @@ -92,10 +92,14 @@ choose_slurmConfig_output <- function(output) { if (!isSlurmAvailable()) return("direct") - # Modify slurm options for ar6 reporting, since we want to run MAGICC in parallel and we'll need a lot of memory - if ("ar6Climate" %in% output) slurm_options <- paste(slurm_options[1:3], "--tasks-per-node=12 --mem=32000") - # reporting.R, in particular remind2::convGDX2MIF, requires at least --mem=8000 of memory - if ("reporting" %in% output) slurm_options <- grep("--mem=[0-9]*[0-9]{3}", slurm_options, value = TRUE) + # Modify slurm options for reporting options that run in parallel (MAGICC) or need more memory + if ("ar6Climate" %in% output) { + slurm_options <- paste(slurm_options[1:3], "--tasks-per-node=12 --mem=32000") + } else if ("nashAnalysis" %in% output) { + slurm_options <- paste(slurm_options[1:3], "--mem=32000") + } else if ("reporting" %in% output) { + slurm_options <- grep("--mem=[0-9]*[0-9]{3}", slurm_options, value = TRUE) + } if (length(slurm_options) == 1) { return(slurm_options[[1]]) @@ -275,12 +279,13 @@ if (comp %in% c("comparison", "export")) { } else { message("\nStarting output generation for ", outputdir, "\n") name <- paste0(output, ".R") + scriptsfound <- file.exists(paste0("scripts/output/single/", name)) if ("--test" %in% flags) { message("Test mode, not executing scripts/output/single/", paste(name, collapse = ", ")) - } else if (all(file.exists(paste0("scripts/output/single/", name)))) { + } else { if (slurmConfig == "direct") { # execute output script directly (without sending it to slurm) - for (n in name) { + for (n in name[scriptsfound]) { message("Executing ", n) tmp.env <- new.env() tmp.error <- try(sys.source(paste0("scripts/output/single/", n), envir = tmp.env)) @@ -302,9 +307,10 @@ if (comp %in% c("comparison", "export")) { } # finished message("\nFinished ", ifelse(slurmConfig == "direct", "", "starting job for "), "output generation for ", outputdir, "!\n") - } else { - warning("Skipping ", outputdir, " because some output script selected could not be found ", - "in scripts/output/single: ", name[! name %in% dir("scripts/output/single")]) + } + if (any(! scriptsfound)) { + warning("Skipping those output script selected that could not be found in scripts/output/single: ", + name[! scriptsfound]) } } diff --git a/scripts/output/single/ar6Climate.R b/scripts/output/single/ar6Climate.R index ad1ddd5d9..5ddd4feb2 100644 --- a/scripts/output/single/ar6Climate.R +++ b/scripts/output/single/ar6Climate.R @@ -224,6 +224,8 @@ climateAssessmentData <- read.quitte(climateAssessmentOutput) %>% interpolate_missing_periods(usePeriods, expand.values = FALSE) %>% write.mif(remindReportingFile, append = TRUE) +deletePlus(remind_reporting_file, writemif = TRUE) + logmsg <- paste0( date(), " postprocessing done! Results appended to REMIND mif '", remindReportingFile, "'\n", "ar6Climate.R finished\n" diff --git a/scripts/output/single/checkProjectSummations.R b/scripts/output/single/checkProjectSummations.R index 86f0ed797..4cea52626 100644 --- a/scripts/output/single/checkProjectSummations.R +++ b/scripts/output/single/checkProjectSummations.R @@ -16,8 +16,16 @@ stopmessage <- NULL absDiff <- 0.00001 relDiff <- 0.01 -for (template in c("AR6", "NAVIGATE")) { +# failing <- mif %>% +# checkSummations(dataDumpFile = NULL, outputDirectory = NULL, summationsFile = "extractVariableGroups", +# absDiff = 5e-7, relDiff = 1e-8) %>% +# filter(abs(diff) >= 5e-7, abs(reldiff) >= 1e-8) %>% +# df_variation() %>% +# droplevels() +# if (nrow(failing) > 0) stopmessage <- c(stopmessage, "extractVariableGroups") +for (template in c("AR6", "NAVIGATE")) { + message("\n### Check project summations for ", template) d <- generateIIASASubmission(mif, outputDirectory = NULL, logFile = NULL, mapping = template, checkSummation = FALSE) failing <- d %>% checkSummations(template = template, summationsFile = template, logFile = NULL, dataDumpFile = NULL, @@ -26,12 +34,9 @@ for (template in c("AR6", "NAVIGATE")) { df_variation() %>% droplevels() - if (nrow(failing) > 0) { - stopmessage <- c(stopmessage, - paste0("\nThe following variables do not satisfy the ", template, " summation checks:"), - paste("\n-", unique(failing$variable), collapse = "")) - } + if (nrow(failing) > 0) stopmessage <- c(stopmessage, template) } + if (length(stopmessage) > 0) { - stop("Failing summation checks, see above.", stopmessage) + stop("Failing summation checks for ", paste(stopmessage, collapse = ", "), ", see above.") } diff --git a/scripts/output/single/fixOnRef.R b/scripts/output/single/fixOnRef.R index e5c00ac4e..f73d21231 100644 --- a/scripts/output/single/fixOnRef.R +++ b/scripts/output/single/fixOnRef.R @@ -37,7 +37,7 @@ findRefMif <- function(outputdir, envi) { return(refmif) } -fixMAGICC <- function(d, dref, startyear) { +fixMAGICC <- function(d, dref, startyear, scenario) { magiccgrep <- "^Forcing|^Temperature|^Concentration" message("Fixing MAGICC6 data before ", startyear) dnew <- @@ -46,7 +46,9 @@ fixMAGICC <- function(d, dref, startyear) { .data$period < startyear), filter(d, ! grepl(magiccgrep, .data$variable) | .data$period >= startyear) - ) + ) %>% + mutate(scenario = factor(scenario)) %>% + droplevels() return(dnew) } @@ -77,21 +79,28 @@ fixOnMif <- function(outputdir) { refname <- basename(dirname(refmif)) d <- quitte::as.quitte(mifs) dref <- quitte::as.quitte(refmif) - d <- fixMAGICC(d, dref, startyear) + d <- fixMAGICC(d, dref, startyear, title) failfile <- file.path(outputdir, "log_fixOnRef.csv") - fixeddata <- piamInterfaces::fixOnRef(d, dref, ret = "fixed", startyear = startyear, failfile = failfile) + fixeddata <- piamInterfaces::fixOnRef(d, dref, ret = "TRUE_or_fixed", startyear = startyear, failfile = failfile) - if (exists("flags") && isTRUE("--interactive" %in% flags)) { - message("\nDo you want to fix that by overwriting ", title, " mif with reference run ", refname, " for t < ", startyear, "?\nType: y/N") + update <- paste0("MAGICC data. ", if (! isTRUE(fixeddata)) "Run output.R -> single -> fixOnRef to fix the rest.") + if (! isTRUE(fixeddata) && isTRUE(envi$cfg$fixOnRefAuto)) { + d <- fixeddata + update <- "data from reference run because cfg$fixOnRefAuto=TRUE." + } else if (! isTRUE(fixeddata) && exists("flags") && isTRUE("--interactive" %in% flags)) { + message("\nDo you want to fix that by overwriting ", title, " mif with reference run ", + refname, " for t < ", startyear, "?\nType: y/N") if (tolower(gms::getLine()) %in% c("y", "yes")) { - message("Updating ", mifs[[1]]) - tmpfile <- paste0(mifs[[1]], "fixOnMif") - quitte::write.mif(fixeddata, tmpfile) - file.rename(tmpfile, mifs[[1]]) - remind2::deletePlus(mifs[[1]], writemif = TRUE) - message("Keep in mind to update the runs that use this as `path_gdx_ref` as well.") + d <- fixeddata + update <- "data from reference run." } } + message("Updating ", mifs[[1]], " with ", update) + tmpfile <- paste0(mifs[[1]], "fixOnMif") + quitte::write.mif(d, tmpfile) + file.rename(tmpfile, mifs[[1]]) + remind2::deletePlus(mifs[[1]], writemif = TRUE) + message("Keep in mind to update the runs that use this as `path_gdx_ref` as well.") return(NULL) } From a4e40d488d13cfbc39b40f631e194f2c11abe18e Mon Sep 17 00:00:00 2001 From: orichters Date: Wed, 6 Mar 2024 17:58:42 +0100 Subject: [PATCH 3/5] add regional summation check for AR6 + NAVIGATE variables --- DESCRIPTION | 2 +- .../output/single/checkProjectSummations.R | 30 +++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 66c1152ba..b7932806f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,7 +51,7 @@ Imports: nleqslv, optparse, piamenv (>= 0.4.0), - piamInterfaces (>= 0.13.12), + piamInterfaces (>= 0.13.14), plotly, purrr, quitte (>= 0.3128.4), diff --git a/scripts/output/single/checkProjectSummations.R b/scripts/output/single/checkProjectSummations.R index 4cea52626..9d645fd6b 100644 --- a/scripts/output/single/checkProjectSummations.R +++ b/scripts/output/single/checkProjectSummations.R @@ -10,12 +10,20 @@ if(! exists("source_include")) { scen <- lucode2::getScenNames(outputdir) mif <- file.path(outputdir, paste0("REMIND_generic_", scen, ".mif")) +mifdata <- as.quitte(mif) stopmessage <- NULL absDiff <- 0.00001 relDiff <- 0.01 +# to be skipped for regional aggregation as they are no extensive variables +varGrep <- paste0("^Tech|CES Price|^Price|^Internal|[Pp]er[- ][Cc]apita|per-GDP|Specific|Interest Rate|", + "Intensity|Productivity|Average Extraction Costs|^PVP|Other Fossil Adjusted|Projected|[Ss]hare") +unitList <- c("%", "Percent", "percent", "% pa", "1", "share", "USD/capita", "index", "kcal/cap/day", + "cm/capita", "kcal/capita/day", "unitless", "kcal/kcal", "m3/ha", "tC/tC", "tC/ha", "years", + "share of total land", "tDM/capita/yr", "US$05 PPP/cap/yr", "t DM/ha/yr", "US$2010/kW", "US$2010/kW/yr") + # failing <- mif %>% # checkSummations(dataDumpFile = NULL, outputDirectory = NULL, summationsFile = "extractVariableGroups", # absDiff = 5e-7, relDiff = 1e-8) %>% @@ -26,15 +34,31 @@ relDiff <- 0.01 for (template in c("AR6", "NAVIGATE")) { message("\n### Check project summations for ", template) - d <- generateIIASASubmission(mif, outputDirectory = NULL, logFile = NULL, mapping = template, checkSummation = FALSE) - failing <- d %>% + d <- generateIIASASubmission(mifdata, outputDirectory = NULL, logFile = NULL, + mapping = template, checkSummation = FALSE) + failvars <- d %>% checkSummations(template = template, summationsFile = template, logFile = NULL, dataDumpFile = NULL, absDiff = absDiff, relDiff = relDiff) %>% filter(abs(diff) >= absDiff, abs(reldiff) >= relDiff) %>% df_variation() %>% droplevels() - if (nrow(failing) > 0) stopmessage <- c(stopmessage, template) + csregi <- d %>% + filter(! .data$unit %in% unitList, ! grepl(varGrep, .data$variable)) %>% + checkSummationsRegional() %>% + rename(World = "total") %>% + droplevels() + checkyear <- 2050 + failregi <- csregi %>% + filter(abs(.data$reldiff) > 0.5, abs(.data$diff) > 0.00015, period == checkyear) %>% + filter(! grepl("^Emissions\\|", .data$variable)) %>% # because World includes bunkers, but regions not + select(-"model", -"scenario") + if (nrow(failregi) > 0) { + message("For those variables, the sum of regional values does not match the World value in 2050:") + failregi %>% piamInterfaces::niceround() %>% print(n = 1000) + } + + if (nrow(failvars) > 0 || nrow(failregi) > 0) stopmessage <- c(stopmessage, template) } if (length(stopmessage) > 0) { From e6d47b68fbeba84255d4db5f21a3b4974abaec7a Mon Sep 17 00:00:00 2001 From: orichters Date: Wed, 6 Mar 2024 18:37:43 +0100 Subject: [PATCH 4/5] improve matching of Emi vars --- scripts/output/single/checkProjectSummations.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/output/single/checkProjectSummations.R b/scripts/output/single/checkProjectSummations.R index 9d645fd6b..421caee68 100644 --- a/scripts/output/single/checkProjectSummations.R +++ b/scripts/output/single/checkProjectSummations.R @@ -24,6 +24,14 @@ unitList <- c("%", "Percent", "percent", "% pa", "1", "share", "USD/capita", "in "cm/capita", "kcal/capita/day", "unitless", "kcal/kcal", "m3/ha", "tC/tC", "tC/ha", "years", "share of total land", "tDM/capita/yr", "US$05 PPP/cap/yr", "t DM/ha/yr", "US$2010/kW", "US$2010/kW/yr") +# emi variables where bunkers are added only to the World level +gases <- c("BC", "CO", "CO2", "Kyoto Gases", "NOx", "OC", "Sulfur", "VOC") +vars <- c("", "|Energy", "|Energy Demand|Transportation", "|Energy and Industrial Processes", + "|Energy|Demand", "|Energy|Demand|Transportation") +gasvars <- expand.grid(gases, vars, stringsAsFactors = FALSE) +bunkervars <- unique(sort(paste0("Emissions|", gasvars$Var1, gasvars$Var2))) + + # failing <- mif %>% # checkSummations(dataDumpFile = NULL, outputDirectory = NULL, summationsFile = "extractVariableGroups", # absDiff = 5e-7, relDiff = 1e-8) %>% @@ -51,11 +59,14 @@ for (template in c("AR6", "NAVIGATE")) { checkyear <- 2050 failregi <- csregi %>% filter(abs(.data$reldiff) > 0.5, abs(.data$diff) > 0.00015, period == checkyear) %>% - filter(! grepl("^Emissions\\|", .data$variable)) %>% # because World includes bunkers, but regions not + filter(! .data$variable %in% bunkervars) %>% select(-"model", -"scenario") if (nrow(failregi) > 0) { - message("For those variables, the sum of regional values does not match the World value in 2050:") + message("For those ", template, " variables, the sum of regional values does not match the World value in 2050:") failregi %>% piamInterfaces::niceround() %>% print(n = 1000) + print(paste0(failregi$variable, collapse = ", ")) + } else { + message("Regional summation checks are fine.") } if (nrow(failvars) > 0 || nrow(failregi) > 0) stopmessage <- c(stopmessage, template) From 4a2d4211b0d06f906caf83fa5f0d12aa44d80ca0 Mon Sep 17 00:00:00 2001 From: orichters Date: Thu, 7 Mar 2024 10:07:11 +0100 Subject: [PATCH 5/5] fix policyCosts reporting after remind2 PR#465 --- scripts/output/comparison/policyCosts.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/output/comparison/policyCosts.R b/scripts/output/comparison/policyCosts.R index 2c7df9c2c..ca166c594 100644 --- a/scripts/output/comparison/policyCosts.R +++ b/scripts/output/comparison/policyCosts.R @@ -297,7 +297,7 @@ if (!"3" %in% special_requests) { tmp_policy_costs <- tmp_policy_costs_magpie %>% lapply(quitte::as.quitte) %>% - lapply(select, region, period, data, value) + lapply(select, "region", "period", "variable", "value") # Combine results in single tibble, with names like "Pol_w.r.t_Ref" policy_costs <- rename(tmp_policy_costs[[1]], !!sym(paste0(pol_names[1], "_w.r.t_",ref_names[1])):=value) @@ -305,13 +305,13 @@ if (!"3" %in% special_requests) { for (i in 2:length(tmp_policy_costs)) { policy_costs <- tmp_policy_costs[[i]] %>% rename(!!sym(paste0(pol_names[i], "_w.r.t_", ref_names[i])) := value) %>% - left_join(policy_costs, tmp_policy_costs[[i]], by = c("region", "period", "data")) + left_join(policy_costs, tmp_policy_costs[[i]], by = c("region", "period", "variable")) } } # and do some pivotting policy_costs <- policy_costs %>% pivot_longer(cols = matches(".*w\\.r\\.t.*"), names_to = "Model Output") %>% - pivot_wider(names_from = data) + pivot_wider(names_from = "variable") # By default, plots are only created until 2100 if (!"4" %in% special_requests) {