From 2f8853e3718f04b732619b0fbbfa8c0d3fe85c6c Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Thu, 20 Jun 2024 16:32:53 +0200 Subject: [PATCH 1/7] seperate function get_data --- R/get_data.R | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 R/get_data.R diff --git a/R/get_data.R b/R/get_data.R new file mode 100644 index 0000000..9e372c7 --- /dev/null +++ b/R/get_data.R @@ -0,0 +1,96 @@ +#' @title get_data +#' +#' @description +#' This package is intended to be used inside a docker container to parse +#' data from "/in/input.json" with the help of +#' the input configuration in "/src/tool.yml". +#' The data can be parsed by file extension. +#' If return_data_paths is set to FALSE, the function will return the paths +#' to the data files. If set to TRUE, the function will try to parse the data. +#' +#' @return A list of the parsed data specified in "/in/input.json" +#' @examples +#' \dontrun{ +#' data <- get_data() +#' } +#' @export +#' @importFrom jsonlite read_json +#' @importFrom yaml read_yaml +#' @importFrom tools file_ext +#' @importFrom utils read.csv read.table + +get_data <- function(return_data_paths = FALSE) { + # get the input file env variable + PARAM_FILE <- Sys.getenv(x = "PARAM_FILE") + if (PARAM_FILE == "") { + PARAM_FILE <- "/in/input.json" + } + + # get the config file env variable + CONF_FILE <- Sys.getenv(x = "CONF_FILE") + if (CONF_FILE == "") { + CONF_FILE <- "/src/tool.yml" + } + + # get the tool name + TOOL <- tolower(Sys.getenv(x = "TOOL_RUN")) + + # parse the json + data <- jsonlite::read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]][["data"]] + + # parse the config yaml, access data section + config <- yaml::read_yaml(CONF_FILE) + data_config <- config$tools[[TOOL]]$data + + # initialize list for data to return + data_return <- list() + + # parse data + for (name in names(data)) { + # get the file path + path <- data[[name]] + + # get the file extension + ext <- tolower(tools::file_ext(path)) + # add a dot at the beginning + ext <- paste(".", ext, sep = "") + + # check if the dataset is given with the correct extension + if ("extension" %in% names(data_config[[name]])) { + if (!(ext %in% data_config[[name]]$extension)) { + stop(paste("The file extension '", ext, "' is not contained in [", paste(data_config[[name]]$extension, collapse = " "), "]", sep = "")) + } + } + + # check if the file exists + if (!file.exists(path)) { + stop(paste("The file '", path, "' does not exist.")) + } + + # do not parse files by extension if parse_files is FALSE + if (return_data_paths) { + data_return[[name]] <- path + next + } + + # .dat + if (ext == ".dat") { + parsed <- as.matrix(read.table(path)) # matrix files: no header, no index + dimnames(parsed) <- NULL + } + + # .csv + else if (ext == ".csv") { + parsed <- read.csv(path) + } + + # not supported to parse + else { + parsed <- path + } + + # append value to parsed_params + data_return[[name]] <- parsed + } + return(data_return) +} From 4baf6235b580b75e25df5caa33e07c3ced4517af Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Thu, 20 Jun 2024 16:35:18 +0200 Subject: [PATCH 2/7] small refactoring --- R/get_data.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/get_data.R b/R/get_data.R index 9e372c7..3e8dfd0 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -35,12 +35,11 @@ get_data <- function(return_data_paths = FALSE) { # get the tool name TOOL <- tolower(Sys.getenv(x = "TOOL_RUN")) - # parse the json + # parse the json, access data section data <- jsonlite::read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]][["data"]] # parse the config yaml, access data section - config <- yaml::read_yaml(CONF_FILE) - data_config <- config$tools[[TOOL]]$data + data_config <- yaml::read_yaml(CONF_FILE)$tools[[TOOL]]$data # initialize list for data to return data_return <- list() From 1ff62511fc5115e7cef9834505afb7b6a92bcd38 Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Fri, 21 Jun 2024 10:10:58 +0200 Subject: [PATCH 3/7] seperate parameters section --- R/get_parameters.R | 77 +++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/R/get_parameters.R b/R/get_parameters.R index 2742ffa..62254a8 100644 --- a/R/get_parameters.R +++ b/R/get_parameters.R @@ -17,10 +17,10 @@ #' @importFrom utils read.csv read.table get_parameters <- function() { - # get the parameter file env variable + # get the input file env variable PARAM_FILE <- Sys.getenv(x = "PARAM_FILE") if (PARAM_FILE == "") { - PARAM_FILE <- "/in/parameters.json" + PARAM_FILE <- "/in/input.json" } # get the config file env variable @@ -32,13 +32,11 @@ get_parameters <- function() { # get the tool name TOOL <- tolower(Sys.getenv(x = "TOOL_RUN")) - # parse the json - params <- read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]] - params_names <- names(params) + # parse the json, access parameters section + params <- jsonlite::read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]][["parameters"]] - # parse the config yaml, directly access parameters section - config <- read_yaml(CONF_FILE) - params_config <- config$tools[[TOOL]]$parameters + # parse the config yaml, access parameters section + params_config <- yaml::read_yaml(CONF_FILE)$tools[[TOOL]]$parameters # get all names from params_config that have a default value and are not optional to parse default values filtered_config_names <- sapply(names(params_config), function(name) { @@ -54,7 +52,7 @@ get_parameters <- function() { filtered_config_names <- filtered_config_names[!sapply(filtered_config_names, is.null)] # combine the two lists of parameter names - params2parse <- unique(c(params_names, names(filtered_config_names))) + params2parse <- unique(c(names(params), names(filtered_config_names))) # initiate list to save parsed parameters parsed_params <- list() @@ -62,36 +60,40 @@ get_parameters <- function() { # parse parameters for (name in params2parse) { # type of the parameter - t <- params_config[[name]][["type"]] + type <- params_config[[name]][["type"]] # get the value from parameters.json if (name %in% names(params)) { - val <- params[[name]] + val <- params[[name]] # if parameter is not included in parameters.json, go for default value in config } else { - val <- params_config[[name]]$default + val <- params_config[[name]]$default } # handle value specific types - if (t == "enum") { + + # enumeration + if (type == "enum") { if (!(val %in% params_config[[name]]$values)) { stop(paste("The value '", val, "' is not contained in [", paste(params_config[[name]]$values, collapse = " "), "]", sep = "")) } - } else if (t %in% c("datetime", "date", "time")) { + } + + # datetime + else if (type %in% c("datetime", "date", "time")) { val <- as.POSIXct(val) - } else if (t == "file") { - # get the ext and use the corresponding reader - ext <- tolower(file_ext(val)) - if (ext == "dat") { - # matrix files: no header, no index - val <- as.matrix(read.table(val)) - dimnames(val) <- NULL - } else if (ext == "csv") { - val <- read.csv(val) + } + + # boolean + else if (type %in% c("boolean", "bool")) { + if (!isTRUE(val) && !isFALSE(val)) { + stop(paste("The value '", val, "' is not a boolean value.")) } - } else if (tolower(t) %in% c("integer", "float")) { - print(t) + } + + # integer and float + else if (tolower(type) %in% c("integer", "float")) { # check for min in params_config if ("min" %in% names(params_config[[name]])) { min <- params_config[[name]]$min @@ -105,25 +107,30 @@ get_parameters <- function() { max <- NULL } - # check whether val is in min and max range + # check if val is in min and max range if (!is.null(min) && !is.null(max)) { # check if min is smaller than or equal to max if (max <= min) { stop(sprintf("There is an error in your parameter configuration / tool.yml, as the given minimum value (%s) for parameter '%s' is higher than or equal to the maximum number (%s).", min, name, max)) + # check if val is between min and max } else if (!(min <= val && val <= max)) { stop(sprintf("%s is %s, but it must be between %s and %s.", name, val, min, max)) } - # check if val is greater than or equal to min - } else if (!is.null(min) && !(min <= val)) { - stop(sprintf("%s is %s, but must be higher than or equal to %s.", name, val, min)) - # check if val is smaller than or equal to max - } else if (!is.null(max) && !(val <= max)) { - stop(sprintf("%s is %s, but must be smaller than or equal to %s.", name, val, max)) - # return val if not violating min-max constraints - } else { - parsed_params[[name]] <- val + # check if val is greater than or equal to min + } else if (!is.null(min) && !(min <= val)) { + stop(sprintf("%s is %s, but must be higher than or equal to %s.", name, val, min)) + # check if val is smaller than or equal to max + } else if (!is.null(max) && !(val <= max)) { + stop(sprintf("%s is %s, but must be smaller than or equal to %s.", name, val, max)) + # return val if not violating min-max constraints + } else { + if (tolower(type) == "integer") { + val <- as.integer(val) + } else if (tolower(type) == "float") { + val <- as.numeric(val) } + } } # parse default values for parameters that are still NULL; a default value exists, as the parameter comes from the filtered_config_names From b4884dd62e4e20b0dce3a6627601c3dce20c28af Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Fri, 21 Jun 2024 11:05:52 +0200 Subject: [PATCH 4/7] adapt tests --- tests/testthat/test-get_data.R | 22 ++++++++++++++++++++++ tests/testthat/test-get_parameters.R | 4 +--- tests/testthat/testdata/in/parameters.json | 18 +++++++++++------- tests/testthat/testdata/src/tool.yml | 3 ++- 4 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 tests/testthat/test-get_data.R diff --git a/tests/testthat/test-get_data.R b/tests/testthat/test-get_data.R new file mode 100644 index 0000000..7f7dab3 --- /dev/null +++ b/tests/testthat/test-get_data.R @@ -0,0 +1,22 @@ +test_that("parse data works", { + # set environment variables for input files and the TOOL to use + Sys.setenv("PARAM_FILE" = testthat::test_path("testdata/in", "parameters.json")) + Sys.setenv("CONF_FILE" = testthat::test_path("testdata/src", "tool.yml")) + Sys.setenv("TOOL_RUN" = "foobar") + + # get the data + data <- get_data() + + # test parsed data + testthat::expect_length(data, 2) + testthat::expect_equal(data$foo_matrix[3, 2], 6.1) + testthat::expect_equal(colnames(data$foo_csv), c("A", "B", "C", "D")) + + # get data paths + data_paths <- get_data(return_data_paths = TRUE) + + # test that data paths are returned + testthat::expect_length(data_paths, 2) + testthat::expect_equal(data_paths$foo_matrix, "testdata/in/foo_matrix.dat") + testthat::expect_equal(data_paths$foo_csv, "testdata/in/foo_csv.csv") +}) diff --git a/tests/testthat/test-get_parameters.R b/tests/testthat/test-get_parameters.R index 89fd441..5892960 100644 --- a/tests/testthat/test-get_parameters.R +++ b/tests/testthat/test-get_parameters.R @@ -8,9 +8,7 @@ test_that("parse parameters works", { params <- get_parameters() # test parsed parameters - testthat::expect_length(params, 7) + testthat::expect_length(params, 5) testthat::expect_equal(params$foo_int, 42) testthat::expect_length(params$foo_array, 5) - testthat::expect_equal(params$foo_matrix[3, 2], 6.1) - testthat::expect_equal(colnames(params$foo_csv), c("A", "B", "C", "D")) }) diff --git a/tests/testthat/testdata/in/parameters.json b/tests/testthat/testdata/in/parameters.json index e99c2e8..eea7f3a 100644 --- a/tests/testthat/testdata/in/parameters.json +++ b/tests/testthat/testdata/in/parameters.json @@ -1,11 +1,15 @@ { "foobar": { - "foo_int": 42, - "foo_float": 13.37, - "foo_string": "Never eat yellow snow", - "foo_enum": "bar", - "foo_array": [34, 55, 23, 43, 23], - "foo_matrix": "testdata/in/foo_matrix.dat", - "foo_csv": "testdata/in/foo_csv.csv" + "parameters": { + "foo_int": 42, + "foo_float": 13.37, + "foo_string": "Never eat yellow snow", + "foo_enum": "bar", + "foo_array": [34, 55, 23, 43, 23] + }, + "data": { + "foo_matrix": "testdata/in/foo_matrix.dat", + "foo_csv": "testdata/in/foo_csv.csv" + } } } diff --git a/tests/testthat/testdata/src/tool.yml b/tests/testthat/testdata/src/tool.yml index d205901..fecf691 100644 --- a/tests/testthat/testdata/src/tool.yml +++ b/tests/testthat/testdata/src/tool.yml @@ -18,7 +18,8 @@ tools: - baz foo_array: type: float - array: true + array: true + data: foo_matrix: type: file foo_csv: From b528469ced60b588ccdf86a475ef98f8349484fd Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Fri, 21 Jun 2024 11:21:47 +0200 Subject: [PATCH 5/7] test boolean --- tests/testthat/test-get_parameters.R | 6 +++++- tests/testthat/testdata/in/parameters.json | 3 ++- tests/testthat/testdata/src/tool.yml | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-get_parameters.R b/tests/testthat/test-get_parameters.R index 5892960..cf33813 100644 --- a/tests/testthat/test-get_parameters.R +++ b/tests/testthat/test-get_parameters.R @@ -8,7 +8,11 @@ test_that("parse parameters works", { params <- get_parameters() # test parsed parameters - testthat::expect_length(params, 5) + testthat::expect_length(params, 6) testthat::expect_equal(params$foo_int, 42) + testthat::expect_type(params$foo_int, "integer") + testthat::expect_type(params$foo_string, "character") + testthat::expect_type(params$foo_float, "double") testthat::expect_length(params$foo_array, 5) + testthat::expect_type(params$foo_boolean, "logical") }) diff --git a/tests/testthat/testdata/in/parameters.json b/tests/testthat/testdata/in/parameters.json index eea7f3a..255f755 100644 --- a/tests/testthat/testdata/in/parameters.json +++ b/tests/testthat/testdata/in/parameters.json @@ -5,7 +5,8 @@ "foo_float": 13.37, "foo_string": "Never eat yellow snow", "foo_enum": "bar", - "foo_array": [34, 55, 23, 43, 23] + "foo_array": [34, 55, 23, 43, 23], + "foo_boolean": true }, "data": { "foo_matrix": "testdata/in/foo_matrix.dat", diff --git a/tests/testthat/testdata/src/tool.yml b/tests/testthat/testdata/src/tool.yml index fecf691..8adf6ae 100644 --- a/tests/testthat/testdata/src/tool.yml +++ b/tests/testthat/testdata/src/tool.yml @@ -19,6 +19,8 @@ tools: foo_array: type: float array: true + foo_boolean: + type: boolean data: foo_matrix: type: file From 87c50a71c37ce94164acbd426123c0505323467e Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Fri, 21 Jun 2024 12:03:38 +0200 Subject: [PATCH 6/7] small refactoring --- R/get_parameters.R | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/R/get_parameters.R b/R/get_parameters.R index 62254a8..6f22a06 100644 --- a/R/get_parameters.R +++ b/R/get_parameters.R @@ -74,19 +74,19 @@ get_parameters <- function() { # handle value specific types # enumeration - if (type == "enum") { + if (tolower(type) == "enum") { if (!(val %in% params_config[[name]]$values)) { stop(paste("The value '", val, "' is not contained in [", paste(params_config[[name]]$values, collapse = " "), "]", sep = "")) } } # datetime - else if (type %in% c("datetime", "date", "time")) { + else if (tolower(type) %in% c("datetime", "date", "time")) { val <- as.POSIXct(val) } # boolean - else if (type %in% c("boolean", "bool")) { + else if (tolower(type) %in% c("boolean", "bool")) { if (!isTRUE(val) && !isFALSE(val)) { stop(paste("The value '", val, "' is not a boolean value.")) } @@ -94,6 +94,7 @@ get_parameters <- function() { # integer and float else if (tolower(type) %in% c("integer", "float")) { + # check for min in params_config if ("min" %in% names(params_config[[name]])) { min <- params_config[[name]]$min @@ -133,13 +134,9 @@ get_parameters <- function() { } } - # parse default values for parameters that are still NULL; a default value exists, as the parameter comes from the filtered_config_names - if (is.null(val)) { - val <- params_config[[name]]$default - } - # append value to parsed_params parsed_params[[name]] <- val } + return(parsed_params) } From 452d8e6b9aba837cf9a99def53d69cb464c7d169 Mon Sep 17 00:00:00 2001 From: AlexDo1 Date: Fri, 21 Jun 2024 12:03:52 +0200 Subject: [PATCH 7/7] test for defaults --- tests/testthat/test-get_parameters.R | 3 ++- tests/testthat/testdata/src/tool.yml | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-get_parameters.R b/tests/testthat/test-get_parameters.R index cf33813..ca461fe 100644 --- a/tests/testthat/test-get_parameters.R +++ b/tests/testthat/test-get_parameters.R @@ -8,11 +8,12 @@ test_that("parse parameters works", { params <- get_parameters() # test parsed parameters - testthat::expect_length(params, 6) + testthat::expect_length(params, 7) testthat::expect_equal(params$foo_int, 42) testthat::expect_type(params$foo_int, "integer") testthat::expect_type(params$foo_string, "character") testthat::expect_type(params$foo_float, "double") testthat::expect_length(params$foo_array, 5) testthat::expect_type(params$foo_boolean, "logical") + testthat::expect_equal(params$foo_default_int, 3) }) diff --git a/tests/testthat/testdata/src/tool.yml b/tests/testthat/testdata/src/tool.yml index 8adf6ae..8cfb94c 100644 --- a/tests/testthat/testdata/src/tool.yml +++ b/tests/testthat/testdata/src/tool.yml @@ -21,6 +21,10 @@ tools: array: true foo_boolean: type: boolean + foo_default_int: + type: integer + default: 3 + optional: false data: foo_matrix: type: file