Skip to content

Commit

Permalink
Merge pull request #3 from VForWaTer/seperate_parameters_and_data
Browse files Browse the repository at this point in the history
Seperate parameters and data
  • Loading branch information
AlexDo1 authored Jun 21, 2024
2 parents 22fafaf + 452d8e6 commit 93ee390
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 50 deletions.
95 changes: 95 additions & 0 deletions R/get_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#' @title get_data
#'
#' @description
#' This package is intended to be used inside a docker container to parse
#' data from "/in/input.json" with the help of
#' the input configuration in "/src/tool.yml".
#' The data can be parsed by file extension.
#' If return_data_paths is set to FALSE, the function will return the paths
#' to the data files. If set to TRUE, the function will try to parse the data.
#'
#' @return A list of the parsed data specified in "/in/input.json"
#' @examples
#' \dontrun{
#' data <- get_data()
#' }
#' @export
#' @importFrom jsonlite read_json
#' @importFrom yaml read_yaml
#' @importFrom tools file_ext
#' @importFrom utils read.csv read.table

get_data <- function(return_data_paths = FALSE) {
# get the input file env variable
PARAM_FILE <- Sys.getenv(x = "PARAM_FILE")
if (PARAM_FILE == "") {
PARAM_FILE <- "/in/input.json"
}

# get the config file env variable
CONF_FILE <- Sys.getenv(x = "CONF_FILE")
if (CONF_FILE == "") {
CONF_FILE <- "/src/tool.yml"
}

# get the tool name
TOOL <- tolower(Sys.getenv(x = "TOOL_RUN"))

# parse the json, access data section
data <- jsonlite::read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]][["data"]]

# parse the config yaml, access data section
data_config <- yaml::read_yaml(CONF_FILE)$tools[[TOOL]]$data

# initialize list for data to return
data_return <- list()

# parse data
for (name in names(data)) {
# get the file path
path <- data[[name]]

# get the file extension
ext <- tolower(tools::file_ext(path))
# add a dot at the beginning
ext <- paste(".", ext, sep = "")

# check if the dataset is given with the correct extension
if ("extension" %in% names(data_config[[name]])) {
if (!(ext %in% data_config[[name]]$extension)) {
stop(paste("The file extension '", ext, "' is not contained in [", paste(data_config[[name]]$extension, collapse = " "), "]", sep = ""))
}
}

# check if the file exists
if (!file.exists(path)) {
stop(paste("The file '", path, "' does not exist."))
}

# do not parse files by extension if parse_files is FALSE
if (return_data_paths) {
data_return[[name]] <- path
next
}

# .dat
if (ext == ".dat") {
parsed <- as.matrix(read.table(path)) # matrix files: no header, no index
dimnames(parsed) <- NULL
}

# .csv
else if (ext == ".csv") {
parsed <- read.csv(path)
}

# not supported to parse
else {
parsed <- path
}

# append value to parsed_params
data_return[[name]] <- parsed
}
return(data_return)
}
84 changes: 44 additions & 40 deletions R/get_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
#' @importFrom utils read.csv read.table

get_parameters <- function() {
# get the parameter file env variable
# get the input file env variable
PARAM_FILE <- Sys.getenv(x = "PARAM_FILE")
if (PARAM_FILE == "") {
PARAM_FILE <- "/in/parameters.json"
PARAM_FILE <- "/in/input.json"
}

# get the config file env variable
Expand All @@ -32,13 +32,11 @@ get_parameters <- function() {
# get the tool name
TOOL <- tolower(Sys.getenv(x = "TOOL_RUN"))

# parse the json
params <- read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]]
params_names <- names(params)
# parse the json, access parameters section
params <- jsonlite::read_json(path = PARAM_FILE, simplifyVector = TRUE)[[TOOL]][["parameters"]]

# parse the config yaml, directly access parameters section
config <- read_yaml(CONF_FILE)
params_config <- config$tools[[TOOL]]$parameters
# parse the config yaml, access parameters section
params_config <- yaml::read_yaml(CONF_FILE)$tools[[TOOL]]$parameters

# get all names from params_config that have a default value and are not optional to parse default values
filtered_config_names <- sapply(names(params_config), function(name) {
Expand All @@ -54,44 +52,49 @@ get_parameters <- function() {
filtered_config_names <- filtered_config_names[!sapply(filtered_config_names, is.null)]

# combine the two lists of parameter names
params2parse <- unique(c(params_names, names(filtered_config_names)))
params2parse <- unique(c(names(params), names(filtered_config_names)))

# initiate list to save parsed parameters
parsed_params <- list()

# parse parameters
for (name in params2parse) {
# type of the parameter
t <- params_config[[name]][["type"]]
type <- params_config[[name]][["type"]]

# get the value from parameters.json
if (name %in% names(params)) {
val <- params[[name]]
val <- params[[name]]

# if parameter is not included in parameters.json, go for default value in config
} else {
val <- params_config[[name]]$default
val <- params_config[[name]]$default
}

# handle value specific types
if (t == "enum") {

# enumeration
if (tolower(type) == "enum") {
if (!(val %in% params_config[[name]]$values)) {
stop(paste("The value '", val, "' is not contained in [", paste(params_config[[name]]$values, collapse = " "), "]", sep = ""))
}
} else if (t %in% c("datetime", "date", "time")) {
}

# datetime
else if (tolower(type) %in% c("datetime", "date", "time")) {
val <- as.POSIXct(val)
} else if (t == "file") {
# get the ext and use the corresponding reader
ext <- tolower(file_ext(val))
if (ext == "dat") {
# matrix files: no header, no index
val <- as.matrix(read.table(val))
dimnames(val) <- NULL
} else if (ext == "csv") {
val <- read.csv(val)
}

# boolean
else if (tolower(type) %in% c("boolean", "bool")) {
if (!isTRUE(val) && !isFALSE(val)) {
stop(paste("The value '", val, "' is not a boolean value."))
}
} else if (tolower(t) %in% c("integer", "float")) {
print(t)
}

# integer and float
else if (tolower(type) %in% c("integer", "float")) {

# check for min in params_config
if ("min" %in% names(params_config[[name]])) {
min <- params_config[[name]]$min
Expand All @@ -105,34 +108,35 @@ get_parameters <- function() {
max <- NULL
}

# check whether val is in min and max range
# check if val is in min and max range
if (!is.null(min) && !is.null(max)) {
# check if min is smaller than or equal to max
if (max <= min) {
stop(sprintf("There is an error in your parameter configuration / tool.yml, as the given minimum value (%s) for parameter '%s' is higher than or equal to the maximum number (%s).", min, name, max))

# check if val is between min and max
} else if (!(min <= val && val <= max)) {
stop(sprintf("%s is %s, but it must be between %s and %s.", name, val, min, max))
}
# check if val is greater than or equal to min
} else if (!is.null(min) && !(min <= val)) {
stop(sprintf("%s is %s, but must be higher than or equal to %s.", name, val, min))
# check if val is smaller than or equal to max
} else if (!is.null(max) && !(val <= max)) {
stop(sprintf("%s is %s, but must be smaller than or equal to %s.", name, val, max))
# return val if not violating min-max constraints
} else {
parsed_params[[name]] <- val
# check if val is greater than or equal to min
} else if (!is.null(min) && !(min <= val)) {
stop(sprintf("%s is %s, but must be higher than or equal to %s.", name, val, min))
# check if val is smaller than or equal to max
} else if (!is.null(max) && !(val <= max)) {
stop(sprintf("%s is %s, but must be smaller than or equal to %s.", name, val, max))
# return val if not violating min-max constraints
} else {
if (tolower(type) == "integer") {
val <- as.integer(val)
} else if (tolower(type) == "float") {
val <- as.numeric(val)
}
}

# parse default values for parameters that are still NULL; a default value exists, as the parameter comes from the filtered_config_names
if (is.null(val)) {
val <- params_config[[name]]$default
}
}

# append value to parsed_params
parsed_params[[name]] <- val
}

return(parsed_params)
}
22 changes: 22 additions & 0 deletions tests/testthat/test-get_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
test_that("parse data works", {
# set environment variables for input files and the TOOL to use
Sys.setenv("PARAM_FILE" = testthat::test_path("testdata/in", "parameters.json"))
Sys.setenv("CONF_FILE" = testthat::test_path("testdata/src", "tool.yml"))
Sys.setenv("TOOL_RUN" = "foobar")

# get the data
data <- get_data()

# test parsed data
testthat::expect_length(data, 2)
testthat::expect_equal(data$foo_matrix[3, 2], 6.1)
testthat::expect_equal(colnames(data$foo_csv), c("A", "B", "C", "D"))

# get data paths
data_paths <- get_data(return_data_paths = TRUE)

# test that data paths are returned
testthat::expect_length(data_paths, 2)
testthat::expect_equal(data_paths$foo_matrix, "testdata/in/foo_matrix.dat")
testthat::expect_equal(data_paths$foo_csv, "testdata/in/foo_csv.csv")
})
7 changes: 5 additions & 2 deletions tests/testthat/test-get_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ test_that("parse parameters works", {
# test parsed parameters
testthat::expect_length(params, 7)
testthat::expect_equal(params$foo_int, 42)
testthat::expect_type(params$foo_int, "integer")
testthat::expect_type(params$foo_string, "character")
testthat::expect_type(params$foo_float, "double")
testthat::expect_length(params$foo_array, 5)
testthat::expect_equal(params$foo_matrix[3, 2], 6.1)
testthat::expect_equal(colnames(params$foo_csv), c("A", "B", "C", "D"))
testthat::expect_type(params$foo_boolean, "logical")
testthat::expect_equal(params$foo_default_int, 3)
})
19 changes: 12 additions & 7 deletions tests/testthat/testdata/in/parameters.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
{
"foobar": {
"foo_int": 42,
"foo_float": 13.37,
"foo_string": "Never eat yellow snow",
"foo_enum": "bar",
"foo_array": [34, 55, 23, 43, 23],
"foo_matrix": "testdata/in/foo_matrix.dat",
"foo_csv": "testdata/in/foo_csv.csv"
"parameters": {
"foo_int": 42,
"foo_float": 13.37,
"foo_string": "Never eat yellow snow",
"foo_enum": "bar",
"foo_array": [34, 55, 23, 43, 23],
"foo_boolean": true
},
"data": {
"foo_matrix": "testdata/in/foo_matrix.dat",
"foo_csv": "testdata/in/foo_csv.csv"
}
}
}
9 changes: 8 additions & 1 deletion tests/testthat/testdata/src/tool.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ tools:
- baz
foo_array:
type: float
array: true
array: true
foo_boolean:
type: boolean
foo_default_int:
type: integer
default: 3
optional: false
data:
foo_matrix:
type: file
foo_csv:
Expand Down

0 comments on commit 93ee390

Please sign in to comment.