diff --git a/R/api.R b/R/api.R index 85122ce..6e06e8c 100644 --- a/R/api.R +++ b/R/api.R @@ -82,44 +82,55 @@ target_get_datasets <- function() { list.files("uploads") } -target_get_trace <- function(name, biomarker, facet = NULL, trace = NULL) { +target_get_trace <- function(name, + biomarker, + filter = NULL, + disaggregate = NULL) { + str(disaggregate) logger::log_info(paste("Requesting data from", name, "with biomarker", biomarker)) - logger::log_info(paste("Filtering by facet variables", facet)) - dat <- read_dataset(name)$dat + logger::log_info(paste("Filtering by variables:", filter)) + dataset <- read_dataset(name) + dat <- dataset$data + xcol <- dataset$xcol cols <- colnames(dat) - # facet_def <- strsplit(facet, ":") - # facet_var <- facet_def[[1]][1] - # facet_level <- facet_def[[1]][2] - # if (!(facet_var %in% cols)) { - # porcelain::porcelain_stop(paste("Column", facet_var, "not found in data"), - # code = "BAD_REQUEST", status_code = 400L) - # } - # dat <- dat[dat[facet_var] == facet_level & dat["biomarker"] == biomarker,] + if (!is.null(filter)) { + filter_def <- strsplit(filter, ":") + filter_var <- filter_def[[1]][1] + filter_level <- filter_def[[1]][2] + if (!(filter_var %in% cols)) { + porcelain::porcelain_stop(paste("Column", filter_var, "not found in data"), + code = "BAD_REQUEST", status_code = 400L) + } + dat <- dat[dat[filter_var] == filter_level,] + } dat <- dat[dat["biomarker"] == biomarker,] - dat$value <- log(dat$value) - if (length(trace) > 0) { - logger::log_info(paste("Disaggregating by trace variables", trace)) - groups <- split(dat, eval(parse(text = paste("~", trace)))) + if (length(disaggregate) > 0) { + logger::log_info(paste("Disaggregating by variables:", disaggregate)) + groups <- split(dat, eval(parse(text = paste("~", disaggregate)))) nms <- names(groups) return(lapply(seq_along(groups), function(i) { + model <- withWarnings(model_out(groups[[i]], xcol)) list(name = jsonlite::unbox(nms[[i]]), - model = model_out(groups[[i]]), - raw = data_out(groups[[i]])) + model = model$output, + raw = data_out(groups[[i]], xcol), + warnings = model$warnings) })) } else { logger::log_info("Returning single trace") + model <- withWarnings(model_out(dat, xcol)) return(list(list(name = jsonlite::unbox("all"), - model = model_out(dat), - raw = data_out(dat)))) + model = model$output, + raw = data_out(dat, xcol), + warnings = model$warnings))) } } read_dataset <- function(name) { path <- file.path("uploads", name) if (!file.exists(path)) { - porcelain::porcelain_stop(paste("Did not find dataset with name ", name), - code = "BAD_REQUEST", status_code = 404L) + porcelain::porcelain_stop(paste("Did not find dataset with name:", name), + code = "DATASET_NOT_FOUND", status_code = 404L) } dat <- utils::read.csv(file.path(path, "data")) dat$value <- as.numeric(dat$value) @@ -127,21 +138,24 @@ read_dataset <- function(name) { list(data = dat, xcol = xcol) } -model_out <- function(dat) { +model_out <- function(dat, xcol) { n <- nrow(dat) if (n == 0) { return(list(x = list(), y = list())) } if (n > 1000) { - m <- mgcv::gam(value ~ s(day, bs = "cs"), data = dat, method = "REML") + m <- mgcv::gam(value ~ s(eval(parse(text = xcol)), bs = "cs"), + data = dat, method = "REML") } else { - m <- stats::loess(value ~ day, data = dat, span = 0.75) + m <- stats::loess(value ~ eval(parse(text = xcol)), data = dat, span = 0.75) } - range <- range(dat$day, na.rm = TRUE) + range <- range(dat[, xcol], na.rm = TRUE) xseq <- range[1]:range[2] - list(x = xseq, y = stats::predict(m, tibble::data_frame(day = xseq))) + xdf <- tibble::data_frame(xcol = xseq) + names(xdf) <- xcol + list(x = xseq, y = stats::predict(m, xdf)) } -data_out <- function(dat) { - list(x = dat$day, y = dat$value) +data_out <- function(dat, xcol) { + list(x = dat[, xcol], y = dat$value) } diff --git a/R/router.R b/R/router.R index d3383b8..90d1a1d 100644 --- a/R/router.R +++ b/R/router.R @@ -50,8 +50,9 @@ get_datasets <- function() { get_trace <- function() { porcelain::porcelain_endpoint$new( "GET", - "/dataset//", + "/dataset//trace/", target_get_trace, - porcelain::porcelain_input_query(facet = "string", trace = "string"), + porcelain::porcelain_input_query(disaggregate = "string", + filter = "string"), returning = porcelain::porcelain_returning_json("DataSeries")) } diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..d28adf5 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,10 @@ +withWarnings <- function(expr) { + myWarnings <- NULL + wHandler <- function(w) { + myWarnings <<- c(myWarnings, list(w)) + invokeRestart("muffleWarning") + } + val <- withCallingHandlers(expr, warning = wHandler) + list(output = val, warnings = lapply(myWarnings, + function(w) jsonlite::unbox(w$message))) +} diff --git a/inst/schema/DataSeries.schema.json b/inst/schema/DataSeries.schema.json index 7ca4d20..15d17f9 100644 --- a/inst/schema/DataSeries.schema.json +++ b/inst/schema/DataSeries.schema.json @@ -42,11 +42,17 @@ } } }, + "warnings": { + "type": "array", + "items": { + "type": "string" + } + }, "required": ["x", "y"], "additionalProperties": false } }, - "required": ["model", "raw"], - "additionalProperties": false + "required": ["model", "raw", "warnings"], + "additionalProperties": true } } diff --git a/inst/schema/DatasetMetadata.schema.json b/inst/schema/DatasetMetadata.schema.json index c233425..e78b19d 100644 --- a/inst/schema/DatasetMetadata.schema.json +++ b/inst/schema/DatasetMetadata.schema.json @@ -5,7 +5,7 @@ "variables": { "type": "array", "items": { - "type": "object" + "$ref": "Variable.schema.json" } }, "biomarkers": { diff --git a/inst/schema/Variable.schema.json b/inst/schema/Variable.schema.json new file mode 100644 index 0000000..b963a79 --- /dev/null +++ b/inst/schema/Variable.schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "name" : { + "type": "string" + }, + "levels": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ "name", "levels" ] +} diff --git a/tests/testthat/test-read.R b/tests/testthat/test-read.R new file mode 100644 index 0000000..2b5096b --- /dev/null +++ b/tests/testthat/test-read.R @@ -0,0 +1,123 @@ +test_that("GET /dataset returns 404 if dataset not found", { + router <- build_routes() + res <- router$request("GET", "/dataset/testdataset") + expect_equal(res$status, 404) + validate_failure_schema(res$body) + body <- jsonlite::fromJSON(res$body) + expect_equal(body$errors[1, "detail"], + "Did not find dataset with name: testdataset") +}) + +test_that("GET /trace/ returns 404 if dataset not found", { + router <- build_routes() + res <- router$request("GET", "/dataset/testdataset/trace/ab") + expect_equal(res$status, 404) + validate_failure_schema(res$body) + body <- jsonlite::fromJSON(res$body) + expect_equal(body$errors[1, "detail"], + "Did not find dataset with name: testdataset") +}) + +test_that("can get trace for uploaded dataset with xcol", { + dat <- data.frame(biomarker = c("ab", "ba"), + value = 1, + time = 1:10, + age = "0-5", + sex = c("M", "F")) + request <- local_POST_dataset_request(dat, + "testdata", + xcol = "time") + router <- build_routes() + res <- router$call(request) + expect_equal(res$status, 200) + res <- router$request("GET", "/dataset/testdata/trace/ab") + expect_equal(res$status, 200) + expected_warnings <- list("span too small. fewer data values than degrees of freedom.", + "pseudoinverse used at 0.96", + "neighborhood radius 4.04", + "reciprocal condition number 0", + "There are other near singularities as well. 16.322") + expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"), + model = list( + x = 1:9, + y = rep(1, 9) + ), + raw = list( + x = c(1, 3, 5, 7, 9), + y = c(1, 1, 1, 1, 1) + ), + warnings = lapply(expected_warnings, + jsonlite::unbox)) + )) + body <- jsonlite::fromJSON(res$body) + expect_equal(body$data, jsonlite::fromJSON(expected)) +}) + +test_that("can get disgagregated traces", { + dat <- data.frame(biomarker = "ab", + value = 1, + day = 1:10, + age = "0-5", + sex = c("M", "F")) + local_add_dataset(dat, + "testdataset") + router <- build_routes() + res <- router$request("GET", "/dataset/testdataset/trace/ab", + query = list(disaggregate = "sex")) + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(nrow(data), 2) + expect_equal(data$name, c("F", "M")) + expect_equal(data$raw[1, "x"], list(c(2, 4, 6, 8, 10))) + expect_equal(data$raw[1, "y"], list(c(1, 1, 1, 1, 1))) + expect_equal(data$raw[2, "x"], list(c(1, 3, 5, 7, 9))) + expect_equal(data$raw[2, "y"], list(c(1, 1, 1, 1, 1))) + + expect_equal(data$model[1, "x"], list(2:10)) + expect_equal(data$model[1, "y"], list(rep(1, 9))) + expect_equal(data$model[2, "x"], list(1:9)) + expect_equal(data$model[2, "y"], list(rep(1, 9))) +}) + +test_that("can get filtered traces", { + dat <- data.frame(biomarker = "ab", + value = rep(c(0, 1, 2, 3), 5), + day = 1:20, + age = rep(c("0-5", "0-5", "5+", "5+"), 5), + sex = c("M", "F")) + local_add_dataset(dat, + "testdataset") + router <- build_routes() + res <- router$request("GET", "/dataset/testdataset/trace/ab", + query = list(filter = "sex:M")) + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(nrow(data), 1) + expect_equal(data$name, "all") + expect_equal(data$raw[1, "x"], list(c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19))) + expect_equal(data$raw[1, "y"], list(c(0, 2, 0, 2, 0, 2, 0, 2, 0, 2))) +}) + +test_that("can get disgagregated and filtered traces", { + dat <- data.frame(biomarker = "ab", + value = rep(c(0, 1, 2, 3), 5), + day = 1:20, + age = rep(c("0-5", "0-5", "5+", "5+"), 5), + sex = c("M", "F")) + local_add_dataset(dat, + "testdataset") + router <- build_routes() + res <- router$request("GET", "/dataset/testdataset/trace/ab", + query = list(disaggregate = "age", filter = "sex:M")) + expect_equal(res$status, 200) + body <- jsonlite::fromJSON(res$body) + data <- body$data + expect_equal(nrow(data), 2) + expect_equal(data$name, c("0-5", "5+")) + expect_equal(data$raw[1, "x"], list(c(1, 5, 9, 13, 17))) + expect_equal(data$raw[1, "y"], list(c(0, 0, 0, 0, 0))) + expect_equal(data$raw[2, "x"], list(c(3, 7, 11, 15, 19))) + expect_equal(data$raw[2, "y"], list(c(2, 2, 2, 2, 2))) +}) diff --git a/tests/testthat/test-router.R b/tests/testthat/test-router.R index 6048ee6..1d35933 100644 --- a/tests/testthat/test-router.R +++ b/tests/testthat/test-router.R @@ -57,7 +57,7 @@ test_that("GET /datasets", { expect_equal(body$data, c("anotherdataset", "testdataset")) }) -test_that("GET /dataset", { +test_that("GET /dataset", { local_add_dataset(data.frame(biomarker = c("ab", "ba"), value = 1, day = 1:10, @@ -68,9 +68,42 @@ test_that("GET /dataset", { res <- router$request("GET", "/dataset/testdataset") expect_equal(res$status, 200) body <- jsonlite::fromJSON(res$body) - str(body$data$variables) expect_equal(body$data$variables$name, c("age", "sex")) expect_equal(body$data$variables$levels, list(c("0-5"), c("M", "F"))) expect_equal(body$data$biomarkers, c("ab", "ba")) expect_equal(body$data$xcol, "day") }) + +test_that("GET /dataset//trace/", { + dat <- data.frame(biomarker = c("ab", "ba"), + value = c(1, 1.5, 2, 3, 3.2, 4, 5, 6, 6.1, 7), + day = 1:10, + age = "0-5", + sex = c("M", "F")) + local_add_dataset(dat, + "testdataset") + set.seed(1) + suppressWarnings({ + m <- stats::loess(value ~ day, data = dat[dat["biomarker"] == "ab",], span = 0.75) + model <- list(x = 1:9, y = stats::predict(m, tibble::data_frame(day = 1:9))) + }) + router <- build_routes() + set.seed(1) + res <- router$request("GET", "/dataset/testdataset/trace/ab") + expect_equal(res$status, 200) + expected_warnings <- list("span too small. fewer data values than degrees of freedom.", + "pseudoinverse used at 0.96", + "neighborhood radius 4.04", + "reciprocal condition number 0", + "There are other near singularities as well. 16.322") + expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"), + model = model, + raw = list( + x = c(1, 3, 5, 7, 9), + y = c(1, 2, 3.2, 5, 6.1)), + warnings = lapply(expected_warnings, + jsonlite::unbox)) + )) + body <- jsonlite::fromJSON(res$body) + expect_equal(body$data, jsonlite::fromJSON(expected)) +})