Skip to content

Commit

Permalink
implement and test filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
hillalex committed Aug 21, 2024
1 parent 17911d0 commit b74f794
Show file tree
Hide file tree
Showing 8 changed files with 239 additions and 35 deletions.
70 changes: 42 additions & 28 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,66 +82,80 @@ target_get_datasets <- function() {
list.files("uploads")
}

target_get_trace <- function(name, biomarker, facet = NULL, trace = NULL) {
target_get_trace <- function(name,
biomarker,
filter = NULL,
disaggregate = NULL) {
str(disaggregate)
logger::log_info(paste("Requesting data from", name,
"with biomarker", biomarker))
logger::log_info(paste("Filtering by facet variables", facet))
dat <- read_dataset(name)$dat
logger::log_info(paste("Filtering by variables:", filter))
dataset <- read_dataset(name)
dat <- dataset$data
xcol <- dataset$xcol
cols <- colnames(dat)
# facet_def <- strsplit(facet, ":")
# facet_var <- facet_def[[1]][1]
# facet_level <- facet_def[[1]][2]
# if (!(facet_var %in% cols)) {
# porcelain::porcelain_stop(paste("Column", facet_var, "not found in data"),
# code = "BAD_REQUEST", status_code = 400L)
# }
# dat <- dat[dat[facet_var] == facet_level & dat["biomarker"] == biomarker,]
if (!is.null(filter)) {
filter_def <- strsplit(filter, ":")
filter_var <- filter_def[[1]][1]
filter_level <- filter_def[[1]][2]
if (!(filter_var %in% cols)) {
porcelain::porcelain_stop(paste("Column", filter_var, "not found in data"),

Check warning on line 102 in R/api.R

View workflow job for this annotation

GitHub Actions / lint

file=R/api.R,line=102,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 81 characters.
code = "BAD_REQUEST", status_code = 400L)
}
dat <- dat[dat[filter_var] == filter_level,]

Check warning on line 105 in R/api.R

View workflow job for this annotation

GitHub Actions / lint

file=R/api.R,line=105,col=48,[commas_linter] Commas should always have a space after.
}
dat <- dat[dat["biomarker"] == biomarker,]

Check warning on line 107 in R/api.R

View workflow job for this annotation

GitHub Actions / lint

file=R/api.R,line=107,col=44,[commas_linter] Commas should always have a space after.
dat$value <- log(dat$value)
if (length(trace) > 0) {
logger::log_info(paste("Disaggregating by trace variables", trace))
groups <- split(dat, eval(parse(text = paste("~", trace))))
if (length(disaggregate) > 0) {
logger::log_info(paste("Disaggregating by variables:", disaggregate))
groups <- split(dat, eval(parse(text = paste("~", disaggregate))))
nms <- names(groups)
return(lapply(seq_along(groups), function(i) {
model <- withWarnings(model_out(groups[[i]], xcol))
list(name = jsonlite::unbox(nms[[i]]),
model = model_out(groups[[i]]),
raw = data_out(groups[[i]]))
model = model$output,
raw = data_out(groups[[i]], xcol),
warnings = model$warnings)
}))
} else {
logger::log_info("Returning single trace")
model <- withWarnings(model_out(dat, xcol))
return(list(list(name = jsonlite::unbox("all"),
model = model_out(dat),
raw = data_out(dat))))
model = model$output,
raw = data_out(dat, xcol),
warnings = model$warnings)))
}
}

read_dataset <- function(name) {
path <- file.path("uploads", name)
if (!file.exists(path)) {
porcelain::porcelain_stop(paste("Did not find dataset with name ", name),
code = "BAD_REQUEST", status_code = 404L)
porcelain::porcelain_stop(paste("Did not find dataset with name:", name),
code = "DATASET_NOT_FOUND", status_code = 404L)
}
dat <- utils::read.csv(file.path(path, "data"))
dat$value <- as.numeric(dat$value)
xcol <- readLines(file.path(path, "xcol"))
list(data = dat, xcol = xcol)
}

model_out <- function(dat) {
model_out <- function(dat, xcol) {
n <- nrow(dat)
if (n == 0) {
return(list(x = list(), y = list()))
}
if (n > 1000) {
m <- mgcv::gam(value ~ s(day, bs = "cs"), data = dat, method = "REML")
m <- mgcv::gam(value ~ s(eval(parse(text = xcol)), bs = "cs"),
data = dat, method = "REML")
} else {
m <- stats::loess(value ~ day, data = dat, span = 0.75)
m <- stats::loess(value ~ eval(parse(text = xcol)), data = dat, span = 0.75)
}
range <- range(dat$day, na.rm = TRUE)
range <- range(dat[, xcol], na.rm = TRUE)
xseq <- range[1]:range[2]
list(x = xseq, y = stats::predict(m, tibble::data_frame(day = xseq)))
xdf <- tibble::data_frame(xcol = xseq)
names(xdf) <- xcol
list(x = xseq, y = stats::predict(m, xdf))
}

data_out <- function(dat) {
list(x = dat$day, y = dat$value)
data_out <- function(dat, xcol) {
list(x = dat[, xcol], y = dat$value)
}
5 changes: 3 additions & 2 deletions R/router.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ get_datasets <- function() {
get_trace <- function() {
porcelain::porcelain_endpoint$new(
"GET",
"/dataset/<name>/<biomarker>",
"/dataset/<name>/trace/<biomarker>",
target_get_trace,
porcelain::porcelain_input_query(facet = "string", trace = "string"),
porcelain::porcelain_input_query(disaggregate = "string",
filter = "string"),
returning = porcelain::porcelain_returning_json("DataSeries"))
}
10 changes: 10 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
withWarnings <- function(expr) {

Check warning on line 1 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=1,col=1,[object_name_linter] Variable and function name style should match snake_case or symbols.
myWarnings <- NULL

Check warning on line 2 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=2,col=3,[object_name_linter] Variable and function name style should match snake_case or symbols.
wHandler <- function(w) {

Check warning on line 3 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=3,col=3,[object_name_linter] Variable and function name style should match snake_case or symbols.
myWarnings <<- c(myWarnings, list(w))

Check warning on line 4 in R/utils.R

View workflow job for this annotation

GitHub Actions / lint

file=R/utils.R,line=4,col=5,[object_name_linter] Variable and function name style should match snake_case or symbols.
invokeRestart("muffleWarning")
}
val <- withCallingHandlers(expr, warning = wHandler)
list(output = val, warnings = lapply(myWarnings,
function(w) jsonlite::unbox(w$message)))
}
10 changes: 8 additions & 2 deletions inst/schema/DataSeries.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,17 @@
}
}
},
"warnings": {
"type": "array",
"items": {
"type": "string"
}
},
"required": ["x", "y"],
"additionalProperties": false
}
},
"required": ["model", "raw"],
"additionalProperties": false
"required": ["model", "raw", "warnings"],
"additionalProperties": true
}
}
2 changes: 1 addition & 1 deletion inst/schema/DatasetMetadata.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"variables": {
"type": "array",
"items": {
"type": "object"
"$ref": "Variable.schema.json"
}
},
"biomarkers": {
Expand Down
17 changes: 17 additions & 0 deletions inst/schema/Variable.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"name" : {
"type": "string"
},
"levels": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [ "name", "levels" ]
}
123 changes: 123 additions & 0 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
test_that("GET /dataset<name> returns 404 if dataset not found", {
router <- build_routes()
res <- router$request("GET", "/dataset/testdataset")
expect_equal(res$status, 404)
validate_failure_schema(res$body)
body <- jsonlite::fromJSON(res$body)
expect_equal(body$errors[1, "detail"],
"Did not find dataset with name: testdataset")
})

test_that("GET /trace/<biomarker> returns 404 if dataset not found", {
router <- build_routes()
res <- router$request("GET", "/dataset/testdataset/trace/ab")
expect_equal(res$status, 404)
validate_failure_schema(res$body)
body <- jsonlite::fromJSON(res$body)
expect_equal(body$errors[1, "detail"],
"Did not find dataset with name: testdataset")
})

test_that("can get trace for uploaded dataset with xcol", {
dat <- data.frame(biomarker = c("ab", "ba"),
value = 1,
time = 1:10,
age = "0-5",
sex = c("M", "F"))
request <- local_POST_dataset_request(dat,
"testdata",
xcol = "time")
router <- build_routes()
res <- router$call(request)
expect_equal(res$status, 200)
res <- router$request("GET", "/dataset/testdata/trace/ab")
expect_equal(res$status, 200)
expected_warnings <- list("span too small. fewer data values than degrees of freedom.",
"pseudoinverse used at 0.96",
"neighborhood radius 4.04",
"reciprocal condition number 0",
"There are other near singularities as well. 16.322")
expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"),
model = list(
x = 1:9,
y = rep(1, 9)
),
raw = list(
x = c(1, 3, 5, 7, 9),
y = c(1, 1, 1, 1, 1)
),
warnings = lapply(expected_warnings,
jsonlite::unbox))
))
body <- jsonlite::fromJSON(res$body)
expect_equal(body$data, jsonlite::fromJSON(expected))
})

test_that("can get disgagregated traces", {
dat <- data.frame(biomarker = "ab",
value = 1,
day = 1:10,
age = "0-5",
sex = c("M", "F"))
local_add_dataset(dat,
"testdataset")
router <- build_routes()
res <- router$request("GET", "/dataset/testdataset/trace/ab",
query = list(disaggregate = "sex"))
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(nrow(data), 2)
expect_equal(data$name, c("F", "M"))
expect_equal(data$raw[1, "x"], list(c(2, 4, 6, 8, 10)))
expect_equal(data$raw[1, "y"], list(c(1, 1, 1, 1, 1)))
expect_equal(data$raw[2, "x"], list(c(1, 3, 5, 7, 9)))
expect_equal(data$raw[2, "y"], list(c(1, 1, 1, 1, 1)))

expect_equal(data$model[1, "x"], list(2:10))
expect_equal(data$model[1, "y"], list(rep(1, 9)))
expect_equal(data$model[2, "x"], list(1:9))
expect_equal(data$model[2, "y"], list(rep(1, 9)))
})

test_that("can get filtered traces", {
dat <- data.frame(biomarker = "ab",
value = rep(c(0, 1, 2, 3), 5),
day = 1:20,
age = rep(c("0-5", "0-5", "5+", "5+"), 5),
sex = c("M", "F"))
local_add_dataset(dat,
"testdataset")
router <- build_routes()
res <- router$request("GET", "/dataset/testdataset/trace/ab",
query = list(filter = "sex:M"))
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(nrow(data), 1)
expect_equal(data$name, "all")
expect_equal(data$raw[1, "x"], list(c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)))
expect_equal(data$raw[1, "y"], list(c(0, 2, 0, 2, 0, 2, 0, 2, 0, 2)))
})

test_that("can get disgagregated and filtered traces", {
dat <- data.frame(biomarker = "ab",
value = rep(c(0, 1, 2, 3), 5),
day = 1:20,
age = rep(c("0-5", "0-5", "5+", "5+"), 5),
sex = c("M", "F"))
local_add_dataset(dat,
"testdataset")
router <- build_routes()
res <- router$request("GET", "/dataset/testdataset/trace/ab",
query = list(disaggregate = "age", filter = "sex:M"))
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
data <- body$data
expect_equal(nrow(data), 2)
expect_equal(data$name, c("0-5", "5+"))
expect_equal(data$raw[1, "x"], list(c(1, 5, 9, 13, 17)))
expect_equal(data$raw[1, "y"], list(c(0, 0, 0, 0, 0)))
expect_equal(data$raw[2, "x"], list(c(3, 7, 11, 15, 19)))
expect_equal(data$raw[2, "y"], list(c(2, 2, 2, 2, 2)))
})
37 changes: 35 additions & 2 deletions tests/testthat/test-router.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ test_that("GET /datasets", {
expect_equal(body$data, c("anotherdataset", "testdataset"))
})

test_that("GET /dataset", {
test_that("GET /dataset<name>", {
local_add_dataset(data.frame(biomarker = c("ab", "ba"),
value = 1,
day = 1:10,
Expand All @@ -68,9 +68,42 @@ test_that("GET /dataset", {
res <- router$request("GET", "/dataset/testdataset")
expect_equal(res$status, 200)
body <- jsonlite::fromJSON(res$body)
str(body$data$variables)
expect_equal(body$data$variables$name, c("age", "sex"))
expect_equal(body$data$variables$levels, list(c("0-5"), c("M", "F")))
expect_equal(body$data$biomarkers, c("ab", "ba"))
expect_equal(body$data$xcol, "day")
})

test_that("GET /dataset/<name>/trace/<biomarker>", {
dat <- data.frame(biomarker = c("ab", "ba"),
value = c(1, 1.5, 2, 3, 3.2, 4, 5, 6, 6.1, 7),
day = 1:10,
age = "0-5",
sex = c("M", "F"))
local_add_dataset(dat,
"testdataset")
set.seed(1)
suppressWarnings({
m <- stats::loess(value ~ day, data = dat[dat["biomarker"] == "ab",], span = 0.75)
model <- list(x = 1:9, y = stats::predict(m, tibble::data_frame(day = 1:9)))
})
router <- build_routes()
set.seed(1)
res <- router$request("GET", "/dataset/testdataset/trace/ab")
expect_equal(res$status, 200)
expected_warnings <- list("span too small. fewer data values than degrees of freedom.",
"pseudoinverse used at 0.96",
"neighborhood radius 4.04",
"reciprocal condition number 0",
"There are other near singularities as well. 16.322")
expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"),
model = model,
raw = list(
x = c(1, 3, 5, 7, 9),
y = c(1, 2, 3.2, 5, 6.1)),
warnings = lapply(expected_warnings,
jsonlite::unbox))
))
body <- jsonlite::fromJSON(res$body)
expect_equal(body$data, jsonlite::fromJSON(expected))
})

0 comments on commit b74f794

Please sign in to comment.