implement and test filtering

seroanalytics · Aug 21, 2024 · b74f794 · b74f794
1 parent 17911d0
commit b74f794
Show file tree

Hide file tree

Showing 8 changed files with 239 additions and 35 deletions.
diff --git a/R/api.R b/R/api.R
@@ -82,66 +82,80 @@ target_get_datasets <- function() {
   list.files("uploads")
 }
 
-target_get_trace <- function(name, biomarker, facet = NULL, trace = NULL) {
+target_get_trace <- function(name,
+                             biomarker,
+                             filter = NULL,
+                             disaggregate = NULL) {
+  str(disaggregate)
   logger::log_info(paste("Requesting data from", name,
                          "with biomarker", biomarker))
-  logger::log_info(paste("Filtering by facet variables", facet))
-  dat <- read_dataset(name)$dat
+  logger::log_info(paste("Filtering by variables:", filter))
+  dataset <- read_dataset(name)
+  dat <- dataset$data
+  xcol <- dataset$xcol
   cols <- colnames(dat)
-  # facet_def <- strsplit(facet, ":")
-  # facet_var <- facet_def[[1]][1]
-  # facet_level <- facet_def[[1]][2]
-  # if (!(facet_var %in% cols)) {
-  #   porcelain::porcelain_stop(paste("Column", facet_var, "not found in data"),
-  #                             code = "BAD_REQUEST", status_code = 400L)
-  # }
-  # dat <- dat[dat[facet_var] == facet_level & dat["biomarker"] == biomarker,]
+  if (!is.null(filter)) {
+    filter_def <- strsplit(filter, ":")
+    filter_var <- filter_def[[1]][1]
+    filter_level <- filter_def[[1]][2]
+    if (!(filter_var %in% cols)) {
+      porcelain::porcelain_stop(paste("Column", filter_var, "not found in data"),
+                                code = "BAD_REQUEST", status_code = 400L)
+    }
+    dat <- dat[dat[filter_var] == filter_level,]
+  }
   dat <- dat[dat["biomarker"] == biomarker,]
-  dat$value <- log(dat$value)
-  if (length(trace) > 0) {
-    logger::log_info(paste("Disaggregating by trace variables", trace))
-    groups <- split(dat, eval(parse(text = paste("~", trace))))
+  if (length(disaggregate) > 0) {
+    logger::log_info(paste("Disaggregating by variables:", disaggregate))
+    groups <- split(dat, eval(parse(text = paste("~", disaggregate))))
     nms <- names(groups)
     return(lapply(seq_along(groups), function(i) {
+      model <- withWarnings(model_out(groups[[i]], xcol))
       list(name = jsonlite::unbox(nms[[i]]),
-           model = model_out(groups[[i]]),
-           raw = data_out(groups[[i]]))
+           model = model$output,
+           raw = data_out(groups[[i]], xcol),
+           warnings = model$warnings)
     }))
   } else {
     logger::log_info("Returning single trace")
+    model <- withWarnings(model_out(dat, xcol))
     return(list(list(name = jsonlite::unbox("all"),
-                     model = model_out(dat),
-                     raw = data_out(dat))))
+                     model = model$output,
+                     raw = data_out(dat, xcol),
+                     warnings = model$warnings)))
   }
 }
 
 read_dataset <- function(name) {
   path <- file.path("uploads", name)
   if (!file.exists(path)) {
-    porcelain::porcelain_stop(paste("Did not find dataset with name ", name),
-                              code = "BAD_REQUEST", status_code = 404L)
+    porcelain::porcelain_stop(paste("Did not find dataset with name:", name),
+                              code = "DATASET_NOT_FOUND", status_code = 404L)
   }
   dat <- utils::read.csv(file.path(path, "data"))
   dat$value <- as.numeric(dat$value)
   xcol <- readLines(file.path(path, "xcol"))
   list(data = dat, xcol = xcol)
 }
 
-model_out <- function(dat) {
+model_out <- function(dat, xcol) {
   n <- nrow(dat)
   if (n == 0) {
     return(list(x = list(), y = list()))
   }
   if (n > 1000) {
-    m <- mgcv::gam(value ~ s(day, bs = "cs"), data = dat, method = "REML")
+    m <- mgcv::gam(value ~ s(eval(parse(text = xcol)), bs = "cs"),
+                   data = dat, method = "REML")
   } else {
-    m <- stats::loess(value ~ day, data = dat, span = 0.75)
+    m <- stats::loess(value ~ eval(parse(text = xcol)), data = dat, span = 0.75)
   }
-  range <- range(dat$day, na.rm = TRUE)
+  range <- range(dat[, xcol], na.rm = TRUE)
   xseq <- range[1]:range[2]
-  list(x = xseq, y = stats::predict(m, tibble::data_frame(day = xseq)))
+  xdf <- tibble::data_frame(xcol = xseq)
+  names(xdf) <- xcol
+  list(x = xseq, y = stats::predict(m, xdf))
 }
 
-data_out <- function(dat) {
-  list(x = dat$day, y = dat$value)
+data_out <- function(dat, xcol) {
+  list(x = dat[, xcol], y = dat$value)
 }
diff --git a/R/router.R b/R/router.R
@@ -50,8 +50,9 @@ get_datasets <- function() {
 get_trace <- function() {
   porcelain::porcelain_endpoint$new(
     "GET",
-    "/dataset/<name>/<biomarker>",
+    "/dataset/<name>/trace/<biomarker>",
     target_get_trace,
-    porcelain::porcelain_input_query(facet = "string", trace = "string"),
+    porcelain::porcelain_input_query(disaggregate = "string",
+                                     filter = "string"),
     returning = porcelain::porcelain_returning_json("DataSeries"))
 }
diff --git a/R/utils.R b/R/utils.R
@@ -0,0 +1,10 @@
+withWarnings <- function(expr) {
+  myWarnings <- NULL
+  wHandler <- function(w) {
+    myWarnings <<- c(myWarnings, list(w))
+    invokeRestart("muffleWarning")
+  }
+  val <- withCallingHandlers(expr, warning = wHandler)
+  list(output = val, warnings = lapply(myWarnings,
+                                       function(w) jsonlite::unbox(w$message)))
+}
diff --git a/inst/schema/DataSeries.schema.json b/inst/schema/DataSeries.schema.json
@@ -42,11 +42,17 @@
             }
           }
         },
+        "warnings": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "required": ["x", "y"],
         "additionalProperties": false
       }
     },
-    "required": ["model", "raw"],
-    "additionalProperties": false
+    "required": ["model", "raw", "warnings"],
+    "additionalProperties": true
   }
 }
diff --git a/inst/schema/DatasetMetadata.schema.json b/inst/schema/DatasetMetadata.schema.json
@@ -5,7 +5,7 @@
     "variables": {
       "type": "array",
       "items": {
-        "type": "object"
+        "$ref": "Variable.schema.json"
       }
     },
     "biomarkers": {

diff --git a/inst/schema/Variable.schema.json b/inst/schema/Variable.schema.json
@@ -0,0 +1,17 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "type": "object",
+  "properties": {
+    "name" : {
+      "type": "string"
+    },
+    "levels": {
+      "type": "array",
+      "items": {
+        "type": "string"
+      }
+    }
+  },
+  "additionalProperties": false,
+  "required": [ "name", "levels" ]
+}
diff --git a/tests/testthat/test-read.R b/tests/testthat/test-read.R
@@ -0,0 +1,123 @@
+test_that("GET /dataset<name> returns 404 if dataset not found", {
+  router <- build_routes()
+  res <- router$request("GET", "/dataset/testdataset")
+  expect_equal(res$status, 404)
+  validate_failure_schema(res$body)
+  body <- jsonlite::fromJSON(res$body)
+  expect_equal(body$errors[1, "detail"],
+               "Did not find dataset with name: testdataset")
+})
+
+test_that("GET /trace/<biomarker> returns 404 if dataset not found", {
+  router <- build_routes()
+  res <- router$request("GET", "/dataset/testdataset/trace/ab")
+  expect_equal(res$status, 404)
+  validate_failure_schema(res$body)
+  body <- jsonlite::fromJSON(res$body)
+  expect_equal(body$errors[1, "detail"],
+               "Did not find dataset with name: testdataset")
+})
+
+test_that("can get trace for uploaded dataset with xcol", {
+  dat <- data.frame(biomarker = c("ab", "ba"),
+                    value = 1,
+                    time = 1:10,
+                    age = "0-5",
+                    sex = c("M", "F"))
+  request <- local_POST_dataset_request(dat,
+                                        "testdata",
+                                        xcol = "time")
+  router <- build_routes()
+  res <- router$call(request)
+  expect_equal(res$status, 200)
+  res <- router$request("GET", "/dataset/testdata/trace/ab")
+  expect_equal(res$status, 200)
+  expected_warnings <- list("span too small.   fewer data values than degrees of freedom.",
+                            "pseudoinverse used at 0.96",
+                            "neighborhood radius 4.04",
+                            "reciprocal condition number  0",
+                            "There are other near singularities as well. 16.322")
+  expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"),
+                                         model = list(
+                                           x = 1:9,
+                                           y = rep(1, 9)
+                                         ),
+                                         raw = list(
+                                           x = c(1, 3, 5, 7, 9),
+                                           y = c(1, 1, 1, 1, 1)
+                                         ),
+                                         warnings = lapply(expected_warnings,
+                                                           jsonlite::unbox))
+  ))
+  body <- jsonlite::fromJSON(res$body)
+  expect_equal(body$data, jsonlite::fromJSON(expected))
+})
+
+test_that("can get disgagregated traces", {
+  dat <- data.frame(biomarker = "ab",
+                    value = 1,
+                    day = 1:10,
+                    age = "0-5",
+                    sex = c("M", "F"))
+  local_add_dataset(dat,
+                    "testdataset")
+  router <- build_routes()
+  res <- router$request("GET", "/dataset/testdataset/trace/ab",
+                        query = list(disaggregate = "sex"))
+  expect_equal(res$status, 200)
+  body <- jsonlite::fromJSON(res$body)
+  data <- body$data
+  expect_equal(nrow(data), 2)
+  expect_equal(data$name, c("F", "M"))
+  expect_equal(data$raw[1, "x"], list(c(2, 4, 6, 8, 10)))
+  expect_equal(data$raw[1, "y"], list(c(1, 1, 1, 1, 1)))
+  expect_equal(data$raw[2, "x"], list(c(1, 3, 5, 7, 9)))
+  expect_equal(data$raw[2, "y"], list(c(1, 1, 1, 1, 1)))
+
+  expect_equal(data$model[1, "x"], list(2:10))
+  expect_equal(data$model[1, "y"], list(rep(1, 9)))
+  expect_equal(data$model[2, "x"], list(1:9))
+  expect_equal(data$model[2, "y"], list(rep(1, 9)))
+})
+
+test_that("can get filtered traces", {
+  dat <- data.frame(biomarker = "ab",
+                    value = rep(c(0, 1, 2, 3), 5),
+                    day = 1:20,
+                    age = rep(c("0-5", "0-5", "5+", "5+"), 5),
+                    sex = c("M", "F"))
+  local_add_dataset(dat,
+                    "testdataset")
+  router <- build_routes()
+  res <- router$request("GET", "/dataset/testdataset/trace/ab",
+                        query = list(filter = "sex:M"))
+  expect_equal(res$status, 200)
+  body <- jsonlite::fromJSON(res$body)
+  data <- body$data
+  expect_equal(nrow(data), 1)
+  expect_equal(data$name, "all")
+  expect_equal(data$raw[1, "x"], list(c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)))
+  expect_equal(data$raw[1, "y"], list(c(0, 2, 0, 2, 0, 2, 0, 2, 0, 2)))
+})
+
+test_that("can get disgagregated and filtered traces", {
+  dat <- data.frame(biomarker = "ab",
+                    value = rep(c(0, 1, 2, 3), 5),
+                    day = 1:20,
+                    age = rep(c("0-5", "0-5", "5+", "5+"), 5),
+                    sex = c("M", "F"))
+  local_add_dataset(dat,
+                    "testdataset")
+  router <- build_routes()
+  res <- router$request("GET", "/dataset/testdataset/trace/ab",
+                        query = list(disaggregate = "age", filter = "sex:M"))
+  expect_equal(res$status, 200)
+  body <- jsonlite::fromJSON(res$body)
+  data <- body$data
+  expect_equal(nrow(data), 2)
+  expect_equal(data$name, c("0-5", "5+"))
+  expect_equal(data$raw[1, "x"], list(c(1, 5, 9, 13, 17)))
+  expect_equal(data$raw[1, "y"], list(c(0, 0, 0, 0, 0)))
+  expect_equal(data$raw[2, "x"], list(c(3, 7, 11, 15, 19)))
+  expect_equal(data$raw[2, "y"], list(c(2, 2, 2, 2, 2)))
+})
diff --git a/tests/testthat/test-router.R b/tests/testthat/test-router.R
@@ -57,7 +57,7 @@ test_that("GET /datasets", {
   expect_equal(body$data, c("anotherdataset", "testdataset"))
 })
 
-test_that("GET /dataset", {
+test_that("GET /dataset<name>", {
   local_add_dataset(data.frame(biomarker = c("ab", "ba"),
                                value = 1,
                                day = 1:10,
@@ -68,9 +68,42 @@ test_that("GET /dataset", {
   res <- router$request("GET", "/dataset/testdataset")
   expect_equal(res$status, 200)
   body <- jsonlite::fromJSON(res$body)
-  str(body$data$variables)
   expect_equal(body$data$variables$name, c("age", "sex"))
   expect_equal(body$data$variables$levels, list(c("0-5"), c("M", "F")))
   expect_equal(body$data$biomarkers, c("ab", "ba"))
   expect_equal(body$data$xcol, "day")
 })
+
+test_that("GET /dataset/<name>/trace/<biomarker>", {
+  dat <- data.frame(biomarker = c("ab", "ba"),
+                    value = c(1, 1.5, 2, 3, 3.2, 4, 5, 6, 6.1, 7),
+                    day = 1:10,
+                    age = "0-5",
+                    sex = c("M", "F"))
+  local_add_dataset(dat,
+                    "testdataset")
+  set.seed(1)
+  suppressWarnings({
+    m <- stats::loess(value ~ day, data = dat[dat["biomarker"] == "ab",], span = 0.75)
+    model <- list(x = 1:9, y = stats::predict(m, tibble::data_frame(day = 1:9)))
+  })
+  router <- build_routes()
+  set.seed(1)
+  res <- router$request("GET", "/dataset/testdataset/trace/ab")
+  expect_equal(res$status, 200)
+  expected_warnings <- list("span too small.   fewer data values than degrees of freedom.",
+                            "pseudoinverse used at 0.96",
+                            "neighborhood radius 4.04",
+                            "reciprocal condition number  0",
+                            "There are other near singularities as well. 16.322")
+  expected <- jsonlite::toJSON(list(list(name = jsonlite::unbox("all"),
+                                         model = model,
+                                         raw = list(
+                                           x = c(1, 3, 5, 7, 9),
+                                           y = c(1, 2, 3.2, 5, 6.1)),
+                                         warnings = lapply(expected_warnings,
+                                                           jsonlite::unbox))
+  ))
+  body <- jsonlite::fromJSON(res$body)
+  expect_equal(body$data, jsonlite::fromJSON(expected))
+})