Skip to content

Commit

Permalink
Merge pull request #127 from voltrondata-labs/edward/publish-results
Browse files Browse the repository at this point in the history
Use benchconnect to publish results to a Conbench server
  • Loading branch information
alistaire47 authored Jan 23, 2023
2 parents bdb7e68 + 5a43548 commit cd0592f
Show file tree
Hide file tree
Showing 18 changed files with 748 additions and 33 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Imports:
distro,
glue,
jsonlite,
processx,
progress,
purrr,
R6,
Expand Down Expand Up @@ -72,8 +73,10 @@ Collate:
'known-sources.R'
'ensure-source.R'
'ensure-tpch-source.R'
'external-dependencies.R'
'measure.R'
'params.R'
'publish.R'
'util.R'
'result.R'
'run.R'
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ export(get_read_function)
export(get_source_attr)
export(get_sql_query_func)
export(get_write_function)
export(install_benchconnect)
export(install_datalogistik)
export(install_pipx)
export(known_compressions)
export(known_formats)
export(known_sources)
Expand Down
110 changes: 110 additions & 0 deletions R/external-dependencies.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
external_cli_available <- function(cli) {
res <- processx::run("which", cli, error_on_status = FALSE)

if (res$status != 0L) {
msg <- paste(cli, 'not installed or on $PATH.\n\n')
if (cli == "pipx") {
msg <- paste0(
msg,
glue::glue('It can be installed with `install_pipx()`\n\n'),
'If already installed, ensure it is on $PATH, e.g. by running',
'`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
)
} else {
msg <- paste0(
msg,
glue::glue('It can be installed with `install_pipx(); install_{cli}()`\n\n'),
'If already installed with pipx, ensure it is on $PATH, e.g. by running',
'`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
)
}

warning(warningCondition(msg, class = "notInstalledWarning"))
}

res$status == 0L
}

pipx_available <- function() {
external_cli_available(cli = "pipx")
}

benchconnect_available <- function() {
external_cli_available(cli = "benchconnect")
}

datalogistik_available <- function() {
external_cli_available(cli = "datalogistik")
}


#' Install pipx
#'
#' Install [pipx](https://pypa.github.io/pipx/), a version of pip that installs
#' Python packages in isolated environments where they will always be available
#' regardless of which version of Python is presently on `$PATH`. Especially
#' useful for installing packages designed to be used via CLIs.
#'
#' @export
install_pipx <- function() {
processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"), echo_cmd = TRUE)
}


#' Install benchconnect
#'
#' Install [benchconnect](https://github.com/conbench/conbench/tree/main/benchconnect),
#' a utility for sending benchmark results to a Conbench server
#'
#' @export
install_benchconnect <- function() {
stopifnot(pipx_available())

url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect"

if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) {
if (interactive()) {
ans <- readline("benchconnect already installed. Update? [Y/n]: ")
} else {
ans <- "y"
}
if (tolower(ans) %in% c("y", "")) {
processx::run("pipx", c("install", "--include-deps", "--force", url), echo_cmd = TRUE)
} else {
invisible()
}
} else {
processx::run("pipx", c("install", "--include-deps", url), echo_cmd = TRUE)
}
}


#' Install datalogistik
#'
#' Install [datalogistik](https://github.com/conbench/datalogistik), a utility
#' for generating, downloading, and converting datasets for benchmarking.
#'
#' Only for interactive use.
#'
#' @export
install_datalogistik <- function() {
# TODO: install pipx?
stopifnot(pipx_available())

ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main")
url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}")

pipx_call <- c("install", "--pip-args=--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary")
if (datalogistik_available()) {
# default to yes (and also this will make it work in non-interactive sessions)
ans <- readline("datalogistik already installed. Update? [Y/n]: ")
if (tolower(ans) %in% c("y", "")) {
# we need the extra args to depend on the development version of arrow
return(processx::run("pipx", c(pipx_call, "--force", url), echo_cmd = TRUE))
} else {
return(invisible())
}
}

processx::run("pipx", c(pipx_call, url), echo_cmd = TRUE)
}
38 changes: 38 additions & 0 deletions R/publish.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Call benchconnect
#
# @param args A character vector of arguments to pass to the benchconnect binary
#
# @returns A string of stdout returned by the call
call_benchconnect <- function(args) {
stopifnot(benchconnect_available())
res <- processx::run(command = "benchconnect", args = args, echo_cmd = TRUE, echo = TRUE)
message(res$stderr)
res$stdout
}


augment_run <- function(run) {
stdout <- call_benchconnect(c("augment", "run", "--json", run$json))
BenchmarkRun$from_json(stdout)
}

augment_result <- function(result) {
stdout <- call_benchconnect(c("augment", "result", "--json", result$json))
BenchmarkResult$from_json(stdout)
}


start_run <- function(run) {
call_benchconnect(c("start", "run", "--json", run$json))
}

submit_result <- function(result) {
call_benchconnect(c("submit", "result", "--json", result$json))
}

finish_run <- function(run) {
# Ed note: `run` is not used right now, but there are some things we can pass
# here in the future, so I put it here for parallelism for now. Since it is
# not evaluated, it doesn't need to be specified for now.
call_benchconnect(c("finish", "run", "--json", "{}"))
}
54 changes: 53 additions & 1 deletion R/result.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ BenchmarkResult <- R6Point1Class(
machine_info = NULL,
cluster_info = NULL,
context = NULL,
github = NULL) {
github = github_info()) {
self$run_name <- run_name
self$run_id <- run_id
self$batch_id <- batch_id
Expand Down Expand Up @@ -329,3 +329,55 @@ as.data.frame.BenchmarkResults <- function(x, row.names = NULL, optional = FALSE
as.data.frame.BenchmarkResult <- function(x, row.names = NULL, optional = FALSE, packages = "arrow", ...) {
x$to_dataframe(row.names = row.names, optional = optional, packages = packages, ...)
}


# A class for holding metadata on a benchmark run
#
# Because this class inherits from `Serializable`, it can be written to and
# instantiated from JSON forms.
#
# All attributes are active bindings so that validation can be run when they are
# set, whether during or after instantiation.
BenchmarkRun <- R6Point1Class(
classname = "BenchmarkRun",
inherit = Serializable,

public = list(
initialize = function(
name = NULL,
id = NULL,
reason = NULL,
info = NULL,
machine_info = NULL,
cluster_info = NULL,
github = github_info(),
finished_timestamp = NULL,
error_type = NULL,
error_info = NULL
) {
self$name <- name
self$id <- id
self$reason <- reason
self$info <- info
self$machine_info <- machine_info
self$cluster_info <- cluster_info
self$github <- github
self$finished_timestamp <- finished_timestamp
self$error_type <- error_type
self$error_info <- error_info
}
),

active = list(
name = function(name) private$get_or_set_serializable(variable = "name", value = name),
id = function(id) private$get_or_set_serializable(variable = "id", value = id),
reason = function(reason) private$get_or_set_serializable(variable = "reason", value = reason),
info = function(info) private$get_or_set_serializable(variable = "info", value = info),
machine_info = function(machine_info) private$get_or_set_serializable(variable = "machine_info", value = machine_info),
cluster_info = function(cluster_info) private$get_or_set_serializable(variable = "cluster_info", value = cluster_info),
github = function(github) private$get_or_set_serializable(variable = "github", value = github),
finished_timestamp = function(finished_timestamp) private$get_or_set_serializable(variable = "finished_timestamp", value = finished_timestamp),
error_type = function(error_type) private$get_or_set_serializable(variable = "error_type", value = error_type),
error_info = function(error_info) private$get_or_set_serializable(variable = "error_info", value = error_info)
)
)
Loading

0 comments on commit cd0592f

Please sign in to comment.