From 864fc7933d52e24e455a55a2a601048cdc4be261 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 4 Jan 2023 23:24:52 +0000 Subject: [PATCH 1/8] Allow results to be published to a Conbench server via benchconnect --- DESCRIPTION | 3 + NAMESPACE | 4 ++ R/external-dependencies.R | 108 ++++++++++++++++++++++++++++++++++++ R/publish.R | 21 +++++++ R/result.R | 49 ++++++++++++++++ man/augment_result.Rd | 14 +++++ man/install_benchconnect.Rd | 12 ++++ man/install_datalogistik.Rd | 15 +++++ man/install_pipx.Rd | 17 ++++++ 9 files changed, 243 insertions(+) create mode 100644 R/external-dependencies.R create mode 100644 R/publish.R create mode 100644 man/augment_result.Rd create mode 100644 man/install_benchconnect.Rd create mode 100644 man/install_datalogistik.Rd create mode 100644 man/install_pipx.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 57f3816..0fee302 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: distro, glue, jsonlite, + processx, progress, purrr, R6, @@ -72,8 +73,10 @@ Collate: 'known-sources.R' 'ensure-source.R' 'ensure-tpch-source.R' + 'external-dependencies.R' 'measure.R' 'params.R' + 'publish.R' 'util.R' 'result.R' 'run.R' diff --git a/NAMESPACE b/NAMESPACE index 0eb5832..9f61396 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(BenchmarkDataFrame) export(all_sources) export(array_altrep_materialization) export(array_to_vector) +export(augment_result) export(confirm_mem_alloc) export(dataset_taxi_2013) export(dataset_taxi_parquet) @@ -39,6 +40,9 @@ export(get_read_function) export(get_source_attr) export(get_sql_query_func) export(get_write_function) +export(install_benchconnect) +export(install_datalogistik) +export(install_pipx) export(known_compressions) export(known_formats) export(known_sources) diff --git a/R/external-dependencies.R b/R/external-dependencies.R new file mode 100644 index 0000000..e8317fe --- /dev/null +++ b/R/external-dependencies.R @@ -0,0 +1,108 @@ +pipx_available <- function() { + exit_code <- system("which pipx", ignore.stdout = TRUE, ignore.stderr = TRUE) + exit_code == 0L +} + +external_cli_available <- function(cli) { + exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE) + + if (exit_code != 0L) { + warning( + warningCondition( + paste0( + paste(cli, 'not installed or on $PATH.\n\n'), + glue::glue('It can be installed interactively with `install_pipx(); install_{cli}()`\n\n'), + 'If already installed with pipx, ensure it is on $PATH, e.g. by running', + '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' + ), + class = "notInstalledWarning" + ) + ) + } + + exit_code == 0L +} + +benchconnect_available <- function() { + external_cli_available(cli = "benchconnect") +} + +datalogistik_available <- function() { + external_cli_available(cli = "datalogistik") +} + + +#' Install pipx +#' +#' Install [pipx](https://pypa.github.io/pipx/), a version of pip that installs +#' Python packages in isolated environments where they will always be available +#' regardless of which version of Python is presently on `$PATH`. Especially +#' useful for installing packages designed to be used via CLIs. +#' +#' Only for interactive use. +#' +#' @export +install_pipx <- function() { + stopifnot(interactive()) + system('pip install pipx && pipx ensurepath', intern = TRUE) +} + + +#' Install benchconnect +#' +#' Install [benchconnect](https://github.com/conbench/conbench/tree/main/benchconnect), +#' a utility for sending benchmark results to a Conbench server +#' +#' @export +install_benchconnect <- function() { + stopifnot(pipx_available()) + + url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect" + pipx_call <- "pipx install --include-deps" + + if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) { + if (interactive()) { + ans <- readline("benchconnect already installed. Update? [Y/n]: ") + } else { + ans <- "y" + } + if (tolower(ans) %in% c("y", "")) { + system(paste(pipx_call, "--force", url), intern = TRUE) + } else { + invisible() + } + } else { + system(paste(pipx_call, url), intern = TRUE) + } +} + + +#' Install datalogistik +#' +#' Install [datalogistik](https://github.com/conbench/datalogistik), a utility +#' for generating, downloading, and converting datasets for benchmarking. +#' +#' Only for interactive use. +#' +#' @export +install_datalogistik <- function() { + # TODO: install pipx? + stopifnot(pipx_available()) + + ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main") + url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}") + + pipx_call <- "pipx install --pip-args '--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'" + if (datalogistik_available()) { + # default to yes (and also this will make it work in non-interactive sessions) + ans <- readline("datalogistik already installed. Update? [Y/n]: ") + if (tolower(ans) %in% c("y", "")) { + # we need the extra args to depend on the development version of arrow + return(system(glue("{pipx_call} --force {url}"), intern = TRUE)) + } else { + return(invisible()) + } + } + + system(glue("{pipx_call} {url}"), intern = TRUE) +} diff --git a/R/publish.R b/R/publish.R new file mode 100644 index 0000000..61b9bc5 --- /dev/null +++ b/R/publish.R @@ -0,0 +1,21 @@ +#' Augment a benchmark result with collected defaults +#' +#' @param result An instance of [BenchmarkResult] +#' +#' @export +augment_result <- function(result) { + stopifnot( + benchconnect_available(), + inherits(result, "BenchmarkResult") + ) + + tmp_in <- tempfile(fileext = '.json') + result$write_json(tmp_in) + + command_res <- processx::run( + 'benchconnect', + args = c("augment", "result", "--path", tmp_in) + ) + file.remove(tmp_in) + BenchmarkResult$from_json(command_res$stdout) +} \ No newline at end of file diff --git a/R/result.R b/R/result.R index 31f51d5..6e6153a 100644 --- a/R/result.R +++ b/R/result.R @@ -329,3 +329,52 @@ as.data.frame.BenchmarkResults <- function(x, row.names = NULL, optional = FALSE as.data.frame.BenchmarkResult <- function(x, row.names = NULL, optional = FALSE, packages = "arrow", ...) { x$to_dataframe(row.names = row.names, optional = optional, packages = packages, ...) } + + +# A class for holding metadata on a benchmark run +# +# Because this class inherits from `Serializable`, it can be written to and +# instantiated from JSON forms. +# +# All attributes are active bindings so that validation can be run when they are +# set, whether during or after instantiation. +BenchmarkRun <- R6Point1Class( + classname = "BenchmarkRun", + inherit = Serializable, + + public = list( + initialize = function( + name = NULL, + id = NULL, + reason = NULL, + machine_info = NULL, + cluster_info = NULL, + github = NULL, + finished_timestamp = NULL, + error_type = NULL, + error_info = NULL + ) { + self$name <- name + self$id <- id + self$reason <- reason + self$machine_info <- machine_info + self$cluster_info <- cluster_info + self$github <- github + self$finished_timestamp <- finished_timestamp + self$error_type <- error_type + self$error_info <- error_info + } + ), + + active = list( + name = function(name) private$get_or_set_serializable(variable = "name", value = name), + id = function(id) private$get_or_set_serializable(variable = "id", value = id), + reason = function(reason) private$get_or_set_serializable(variable = "reason", value = reason), + machine_info = function(machine_info) private$get_or_set_serializable(variable = "machine_info", value = machine_info), + cluster_info = function(cluster_info) private$get_or_set_serializable(variable = "cluster_info", value = cluster_info), + github = function(github) private$get_or_set_serializable(variable = "github", value = github), + finished_timestamp = function(finished_timestamp) private$get_or_set_serializable(variable = "finished_timestamp", value = finished_timestamp), + error_type = function(error_type) private$get_or_set_serializable(variable = "error_type", value = error_type), + error_info = function(error_info) private$get_or_set_serializable(variable = "error_info", value = error_info) + ) +) diff --git a/man/augment_result.Rd b/man/augment_result.Rd new file mode 100644 index 0000000..f9bc214 --- /dev/null +++ b/man/augment_result.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publish.R +\name{augment_result} +\alias{augment_result} +\title{Augment a benchmark result with collected defaults} +\usage{ +augment_result(result) +} +\arguments{ +\item{result}{An instance of \link{BenchmarkResult}} +} +\description{ +Augment a benchmark result with collected defaults +} diff --git a/man/install_benchconnect.Rd b/man/install_benchconnect.Rd new file mode 100644 index 0000000..e87fe96 --- /dev/null +++ b/man/install_benchconnect.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/external-dependencies.R +\name{install_benchconnect} +\alias{install_benchconnect} +\title{Install benchconnect} +\usage{ +install_benchconnect() +} +\description{ +Install \href{https://github.com/conbench/conbench/tree/main/benchconnect}{benchconnect}, +a utility for sending benchmark results to a Conbench server +} diff --git a/man/install_datalogistik.Rd b/man/install_datalogistik.Rd new file mode 100644 index 0000000..64e2442 --- /dev/null +++ b/man/install_datalogistik.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/external-dependencies.R +\name{install_datalogistik} +\alias{install_datalogistik} +\title{Install datalogistik} +\usage{ +install_datalogistik() +} +\description{ +Install \href{https://github.com/conbench/datalogistik}{datalogistik}, a utility +for generating, downloading, and converting datasets for benchmarking. +} +\details{ +Only for interactive use. +} diff --git a/man/install_pipx.Rd b/man/install_pipx.Rd new file mode 100644 index 0000000..4764b20 --- /dev/null +++ b/man/install_pipx.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/external-dependencies.R +\name{install_pipx} +\alias{install_pipx} +\title{Install pipx} +\usage{ +install_pipx() +} +\description{ +Install \href{https://pypa.github.io/pipx/}{pipx}, a version of pip that installs +Python packages in isolated environments where they will always be available +regardless of which version of Python is presently on \verb{$PATH}. Especially +useful for installing packages designed to be used via CLIs. +} +\details{ +Only for interactive use. +} From d23a8c141797606d65b6f347ac6be7f4aa7c6d94 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Fri, 13 Jan 2023 22:28:59 +0000 Subject: [PATCH 2/8] integrate benchconnect publishing --- NAMESPACE | 1 - R/external-dependencies.R | 39 +++--- R/publish.R | 53 +++++--- R/result.R | 7 +- R/run.R | 113 +++++++++++++--- man/augment_result.Rd | 14 -- man/install_pipx.Rd | 3 - man/run.Rd | 35 ++++- tests/testthat/setup.R | 11 ++ tests/testthat/test-external-dependencies.R | 11 ++ tests/testthat/test-publish.R | 135 ++++++++++++++++++++ tests/testthat/test-result.R | 40 ++++-- tests/testthat/test-run.R | 127 +++++++++++++++++- 13 files changed, 506 insertions(+), 83 deletions(-) delete mode 100644 man/augment_result.Rd create mode 100644 tests/testthat/setup.R create mode 100644 tests/testthat/test-external-dependencies.R create mode 100644 tests/testthat/test-publish.R diff --git a/NAMESPACE b/NAMESPACE index 9f61396..dd29954 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,7 +17,6 @@ export(BenchmarkDataFrame) export(all_sources) export(array_altrep_materialization) export(array_to_vector) -export(augment_result) export(confirm_mem_alloc) export(dataset_taxi_2013) export(dataset_taxi_parquet) diff --git a/R/external-dependencies.R b/R/external-dependencies.R index e8317fe..5b055ea 100644 --- a/R/external-dependencies.R +++ b/R/external-dependencies.R @@ -1,28 +1,34 @@ -pipx_available <- function() { - exit_code <- system("which pipx", ignore.stdout = TRUE, ignore.stderr = TRUE) - exit_code == 0L -} - external_cli_available <- function(cli) { exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE) if (exit_code != 0L) { - warning( - warningCondition( - paste0( - paste(cli, 'not installed or on $PATH.\n\n'), - glue::glue('It can be installed interactively with `install_pipx(); install_{cli}()`\n\n'), - 'If already installed with pipx, ensure it is on $PATH, e.g. by running', - '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' - ), - class = "notInstalledWarning" + msg <- paste(cli, 'not installed or on $PATH.\n\n') + if (cli == "pipx") { + msg <- paste0( + msg, + glue::glue('It can be installed with `install_pipx()`\n\n'), + 'If already installed, ensure it is on $PATH, e.g. by running', + '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' ) - ) + } else { + msg <- paste0( + msg, + glue::glue('It can be installed with `install_pipx(); install_{cli}()`\n\n'), + 'If already installed with pipx, ensure it is on $PATH, e.g. by running', + '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' + ) + } + + warning(warningCondition(msg, class = "notInstalledWarning")) } exit_code == 0L } +pipx_available <- function() { + external_cli_available(cli = "pipx") +} + benchconnect_available <- function() { external_cli_available(cli = "benchconnect") } @@ -39,11 +45,8 @@ datalogistik_available <- function() { #' regardless of which version of Python is presently on `$PATH`. Especially #' useful for installing packages designed to be used via CLIs. #' -#' Only for interactive use. -#' #' @export install_pipx <- function() { - stopifnot(interactive()) system('pip install pipx && pipx ensurepath', intern = TRUE) } diff --git a/R/publish.R b/R/publish.R index 61b9bc5..08396f5 100644 --- a/R/publish.R +++ b/R/publish.R @@ -1,21 +1,38 @@ -#' Augment a benchmark result with collected defaults -#' -#' @param result An instance of [BenchmarkResult] -#' -#' @export +# Call benchconnect +# +# @param args A character vector of arguments to pass to the benchconnect binary +# +# @returns A string of stdout returned by the call +call_benchconnect <- function(args) { + stopifnot(benchconnect_available()) + res <- processx::run(command = "benchconnect", args = args, echo_cmd = TRUE, echo = TRUE) + message(res$stderr) + res$stdout +} + + +augment_run <- function(run) { + stdout <- call_benchconnect(c("augment", "run", "--json", run$json)) + BenchmarkRun$from_json(stdout) +} + augment_result <- function(result) { - stopifnot( - benchconnect_available(), - inherits(result, "BenchmarkResult") - ) + stdout <- call_benchconnect(c("augment", "result", "--json", result$json)) + BenchmarkResult$from_json(stdout) +} + + +start_run <- function(run) { + call_benchconnect(c("start", "run", "--json", run$json)) +} - tmp_in <- tempfile(fileext = '.json') - result$write_json(tmp_in) +submit_result <- function(result) { + call_benchconnect(c("submit", "result", "--json", result$json)) +} - command_res <- processx::run( - 'benchconnect', - args = c("augment", "result", "--path", tmp_in) - ) - file.remove(tmp_in) - BenchmarkResult$from_json(command_res$stdout) -} \ No newline at end of file +finish_run <- function(run) { + # Ed note: `run` is not used right now, but there are some things we can pass + # here in the future, so I put it here for parallelism for now. Since it is + # not evaluated, it doesn't need to be specified for now. + call_benchconnect(c("finish", "run", "--json", "{}")) +} diff --git a/R/result.R b/R/result.R index 6e6153a..a767e7d 100644 --- a/R/result.R +++ b/R/result.R @@ -186,7 +186,7 @@ BenchmarkResult <- R6Point1Class( machine_info = NULL, cluster_info = NULL, context = NULL, - github = NULL) { + github = github_info()) { self$run_name <- run_name self$run_id <- run_id self$batch_id <- batch_id @@ -347,9 +347,10 @@ BenchmarkRun <- R6Point1Class( name = NULL, id = NULL, reason = NULL, + info = NULL, machine_info = NULL, cluster_info = NULL, - github = NULL, + github = github_info(), finished_timestamp = NULL, error_type = NULL, error_info = NULL @@ -357,6 +358,7 @@ BenchmarkRun <- R6Point1Class( self$name <- name self$id <- id self$reason <- reason + self$info <- info self$machine_info <- machine_info self$cluster_info <- cluster_info self$github <- github @@ -370,6 +372,7 @@ BenchmarkRun <- R6Point1Class( name = function(name) private$get_or_set_serializable(variable = "name", value = name), id = function(id) private$get_or_set_serializable(variable = "id", value = id), reason = function(reason) private$get_or_set_serializable(variable = "reason", value = reason), + info = function(info) private$get_or_set_serializable(variable = "info", value = info), machine_info = function(machine_info) private$get_or_set_serializable(variable = "machine_info", value = machine_info), cluster_info = function(cluster_info) private$get_or_set_serializable(variable = "cluster_info", value = cluster_info), github = function(github) private$get_or_set_serializable(variable = "github", value = github), diff --git a/R/run.R b/R/run.R index ed2ed33..a05f0b8 100644 --- a/R/run.R +++ b/R/run.R @@ -20,16 +20,68 @@ run.default <- function(x, ...) { } +#' @param publish Flag for whether to publish results to a Conbench server. See +#' "Environment Variables" section for how to specify server details. Requires +#' the benchconnect CLI is installed; see [install_benchconnect()]. +#' @param run_name Name for the run. If not specified, will use `{run_reason}: {commit hash}` +#' @param run_reason Required. Low-cardinality reason for the run, e.g. "commit" or "test" +#' +#' @section Environment Variables: +#' +#' - `CONBENCH_URL`: Required. The URL of the Conbench server with no trailing +#' slash. For arrow, should be `https://conbench.ursa.dev`. +#' - `CONBENCH_EMAIL`: The email to use for Conbench login. Only required if the +#' server is private. +#' - `CONBENCH_PASSWORD`: The password to use for Conbench login. Only required +#' if the server is private. +#' - `CONBENCH_PROJECT_REPOSITORY`: The repository name (in the format +#' `org/repo`) or the URL (in the format `https://github.com/org/repo`). +#' Defaults to `"https://github.com/apache/arrow"` if unset. +#' - `CONBENCH_PROJECT_PR_NUMBER`: Recommended. The number of the GitHub pull +#' request that is running this benchmark, or `NULL` if it's a run on the +#' default branch +#' - `CONBENCH_PROJECT_COMMIT`: The 40-character commit SHA of the repo being +#' benchmarked. If missing, will attempt to obtain it from +#' `arrow::arrow_info()$build_info$git_id`, though this may not be populated +#' depending on how Arrow was built. +#' - `CONBENCH_MACHINE_INFO_NAME`: Will override detected machine host name sent +#' in `machine_info.name` when posting runs and results. Needed for cases where +#' the actual host name can vary, like CI and cloud runners. +#' #' @rdname run #' @export -run.BenchmarkDataFrame <- function(x, ...) { +run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run_reason = NULL) { # if already run (so no elements of `parameters` are NULL), is no-op x <- get_default_parameters(x, ...) + if (publish) { + github <- github_info() + if (is.null(run_name)) { + run_name <- paste(run_reason, github$commit, sep = ": ") + } + stopifnot( + "Results cannot be published without a `run_reason`!" = !is.null(run_reason) + ) + bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github) + start_run(run = bm_run) + } + x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { - run_benchmark(bm = bm, params = params, ...) + withr::with_envvar( + c(CONBENCH_RUN_NAME = run_name, CONBENCH_RUN_REASON = run_reason), + { ress <- run_benchmark(bm = bm, params = params, ...) } + ) + if (publish) { + ress$results <- lapply(ress$results, augment_result) + lapply(ress$results, submit_result) + } + ress }) + if (publish) { + finish_run(run = bm_run) + } + x } @@ -185,6 +237,13 @@ run_one <- function(bm, return(script) } + metadata <- assemble_metadata( + name = bm$name, + params = bm$tags_fun(params), + cpu_count = global_params$cpu_count, + n_iter = n_iter + ) + # construct the `run_script()` arguments out of all of the params as well as a # few other arguments. We need all of the parameters here so that the # cached result file names are right. Then run the script. @@ -193,6 +252,7 @@ run_one <- function(bm, list( lines = script, name = bm$name, + metadata = metadata, progress_bar = progress_bar, read_only = read_only ), @@ -256,11 +316,14 @@ run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, glob n_iter = n_iter ) + env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON"))) + env_vars <- lapply(env_vars, function(x) if (nzchar(x)) x else NULL) + out <- BenchmarkResult$new( - run_name = NULL, - run_id = NULL, + run_name = env_vars$CONBENCH_RUN_NAME, + run_id = env_vars$CONBENCH_RUN_ID, batch_id = batch_id, - run_reason = NULL, + run_reason = env_vars$CONBENCH_RUN_REASON, # let default populate # timestamp = utc_now_iso_format(), stats = list( @@ -365,13 +428,6 @@ assemble_metadata <- function(name, params, cpu_count, n_iter) { benchmark_language = "R" ) - pr_number_env <- Sys.getenv("BENCHMARKABLE_PR_NUMBER") - github <- list( - repository = "https://github.com/apache/arrow", - commit = arrow_info$build_info$git_id, - pr_number = if (pr_number_env == "") NULL else as.integer(pr_number_env) - ) - options <- list( iterations = n_iter, drop_caches = FALSE, # TODO: parameterize when implemented @@ -383,14 +439,27 @@ assemble_metadata <- function(name, params, cpu_count, n_iter) { tags = tags, info = info, context = context, - github = github, + github = github_info(), options = options ) } + +github_info <- function() { + repo_env <- Sys.getenv("CONBENCH_PROJECT_REPOSITORY") + pr_number_env <- Sys.getenv("CONBENCH_PROJECT_PR_NUMBER") + commit_env <- Sys.getenv("CONBENCH_PROJECT_COMMIT") + github <- list( + repository = if (repo_env != "") repo_env else "https://github.com/apache/arrow", + commit = if (commit_env != "") commit_env else arrow::arrow_info()$build_info$git_id, + pr_number = if (pr_number_env != "") as.integer(pr_number_env) else NULL + ) +} + + #' @importFrom jsonlite fromJSON toJSON #' @importFrom withr with_envvar -run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FALSE) { +run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE) { # cmd may need to vary by platform; possibly also a param for this fn? result_dir <- file.path(local_dir(), "results") @@ -424,7 +493,7 @@ run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FAL progress_bar$message(msg, set_width = FALSE) } - env_vars <- list() + env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON"))) if (!is.na.null(dots$mem_alloc)) { env_vars <- c(env_vars, ARROW_DEFAULT_MEMORY_POOL = dots$mem_alloc) } @@ -466,8 +535,20 @@ run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FAL # This means the script errored. message(paste(result, collapse = "\n")) result <- BenchmarkResult$new( + run_name = if (nzchar(env_vars$CONBENCH_RUN_NAME)) env_vars$CONBENCH_RUN_NAME else NULL, + run_id = if (nzchar(env_vars$CONBENCH_RUN_ID)) env_vars$CONBENCH_RUN_ID else NULL, + run_reason = if (nzchar(env_vars$CONBENCH_RUN_REASON)) env_vars$CONBENCH_RUN_REASON else NULL, error = list(log = result), - optional_benchmark_info = list(params = list(...)) + tags = metadata$tags, + info = metadata$info, + optional_benchmark_info = list( + params = list(...), + options = metadata$options, + output = NULL, + rscript = lines + ), + context = metadata$context, + github = metadata$github ) } diff --git a/man/augment_result.Rd b/man/augment_result.Rd deleted file mode 100644 index f9bc214..0000000 --- a/man/augment_result.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/publish.R -\name{augment_result} -\alias{augment_result} -\title{Augment a benchmark result with collected defaults} -\usage{ -augment_result(result) -} -\arguments{ -\item{result}{An instance of \link{BenchmarkResult}} -} -\description{ -Augment a benchmark result with collected defaults -} diff --git a/man/install_pipx.Rd b/man/install_pipx.Rd index 4764b20..069445a 100644 --- a/man/install_pipx.Rd +++ b/man/install_pipx.Rd @@ -12,6 +12,3 @@ Python packages in isolated environments where they will always be available regardless of which version of Python is presently on \verb{$PATH}. Especially useful for installing packages designed to be used via CLIs. } -\details{ -Only for interactive use. -} diff --git a/man/run.Rd b/man/run.Rd index 5f64ee3..277fe75 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -7,7 +7,7 @@ \usage{ run(x, ...) -\method{run}{BenchmarkDataFrame}(x, ...) +\method{run}{BenchmarkDataFrame}(x, ..., publish = FALSE, run_name = NULL, run_reason = NULL) } \arguments{ \item{x}{An S3 classed object to run} @@ -15,6 +15,14 @@ run(x, ...) \item{...}{Additional arguments passed through to methods. For \code{run.BenchmarkDataFrame}, passed through to \code{\link[=get_default_parameters]{get_default_parameters()}} (when parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.} + +\item{publish}{Flag for whether to publish results to a Conbench server. See +"Environment Variables" section for how to specify server details. Requires +the benchconnect CLI is installed; see \code{\link[=install_benchconnect]{install_benchconnect()}}.} + +\item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}} + +\item{run_reason}{Required. Low-cardinality reason for the run, e.g. "commit" or "test"} } \value{ A modified object containing run results. For \code{run.BenchmarkDataFrame}, @@ -23,3 +31,28 @@ a \code{results} list column is appended. \description{ Run an object } +\section{Environment Variables}{ + +\itemize{ +\item \code{CONBENCH_URL}: Required. The URL of the Conbench server with no trailing +slash. For arrow, should be \verb{https://conbench.ursa.dev}. +\item \code{CONBENCH_EMAIL}: The email to use for Conbench login. Only required if the +server is private. +\item \code{CONBENCH_PASSWORD}: The password to use for Conbench login. Only required +if the server is private. +\item \code{CONBENCH_PROJECT_REPOSITORY}: The repository name (in the format +\code{org/repo}) or the URL (in the format \verb{https://github.com/org/repo}). +Defaults to \code{"https://github.com/apache/arrow"} if unset. +\item \code{CONBENCH_PROJECT_PR_NUMBER}: Recommended. The number of the GitHub pull +request that is running this benchmark, or \code{NULL} if it's a run on the +default branch +\item \code{CONBENCH_PROJECT_COMMIT}: The 40-character commit SHA of the repo being +benchmarked. If missing, will attempt to obtain it from +\code{arrow::arrow_info()$build_info$git_id}, though this may not be populated +depending on how Arrow was built. +\item \code{CONBENCH_MACHINE_INFO_NAME}: Will override detected machine host name sent +in \code{machine_info.name} when posting runs and results. Needed for cases where +the actual host name can vary, like CI and cloud runners. +} +} + diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R new file mode 100644 index 0000000..1b39ba7 --- /dev/null +++ b/tests/testthat/setup.R @@ -0,0 +1,11 @@ +if (!pipx_available()) { + install_pipx() +} + +if (!benchconnect_available()) { + install_benchconnect() +} + +if (!datalogistik_available()) { + install_datalogistik() +} diff --git a/tests/testthat/test-external-dependencies.R b/tests/testthat/test-external-dependencies.R new file mode 100644 index 0000000..5d10cf8 --- /dev/null +++ b/tests/testthat/test-external-dependencies.R @@ -0,0 +1,11 @@ +test_that("pipx_available() works", { + expect_equal(pipx_available(), system("which pipx") == 0L) +}) + +test_that("benchconnect_available() works", { + expect_equal(benchconnect_available(), system("which benchconnect") == 0L) +}) + +test_that("datalogistik_available() works", { + expect_equal(datalogistik_available(), system("which datalogistik") == 0L) +}) diff --git a/tests/testthat/test-publish.R b/tests/testthat/test-publish.R new file mode 100644 index 0000000..80d0643 --- /dev/null +++ b/tests/testthat/test-publish.R @@ -0,0 +1,135 @@ +test_that("call benchconnect works", { + expect_match(call_benchconnect("--help"), "Command line utilities for interacting with a Conbench API") +}) + +test_that("augment_run() works", { + reason <- "test" + host_name <- "fake-computer" + github <- list( + commit = "fake-commit", + repository = "https://github.com/conchair/conchair", + pr_number = "47" + ) + + unaugmented_run <- BenchmarkRun$new(reason = reason, github = NULL) + withr::with_envvar( + c( + "CONBENCH_MACHINE_INFO_NAME" = host_name, + "CONBENCH_PROJECT_REPOSITORY" = github$repository, + "CONBENCH_PROJECT_COMMIT" = github$commit, + "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number + ), + { augmented_run <- augment_run(unaugmented_run) } + ) + + expect_equal(unaugmented_run$reason, reason) + expect_equal(augmented_run$reason, reason) + + expect_null(unaugmented_run$id) + expect_type(augmented_run$id, "character") + + expect_null(unaugmented_run$machine_info) + expect_type(augmented_run$machine_info, "list") + expect_type(augmented_run$machine_info$name, "character") + expect_equal(augmented_run$machine_info$name, host_name) + + expect_null(unaugmented_run$github) + expect_equal(augmented_run$github, github) +}) + + +test_that("augment_result() works", { + stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3) + host_name <- "fake-computer" + github <- list( + commit = "fake-commit", + repository = "conchair/conchair", + pr_number = "47" + ) + + unaugmented_result <- BenchmarkResult$new(stats = stats, github = NULL) + withr::with_envvar( + c( + "CONBENCH_MACHINE_INFO_NAME" = host_name, + "CONBENCH_PROJECT_REPOSITORY" = github$repository, + "CONBENCH_PROJECT_COMMIT" = github$commit, + "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number + ), + { augmented_result <- augment_result(unaugmented_result) } + ) + + expect_equal(unaugmented_result$timestamp, augmented_result$timestamp) + + expect_equal(unaugmented_result$stats, stats) + expect_equal(augmented_result$stats, stats) + + expect_null(unaugmented_result$batch_id) + expect_type(augmented_result$batch_id, "character") + + expect_null(unaugmented_result$machine_info) + expect_type(augmented_result$machine_info, "list") + expect_type(augmented_result$machine_info$name, "character") + expect_equal(augmented_result$machine_info$name, host_name) + + expect_null(unaugmented_result$github) + expect_equal(augmented_result$github, github) +}) + + +test_that("start_run() works", { + bm_run <- BenchmarkRun$new( + name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5", + reason = "arrowbench-unit-test", + github = list( + commit = "2z8c9c49a5dc4a179243268e4bb6daa5", + repository = "https://github.com/conchair/conchair", + pr_number = "47" + ) + ) + + mockery::stub( + where = start_run, + what = "call_benchconnect", + how = function(args) { + expect_identical(args, c("start", "run", "--json", bm_run$json)) + } + ) + start_run(run = bm_run) +}) + + +test_that("submit_result() works", { + bm_result <- BenchmarkResult$new( + run_name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5", + run_reason = "arrowbench-unit-test", + github = list( + commit = "2z8c9c49a5dc4a179243268e4bb6daa5", + repository = "https://github.com/conchair/conchair", + pr_number = "47" + ), + stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3) + ) + + mockery::stub( + where = submit_result, + what = "call_benchconnect", + how = function(args) { + expect_identical(args, c("submit", "result", "--json", bm_result$json)) + } + ) + submit_result(result = bm_result) +}) + + +test_that("finish_run() works", { + mockery::stub( + where = finish_run, + what = "call_benchconnect", + how = function(args) { + expect_identical(args, c("finish", "run", "--json", "{}")) + } + ) + finish_run() +}) + +unlink("benchconnect-state.json") \ No newline at end of file diff --git a/tests/testthat/test-result.R b/tests/testthat/test-result.R index 83fcf69..e798ed0 100644 --- a/tests/testthat/test-result.R +++ b/tests/testthat/test-result.R @@ -47,14 +47,34 @@ test_that("inherited serialization/deserialization methods work", { }) test_that("S3 methods work", { - res <- BenchmarkResult$new( - run_name = "fake_run", - tags = c(is_real = FALSE), - optional_benchmark_info = list( - name = "fake", - result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE), - params = list(speed = "lightning") - ) + github <- list( + repository = "https://github.com/conchair/conchair", + commit = "2z8c9c49a5dc4a179243268e4bb6daa5", + pr_number = 47L + ) + run_reason <- "mocked-arrowbench-unit-test" + run_name <- paste(run_reason, github$commit, sep = ": ") + host_name <- "fake-computer" + + withr::with_envvar( + c( + CONBENCH_PROJECT_REPOSITORY = github$repository, + CONBENCH_PROJECT_PR_NUMBER = github$pr_number, + CONBENCH_PROJECT_COMMIT = github$commit, + CONBENCH_MACHINE_INFO_NAME = host_name + ), + { + res <- BenchmarkResult$new( + run_name = run_name, + run_reason = run_reason, + tags = c(is_real = FALSE), + optional_benchmark_info = list( + name = "fake", + result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE), + params = list(speed = "lightning") + ) + ) + } ) expect_equal(as.character(res), res$json) @@ -67,7 +87,9 @@ test_that("S3 methods work", { list(iteration = 1L, time = 0, status = "superfast", speed = "lightning"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"), - run_name = "fake_run", + run_name = run_name, + run_reason = run_reason, + github = github, timestamp = res$timestamp, tags = c(is_real = FALSE) ) diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R index e8abcd4..14158ee 100644 --- a/tests/testthat/test-run.R +++ b/tests/testthat/test-run.R @@ -254,7 +254,13 @@ test_that("run.BenchmarkDataFrame() works", { ) bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) - bm_df_res <- run(bm_df) + # Narrower than `suppressWarnings()`; catches all 5 instances unlike `expect_warning()` + withCallingHandlers( + { bm_df_res <- run(bm_df) }, + warning = function(w) if (conditionMessage(w) == "deparse may be incomplete") { + invokeRestart(findRestart("muffleWarning")) + } + ) assert_benchmark_dataframe( bm_df_res, @@ -290,4 +296,123 @@ test_that("run.BenchmarkDataFrame() works", { }) +test_that("run.BenchmarkDataFrame() with `publish = TRUE` works (with mocking)", { + bm_list <- list(placebo, placebo) + param_list <- list( + get_default_parameters( + placebo, + error = list(NULL, "rlang::abort", "base::stop"), + cpu_count = arrow::cpu_count() + ), + NULL + ) + bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) + + github <- list( + repository = "https://github.com/conchair/conchair", + commit = "2z8c9c49a5dc4a179243268e4bb6daa5", + pr_number = 47L + ) + run_reason <- "mocked-arrowbench-unit-test" + run_name <- paste(run_reason, github$commit, sep = ": ") + host_name <- "fake-computer" + + + + withr::with_envvar( + c( + CONBENCH_PROJECT_REPOSITORY = github$repository, + CONBENCH_PROJECT_PR_NUMBER = github$pr_number, + CONBENCH_PROJECT_COMMIT = github$commit, + CONBENCH_MACHINE_INFO_NAME = host_name + ), + { + # Narrower than `suppressWarnings()`; catches all 5 instances unlike `expect_warning()` + withCallingHandlers( + { + mockery::stub( + where = run.BenchmarkDataFrame, + what = "start_run", + how = function(run) { + run <- augment_run(run) + expect_identical(run$github, github) + expect_identical(run$name, run_name) + expect_identical(run$reason, run_reason) + expect_identical(run$machine_info$name, host_name) + } + ) + + mockery::stub( + where = run.BenchmarkDataFrame, + what = "submit_result", + how = function(result) { + expect_identical(result$github, github) + expect_identical(result$run_name, run_name) + expect_identical(result$run_reason, run_reason) + expect_identical(result$machine_info$name, host_name) + } + ) + + mockery::stub( + where = run.BenchmarkDataFrame, + what = "finish_run", + how = function(run) { + run <- augment_run(run) + expect_identical(run$github, github) + expect_identical(run$name, run_name) + expect_identical(run$reason, run_reason) + expect_identical(run$machine_info$name, host_name) + } + ) + + wipe_results() + bm_df_res <- run( + bm_df, + publish = TRUE, + run_name = run_name, + run_reason = run_reason + + ) + }, + warning = function(w) if (conditionMessage(w) == "deparse may be incomplete") { + invokeRestart(findRestart("muffleWarning")) + } + ) + } + ) + + assert_benchmark_dataframe( + bm_df_res, + benchmarks = bm_list, + parameters = list(param_list[[1]], get_default_parameters(placebo)) + ) + expect_true("results" %in% names(bm_df_res)) + purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { + expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6")) + expect_equal(nrow(parameters), length(results$results)) + if ("error" %in% names(parameters)) { + # param set with some cases that will error + purrr::walk2(parameters$error, results$results, function(err, res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + if (is.null(err)) { + # passing case + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + } else { + # erroring case + expect_false(is.null(res$error)) + } + }) + } else { + # param set with no cases that will error (includes defaults) + purrr::walk(results$results, function(res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + }) + } + }) +}) + +unlink("benchconnect-state.json") wipe_results() From 2b401b508834eb7d24b6cf3782cc1d665ac3daab Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Tue, 17 Jan 2023 22:58:53 +0000 Subject: [PATCH 3/8] stopifnot early --- R/run.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/run.R b/R/run.R index a05f0b8..2e4459e 100644 --- a/R/run.R +++ b/R/run.R @@ -55,13 +55,14 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run x <- get_default_parameters(x, ...) if (publish) { + stopifnot( + "Results cannot be published without a `run_reason`!" = !is.null(run_reason) + ) + github <- github_info() if (is.null(run_name)) { run_name <- paste(run_reason, github$commit, sep = ": ") } - stopifnot( - "Results cannot be published without a `run_reason`!" = !is.null(run_reason) - ) bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github) start_run(run = bm_run) } From dc9887aee574ca84eae48b3c73faaeaf3748f273 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 18 Jan 2023 19:36:50 +0000 Subject: [PATCH 4/8] run `finish_run()` via `on.exit()` --- R/run.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/run.R b/R/run.R index 2e4459e..1c5a53b 100644 --- a/R/run.R +++ b/R/run.R @@ -65,6 +65,11 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run } bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github) start_run(run = bm_run) + + # clean up even if something fails + on.exit({ + finish_run(run = bm_run) + }) } x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { @@ -79,10 +84,6 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run ress }) - if (publish) { - finish_run(run = bm_run) - } - x } From 3da373b248192565b7549094fba05e3e60f281a8 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 18 Jan 2023 19:44:09 +0000 Subject: [PATCH 5/8] comment tests --- tests/testthat/test-run.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R index 14158ee..fc5facc 100644 --- a/tests/testthat/test-run.R +++ b/tests/testthat/test-run.R @@ -387,9 +387,14 @@ test_that("run.BenchmarkDataFrame() with `publish = TRUE` works (with mocking)", parameters = list(param_list[[1]], get_default_parameters(placebo)) ) expect_true("results" %in% names(bm_df_res)) + + # iterate over param and res list cols purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { + # each element should be a `BenchmarkResults` (plural!) instance expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6")) + # each row of the param df should correspond to a result instance expect_equal(nrow(parameters), length(results$results)) + # iterate over results in `BenchmarkResults` object if ("error" %in% names(parameters)) { # param set with some cases that will error purrr::walk2(parameters$error, results$results, function(err, res) { From 4e2438d5f4d222c6a351d58d051294df722a1df3 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 18 Jan 2023 22:59:49 +0000 Subject: [PATCH 6/8] system() -> processx::run() --- R/external-dependencies.R | 19 +++++++++---------- tests/testthat/test-external-dependencies.R | 19 ++++++++++++++++--- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/R/external-dependencies.R b/R/external-dependencies.R index 5b055ea..c152466 100644 --- a/R/external-dependencies.R +++ b/R/external-dependencies.R @@ -1,7 +1,7 @@ external_cli_available <- function(cli) { - exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE) + res <- processx::run("which", cli, error_on_status = FALSE) - if (exit_code != 0L) { + if (res$status != 0L) { msg <- paste(cli, 'not installed or on $PATH.\n\n') if (cli == "pipx") { msg <- paste0( @@ -22,7 +22,7 @@ external_cli_available <- function(cli) { warning(warningCondition(msg, class = "notInstalledWarning")) } - exit_code == 0L + res$status == 0L } pipx_available <- function() { @@ -47,7 +47,7 @@ datalogistik_available <- function() { #' #' @export install_pipx <- function() { - system('pip install pipx && pipx ensurepath', intern = TRUE) + processx::run("sh", c("-c", "pip install pipx && pipx ensurepath")) } @@ -61,7 +61,6 @@ install_benchconnect <- function() { stopifnot(pipx_available()) url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect" - pipx_call <- "pipx install --include-deps" if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) { if (interactive()) { @@ -70,12 +69,12 @@ install_benchconnect <- function() { ans <- "y" } if (tolower(ans) %in% c("y", "")) { - system(paste(pipx_call, "--force", url), intern = TRUE) + processx::run("pipx", c("install", "--include-deps", "--force", url)) } else { invisible() } } else { - system(paste(pipx_call, url), intern = TRUE) + processx::run("pipx", c("install", "--include-deps", url)) } } @@ -95,17 +94,17 @@ install_datalogistik <- function() { ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main") url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}") - pipx_call <- "pipx install --pip-args '--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'" + pipx_call <- c("install", "--pip-args", "'--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'") if (datalogistik_available()) { # default to yes (and also this will make it work in non-interactive sessions) ans <- readline("datalogistik already installed. Update? [Y/n]: ") if (tolower(ans) %in% c("y", "")) { # we need the extra args to depend on the development version of arrow - return(system(glue("{pipx_call} --force {url}"), intern = TRUE)) + return(processx::run("pipx", c(pipx_call, "--force", url))) } else { return(invisible()) } } - system(glue("{pipx_call} {url}"), intern = TRUE) + processx::run("pipx", c(pipx_call, url)) } diff --git a/tests/testthat/test-external-dependencies.R b/tests/testthat/test-external-dependencies.R index 5d10cf8..a6ebd87 100644 --- a/tests/testthat/test-external-dependencies.R +++ b/tests/testthat/test-external-dependencies.R @@ -1,11 +1,24 @@ +test_that("external_cli_available() works", { + fake_uninstalled_cli <- basename(tempfile()) + expect_warning( + expect_false( + external_cli_available(fake_uninstalled_cli) + ), + regexp = paste(fake_uninstalled_cli, "not installed or on $PATH"), + fixed = TRUE + ) + + expect_true(external_cli_available("which")) +}) + test_that("pipx_available() works", { - expect_equal(pipx_available(), system("which pipx") == 0L) + expect_equal(pipx_available(), processx::run("which", "pipx")$status == 0L) }) test_that("benchconnect_available() works", { - expect_equal(benchconnect_available(), system("which benchconnect") == 0L) + expect_equal(benchconnect_available(), processx::run("which", "benchconnect")$status == 0L) }) test_that("datalogistik_available() works", { - expect_equal(datalogistik_available(), system("which datalogistik") == 0L) + expect_equal(datalogistik_available(), processx::run("which", "datalogistik")$status == 0L) }) From 5945866038a77026415b5fc6aa162fad674940a0 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 18 Jan 2023 23:33:21 +0000 Subject: [PATCH 7/8] bash quoting is stupid --- R/external-dependencies.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/external-dependencies.R b/R/external-dependencies.R index c152466..4f0cf45 100644 --- a/R/external-dependencies.R +++ b/R/external-dependencies.R @@ -47,7 +47,7 @@ datalogistik_available <- function() { #' #' @export install_pipx <- function() { - processx::run("sh", c("-c", "pip install pipx && pipx ensurepath")) + processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"), echo_cmd = TRUE) } @@ -69,12 +69,12 @@ install_benchconnect <- function() { ans <- "y" } if (tolower(ans) %in% c("y", "")) { - processx::run("pipx", c("install", "--include-deps", "--force", url)) + processx::run("pipx", c("install", "--include-deps", "--force", url), echo_cmd = TRUE) } else { invisible() } } else { - processx::run("pipx", c("install", "--include-deps", url)) + processx::run("pipx", c("install", "--include-deps", url), echo_cmd = TRUE) } } @@ -94,17 +94,17 @@ install_datalogistik <- function() { ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main") url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}") - pipx_call <- c("install", "--pip-args", "'--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'") + pipx_call <- c("install", "--pip-args=--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary") if (datalogistik_available()) { # default to yes (and also this will make it work in non-interactive sessions) ans <- readline("datalogistik already installed. Update? [Y/n]: ") if (tolower(ans) %in% c("y", "")) { # we need the extra args to depend on the development version of arrow - return(processx::run("pipx", c(pipx_call, "--force", url))) + return(processx::run("pipx", c(pipx_call, "--force", url), echo_cmd = TRUE)) } else { return(invisible()) } } - processx::run("pipx", c(pipx_call, url)) + processx::run("pipx", c(pipx_call, url), echo_cmd = TRUE) } From 5a43548b85db3fa65f494b5bab895b25de13885f Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Fri, 20 Jan 2023 22:10:43 +0000 Subject: [PATCH 8/8] rip out excess env vars --- R/run.R | 52 +++++++++++++++++++++++++++----------------- man/run_benchmark.Rd | 8 ++++++- man/run_bm.Rd | 8 ++++++- man/run_one.Rd | 6 +++++ 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/R/run.R b/R/run.R index 1c5a53b..681c788 100644 --- a/R/run.R +++ b/R/run.R @@ -73,10 +73,8 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run } x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { - withr::with_envvar( - c(CONBENCH_RUN_NAME = run_name, CONBENCH_RUN_REASON = run_reason), - { ress <- run_benchmark(bm = bm, params = params, ...) } - ) + ress <- run_benchmark(bm = bm, params = params, run_name = run_name, run_reason = run_reason, ...) + if (publish) { ress$results <- lapply(ress$results, augment_result) lapply(ress$results, submit_result) @@ -103,6 +101,8 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run #' `profvis::profvis(prof_input = file)`. Default is `FALSE` #' @param read_only this will only attempt to read benchmark files and will not #' run any that it cannot find. +#' @param run_name Name for the run. If not specified, will use `{run_reason}: {commit hash}` +#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test" #' #' @return A `BenchmarkResults` object, containing `results` attribute of a list #' of length `nrow(params)`, each of those a `BenchmarkResult` object. @@ -116,7 +116,9 @@ run_benchmark <- function(bm, n_iter = 1, dry_run = FALSE, profiling = FALSE, - read_only = FALSE) { + read_only = FALSE, + run_name = NULL, + run_reason = NULL) { start <- Sys.time() stopifnot(is.data.frame(params), nrow(params) > 0) message("Running ", nrow(params), " benchmarks with ", n_iter, " iterations:") @@ -145,6 +147,8 @@ run_benchmark <- function(bm, profiling = profiling, progress_bar = progress_bar, read_only = read_only, + run_name = run_name, + run_reason = run_reason, test_packages = unique(bm$packages_used(params)) ) @@ -174,6 +178,8 @@ run_benchmark <- function(bm, #' @inheritParams run_benchmark #' @param batch_id a length 1 character vector to identify the batch #' @param progress_bar a `progress` object to update progress to (default `NULL`) +#' @param run_name Name for the run +#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test" #' @param test_packages a character vector of packages that the benchmarks test (default `NULL`) #' @param ... parameters passed to `bm$setup()`. #' @@ -188,6 +194,8 @@ run_one <- function(bm, profiling = FALSE, progress_bar = NULL, read_only = FALSE, + run_name = NULL, + run_reason = NULL, test_packages = NULL) { all_params <- list(...) @@ -213,7 +221,7 @@ run_one <- function(bm, args <- modifyList( params, list(bm = bm, n_iter = n_iter, batch_id = batch_id, profiling = profiling, - global_params = global_params) + global_params = global_params, run_name = run_name, run_reason = run_reason) ) # transform the arguments into a string representation that can be called in @@ -256,7 +264,9 @@ run_one <- function(bm, name = bm$name, metadata = metadata, progress_bar = progress_bar, - read_only = read_only + read_only = read_only, + run_name = run_name, + run_reason = run_reason ), keep.null = TRUE ) @@ -275,10 +285,13 @@ run_one <- function(bm, #' in a fresh R process that `run_one()` provides. #' @inheritParams run_one #' @param global_params the global parameters that have been set +#' @param run_name Name for the run +#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test" #' @export #' @importFrom utils modifyList #' @importFrom sessioninfo package_info -run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, global_params = list()) { +run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, + global_params = list(), run_name = NULL, run_reason = NULL) { # We *don't* want to use altrep when we are setting up, or we get surprising results withr::with_options( list(arrow.use_altrep = FALSE), @@ -318,14 +331,11 @@ run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, glob n_iter = n_iter ) - env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON"))) - env_vars <- lapply(env_vars, function(x) if (nzchar(x)) x else NULL) - out <- BenchmarkResult$new( - run_name = env_vars$CONBENCH_RUN_NAME, - run_id = env_vars$CONBENCH_RUN_ID, + run_name = run_name, + run_id = NULL, batch_id = batch_id, - run_reason = env_vars$CONBENCH_RUN_REASON, + run_reason = run_reason, # let default populate # timestamp = utc_now_iso_format(), stats = list( @@ -363,7 +373,8 @@ run_iteration <- function(bm, ctx, profiling = FALSE) { out } -global_setup <- function(lib_path = NULL, cpu_count = NULL, mem_alloc = NULL, test_packages = NULL, dry_run = FALSE, read_only = FALSE) { +global_setup <- function(lib_path = NULL, cpu_count = NULL, mem_alloc = NULL, + test_packages = NULL, dry_run = FALSE, read_only = FALSE) { script <- "" if (!dry_run & !read_only) { lib_path <- ensure_lib(lib_path, test_packages = test_packages) @@ -461,7 +472,8 @@ github_info <- function() { #' @importFrom jsonlite fromJSON toJSON #' @importFrom withr with_envvar -run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE) { +run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE, + run_name = run_name, run_reason = run_reason) { # cmd may need to vary by platform; possibly also a param for this fn? result_dir <- file.path(local_dir(), "results") @@ -495,7 +507,7 @@ run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_ progress_bar$message(msg, set_width = FALSE) } - env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON"))) + env_vars <- list() if (!is.na.null(dots$mem_alloc)) { env_vars <- c(env_vars, ARROW_DEFAULT_MEMORY_POOL = dots$mem_alloc) } @@ -537,9 +549,9 @@ run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_ # This means the script errored. message(paste(result, collapse = "\n")) result <- BenchmarkResult$new( - run_name = if (nzchar(env_vars$CONBENCH_RUN_NAME)) env_vars$CONBENCH_RUN_NAME else NULL, - run_id = if (nzchar(env_vars$CONBENCH_RUN_ID)) env_vars$CONBENCH_RUN_ID else NULL, - run_reason = if (nzchar(env_vars$CONBENCH_RUN_REASON)) env_vars$CONBENCH_RUN_REASON else NULL, + run_name = run_name, + run_id = NULL, + run_reason = run_reason, error = list(log = result), tags = metadata$tags, info = metadata$info, diff --git a/man/run_benchmark.Rd b/man/run_benchmark.Rd index 3e603c4..35804ae 100644 --- a/man/run_benchmark.Rd +++ b/man/run_benchmark.Rd @@ -11,7 +11,9 @@ run_benchmark( n_iter = 1, dry_run = FALSE, profiling = FALSE, - read_only = FALSE + read_only = FALSE, + run_name = NULL, + run_reason = NULL ) } \arguments{ @@ -35,6 +37,10 @@ contain a \code{prof_file} field, which you can read in with \item{read_only}{this will only attempt to read benchmark files and will not run any that it cannot find.} + +\item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}} + +\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} } \value{ A \code{BenchmarkResults} object, containing \code{results} attribute of a list diff --git a/man/run_bm.Rd b/man/run_bm.Rd index 4f5f633..f00b0db 100644 --- a/man/run_bm.Rd +++ b/man/run_bm.Rd @@ -10,7 +10,9 @@ run_bm( n_iter = 1, batch_id = NULL, profiling = FALSE, - global_params = list() + global_params = list(), + run_name = NULL, + run_reason = NULL ) } \arguments{ @@ -27,6 +29,10 @@ contain a \code{prof_file} field, which you can read in with \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}} \item{global_params}{the global parameters that have been set} + +\item{run_name}{Name for the run} + +\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} } \description{ This is the function that gets called in the script that \code{\link[=run_one]{run_one()}} prepares. diff --git a/man/run_one.Rd b/man/run_one.Rd index 5448b28..51aca4d 100644 --- a/man/run_one.Rd +++ b/man/run_one.Rd @@ -13,6 +13,8 @@ run_one( profiling = FALSE, progress_bar = NULL, read_only = FALSE, + run_name = NULL, + run_reason = NULL, test_packages = NULL ) } @@ -37,6 +39,10 @@ contain a \code{prof_file} field, which you can read in with \item{read_only}{this will only attempt to read benchmark files and will not run any that it cannot find.} +\item{run_name}{Name for the run} + +\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} + \item{test_packages}{a character vector of packages that the benchmarks test (default \code{NULL})} } \value{