From 864fc7933d52e24e455a55a2a601048cdc4be261 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Wed, 4 Jan 2023 23:24:52 +0000
Subject: [PATCH 1/8] Allow results to be published to a Conbench server via
 benchconnect

---
 DESCRIPTION                 |   3 +
 NAMESPACE                   |   4 ++
 R/external-dependencies.R   | 108 ++++++++++++++++++++++++++++++++++++
 R/publish.R                 |  21 +++++++
 R/result.R                  |  49 ++++++++++++++++
 man/augment_result.Rd       |  14 +++++
 man/install_benchconnect.Rd |  12 ++++
 man/install_datalogistik.Rd |  15 +++++
 man/install_pipx.Rd         |  17 ++++++
 9 files changed, 243 insertions(+)
 create mode 100644 R/external-dependencies.R
 create mode 100644 R/publish.R
 create mode 100644 man/augment_result.Rd
 create mode 100644 man/install_benchconnect.Rd
 create mode 100644 man/install_datalogistik.Rd
 create mode 100644 man/install_pipx.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 57f3816..0fee302 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,6 +20,7 @@ Imports:
     distro,
     glue, 
     jsonlite,
+    processx,
     progress,
     purrr,
     R6,
@@ -72,8 +73,10 @@ Collate:
     'known-sources.R'
     'ensure-source.R'
     'ensure-tpch-source.R'
+    'external-dependencies.R'
     'measure.R'
     'params.R'
+    'publish.R'
     'util.R'
     'result.R'
     'run.R'
diff --git a/NAMESPACE b/NAMESPACE
index 0eb5832..9f61396 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,6 +17,7 @@ export(BenchmarkDataFrame)
 export(all_sources)
 export(array_altrep_materialization)
 export(array_to_vector)
+export(augment_result)
 export(confirm_mem_alloc)
 export(dataset_taxi_2013)
 export(dataset_taxi_parquet)
@@ -39,6 +40,9 @@ export(get_read_function)
 export(get_source_attr)
 export(get_sql_query_func)
 export(get_write_function)
+export(install_benchconnect)
+export(install_datalogistik)
+export(install_pipx)
 export(known_compressions)
 export(known_formats)
 export(known_sources)
diff --git a/R/external-dependencies.R b/R/external-dependencies.R
new file mode 100644
index 0000000..e8317fe
--- /dev/null
+++ b/R/external-dependencies.R
@@ -0,0 +1,108 @@
+pipx_available <- function() {
+  exit_code <- system("which pipx", ignore.stdout = TRUE, ignore.stderr = TRUE)
+  exit_code == 0L
+}
+
+external_cli_available <- function(cli) {
+  exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE)
+
+  if (exit_code != 0L) {
+    warning(
+      warningCondition(
+        paste0(
+          paste(cli, 'not installed or on $PATH.\n\n'),
+          glue::glue('It can be installed interactively with `install_pipx(); install_{cli}()`\n\n'),
+          'If already installed with pipx, ensure it is on $PATH, e.g. by running',
+          '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
+        ),
+        class = "notInstalledWarning"
+      )
+    )
+  }
+
+  exit_code == 0L
+}
+
+benchconnect_available <- function() {
+  external_cli_available(cli = "benchconnect")
+}
+
+datalogistik_available <- function() {
+  external_cli_available(cli = "datalogistik")
+}
+
+
+#' Install pipx
+#'
+#' Install [pipx](https://pypa.github.io/pipx/), a version of pip that installs
+#' Python packages in isolated environments where they will always be available
+#' regardless of which version of Python is presently on `$PATH`. Especially
+#' useful for installing packages designed to be used via CLIs.
+#'
+#' Only for interactive use.
+#'
+#' @export
+install_pipx <- function() {
+  stopifnot(interactive())
+  system('pip install pipx && pipx ensurepath', intern = TRUE)
+}
+
+
+#' Install benchconnect
+#'
+#' Install [benchconnect](https://github.com/conbench/conbench/tree/main/benchconnect),
+#' a utility for sending benchmark results to a Conbench server
+#'
+#' @export
+install_benchconnect <- function() {
+  stopifnot(pipx_available())
+
+  url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect"
+  pipx_call <- "pipx install --include-deps"
+
+  if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) {
+    if (interactive()) {
+      ans <- readline("benchconnect already installed. Update? [Y/n]: ")
+    } else {
+      ans <- "y"
+    }
+    if (tolower(ans) %in% c("y", "")) {
+      system(paste(pipx_call, "--force", url), intern = TRUE)
+    } else {
+      invisible()
+    }
+  } else {
+    system(paste(pipx_call, url), intern = TRUE)
+  }
+}
+
+
+#' Install datalogistik
+#'
+#' Install [datalogistik](https://github.com/conbench/datalogistik), a utility
+#' for generating, downloading, and converting datasets for benchmarking.
+#'
+#' Only for interactive use.
+#'
+#' @export
+install_datalogistik <- function() {
+  # TODO: install pipx?
+  stopifnot(pipx_available())
+
+  ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main")
+  url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}")
+
+  pipx_call <- "pipx install --pip-args '--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'"
+  if (datalogistik_available()) {
+    # default to yes (and also this will make it work in non-interactive sessions)
+    ans <- readline("datalogistik already installed. Update? [Y/n]: ")
+    if (tolower(ans) %in% c("y", "")) {
+      # we need the extra args to depend on the development version of arrow
+      return(system(glue("{pipx_call} --force {url}"), intern = TRUE))
+    } else {
+      return(invisible())
+    }
+  }
+
+  system(glue("{pipx_call} {url}"), intern = TRUE)
+}
diff --git a/R/publish.R b/R/publish.R
new file mode 100644
index 0000000..61b9bc5
--- /dev/null
+++ b/R/publish.R
@@ -0,0 +1,21 @@
+#' Augment a benchmark result with collected defaults
+#'
+#' @param result An instance of [BenchmarkResult]
+#'
+#' @export
+augment_result <- function(result) {
+  stopifnot(
+    benchconnect_available(),
+    inherits(result, "BenchmarkResult")
+  )
+
+  tmp_in <- tempfile(fileext = '.json')
+  result$write_json(tmp_in)
+
+  command_res <-   processx::run(
+    'benchconnect',
+    args = c("augment", "result", "--path", tmp_in)
+  )
+  file.remove(tmp_in)
+  BenchmarkResult$from_json(command_res$stdout)
+}
\ No newline at end of file
diff --git a/R/result.R b/R/result.R
index 31f51d5..6e6153a 100644
--- a/R/result.R
+++ b/R/result.R
@@ -329,3 +329,52 @@ as.data.frame.BenchmarkResults <- function(x, row.names = NULL, optional = FALSE
 as.data.frame.BenchmarkResult <- function(x, row.names = NULL, optional = FALSE, packages = "arrow", ...) {
   x$to_dataframe(row.names = row.names, optional = optional, packages = packages, ...)
 }
+
+
+# A class for holding metadata on a benchmark run
+#
+# Because this class inherits from `Serializable`, it can be written to and
+# instantiated from JSON forms.
+#
+# All attributes are active bindings so that validation can be run when they are
+# set, whether during or after instantiation.
+BenchmarkRun <- R6Point1Class(
+  classname = "BenchmarkRun",
+  inherit = Serializable,
+
+  public = list(
+    initialize = function(
+      name = NULL,
+      id = NULL,
+      reason = NULL,
+      machine_info = NULL,
+      cluster_info = NULL,
+      github = NULL,
+      finished_timestamp = NULL,
+      error_type = NULL,
+      error_info = NULL
+    ) {
+      self$name <- name
+      self$id <- id
+      self$reason <- reason
+      self$machine_info <- machine_info
+      self$cluster_info <- cluster_info
+      self$github <- github
+      self$finished_timestamp <- finished_timestamp
+      self$error_type <- error_type
+      self$error_info <- error_info
+    }
+  ),
+
+  active = list(
+    name = function(name) private$get_or_set_serializable(variable = "name", value = name),
+    id = function(id) private$get_or_set_serializable(variable = "id", value = id),
+    reason = function(reason) private$get_or_set_serializable(variable = "reason", value = reason),
+    machine_info = function(machine_info) private$get_or_set_serializable(variable = "machine_info", value = machine_info),
+    cluster_info = function(cluster_info) private$get_or_set_serializable(variable = "cluster_info", value = cluster_info),
+    github = function(github) private$get_or_set_serializable(variable = "github", value = github),
+    finished_timestamp = function(finished_timestamp) private$get_or_set_serializable(variable = "finished_timestamp", value = finished_timestamp),
+    error_type = function(error_type) private$get_or_set_serializable(variable = "error_type", value = error_type),
+    error_info = function(error_info) private$get_or_set_serializable(variable = "error_info", value = error_info)
+  )
+)
diff --git a/man/augment_result.Rd b/man/augment_result.Rd
new file mode 100644
index 0000000..f9bc214
--- /dev/null
+++ b/man/augment_result.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/publish.R
+\name{augment_result}
+\alias{augment_result}
+\title{Augment a benchmark result with collected defaults}
+\usage{
+augment_result(result)
+}
+\arguments{
+\item{result}{An instance of \link{BenchmarkResult}}
+}
+\description{
+Augment a benchmark result with collected defaults
+}
diff --git a/man/install_benchconnect.Rd b/man/install_benchconnect.Rd
new file mode 100644
index 0000000..e87fe96
--- /dev/null
+++ b/man/install_benchconnect.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/external-dependencies.R
+\name{install_benchconnect}
+\alias{install_benchconnect}
+\title{Install benchconnect}
+\usage{
+install_benchconnect()
+}
+\description{
+Install \href{https://github.com/conbench/conbench/tree/main/benchconnect}{benchconnect},
+a utility for sending benchmark results to a Conbench server
+}
diff --git a/man/install_datalogistik.Rd b/man/install_datalogistik.Rd
new file mode 100644
index 0000000..64e2442
--- /dev/null
+++ b/man/install_datalogistik.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/external-dependencies.R
+\name{install_datalogistik}
+\alias{install_datalogistik}
+\title{Install datalogistik}
+\usage{
+install_datalogistik()
+}
+\description{
+Install \href{https://github.com/conbench/datalogistik}{datalogistik}, a utility
+for generating, downloading, and converting datasets for benchmarking.
+}
+\details{
+Only for interactive use.
+}
diff --git a/man/install_pipx.Rd b/man/install_pipx.Rd
new file mode 100644
index 0000000..4764b20
--- /dev/null
+++ b/man/install_pipx.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/external-dependencies.R
+\name{install_pipx}
+\alias{install_pipx}
+\title{Install pipx}
+\usage{
+install_pipx()
+}
+\description{
+Install \href{https://pypa.github.io/pipx/}{pipx}, a version of pip that installs
+Python packages in isolated environments where they will always be available
+regardless of which version of Python is presently on \verb{$PATH}. Especially
+useful for installing packages designed to be used via CLIs.
+}
+\details{
+Only for interactive use.
+}

From d23a8c141797606d65b6f347ac6be7f4aa7c6d94 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Fri, 13 Jan 2023 22:28:59 +0000
Subject: [PATCH 2/8] integrate benchconnect publishing

---
 NAMESPACE                                   |   1 -
 R/external-dependencies.R                   |  39 +++---
 R/publish.R                                 |  53 +++++---
 R/result.R                                  |   7 +-
 R/run.R                                     | 113 +++++++++++++---
 man/augment_result.Rd                       |  14 --
 man/install_pipx.Rd                         |   3 -
 man/run.Rd                                  |  35 ++++-
 tests/testthat/setup.R                      |  11 ++
 tests/testthat/test-external-dependencies.R |  11 ++
 tests/testthat/test-publish.R               | 135 ++++++++++++++++++++
 tests/testthat/test-result.R                |  40 ++++--
 tests/testthat/test-run.R                   | 127 +++++++++++++++++-
 13 files changed, 506 insertions(+), 83 deletions(-)
 delete mode 100644 man/augment_result.Rd
 create mode 100644 tests/testthat/setup.R
 create mode 100644 tests/testthat/test-external-dependencies.R
 create mode 100644 tests/testthat/test-publish.R

diff --git a/NAMESPACE b/NAMESPACE
index 9f61396..dd29954 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -17,7 +17,6 @@ export(BenchmarkDataFrame)
 export(all_sources)
 export(array_altrep_materialization)
 export(array_to_vector)
-export(augment_result)
 export(confirm_mem_alloc)
 export(dataset_taxi_2013)
 export(dataset_taxi_parquet)
diff --git a/R/external-dependencies.R b/R/external-dependencies.R
index e8317fe..5b055ea 100644
--- a/R/external-dependencies.R
+++ b/R/external-dependencies.R
@@ -1,28 +1,34 @@
-pipx_available <- function() {
-  exit_code <- system("which pipx", ignore.stdout = TRUE, ignore.stderr = TRUE)
-  exit_code == 0L
-}
-
 external_cli_available <- function(cli) {
   exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE)
 
   if (exit_code != 0L) {
-    warning(
-      warningCondition(
-        paste0(
-          paste(cli, 'not installed or on $PATH.\n\n'),
-          glue::glue('It can be installed interactively with `install_pipx(); install_{cli}()`\n\n'),
-          'If already installed with pipx, ensure it is on $PATH, e.g. by running',
-          '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
-        ),
-        class = "notInstalledWarning"
+    msg <- paste(cli, 'not installed or on $PATH.\n\n')
+    if (cli == "pipx") {
+      msg <- paste0(
+        msg,
+        glue::glue('It can be installed with `install_pipx()`\n\n'),
+        'If already installed, ensure it is on $PATH, e.g. by running',
+        '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
       )
-    )
+    } else {
+      msg <- paste0(
+        msg,
+        glue::glue('It can be installed with `install_pipx(); install_{cli}()`\n\n'),
+        'If already installed with pipx, ensure it is on $PATH, e.g. by running',
+        '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
+      )
+    }
+
+    warning(warningCondition(msg, class = "notInstalledWarning"))
   }
 
   exit_code == 0L
 }
 
+pipx_available <- function() {
+  external_cli_available(cli = "pipx")
+}
+
 benchconnect_available <- function() {
   external_cli_available(cli = "benchconnect")
 }
@@ -39,11 +45,8 @@ datalogistik_available <- function() {
 #' regardless of which version of Python is presently on `$PATH`. Especially
 #' useful for installing packages designed to be used via CLIs.
 #'
-#' Only for interactive use.
-#'
 #' @export
 install_pipx <- function() {
-  stopifnot(interactive())
   system('pip install pipx && pipx ensurepath', intern = TRUE)
 }
 
diff --git a/R/publish.R b/R/publish.R
index 61b9bc5..08396f5 100644
--- a/R/publish.R
+++ b/R/publish.R
@@ -1,21 +1,38 @@
-#' Augment a benchmark result with collected defaults
-#'
-#' @param result An instance of [BenchmarkResult]
-#'
-#' @export
+# Call benchconnect
+#
+# @param args A character vector of arguments to pass to the benchconnect binary
+#
+# @returns A string of stdout returned by the call
+call_benchconnect <- function(args) {
+  stopifnot(benchconnect_available())
+  res <- processx::run(command = "benchconnect", args = args, echo_cmd = TRUE, echo = TRUE)
+  message(res$stderr)
+  res$stdout
+}
+
+
+augment_run <- function(run) {
+  stdout <- call_benchconnect(c("augment", "run", "--json", run$json))
+  BenchmarkRun$from_json(stdout)
+}
+
 augment_result <- function(result) {
-  stopifnot(
-    benchconnect_available(),
-    inherits(result, "BenchmarkResult")
-  )
+  stdout <- call_benchconnect(c("augment", "result", "--json", result$json))
+  BenchmarkResult$from_json(stdout)
+}
+
+
+start_run <- function(run) {
+  call_benchconnect(c("start", "run", "--json", run$json))
+}
 
-  tmp_in <- tempfile(fileext = '.json')
-  result$write_json(tmp_in)
+submit_result <- function(result) {
+  call_benchconnect(c("submit", "result", "--json", result$json))
+}
 
-  command_res <-   processx::run(
-    'benchconnect',
-    args = c("augment", "result", "--path", tmp_in)
-  )
-  file.remove(tmp_in)
-  BenchmarkResult$from_json(command_res$stdout)
-}
\ No newline at end of file
+finish_run <- function(run) {
+  # Ed note: `run` is not used right now, but there are some things we can pass
+  # here in the future, so I put it here for parallelism for now. Since it is
+  # not evaluated, it doesn't need to be specified for now.
+  call_benchconnect(c("finish", "run", "--json", "{}"))
+}
diff --git a/R/result.R b/R/result.R
index 6e6153a..a767e7d 100644
--- a/R/result.R
+++ b/R/result.R
@@ -186,7 +186,7 @@ BenchmarkResult <- R6Point1Class(
                           machine_info = NULL,
                           cluster_info = NULL,
                           context = NULL,
-                          github = NULL) {
+                          github = github_info()) {
       self$run_name <- run_name
       self$run_id <- run_id
       self$batch_id <- batch_id
@@ -347,9 +347,10 @@ BenchmarkRun <- R6Point1Class(
       name = NULL,
       id = NULL,
       reason = NULL,
+      info = NULL,
       machine_info = NULL,
       cluster_info = NULL,
-      github = NULL,
+      github = github_info(),
       finished_timestamp = NULL,
       error_type = NULL,
       error_info = NULL
@@ -357,6 +358,7 @@ BenchmarkRun <- R6Point1Class(
       self$name <- name
       self$id <- id
       self$reason <- reason
+      self$info <- info
       self$machine_info <- machine_info
       self$cluster_info <- cluster_info
       self$github <- github
@@ -370,6 +372,7 @@ BenchmarkRun <- R6Point1Class(
     name = function(name) private$get_or_set_serializable(variable = "name", value = name),
     id = function(id) private$get_or_set_serializable(variable = "id", value = id),
     reason = function(reason) private$get_or_set_serializable(variable = "reason", value = reason),
+    info = function(info) private$get_or_set_serializable(variable = "info", value = info),
     machine_info = function(machine_info) private$get_or_set_serializable(variable = "machine_info", value = machine_info),
     cluster_info = function(cluster_info) private$get_or_set_serializable(variable = "cluster_info", value = cluster_info),
     github = function(github) private$get_or_set_serializable(variable = "github", value = github),
diff --git a/R/run.R b/R/run.R
index ed2ed33..a05f0b8 100644
--- a/R/run.R
+++ b/R/run.R
@@ -20,16 +20,68 @@ run.default <- function(x, ...) {
 }
 
 
+#' @param publish Flag for whether to publish results to a Conbench server. See
+#' "Environment Variables" section for how to specify server details. Requires
+#' the benchconnect CLI is installed; see [install_benchconnect()].
+#' @param run_name Name for the run. If not specified, will use `{run_reason}: {commit hash}`
+#' @param run_reason Required. Low-cardinality reason for the run, e.g. "commit" or "test"
+#'
+#' @section Environment Variables:
+#'
+#' - `CONBENCH_URL`: Required. The URL of the Conbench server with no trailing
+#' slash. For arrow, should be `https://conbench.ursa.dev`.
+#' - `CONBENCH_EMAIL`: The email to use for Conbench login. Only required if the
+#' server is private.
+#' - `CONBENCH_PASSWORD`: The password to use for Conbench login. Only required
+#' if the server is private.
+#' - `CONBENCH_PROJECT_REPOSITORY`: The repository name (in the format
+#' `org/repo`) or the URL (in the format `https://github.com/org/repo`).
+#' Defaults to `"https://github.com/apache/arrow"` if unset.
+#' - `CONBENCH_PROJECT_PR_NUMBER`: Recommended. The number of the GitHub pull
+#' request that is running this benchmark, or `NULL` if it's a run on the
+#' default branch
+#' - `CONBENCH_PROJECT_COMMIT`: The 40-character commit SHA of the repo being
+#' benchmarked. If missing, will attempt to obtain it from
+#' `arrow::arrow_info()$build_info$git_id`, though this may not be populated
+#' depending on how Arrow was built.
+#' - `CONBENCH_MACHINE_INFO_NAME`: Will override detected machine host name sent
+#' in `machine_info.name` when posting runs and results. Needed for cases where
+#' the actual host name can vary, like CI and cloud runners.
+#'
 #' @rdname run
 #' @export
-run.BenchmarkDataFrame <- function(x, ...) {
+run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run_reason = NULL) {
   # if already run (so no elements of `parameters` are NULL), is no-op
   x <- get_default_parameters(x, ...)
 
+  if (publish) {
+    github <- github_info()
+    if (is.null(run_name)) {
+      run_name <- paste(run_reason, github$commit, sep = ": ")
+    }
+    stopifnot(
+      "Results cannot be published without a `run_reason`!" = !is.null(run_reason)
+    )
+    bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github)
+    start_run(run = bm_run)
+  }
+
   x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) {
-    run_benchmark(bm = bm, params = params, ...)
+    withr::with_envvar(
+      c(CONBENCH_RUN_NAME = run_name, CONBENCH_RUN_REASON = run_reason),
+      { ress <- run_benchmark(bm = bm, params = params, ...) }
+    )
+    if (publish) {
+      ress$results <- lapply(ress$results, augment_result)
+      lapply(ress$results, submit_result)
+    }
+    ress
   })
 
+  if (publish) {
+    finish_run(run = bm_run)
+  }
+
   x
 }
 
@@ -185,6 +237,13 @@ run_one <- function(bm,
     return(script)
   }
 
+  metadata <- assemble_metadata(
+    name = bm$name,
+    params = bm$tags_fun(params),
+    cpu_count = global_params$cpu_count,
+    n_iter = n_iter
+  )
+
   # construct the `run_script()` arguments out of all of the params as well as a
   # few other arguments. We need all of the parameters here so that the
   # cached result file names are right. Then run the script.
@@ -193,6 +252,7 @@ run_one <- function(bm,
     list(
       lines = script,
       name = bm$name,
+      metadata = metadata,
       progress_bar = progress_bar,
       read_only = read_only
     ),
@@ -256,11 +316,14 @@ run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, glob
     n_iter = n_iter
   )
 
+  env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON")))
+  env_vars <- lapply(env_vars, function(x) if (nzchar(x)) x else NULL)
+
   out <- BenchmarkResult$new(
-    run_name = NULL,
-    run_id = NULL,
+    run_name = env_vars$CONBENCH_RUN_NAME,
+    run_id = env_vars$CONBENCH_RUN_ID,
     batch_id = batch_id,
-    run_reason = NULL,
+    run_reason = env_vars$CONBENCH_RUN_REASON,
     # let default populate
     # timestamp = utc_now_iso_format(),
     stats = list(
@@ -365,13 +428,6 @@ assemble_metadata <- function(name, params, cpu_count, n_iter) {
     benchmark_language = "R"
   )
 
-  pr_number_env <- Sys.getenv("BENCHMARKABLE_PR_NUMBER")
-  github <- list(
-    repository = "https://github.com/apache/arrow",
-    commit = arrow_info$build_info$git_id,
-    pr_number = if (pr_number_env == "") NULL else as.integer(pr_number_env)
-  )
-
   options <- list(
     iterations = n_iter,
     drop_caches = FALSE,  # TODO: parameterize when implemented
@@ -383,14 +439,27 @@ assemble_metadata <- function(name, params, cpu_count, n_iter) {
     tags = tags,
     info = info,
     context = context,
-    github = github,
+    github = github_info(),
     options = options
   )
 }
 
+
+github_info <- function() {
+  repo_env <- Sys.getenv("CONBENCH_PROJECT_REPOSITORY")
+  pr_number_env <- Sys.getenv("CONBENCH_PROJECT_PR_NUMBER")
+  commit_env <- Sys.getenv("CONBENCH_PROJECT_COMMIT")
+  github <- list(
+    repository = if (repo_env != "") repo_env else "https://github.com/apache/arrow",
+    commit = if (commit_env != "") commit_env else arrow::arrow_info()$build_info$git_id,
+    pr_number = if (pr_number_env != "") as.integer(pr_number_env) else NULL
+  )
+}
+
+
 #' @importFrom jsonlite fromJSON toJSON
 #' @importFrom withr with_envvar
-run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FALSE) {
+run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE) {
   # cmd may need to vary by platform; possibly also a param for this fn?
 
   result_dir <- file.path(local_dir(), "results")
@@ -424,7 +493,7 @@ run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FAL
     progress_bar$message(msg, set_width = FALSE)
   }
 
-  env_vars <- list()
+  env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON")))
   if (!is.na.null(dots$mem_alloc)) {
     env_vars <- c(env_vars, ARROW_DEFAULT_MEMORY_POOL = dots$mem_alloc)
   }
@@ -466,8 +535,20 @@ run_script <- function(lines, cmd = find_r(), ..., progress_bar, read_only = FAL
     # This means the script errored.
     message(paste(result, collapse = "\n"))
     result <- BenchmarkResult$new(
+      run_name = if (nzchar(env_vars$CONBENCH_RUN_NAME)) env_vars$CONBENCH_RUN_NAME else NULL,
+      run_id = if (nzchar(env_vars$CONBENCH_RUN_ID)) env_vars$CONBENCH_RUN_ID else NULL,
+      run_reason = if (nzchar(env_vars$CONBENCH_RUN_REASON)) env_vars$CONBENCH_RUN_REASON else NULL,
       error = list(log = result),
-      optional_benchmark_info = list(params = list(...))
+      tags = metadata$tags,
+      info = metadata$info,
+      optional_benchmark_info = list(
+        params = list(...),
+        options = metadata$options,
+        output = NULL,
+        rscript = lines
+      ),
+      context = metadata$context,
+      github = metadata$github
     )
   }
 
diff --git a/man/augment_result.Rd b/man/augment_result.Rd
deleted file mode 100644
index f9bc214..0000000
--- a/man/augment_result.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/publish.R
-\name{augment_result}
-\alias{augment_result}
-\title{Augment a benchmark result with collected defaults}
-\usage{
-augment_result(result)
-}
-\arguments{
-\item{result}{An instance of \link{BenchmarkResult}}
-}
-\description{
-Augment a benchmark result with collected defaults
-}
diff --git a/man/install_pipx.Rd b/man/install_pipx.Rd
index 4764b20..069445a 100644
--- a/man/install_pipx.Rd
+++ b/man/install_pipx.Rd
@@ -12,6 +12,3 @@ Python packages in isolated environments where they will always be available
 regardless of which version of Python is presently on \verb{$PATH}. Especially
 useful for installing packages designed to be used via CLIs.
 }
-\details{
-Only for interactive use.
-}
diff --git a/man/run.Rd b/man/run.Rd
index 5f64ee3..277fe75 100644
--- a/man/run.Rd
+++ b/man/run.Rd
@@ -7,7 +7,7 @@
 \usage{
 run(x, ...)
 
-\method{run}{BenchmarkDataFrame}(x, ...)
+\method{run}{BenchmarkDataFrame}(x, ..., publish = FALSE, run_name = NULL, run_reason = NULL)
 }
 \arguments{
 \item{x}{An S3 classed object to run}
@@ -15,6 +15,14 @@ run(x, ...)
 \item{...}{Additional arguments passed through to methods. For
 \code{run.BenchmarkDataFrame}, passed through to \code{\link[=get_default_parameters]{get_default_parameters()}} (when
 parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.}
+
+\item{publish}{Flag for whether to publish results to a Conbench server. See
+"Environment Variables" section for how to specify server details. Requires
+the benchconnect CLI is installed; see \code{\link[=install_benchconnect]{install_benchconnect()}}.}
+
+\item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}}
+
+\item{run_reason}{Required. Low-cardinality reason for the run, e.g. "commit" or "test"}
 }
 \value{
 A modified object containing run results. For \code{run.BenchmarkDataFrame},
@@ -23,3 +31,28 @@ a \code{results} list column is appended.
 \description{
 Run an object
 }
+\section{Environment Variables}{
+
+\itemize{
+\item \code{CONBENCH_URL}: Required. The URL of the Conbench server with no trailing
+slash. For arrow, should be \verb{https://conbench.ursa.dev}.
+\item \code{CONBENCH_EMAIL}: The email to use for Conbench login. Only required if the
+server is private.
+\item \code{CONBENCH_PASSWORD}: The password to use for Conbench login. Only required
+if the server is private.
+\item \code{CONBENCH_PROJECT_REPOSITORY}: The repository name (in the format
+\code{org/repo}) or the URL (in the format \verb{https://github.com/org/repo}).
+Defaults to \code{"https://github.com/apache/arrow"} if unset.
+\item \code{CONBENCH_PROJECT_PR_NUMBER}: Recommended. The number of the GitHub pull
+request that is running this benchmark, or \code{NULL} if it's a run on the
+default branch
+\item \code{CONBENCH_PROJECT_COMMIT}: The 40-character commit SHA of the repo being
+benchmarked. If missing, will attempt to obtain it from
+\code{arrow::arrow_info()$build_info$git_id}, though this may not be populated
+depending on how Arrow was built.
+\item \code{CONBENCH_MACHINE_INFO_NAME}: Will override detected machine host name sent
+in \code{machine_info.name} when posting runs and results. Needed for cases where
+the actual host name can vary, like CI and cloud runners.
+}
+}
+
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
new file mode 100644
index 0000000..1b39ba7
--- /dev/null
+++ b/tests/testthat/setup.R
@@ -0,0 +1,11 @@
+if (!pipx_available()) {
+  install_pipx()
+}
+
+if (!benchconnect_available()) {
+  install_benchconnect()
+}
+
+if (!datalogistik_available()) {
+  install_datalogistik()
+}
diff --git a/tests/testthat/test-external-dependencies.R b/tests/testthat/test-external-dependencies.R
new file mode 100644
index 0000000..5d10cf8
--- /dev/null
+++ b/tests/testthat/test-external-dependencies.R
@@ -0,0 +1,11 @@
+test_that("pipx_available() works", {
+  expect_equal(pipx_available(), system("which pipx") == 0L)
+})
+
+test_that("benchconnect_available() works", {
+  expect_equal(benchconnect_available(), system("which benchconnect") == 0L)
+})
+
+test_that("datalogistik_available() works", {
+  expect_equal(datalogistik_available(), system("which datalogistik") == 0L)
+})
diff --git a/tests/testthat/test-publish.R b/tests/testthat/test-publish.R
new file mode 100644
index 0000000..80d0643
--- /dev/null
+++ b/tests/testthat/test-publish.R
@@ -0,0 +1,135 @@
+test_that("call benchconnect works", {
+  expect_match(call_benchconnect("--help"), "Command line utilities for interacting with a Conbench API")
+})
+
+test_that("augment_run() works", {
+  reason <- "test"
+  host_name <- "fake-computer"
+  github <- list(
+    commit = "fake-commit",
+    repository = "https://github.com/conchair/conchair",
+    pr_number = "47"
+  )
+
+  unaugmented_run <- BenchmarkRun$new(reason = reason, github = NULL)
+  withr::with_envvar(
+    c(
+      "CONBENCH_MACHINE_INFO_NAME" = host_name,
+      "CONBENCH_PROJECT_REPOSITORY" = github$repository,
+      "CONBENCH_PROJECT_COMMIT" = github$commit,
+      "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number
+    ),
+    { augmented_run <- augment_run(unaugmented_run) }
+  )
+
+  expect_equal(unaugmented_run$reason, reason)
+  expect_equal(augmented_run$reason, reason)
+
+  expect_null(unaugmented_run$id)
+  expect_type(augmented_run$id, "character")
+
+  expect_null(unaugmented_run$machine_info)
+  expect_type(augmented_run$machine_info, "list")
+  expect_type(augmented_run$machine_info$name, "character")
+  expect_equal(augmented_run$machine_info$name, host_name)
+
+  expect_null(unaugmented_run$github)
+  expect_equal(augmented_run$github, github)
+})
+
+
+test_that("augment_result() works", {
+  stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3)
+  host_name <- "fake-computer"
+  github <- list(
+    commit = "fake-commit",
+    repository = "conchair/conchair",
+    pr_number = "47"
+  )
+
+  unaugmented_result <- BenchmarkResult$new(stats = stats, github = NULL)
+  withr::with_envvar(
+    c(
+      "CONBENCH_MACHINE_INFO_NAME" = host_name,
+      "CONBENCH_PROJECT_REPOSITORY" = github$repository,
+      "CONBENCH_PROJECT_COMMIT" = github$commit,
+      "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number
+    ),
+    { augmented_result <- augment_result(unaugmented_result) }
+  )
+
+  expect_equal(unaugmented_result$timestamp, augmented_result$timestamp)
+
+  expect_equal(unaugmented_result$stats, stats)
+  expect_equal(augmented_result$stats, stats)
+
+  expect_null(unaugmented_result$batch_id)
+  expect_type(augmented_result$batch_id, "character")
+
+  expect_null(unaugmented_result$machine_info)
+  expect_type(augmented_result$machine_info, "list")
+  expect_type(augmented_result$machine_info$name, "character")
+  expect_equal(augmented_result$machine_info$name, host_name)
+
+  expect_null(unaugmented_result$github)
+  expect_equal(augmented_result$github, github)
+})
+
+
+test_that("start_run() works", {
+  bm_run <- BenchmarkRun$new(
+    name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5",
+    reason = "arrowbench-unit-test",
+    github = list(
+      commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
+      repository = "https://github.com/conchair/conchair",
+      pr_number = "47"
+    )
+  )
+
+  mockery::stub(
+    where = start_run,
+    what = "call_benchconnect",
+    how = function(args) {
+      expect_identical(args, c("start", "run", "--json", bm_run$json))
+    }
+  )
+  start_run(run = bm_run)
+})
+
+
+test_that("submit_result() works", {
+  bm_result <- BenchmarkResult$new(
+    run_name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5",
+    run_reason = "arrowbench-unit-test",
+    github = list(
+      commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
+      repository = "https://github.com/conchair/conchair",
+      pr_number = "47"
+    ),
+    stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3)
+  )
+
+  mockery::stub(
+    where = submit_result,
+    what = "call_benchconnect",
+    how = function(args) {
+      expect_identical(args, c("submit", "result", "--json", bm_result$json))
+    }
+  )
+  submit_result(result = bm_result)
+})
+
+
+test_that("finish_run() works", {
+  mockery::stub(
+    where = finish_run,
+    what = "call_benchconnect",
+    how = function(args) {
+      expect_identical(args, c("finish", "run", "--json", "{}"))
+    }
+  )
+  finish_run()
+})
+
+unlink("benchconnect-state.json")
\ No newline at end of file
diff --git a/tests/testthat/test-result.R b/tests/testthat/test-result.R
index 83fcf69..e798ed0 100644
--- a/tests/testthat/test-result.R
+++ b/tests/testthat/test-result.R
@@ -47,14 +47,34 @@ test_that("inherited serialization/deserialization methods work", {
 })
 
 test_that("S3 methods work", {
-  res <- BenchmarkResult$new(
-    run_name = "fake_run",
-    tags = c(is_real = FALSE),
-    optional_benchmark_info = list(
-      name = "fake",
-      result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE),
-      params = list(speed = "lightning")
-    )
+  github <- list(
+    repository = "https://github.com/conchair/conchair",
+    commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
+    pr_number = 47L
+  )
+  run_reason <- "mocked-arrowbench-unit-test"
+  run_name <- paste(run_reason, github$commit, sep = ": ")
+  host_name <- "fake-computer"
+
+  withr::with_envvar(
+    c(
+      CONBENCH_PROJECT_REPOSITORY = github$repository,
+      CONBENCH_PROJECT_PR_NUMBER = github$pr_number,
+      CONBENCH_PROJECT_COMMIT = github$commit,
+      CONBENCH_MACHINE_INFO_NAME = host_name
+    ),
+    {
+      res <- BenchmarkResult$new(
+        run_name = run_name,
+        run_reason = run_reason,
+        tags = c(is_real = FALSE),
+        optional_benchmark_info = list(
+          name = "fake",
+          result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE),
+          params = list(speed = "lightning")
+        )
+      )
+    }
   )
 
   expect_equal(as.character(res), res$json)
@@ -67,7 +87,9 @@ test_that("S3 methods work", {
       list(iteration = 1L, time = 0, status = "superfast", speed = "lightning"),
       row.names = c(NA, -1L),
       class = c("tbl_df", "tbl", "data.frame"),
-      run_name = "fake_run",
+      run_name = run_name,
+      run_reason = run_reason,
+      github = github,
       timestamp = res$timestamp,
       tags = c(is_real = FALSE)
     )
diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R
index e8abcd4..14158ee 100644
--- a/tests/testthat/test-run.R
+++ b/tests/testthat/test-run.R
@@ -254,7 +254,13 @@ test_that("run.BenchmarkDataFrame() works", {
   )
   bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list)
 
-  bm_df_res <- run(bm_df)
+  # Narrower than `suppressWarnings()`; catches all 5 instances unlike `expect_warning()`
+  withCallingHandlers(
+    { bm_df_res <- run(bm_df) },
+    warning = function(w) if (conditionMessage(w) == "deparse may be incomplete") {
+      invokeRestart(findRestart("muffleWarning"))
+    }
+  )
 
   assert_benchmark_dataframe(
     bm_df_res,
@@ -290,4 +296,123 @@ test_that("run.BenchmarkDataFrame() works", {
 })
 
 
+test_that("run.BenchmarkDataFrame() with `publish = TRUE` works (with mocking)", {
+  bm_list <- list(placebo, placebo)
+  param_list <- list(
+    get_default_parameters(
+      placebo,
+      error = list(NULL, "rlang::abort", "base::stop"),
+      cpu_count = arrow::cpu_count()
+    ),
+    NULL
+  )
+  bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list)
+
+  github <- list(
+    repository = "https://github.com/conchair/conchair",
+    commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
+    pr_number = 47L
+  )
+  run_reason <- "mocked-arrowbench-unit-test"
+  run_name <- paste(run_reason, github$commit, sep = ": ")
+  host_name <- "fake-computer"
+
+
+
+  withr::with_envvar(
+    c(
+      CONBENCH_PROJECT_REPOSITORY = github$repository,
+      CONBENCH_PROJECT_PR_NUMBER = github$pr_number,
+      CONBENCH_PROJECT_COMMIT = github$commit,
+      CONBENCH_MACHINE_INFO_NAME = host_name
+    ),
+    {
+      # Narrower than `suppressWarnings()`; catches all 5 instances unlike `expect_warning()`
+      withCallingHandlers(
+        {
+          mockery::stub(
+            where = run.BenchmarkDataFrame,
+            what = "start_run",
+            how = function(run) {
+              run <- augment_run(run)
+              expect_identical(run$github, github)
+              expect_identical(run$name, run_name)
+              expect_identical(run$reason, run_reason)
+              expect_identical(run$machine_info$name, host_name)
+            }
+          )
+
+          mockery::stub(
+            where = run.BenchmarkDataFrame,
+            what = "submit_result",
+            how = function(result) {
+              expect_identical(result$github, github)
+              expect_identical(result$run_name, run_name)
+              expect_identical(result$run_reason, run_reason)
+              expect_identical(result$machine_info$name, host_name)
+            }
+          )
+
+          mockery::stub(
+            where = run.BenchmarkDataFrame,
+            what = "finish_run",
+            how = function(run) {
+              run <- augment_run(run)
+              expect_identical(run$github, github)
+              expect_identical(run$name, run_name)
+              expect_identical(run$reason, run_reason)
+              expect_identical(run$machine_info$name, host_name)
+            }
+          )
+
+          wipe_results()
+          bm_df_res <- run(
+            bm_df,
+            publish = TRUE,
+            run_name = run_name,
+            run_reason = run_reason
+
+          )
+        },
+        warning = function(w) if (conditionMessage(w) == "deparse may be incomplete") {
+          invokeRestart(findRestart("muffleWarning"))
+        }
+      )
+    }
+  )
+
+  assert_benchmark_dataframe(
+    bm_df_res,
+    benchmarks = bm_list,
+    parameters = list(param_list[[1]], get_default_parameters(placebo))
+  )
+  expect_true("results" %in% names(bm_df_res))
+  purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) {
+    expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6"))
+    expect_equal(nrow(parameters), length(results$results))
+    if ("error" %in% names(parameters)) {
+      # param set with some cases that will error
+      purrr::walk2(parameters$error, results$results, function(err, res) {
+        expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6"))
+        if (is.null(err)) {
+          # passing case
+          expect_null(res$error)
+          expect_gt(res$stats$data[[1]], 0)
+        } else {
+          # erroring case
+          expect_false(is.null(res$error))
+        }
+      })
+    } else {
+      # param set with no cases that will error (includes defaults)
+      purrr::walk(results$results, function(res) {
+        expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6"))
+        expect_null(res$error)
+        expect_gt(res$stats$data[[1]], 0)
+      })
+    }
+  })
+})
+
+unlink("benchconnect-state.json")
 wipe_results()

From 2b401b508834eb7d24b6cf3782cc1d665ac3daab Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Tue, 17 Jan 2023 22:58:53 +0000
Subject: [PATCH 3/8] stopifnot early

---
 R/run.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/run.R b/R/run.R
index a05f0b8..2e4459e 100644
--- a/R/run.R
+++ b/R/run.R
@@ -55,13 +55,14 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run
   x <- get_default_parameters(x, ...)
 
   if (publish) {
+    stopifnot(
+      "Results cannot be published without a `run_reason`!" = !is.null(run_reason)
+    )
+
     github <- github_info()
     if (is.null(run_name)) {
       run_name <- paste(run_reason, github$commit, sep = ": ")
     }
-    stopifnot(
-      "Results cannot be published without a `run_reason`!" = !is.null(run_reason)
-    )
     bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github)
     start_run(run = bm_run)
   }

From dc9887aee574ca84eae48b3c73faaeaf3748f273 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Wed, 18 Jan 2023 19:36:50 +0000
Subject: [PATCH 4/8] run `finish_run()` via `on.exit()`

---
 R/run.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/run.R b/R/run.R
index 2e4459e..1c5a53b 100644
--- a/R/run.R
+++ b/R/run.R
@@ -65,6 +65,11 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run
     }
     bm_run <- BenchmarkRun$new(name = run_name, reason = run_reason, github = github)
     start_run(run = bm_run)
+
+    # clean up even if something fails
+    on.exit({
+      finish_run(run = bm_run)
+    })
   }
 
   x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) {
@@ -79,10 +84,6 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run
     ress
   })
 
-  if (publish) {
-    finish_run(run = bm_run)
-  }
-
   x
 }
 

From 3da373b248192565b7549094fba05e3e60f281a8 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Wed, 18 Jan 2023 19:44:09 +0000
Subject: [PATCH 5/8] comment tests

---
 tests/testthat/test-run.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R
index 14158ee..fc5facc 100644
--- a/tests/testthat/test-run.R
+++ b/tests/testthat/test-run.R
@@ -387,9 +387,14 @@ test_that("run.BenchmarkDataFrame() with `publish = TRUE` works (with mocking)",
     parameters = list(param_list[[1]], get_default_parameters(placebo))
   )
   expect_true("results" %in% names(bm_df_res))
+
+  # iterate over param and res list cols
   purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) {
+    # each element should be a `BenchmarkResults` (plural!) instance
     expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6"))
+    # each row of the param df should correspond to a result instance
     expect_equal(nrow(parameters), length(results$results))
+    # iterate over results in `BenchmarkResults` object
     if ("error" %in% names(parameters)) {
       # param set with some cases that will error
       purrr::walk2(parameters$error, results$results, function(err, res) {

From 4e2438d5f4d222c6a351d58d051294df722a1df3 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Wed, 18 Jan 2023 22:59:49 +0000
Subject: [PATCH 6/8] system() -> processx::run()

---
 R/external-dependencies.R                   | 19 +++++++++----------
 tests/testthat/test-external-dependencies.R | 19 ++++++++++++++++---
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/R/external-dependencies.R b/R/external-dependencies.R
index 5b055ea..c152466 100644
--- a/R/external-dependencies.R
+++ b/R/external-dependencies.R
@@ -1,7 +1,7 @@
 external_cli_available <- function(cli) {
-  exit_code <- system(paste("which", cli), ignore.stdout = TRUE, ignore.stderr = TRUE)
+  res <- processx::run("which", cli, error_on_status = FALSE)
 
-  if (exit_code != 0L) {
+  if (res$status != 0L) {
     msg <- paste(cli, 'not installed or on $PATH.\n\n')
     if (cli == "pipx") {
       msg <- paste0(
@@ -22,7 +22,7 @@ external_cli_available <- function(cli) {
     warning(warningCondition(msg, class = "notInstalledWarning"))
   }
 
-  exit_code == 0L
+  res$status == 0L
 }
 
 pipx_available <- function() {
@@ -47,7 +47,7 @@ datalogistik_available <- function() {
 #'
 #' @export
 install_pipx <- function() {
-  system('pip install pipx && pipx ensurepath', intern = TRUE)
+  processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"))
 }
 
 
@@ -61,7 +61,6 @@ install_benchconnect <- function() {
   stopifnot(pipx_available())
 
   url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect"
-  pipx_call <- "pipx install --include-deps"
 
   if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) {
     if (interactive()) {
@@ -70,12 +69,12 @@ install_benchconnect <- function() {
       ans <- "y"
     }
     if (tolower(ans) %in% c("y", "")) {
-      system(paste(pipx_call, "--force", url), intern = TRUE)
+      processx::run("pipx", c("install", "--include-deps", "--force", url))
     } else {
       invisible()
     }
   } else {
-    system(paste(pipx_call, url), intern = TRUE)
+    processx::run("pipx", c("install", "--include-deps", url))
   }
 }
 
@@ -95,17 +94,17 @@ install_datalogistik <- function() {
   ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main")
   url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}")
 
-  pipx_call <- "pipx install --pip-args '--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'"
+  pipx_call <- c("install", "--pip-args", "'--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'")
   if (datalogistik_available()) {
     # default to yes (and also this will make it work in non-interactive sessions)
     ans <- readline("datalogistik already installed. Update? [Y/n]: ")
     if (tolower(ans) %in% c("y", "")) {
       # we need the extra args to depend on the development version of arrow
-      return(system(glue("{pipx_call} --force {url}"), intern = TRUE))
+      return(processx::run("pipx", c(pipx_call, "--force", url)))
     } else {
       return(invisible())
     }
   }
 
-  system(glue("{pipx_call} {url}"), intern = TRUE)
+  processx::run("pipx", c(pipx_call, url))
 }
diff --git a/tests/testthat/test-external-dependencies.R b/tests/testthat/test-external-dependencies.R
index 5d10cf8..a6ebd87 100644
--- a/tests/testthat/test-external-dependencies.R
+++ b/tests/testthat/test-external-dependencies.R
@@ -1,11 +1,24 @@
+test_that("external_cli_available() works", {
+  fake_uninstalled_cli <- basename(tempfile())
+  expect_warning(
+    expect_false(
+      external_cli_available(fake_uninstalled_cli)
+    ),
+    regexp = paste(fake_uninstalled_cli, "not installed or on $PATH"),
+    fixed = TRUE
+  )
+
+  expect_true(external_cli_available("which"))
+})
+
 test_that("pipx_available() works", {
-  expect_equal(pipx_available(), system("which pipx") == 0L)
+  expect_equal(pipx_available(), processx::run("which", "pipx")$status == 0L)
 })
 
 test_that("benchconnect_available() works", {
-  expect_equal(benchconnect_available(), system("which benchconnect") == 0L)
+  expect_equal(benchconnect_available(), processx::run("which", "benchconnect")$status == 0L)
 })
 
 test_that("datalogistik_available() works", {
-  expect_equal(datalogistik_available(), system("which datalogistik") == 0L)
+  expect_equal(datalogistik_available(), processx::run("which", "datalogistik")$status == 0L)
 })

From 5945866038a77026415b5fc6aa162fad674940a0 Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Wed, 18 Jan 2023 23:33:21 +0000
Subject: [PATCH 7/8] bash quoting is stupid

---
 R/external-dependencies.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/external-dependencies.R b/R/external-dependencies.R
index c152466..4f0cf45 100644
--- a/R/external-dependencies.R
+++ b/R/external-dependencies.R
@@ -47,7 +47,7 @@ datalogistik_available <- function() {
 #'
 #' @export
 install_pipx <- function() {
-  processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"))
+  processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"), echo_cmd = TRUE)
 }
 
 
@@ -69,12 +69,12 @@ install_benchconnect <- function() {
       ans <- "y"
     }
     if (tolower(ans) %in% c("y", "")) {
-      processx::run("pipx", c("install", "--include-deps", "--force", url))
+      processx::run("pipx", c("install", "--include-deps", "--force", url), echo_cmd = TRUE)
     } else {
       invisible()
     }
   } else {
-    processx::run("pipx", c("install", "--include-deps", url))
+    processx::run("pipx", c("install", "--include-deps", url), echo_cmd = TRUE)
   }
 }
 
@@ -94,17 +94,17 @@ install_datalogistik <- function() {
   ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main")
   url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}")
 
-  pipx_call <- c("install", "--pip-args", "'--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary'")
+  pipx_call <- c("install", "--pip-args=--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary")
   if (datalogistik_available()) {
     # default to yes (and also this will make it work in non-interactive sessions)
     ans <- readline("datalogistik already installed. Update? [Y/n]: ")
     if (tolower(ans) %in% c("y", "")) {
       # we need the extra args to depend on the development version of arrow
-      return(processx::run("pipx", c(pipx_call, "--force", url)))
+      return(processx::run("pipx", c(pipx_call, "--force", url), echo_cmd = TRUE))
     } else {
       return(invisible())
     }
   }
 
-  processx::run("pipx", c(pipx_call, url))
+  processx::run("pipx", c(pipx_call, url), echo_cmd = TRUE)
 }

From 5a43548b85db3fa65f494b5bab895b25de13885f Mon Sep 17 00:00:00 2001
From: Edward Visel <1693477+alistaire47@users.noreply.github.com>
Date: Fri, 20 Jan 2023 22:10:43 +0000
Subject: [PATCH 8/8] rip out excess env vars

---
 R/run.R              | 52 +++++++++++++++++++++++++++-----------------
 man/run_benchmark.Rd |  8 ++++++-
 man/run_bm.Rd        |  8 ++++++-
 man/run_one.Rd       |  6 +++++
 4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/R/run.R b/R/run.R
index 1c5a53b..681c788 100644
--- a/R/run.R
+++ b/R/run.R
@@ -73,10 +73,8 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run
   }
 
   x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) {
-    withr::with_envvar(
-      c(CONBENCH_RUN_NAME = run_name, CONBENCH_RUN_REASON = run_reason),
-      { ress <- run_benchmark(bm = bm, params = params, ...) }
-    )
+    ress <- run_benchmark(bm = bm, params = params, run_name = run_name, run_reason = run_reason, ...)
+
     if (publish) {
       ress$results <- lapply(ress$results, augment_result)
       lapply(ress$results, submit_result)
@@ -103,6 +101,8 @@ run.BenchmarkDataFrame <- function(x, ..., publish = FALSE, run_name = NULL, run
 #' `profvis::profvis(prof_input = file)`. Default is `FALSE`
 #' @param read_only this will only attempt to read benchmark files and will not
 #' run any that it cannot find.
+#' @param run_name Name for the run. If not specified, will use `{run_reason}: {commit hash}`
+#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test"
 #'
 #' @return A `BenchmarkResults` object, containing `results` attribute of a list
 #' of length `nrow(params)`, each of those a `BenchmarkResult` object.
@@ -116,7 +116,9 @@ run_benchmark <- function(bm,
                           n_iter = 1,
                           dry_run = FALSE,
                           profiling = FALSE,
-                          read_only = FALSE) {
+                          read_only = FALSE,
+                          run_name = NULL,
+                          run_reason = NULL) {
   start <- Sys.time()
   stopifnot(is.data.frame(params), nrow(params) > 0)
   message("Running ", nrow(params), " benchmarks with ", n_iter, " iterations:")
@@ -145,6 +147,8 @@ run_benchmark <- function(bm,
     profiling = profiling,
     progress_bar = progress_bar,
     read_only = read_only,
+    run_name = run_name,
+    run_reason = run_reason,
     test_packages = unique(bm$packages_used(params))
   )
 
@@ -174,6 +178,8 @@ run_benchmark <- function(bm,
 #' @inheritParams run_benchmark
 #' @param batch_id a length 1 character vector to identify the batch
 #' @param progress_bar a `progress` object to update progress to (default `NULL`)
+#' @param run_name Name for the run
+#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test"
 #' @param test_packages a character vector of packages that the benchmarks test (default `NULL`)
 #' @param ... parameters passed to `bm$setup()`.
 #'
@@ -188,6 +194,8 @@ run_one <- function(bm,
                     profiling = FALSE,
                     progress_bar = NULL,
                     read_only = FALSE,
+                    run_name = NULL,
+                    run_reason = NULL,
                     test_packages = NULL) {
   all_params <- list(...)
 
@@ -213,7 +221,7 @@ run_one <- function(bm,
   args <- modifyList(
     params,
     list(bm = bm, n_iter = n_iter, batch_id = batch_id, profiling = profiling,
-         global_params = global_params)
+         global_params = global_params, run_name = run_name, run_reason = run_reason)
   )
 
   # transform the arguments into a string representation that can be called in
@@ -256,7 +264,9 @@ run_one <- function(bm,
       name = bm$name,
       metadata = metadata,
       progress_bar = progress_bar,
-      read_only = read_only
+      read_only = read_only,
+      run_name = run_name,
+      run_reason = run_reason
     ),
     keep.null = TRUE
   )
@@ -275,10 +285,13 @@ run_one <- function(bm,
 #' in a fresh R process that `run_one()` provides.
 #' @inheritParams run_one
 #' @param global_params the global parameters that have been set
+#' @param run_name Name for the run
+#' @param run_reason Low-cardinality reason for the run, e.g. "commit" or "test"
 #' @export
 #' @importFrom utils modifyList
 #' @importFrom sessioninfo package_info
-run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, global_params = list()) {
+run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE,
+                   global_params = list(), run_name = NULL, run_reason = NULL) {
   # We *don't* want to use altrep when we are setting up, or we get surprising results
   withr::with_options(
     list(arrow.use_altrep = FALSE),
@@ -318,14 +331,11 @@ run_bm <- function(bm, ..., n_iter = 1, batch_id = NULL, profiling = FALSE, glob
     n_iter = n_iter
   )
 
-  env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON")))
-  env_vars <- lapply(env_vars, function(x) if (nzchar(x)) x else NULL)
-
   out <- BenchmarkResult$new(
-    run_name = env_vars$CONBENCH_RUN_NAME,
-    run_id = env_vars$CONBENCH_RUN_ID,
+    run_name = run_name,
+    run_id = NULL,
     batch_id = batch_id,
-    run_reason = env_vars$CONBENCH_RUN_REASON,
+    run_reason = run_reason,
     # let default populate
     # timestamp = utc_now_iso_format(),
     stats = list(
@@ -363,7 +373,8 @@ run_iteration <- function(bm, ctx, profiling = FALSE) {
   out
 }
 
-global_setup <- function(lib_path = NULL, cpu_count = NULL, mem_alloc = NULL, test_packages = NULL, dry_run = FALSE, read_only = FALSE) {
+global_setup <- function(lib_path = NULL, cpu_count = NULL, mem_alloc = NULL,
+                         test_packages = NULL, dry_run = FALSE, read_only = FALSE) {
   script <- ""
   if (!dry_run & !read_only) {
     lib_path <- ensure_lib(lib_path, test_packages = test_packages)
@@ -461,7 +472,8 @@ github_info <- function() {
 
 #' @importFrom jsonlite fromJSON toJSON
 #' @importFrom withr with_envvar
-run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE) {
+run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_only = FALSE,
+                       run_name = run_name, run_reason = run_reason) {
   # cmd may need to vary by platform; possibly also a param for this fn?
 
   result_dir <- file.path(local_dir(), "results")
@@ -495,7 +507,7 @@ run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_
     progress_bar$message(msg, set_width = FALSE)
   }
 
-  env_vars <- as.list(Sys.getenv(c("CONBENCH_RUN_NAME", "CONBENCH_RUN_ID", "CONBENCH_RUN_REASON")))
+  env_vars <- list()
   if (!is.na.null(dots$mem_alloc)) {
     env_vars <- c(env_vars, ARROW_DEFAULT_MEMORY_POOL = dots$mem_alloc)
   }
@@ -537,9 +549,9 @@ run_script <- function(lines, cmd = find_r(), ..., metadata, progress_bar, read_
     # This means the script errored.
     message(paste(result, collapse = "\n"))
     result <- BenchmarkResult$new(
-      run_name = if (nzchar(env_vars$CONBENCH_RUN_NAME)) env_vars$CONBENCH_RUN_NAME else NULL,
-      run_id = if (nzchar(env_vars$CONBENCH_RUN_ID)) env_vars$CONBENCH_RUN_ID else NULL,
-      run_reason = if (nzchar(env_vars$CONBENCH_RUN_REASON)) env_vars$CONBENCH_RUN_REASON else NULL,
+      run_name = run_name,
+      run_id = NULL,
+      run_reason = run_reason,
       error = list(log = result),
       tags = metadata$tags,
       info = metadata$info,
diff --git a/man/run_benchmark.Rd b/man/run_benchmark.Rd
index 3e603c4..35804ae 100644
--- a/man/run_benchmark.Rd
+++ b/man/run_benchmark.Rd
@@ -11,7 +11,9 @@ run_benchmark(
   n_iter = 1,
   dry_run = FALSE,
   profiling = FALSE,
-  read_only = FALSE
+  read_only = FALSE,
+  run_name = NULL,
+  run_reason = NULL
 )
 }
 \arguments{
@@ -35,6 +37,10 @@ contain a \code{prof_file} field, which you can read in with
 
 \item{read_only}{this will only attempt to read benchmark files and will not
 run any that it cannot find.}
+
+\item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}}
+
+\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
 }
 \value{
 A \code{BenchmarkResults} object, containing \code{results} attribute of a list
diff --git a/man/run_bm.Rd b/man/run_bm.Rd
index 4f5f633..f00b0db 100644
--- a/man/run_bm.Rd
+++ b/man/run_bm.Rd
@@ -10,7 +10,9 @@ run_bm(
   n_iter = 1,
   batch_id = NULL,
   profiling = FALSE,
-  global_params = list()
+  global_params = list(),
+  run_name = NULL,
+  run_reason = NULL
 )
 }
 \arguments{
@@ -27,6 +29,10 @@ contain a \code{prof_file} field, which you can read in with
 \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}}
 
 \item{global_params}{the global parameters that have been set}
+
+\item{run_name}{Name for the run}
+
+\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
 }
 \description{
 This is the function that gets called in the script that \code{\link[=run_one]{run_one()}} prepares.
diff --git a/man/run_one.Rd b/man/run_one.Rd
index 5448b28..51aca4d 100644
--- a/man/run_one.Rd
+++ b/man/run_one.Rd
@@ -13,6 +13,8 @@ run_one(
   profiling = FALSE,
   progress_bar = NULL,
   read_only = FALSE,
+  run_name = NULL,
+  run_reason = NULL,
   test_packages = NULL
 )
 }
@@ -37,6 +39,10 @@ contain a \code{prof_file} field, which you can read in with
 \item{read_only}{this will only attempt to read benchmark files and will not
 run any that it cannot find.}
 
+\item{run_name}{Name for the run}
+
+\item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
+
 \item{test_packages}{a character vector of packages that the benchmarks test (default \code{NULL})}
 }
 \value{