From 2c53c9d136928129a3252d4504932d8ec8776648 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Fri, 2 Dec 2022 17:28:40 +0000 Subject: [PATCH 1/7] Top-level multi-benchmark runner --- DESCRIPTION | 2 + NAMESPACE | 7 +++ R/benchmark-dataframe.R | 92 +++++++++++++++++++++++++++++++++++ R/benchmark.R | 13 +++++ man/BenchmarkDataFrame.Rd | 22 +++++++++ man/default_params.Rd | 22 +++++++++ man/get_package_benchmarks.Rd | 18 +++++++ man/run.Rd | 25 ++++++++++ 8 files changed, 201 insertions(+) create mode 100644 R/benchmark-dataframe.R create mode 100644 man/BenchmarkDataFrame.Rd create mode 100644 man/default_params.Rd create mode 100644 man/get_package_benchmarks.Rd create mode 100644 man/run.Rd diff --git a/DESCRIPTION b/DESCRIPTION index d68b448..a88172b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,6 +29,7 @@ Imports: rlang, R.utils, sessioninfo, + tibble, utils, uuid, waldo, @@ -49,6 +50,7 @@ Suggests: RoxygenNote: 7.2.2 Roxygen: list(markdown = TRUE, load = "source") Collate: + 'benchmark-dataframe.R' 'benchmark.R' 'bm-array-altrep-materialization.R' 'bm-array-to-vector.R' diff --git a/NAMESPACE b/NAMESPACE index e8b832e..56ed0bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,15 +4,20 @@ S3method(as.character,Serializable) S3method(as.data.frame,BenchmarkResult) S3method(as.data.frame,BenchmarkResults) S3method(as.list,Serializable) +S3method(format,BenchmarkDataFrame) +S3method(run,BenchmarkDataFrame) +S3method(run,default) export("%||%") export(BenchEnvironment) export(Benchmark) +export(BenchmarkDataFrame) export(all_sources) export(array_altrep_materialization) export(array_to_vector) export(confirm_mem_alloc) export(dataset_taxi_2013) export(dataset_taxi_parquet) +export(default_params) export(df_to_table) export(ensure_dataset) export(ensure_format) @@ -24,6 +29,7 @@ export(get_csv_writer) export(get_dataset_attr) export(get_input_func) export(get_json_reader) +export(get_package_benchmarks) export(get_params_summary) export(get_query_func) export(get_read_function) @@ -40,6 +46,7 @@ export(read_json) export(read_source) export(remote_dataset) export(row_group_size) +export(run) export(run_benchmark) export(run_bm) export(run_one) diff --git a/R/benchmark-dataframe.R b/R/benchmark-dataframe.R new file mode 100644 index 0000000..6217d28 --- /dev/null +++ b/R/benchmark-dataframe.R @@ -0,0 +1,92 @@ +#' A classed dataframe of benchmarks for running +#' +#' @param benchmarks A list with elements of class `Benchmark` +#' @param parameters Optional. A list of dataframes of parameter combinations to +#' run as generated by [default_params()]. If null, defaults will be generated +#' when [run()] is called. +#' +#' @return A classed dataframe with `name` (benchmark attribute, not object name), +#' `benchmark`, and `params` columns +#' +#' @export +BenchmarkDataFrame <- function(benchmarks, parameters) { + lapply(benchmarks, function(bm) stopifnot( + "All elements of `benchmarks` are not of class `Benchmark`!" = inherits(bm, "Benchmark") + )) + + bm_names <- vapply(benchmarks, function(bm) bm$name, character(1)) + if (missing(parameters)) { + parameters <- rep(list(NULL), length = length(benchmarks)) + } + + structure( + tibble::tibble( + name = bm_names, + benchmark = benchmarks, + parameters = parameters + ), + class = c("BenchmarkDataFrame", "tbl_df", "tbl", "data.frame") + ) +} + + +#' Get a list of benchmarks in a package +#' +#' @param package String of package name in which to find benchmarks +#' +#' @return An instance of [BenchmarkDataFrame] with all the benchmarks contained +#' by a package +#' +#' @export +get_package_benchmarks <- function(package = "arrowbench") { + nms <- getNamespaceExports(package) + objs <- mget(nms, envir = getNamespace(package)) + bms <- Filter(function(x) inherits(x, "Benchmark"), objs) + BenchmarkDataFrame(benchmarks = bms) +} + + +#' Run an object +#' +#' @param x An S3 classed object to run +#' @param ... Additional arguments passed through to methods. For +#' `run.BenchmarkDataFrame`, passed through to [default_params()] (when +#' parameters are not specified) and [run_benchmark()]. +#' +#' @return A modified object containing run results. For `run.BenchmarkDataFrame`, +#' a `results` list column is appended. +#' +#' @export +run <- function(x, ...) { + UseMethod("run") +} + + +#' @export +run.default <- function(x, ...) { + stop("No method found for class `", toString(class(x)), '`') +} + + +#' @rdname run +#' @export +run.BenchmarkDataFrame <- function(x, ...) { + x$params <- lapply(x$parameters, function(params) { + if (is.null(params)) { + params <- default_params(x, ...) + } + params + }) + + x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { + run_benchmark(bm = x, params = params, ...) + }) + + x +} + + +#' @export +format.BenchmarkDataFrame <- function(x, ...) { + c("# ", NextMethod()) +} \ No newline at end of file diff --git a/R/benchmark.R b/R/benchmark.R index 2dc72d2..aedf3a6 100644 --- a/R/benchmark.R +++ b/R/benchmark.R @@ -136,6 +136,19 @@ Benchmark <- function(name, #' @export BenchEnvironment <- function(...) list2env(list(...)) +#' Generate a dataframe of default parameters for a benchmark +#' +#' Generates a dataframe of parameter combinations for a benchmark to try based +#' on the parameter defaults of its `setup` function and supplied parameters. +#' +#' @param bm An object of class `Benchmark` for which to generate parameters +#' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` +#' function. May also contain `cpu_count`, `lib_path`, and `mem_alloc`. +#' +#' @return A dataframe of parameter combinations to try with a column for each +#' parameter and a row for each combination. +#' +#' @export default_params <- function(bm, ...) { # This takes the expansion of the default parameters in the function signature # perhaps restricted by the ... params diff --git a/man/BenchmarkDataFrame.Rd b/man/BenchmarkDataFrame.Rd new file mode 100644 index 0000000..4d1ab7e --- /dev/null +++ b/man/BenchmarkDataFrame.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark-dataframe.R +\name{BenchmarkDataFrame} +\alias{BenchmarkDataFrame} +\title{A classed dataframe of benchmarks for running} +\usage{ +BenchmarkDataFrame(benchmarks, parameters) +} +\arguments{ +\item{benchmarks}{A list with elements of class \code{Benchmark}} + +\item{parameters}{Optional. A list of dataframes of parameter combinations to +run as generated by \code{\link[=default_params]{default_params()}}. If null, defaults will be generated +when \code{\link[=run]{run()}} is called.} +} +\value{ +A classed dataframe with \code{name} (benchmark attribute, not object name), +\code{benchmark}, and \code{params} columns +} +\description{ +A classed dataframe of benchmarks for running +} diff --git a/man/default_params.Rd b/man/default_params.Rd new file mode 100644 index 0000000..f375b37 --- /dev/null +++ b/man/default_params.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark.R +\name{default_params} +\alias{default_params} +\title{Generate a dataframe of default parameters for a benchmark} +\usage{ +default_params(bm, ...) +} +\arguments{ +\item{bm}{An object of class \code{Benchmark} for which to generate parameters} + +\item{...}{Named arguments corresponding to the parameters of \code{bm}'s \code{setup} +function. May also contain \code{cpu_count}, \code{lib_path}, and \code{mem_alloc}.} +} +\value{ +A dataframe of parameter combinations to try with a column for each +parameter and a row for each combination. +} +\description{ +Generates a dataframe of parameter combinations for a benchmark to try based +on the parameter defaults of its \code{setup} function and supplied parameters. +} diff --git a/man/get_package_benchmarks.Rd b/man/get_package_benchmarks.Rd new file mode 100644 index 0000000..a150102 --- /dev/null +++ b/man/get_package_benchmarks.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark-dataframe.R +\name{get_package_benchmarks} +\alias{get_package_benchmarks} +\title{Get a list of benchmarks in a package} +\usage{ +get_package_benchmarks(package = "arrowbench") +} +\arguments{ +\item{package}{String of package name in which to find benchmarks} +} +\value{ +An instance of \link{BenchmarkDataFrame} with all the benchmarks contained +by a package +} +\description{ +Get a list of benchmarks in a package +} diff --git a/man/run.Rd b/man/run.Rd new file mode 100644 index 0000000..93b19a6 --- /dev/null +++ b/man/run.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark-dataframe.R +\name{run} +\alias{run} +\alias{run.BenchmarkDataFrame} +\title{Run an object} +\usage{ +run(x, ...) + +\method{run}{BenchmarkDataFrame}(x, ...) +} +\arguments{ +\item{x}{An S3 classed object to run} + +\item{...}{Additional arguments passed through to methods. For +\code{run.BenchmarkDataFrame}, passed through to \code{\link[=default_params]{default_params()}} (when +parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.} +} +\value{ +A modified object containing run results. For \code{run.BenchmarkDataFrame}, +a \code{results} list column is appended. +} +\description{ +Run an object +} From 1fbfb454ecea8b3c58fe711d5454e94880331a48 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Mon, 12 Dec 2022 18:49:46 +0000 Subject: [PATCH 2/7] BenchmarkDataFrame --- DESCRIPTION | 3 +- NAMESPACE | 4 + R/benchmark-dataframe.R | 47 ++++++-- R/benchmark.R | 26 ++-- man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd | 16 +++ man/default_params.Rd | 8 +- tests/testthat/test-benchmark-dataframe.R | 140 ++++++++++++++++++++++ 7 files changed, 218 insertions(+), 26 deletions(-) create mode 100644 man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd create mode 100644 tests/testthat/test-benchmark-dataframe.R diff --git a/DESCRIPTION b/DESCRIPTION index a88172b..1d7ca5f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,6 @@ Imports: arrow, bench, callr, - dbplyr, dplyr, duckdb (>= 0.6.0), distro, @@ -39,6 +38,8 @@ Suggests: archive, data.table, DBI, + dbplyr, + duckdb, fst, jsonify, lubridate, diff --git a/NAMESPACE b/NAMESPACE index 56ed0bd..5ea8ef9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,9 @@ S3method(as.character,Serializable) S3method(as.data.frame,BenchmarkResult) S3method(as.data.frame,BenchmarkResults) S3method(as.list,Serializable) +S3method(default_params,Benchmark) +S3method(default_params,BenchmarkDataFrame) +S3method(default_params,default) S3method(format,BenchmarkDataFrame) S3method(run,BenchmarkDataFrame) S3method(run,default) @@ -11,6 +14,7 @@ export("%||%") export(BenchEnvironment) export(Benchmark) export(BenchmarkDataFrame) +export(URSA_I9_9960X_R_BENCHMARK_NAMES) export(all_sources) export(array_altrep_materialization) export(array_to_vector) diff --git a/R/benchmark-dataframe.R b/R/benchmark-dataframe.R index 6217d28..a6414a2 100644 --- a/R/benchmark-dataframe.R +++ b/R/benchmark-dataframe.R @@ -1,3 +1,15 @@ +#' A vector of benchmark attribute names run on `ursa-i9-9960x` +#' +#' @export +URSA_I9_9960X_R_BENCHMARK_NAMES <- c( + "dataframe-to-table", # `df_to_table` + "file-read", + "file-write", + "partitioned-dataset-filter", # `dataset_taxi_parquet` + "wide-dataframe", # not actually an R benchmark + "tpch" # `tpc_h` +) + #' A classed dataframe of benchmarks for running #' #' @param benchmarks A list with elements of class `Benchmark` @@ -30,6 +42,12 @@ BenchmarkDataFrame <- function(benchmarks, parameters) { } +#' @export +format.BenchmarkDataFrame <- function(x, ...) { + c("# ", NextMethod()) +} + + #' Get a list of benchmarks in a package #' #' @param package String of package name in which to find benchmarks @@ -46,6 +64,19 @@ get_package_benchmarks <- function(package = "arrowbench") { } +#' @export +default_params.BenchmarkDataFrame <- function(x, ...) { + x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { + if (is.null(params)) { + params <- default_params(bm, ...) + } + params + }) + + x +} + + #' Run an object #' #' @param x An S3 classed object to run @@ -71,22 +102,12 @@ run.default <- function(x, ...) { #' @rdname run #' @export run.BenchmarkDataFrame <- function(x, ...) { - x$params <- lapply(x$parameters, function(params) { - if (is.null(params)) { - params <- default_params(x, ...) - } - params - }) + # if already run (so no elements of `parameters` are NULL), is no-op + x <- default_params(x, ...) x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { - run_benchmark(bm = x, params = params, ...) + run_benchmark(bm = bm, params = params, ...) }) x } - - -#' @export -format.BenchmarkDataFrame <- function(x, ...) { - c("# ", NextMethod()) -} \ No newline at end of file diff --git a/R/benchmark.R b/R/benchmark.R index aedf3a6..c32524b 100644 --- a/R/benchmark.R +++ b/R/benchmark.R @@ -141,18 +141,28 @@ BenchEnvironment <- function(...) list2env(list(...)) #' Generates a dataframe of parameter combinations for a benchmark to try based #' on the parameter defaults of its `setup` function and supplied parameters. #' -#' @param bm An object of class `Benchmark` for which to generate parameters +#' @param x An object for which to generate parameters #' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` #' function. May also contain `cpu_count`, `lib_path`, and `mem_alloc`. #' -#' @return A dataframe of parameter combinations to try with a column for each -#' parameter and a row for each combination. +#' @return For `default_params.Benchmark`, a dataframe of parameter combinations +#' to try with a column for each parameter and a row for each combination. #' #' @export -default_params <- function(bm, ...) { +default_params <- function(x, ...) { + UseMethod("default_params") +} + +#' @export +default_params.default <- function(x, ...) { + stop("No method found for class `", toString(class(x)), '`') +} + +#' @export +default_params.Benchmark <- function(x, ...) { # This takes the expansion of the default parameters in the function signature # perhaps restricted by the ... params - params <- modifyList(get_default_args(bm$setup), list(...)) + params <- modifyList(get_default_args(x$setup), list(...)) if (identical(params$lib_path, "all")) { # Default for lib_path is just "latest", if omitted # "all" means all old versions @@ -171,12 +181,12 @@ default_params <- function(bm, ...) { if (!is.null(params$mem_alloc)) { # a bit of a hack, we can test memory allocators on devel or latest, but # "4.0" <= "devel" and "4.0" <= "latest" are both true. - out[!is_arrow_package(out, "4.0", bm$packages_used), "mem_alloc"] <- NA + out[!is_arrow_package(out, "4.0", x$packages_used), "mem_alloc"] <- NA out <- unique(out) } - if (!is.null(bm$valid_params)) { - out <- bm$valid_params(out) + if (!is.null(x$valid_params)) { + out <- x$valid_params(out) } out } diff --git a/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd b/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd new file mode 100644 index 0000000..40af5ae --- /dev/null +++ b/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmark-dataframe.R +\docType{data} +\name{URSA_I9_9960X_R_BENCHMARK_NAMES} +\alias{URSA_I9_9960X_R_BENCHMARK_NAMES} +\title{A vector of benchmark attribute names run on \verb{ursa-i9-9960x}} +\format{ +An object of class \code{character} of length 6. +} +\usage{ +URSA_I9_9960X_R_BENCHMARK_NAMES +} +\description{ +A vector of benchmark attribute names run on \verb{ursa-i9-9960x} +} +\keyword{datasets} diff --git a/man/default_params.Rd b/man/default_params.Rd index f375b37..77cd1f4 100644 --- a/man/default_params.Rd +++ b/man/default_params.Rd @@ -4,17 +4,17 @@ \alias{default_params} \title{Generate a dataframe of default parameters for a benchmark} \usage{ -default_params(bm, ...) +default_params(x, ...) } \arguments{ -\item{bm}{An object of class \code{Benchmark} for which to generate parameters} +\item{x}{An object for which to generate parameters} \item{...}{Named arguments corresponding to the parameters of \code{bm}'s \code{setup} function. May also contain \code{cpu_count}, \code{lib_path}, and \code{mem_alloc}.} } \value{ -A dataframe of parameter combinations to try with a column for each -parameter and a row for each combination. +For \code{default_params.Benchmark}, a dataframe of parameter combinations +to try with a column for each parameter and a row for each combination. } \description{ Generates a dataframe of parameter combinations for a benchmark to try based diff --git a/tests/testthat/test-benchmark-dataframe.R b/tests/testthat/test-benchmark-dataframe.R new file mode 100644 index 0000000..33004f9 --- /dev/null +++ b/tests/testthat/test-benchmark-dataframe.R @@ -0,0 +1,140 @@ +assert_benchmark_dataframe <- function(bm_df, benchmarks, parameters) { + if (missing(parameters)) { + parameters <- rep(list(NULL), length(benchmarks)) + } + + expect_s3_class(bm_df, c("BenchmarkDataFrame", "tbl", "tbl_df", "data.frame")) + expect_true(all(c("name", "benchmark", "parameters") %in% names(bm_df))) + expect_equal(nrow(bm_df), length(benchmarks)) + expect_equal(bm_df$name, vapply(benchmarks, function(x) x$name, character(1))) + expect_equal(bm_df$benchmark, benchmarks) + expect_equal(bm_df$parameters, parameters) +} + + +test_that("BenchmarkDataFrame can be instantiated", { + for (bm_list in list( + list(placebo), + list(placebo, placebo), + list(a = placebo, b = placebo) + )) { + bm_df <- BenchmarkDataFrame(benchmarks = bm_list) + assert_benchmark_dataframe(bm_df, benchmarks = bm_list) + } + + bm_list <- list(placebo, placebo) + param_list <- list(default_params(placebo), NULL) + bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) + assert_benchmark_dataframe(bm_df, benchmarks = bm_list, parameters = param_list) + + expect_error( + BenchmarkDataFrame(1), + "All elements of `benchmarks` are not of class `Benchmark`!" + ) +}) + + +test_that("format.BenchmarkDataFrame() works", { + bm_df <- BenchmarkDataFrame(benchmarks = list(placebo)) + expect_output(print(bm_df), "# ") +}) + + +test_that("`get_package_benchmarks()` works", { + bm_df <- get_package_benchmarks() + assert_benchmark_dataframe(bm_df = bm_df, benchmarks = bm_df$benchmark) + expect_gt(nrow(bm_df), 0L) + # currently `any()` because `wide-dataframe` is actually a Python benchmark, + # but is still listed in arrow-benchmarks-ci in R. If removed, change to `all()`. + expect_true(any(URSA_I9_9960X_R_BENCHMARK_NAMES %in% bm_df$name)) +}) + + +test_that("default_params.BenchmarkDataFrame() can fill in params col", { + bm_list <- list(placebo, placebo) + bm_df <- BenchmarkDataFrame(bm_list) + assert_benchmark_dataframe(bm_df, bm_list) + + bm_df_augmented <- default_params(bm_df) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo)) + expect_gt(nrow(param_df), 0L) + }) + + # handle keyword args + bm_df_augmented <- default_params(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo, duration = 1)) + expect_gt(nrow(param_df), 0L) + }) + + # handle partially-specified param lists + bm_df <- BenchmarkDataFrame(bm_list, parameters = list(default_params(placebo, duration = 1), NULL)) + bm_df_augmented <- default_params(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo, duration = 1)) + expect_gt(nrow(param_df), 0L) + }) +}) + + +test_that("run() dispatches and run.default() errors", { + expect_error( + run(1), + "No method found for class `numeric`" + ) +}) + + +test_that("run.BenchmarkDataFrame() works", { + bm_list <- list(placebo, placebo) + param_list <- list( + default_params( + placebo, + error = list(NULL, "rlang::abort", "base::stop"), + cpu_count = arrow::cpu_count() + ), + NULL + ) + bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) + + bm_df_res <- run(bm_df) + + assert_benchmark_dataframe( + bm_df_res, + benchmarks = bm_list, + parameters = list(param_list[[1]], default_params(placebo)) + ) + expect_true("results" %in% names(bm_df_res)) + purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { + expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6")) + expect_equal(nrow(parameters), length(results$results)) + if ("error" %in% names(parameters)) { + # param set with some cases that will error + purrr::walk2(parameters$error, results$results, function(err, res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + if (is.null(err)) { + # passing case + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + } else { + # erroring case + expect_false(is.null(res$error)) + } + }) + } else { + # param set with no cases that will error (includes defaults) + purrr::walk(results$results, function(res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + }) + } + }) +}) \ No newline at end of file From d3cd03871357b346652e75ac75aac74fa88466f7 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Mon, 12 Dec 2022 19:25:48 +0000 Subject: [PATCH 3/7] remove unused import --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d7ca5f..18a6cdc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,6 @@ Depends: R (>= 3.5.0) Imports: arrow, bench, - callr, dplyr, duckdb (>= 0.6.0), distro, From 63c1f0672ee6d85b3d09f02beb2658cf32a2eb66 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Mon, 12 Dec 2022 19:38:55 +0000 Subject: [PATCH 4/7] is CI failure fixed by dbplyr in Suggests? --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 18a6cdc..e93f0e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,6 @@ Suggests: data.table, DBI, dbplyr, - duckdb, fst, jsonify, lubridate, From 0cdb6e7ea5443dd958176547c75fcf18dfc55a6f Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:36:54 +0000 Subject: [PATCH 5/7] rearrange files --- DESCRIPTION | 2 + R/benchmark-dataframe.R | 61 ------------- R/benchmark.R | 55 +----------- R/config.R | 11 +++ R/params.R | 68 ++++++++++++++ R/run.R | 35 ++++++++ man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd | 2 +- man/default_params.Rd | 2 +- man/run.Rd | 2 +- tests/testthat/helper.R | 15 ++++ tests/testthat/test-benchmark-dataframe.R | 104 ---------------------- tests/testthat/test-params.R | 32 +++++++ tests/testthat/test-run.R | 56 ++++++++++++ 13 files changed, 223 insertions(+), 222 deletions(-) create mode 100644 R/config.R create mode 100644 R/params.R create mode 100644 tests/testthat/test-params.R diff --git a/DESCRIPTION b/DESCRIPTION index e93f0e1..48d63e6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,6 +66,7 @@ Collate: 'bm-tpc-h.R' 'bm-write-csv.R' 'bm-write-file.R' + 'config.R' 'custom-duckdb.R' 'ensure-format.R' 'ensure-lib.R' @@ -73,6 +74,7 @@ Collate: 'ensure-source.R' 'ensure-tpch-source.R' 'measure.R' + 'params.R' 'util.R' 'result.R' 'run.R' diff --git a/R/benchmark-dataframe.R b/R/benchmark-dataframe.R index a6414a2..b39b174 100644 --- a/R/benchmark-dataframe.R +++ b/R/benchmark-dataframe.R @@ -1,15 +1,3 @@ -#' A vector of benchmark attribute names run on `ursa-i9-9960x` -#' -#' @export -URSA_I9_9960X_R_BENCHMARK_NAMES <- c( - "dataframe-to-table", # `df_to_table` - "file-read", - "file-write", - "partitioned-dataset-filter", # `dataset_taxi_parquet` - "wide-dataframe", # not actually an R benchmark - "tpch" # `tpc_h` -) - #' A classed dataframe of benchmarks for running #' #' @param benchmarks A list with elements of class `Benchmark` @@ -62,52 +50,3 @@ get_package_benchmarks <- function(package = "arrowbench") { bms <- Filter(function(x) inherits(x, "Benchmark"), objs) BenchmarkDataFrame(benchmarks = bms) } - - -#' @export -default_params.BenchmarkDataFrame <- function(x, ...) { - x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { - if (is.null(params)) { - params <- default_params(bm, ...) - } - params - }) - - x -} - - -#' Run an object -#' -#' @param x An S3 classed object to run -#' @param ... Additional arguments passed through to methods. For -#' `run.BenchmarkDataFrame`, passed through to [default_params()] (when -#' parameters are not specified) and [run_benchmark()]. -#' -#' @return A modified object containing run results. For `run.BenchmarkDataFrame`, -#' a `results` list column is appended. -#' -#' @export -run <- function(x, ...) { - UseMethod("run") -} - - -#' @export -run.default <- function(x, ...) { - stop("No method found for class `", toString(class(x)), '`') -} - - -#' @rdname run -#' @export -run.BenchmarkDataFrame <- function(x, ...) { - # if already run (so no elements of `parameters` are NULL), is no-op - x <- default_params(x, ...) - - x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { - run_benchmark(bm = bm, params = params, ...) - }) - - x -} diff --git a/R/benchmark.R b/R/benchmark.R index c32524b..5addee9 100644 --- a/R/benchmark.R +++ b/R/benchmark.R @@ -129,6 +129,7 @@ Benchmark <- function(name, ) } + #' Create a test environment to run benchmarks in #' #' @param ... named list of parameters to set in the environment @@ -136,60 +137,6 @@ Benchmark <- function(name, #' @export BenchEnvironment <- function(...) list2env(list(...)) -#' Generate a dataframe of default parameters for a benchmark -#' -#' Generates a dataframe of parameter combinations for a benchmark to try based -#' on the parameter defaults of its `setup` function and supplied parameters. -#' -#' @param x An object for which to generate parameters -#' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` -#' function. May also contain `cpu_count`, `lib_path`, and `mem_alloc`. -#' -#' @return For `default_params.Benchmark`, a dataframe of parameter combinations -#' to try with a column for each parameter and a row for each combination. -#' -#' @export -default_params <- function(x, ...) { - UseMethod("default_params") -} - -#' @export -default_params.default <- function(x, ...) { - stop("No method found for class `", toString(class(x)), '`') -} - -#' @export -default_params.Benchmark <- function(x, ...) { - # This takes the expansion of the default parameters in the function signature - # perhaps restricted by the ... params - params <- modifyList(get_default_args(x$setup), list(...)) - if (identical(params$lib_path, "all")) { - # Default for lib_path is just "latest", if omitted - # "all" means all old versions - # rev() is so we run newest first. This also means we bootstrap data fixtures - # with newest first, so that's some assurance that older versions can read - # what the newer libs write - params$lib_path <- rev(c(names(arrow_version_to_date), "devel", "latest")) - } - if (is.null(params$cpu_count)) { - params$cpu_count <- c(1L, parallel::detectCores()) - } - params$stringsAsFactors <- FALSE - out <- do.call(expand.grid, params) - - # we don't change memory allocators on non-arrow packages - if (!is.null(params$mem_alloc)) { - # a bit of a hack, we can test memory allocators on devel or latest, but - # "4.0" <= "devel" and "4.0" <= "latest" are both true. - out[!is_arrow_package(out, "4.0", x$packages_used), "mem_alloc"] <- NA - out <- unique(out) - } - - if (!is.null(x$valid_params)) { - out <- x$valid_params(out) - } - out -} #' Extract the parameter summary as a data.frame #' diff --git a/R/config.R b/R/config.R new file mode 100644 index 0000000..650010a --- /dev/null +++ b/R/config.R @@ -0,0 +1,11 @@ +#' A vector of benchmark attribute names run on `ursa-i9-9960x` +#' +#' @export +URSA_I9_9960X_R_BENCHMARK_NAMES <- c( + "dataframe-to-table", # `df_to_table` + "file-read", + "file-write", + "partitioned-dataset-filter", # `dataset_taxi_parquet` + "wide-dataframe", # not actually an R benchmark + "tpch" # `tpc_h` +) diff --git a/R/params.R b/R/params.R new file mode 100644 index 0000000..c72295e --- /dev/null +++ b/R/params.R @@ -0,0 +1,68 @@ + + +#' Generate a dataframe of default parameters for a benchmark +#' +#' Generates a dataframe of parameter combinations for a benchmark to try based +#' on the parameter defaults of its `setup` function and supplied parameters. +#' +#' @param x An object for which to generate parameters +#' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` +#' function. May also contain `cpu_count`, `lib_path`, and `mem_alloc`. +#' +#' @return For `default_params.Benchmark`, a dataframe of parameter combinations +#' to try with a column for each parameter and a row for each combination. +#' +#' @export +default_params <- function(x, ...) { + UseMethod("default_params") +} + +#' @export +default_params.default <- function(x, ...) { + stop("No method found for class `", toString(class(x)), '`') +} + +#' @export +default_params.Benchmark <- function(x, ...) { + # This takes the expansion of the default parameters in the function signature + # perhaps restricted by the ... params + params <- modifyList(get_default_args(x$setup), list(...)) + if (identical(params$lib_path, "all")) { + # Default for lib_path is just "latest", if omitted + # "all" means all old versions + # rev() is so we run newest first. This also means we bootstrap data fixtures + # with newest first, so that's some assurance that older versions can read + # what the newer libs write + params$lib_path <- rev(c(names(arrow_version_to_date), "devel", "latest")) + } + if (is.null(params$cpu_count)) { + params$cpu_count <- c(1L, parallel::detectCores()) + } + params$stringsAsFactors <- FALSE + out <- do.call(expand.grid, params) + + # we don't change memory allocators on non-arrow packages + if (!is.null(params$mem_alloc)) { + # a bit of a hack, we can test memory allocators on devel or latest, but + # "4.0" <= "devel" and "4.0" <= "latest" are both true. + out[!is_arrow_package(out, "4.0", x$packages_used), "mem_alloc"] <- NA + out <- unique(out) + } + + if (!is.null(x$valid_params)) { + out <- x$valid_params(out) + } + out +} + +#' @export +default_params.BenchmarkDataFrame <- function(x, ...) { + x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { + if (is.null(params)) { + params <- default_params(bm, ...) + } + params + }) + + x +} diff --git a/R/run.R b/R/run.R index f7d0ee0..9f0bc8d 100644 --- a/R/run.R +++ b/R/run.R @@ -1,3 +1,38 @@ +#' Run an object +#' +#' @param x An S3 classed object to run +#' @param ... Additional arguments passed through to methods. For +#' `run.BenchmarkDataFrame`, passed through to [default_params()] (when +#' parameters are not specified) and [run_benchmark()]. +#' +#' @return A modified object containing run results. For `run.BenchmarkDataFrame`, +#' a `results` list column is appended. +#' +#' @export +run <- function(x, ...) { + UseMethod("run") +} + + +#' @export +run.default <- function(x, ...) { + stop("No method found for class `", toString(class(x)), '`') +} + + +#' @rdname run +#' @export +run.BenchmarkDataFrame <- function(x, ...) { + # if already run (so no elements of `parameters` are NULL), is no-op + x <- default_params(x, ...) + + x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { + run_benchmark(bm = bm, params = params, ...) + }) + + x +} + #' Run a Benchmark across a range of parameters #' #' @param bm [Benchmark()] object diff --git a/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd b/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd index 40af5ae..232925a 100644 --- a/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd +++ b/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/benchmark-dataframe.R +% Please edit documentation in R/config.R \docType{data} \name{URSA_I9_9960X_R_BENCHMARK_NAMES} \alias{URSA_I9_9960X_R_BENCHMARK_NAMES} diff --git a/man/default_params.Rd b/man/default_params.Rd index 77cd1f4..114d128 100644 --- a/man/default_params.Rd +++ b/man/default_params.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/benchmark.R +% Please edit documentation in R/params.R \name{default_params} \alias{default_params} \title{Generate a dataframe of default parameters for a benchmark} diff --git a/man/run.Rd b/man/run.Rd index 93b19a6..984ccc7 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/benchmark-dataframe.R +% Please edit documentation in R/run.R \name{run} \alias{run} \alias{run.BenchmarkDataFrame} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 633f26f..4fe373c 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -46,3 +46,18 @@ suppress_deparse_warning <- function(...) { invokeRestart("muffleWarning") }) } + + +assert_benchmark_dataframe <- function(bm_df, benchmarks, parameters) { + if (missing(parameters)) { + parameters <- rep(list(NULL), length(benchmarks)) + } + + expect_s3_class(bm_df, c("BenchmarkDataFrame", "tbl", "tbl_df", "data.frame")) + expect_true(all(c("name", "benchmark", "parameters") %in% names(bm_df))) + expect_equal(nrow(bm_df), length(benchmarks)) + expect_equal(bm_df$name, vapply(benchmarks, function(x) x$name, character(1))) + expect_equal(bm_df$benchmark, benchmarks) + expect_equal(bm_df$parameters, parameters) +} + diff --git a/tests/testthat/test-benchmark-dataframe.R b/tests/testthat/test-benchmark-dataframe.R index 33004f9..d6367e5 100644 --- a/tests/testthat/test-benchmark-dataframe.R +++ b/tests/testthat/test-benchmark-dataframe.R @@ -1,17 +1,3 @@ -assert_benchmark_dataframe <- function(bm_df, benchmarks, parameters) { - if (missing(parameters)) { - parameters <- rep(list(NULL), length(benchmarks)) - } - - expect_s3_class(bm_df, c("BenchmarkDataFrame", "tbl", "tbl_df", "data.frame")) - expect_true(all(c("name", "benchmark", "parameters") %in% names(bm_df))) - expect_equal(nrow(bm_df), length(benchmarks)) - expect_equal(bm_df$name, vapply(benchmarks, function(x) x$name, character(1))) - expect_equal(bm_df$benchmark, benchmarks) - expect_equal(bm_df$parameters, parameters) -} - - test_that("BenchmarkDataFrame can be instantiated", { for (bm_list in list( list(placebo), @@ -48,93 +34,3 @@ test_that("`get_package_benchmarks()` works", { # but is still listed in arrow-benchmarks-ci in R. If removed, change to `all()`. expect_true(any(URSA_I9_9960X_R_BENCHMARK_NAMES %in% bm_df$name)) }) - - -test_that("default_params.BenchmarkDataFrame() can fill in params col", { - bm_list <- list(placebo, placebo) - bm_df <- BenchmarkDataFrame(bm_list) - assert_benchmark_dataframe(bm_df, bm_list) - - bm_df_augmented <- default_params(bm_df) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params)) - lapply(bm_df_augmented$parameters, function(param_df) { - expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo)) - expect_gt(nrow(param_df), 0L) - }) - - # handle keyword args - bm_df_augmented <- default_params(bm_df, duration = 1) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) - lapply(bm_df_augmented$parameters, function(param_df) { - expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo, duration = 1)) - expect_gt(nrow(param_df), 0L) - }) - - # handle partially-specified param lists - bm_df <- BenchmarkDataFrame(bm_list, parameters = list(default_params(placebo, duration = 1), NULL)) - bm_df_augmented <- default_params(bm_df, duration = 1) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) - lapply(bm_df_augmented$parameters, function(param_df) { - expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo, duration = 1)) - expect_gt(nrow(param_df), 0L) - }) -}) - - -test_that("run() dispatches and run.default() errors", { - expect_error( - run(1), - "No method found for class `numeric`" - ) -}) - - -test_that("run.BenchmarkDataFrame() works", { - bm_list <- list(placebo, placebo) - param_list <- list( - default_params( - placebo, - error = list(NULL, "rlang::abort", "base::stop"), - cpu_count = arrow::cpu_count() - ), - NULL - ) - bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) - - bm_df_res <- run(bm_df) - - assert_benchmark_dataframe( - bm_df_res, - benchmarks = bm_list, - parameters = list(param_list[[1]], default_params(placebo)) - ) - expect_true("results" %in% names(bm_df_res)) - purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { - expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6")) - expect_equal(nrow(parameters), length(results$results)) - if ("error" %in% names(parameters)) { - # param set with some cases that will error - purrr::walk2(parameters$error, results$results, function(err, res) { - expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) - if (is.null(err)) { - # passing case - expect_null(res$error) - expect_gt(res$stats$data[[1]], 0) - } else { - # erroring case - expect_false(is.null(res$error)) - } - }) - } else { - # param set with no cases that will error (includes defaults) - purrr::walk(results$results, function(res) { - expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) - expect_null(res$error) - expect_gt(res$stats$data[[1]], 0) - }) - } - }) -}) \ No newline at end of file diff --git a/tests/testthat/test-params.R b/tests/testthat/test-params.R new file mode 100644 index 0000000..1137156 --- /dev/null +++ b/tests/testthat/test-params.R @@ -0,0 +1,32 @@ +test_that("default_params.BenchmarkDataFrame() can fill in params col", { + bm_list <- list(placebo, placebo) + bm_df <- BenchmarkDataFrame(bm_list) + assert_benchmark_dataframe(bm_df, bm_list) + + bm_df_augmented <- default_params(bm_df) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo)) + expect_gt(nrow(param_df), 0L) + }) + + # handle keyword args + bm_df_augmented <- default_params(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo, duration = 1)) + expect_gt(nrow(param_df), 0L) + }) + + # handle partially-specified param lists + bm_df <- BenchmarkDataFrame(bm_list, parameters = list(default_params(placebo, duration = 1), NULL)) + bm_df_augmented <- default_params(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + lapply(bm_df_augmented$parameters, function(param_df) { + expect_s3_class(param_df, "data.frame") + expect_equal(param_df, default_params(placebo, duration = 1)) + expect_gt(nrow(param_df), 0L) + }) +}) diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R index 9c0eee3..3c62c78 100644 --- a/tests/testthat/test-run.R +++ b/tests/testthat/test-run.R @@ -234,4 +234,60 @@ test_that("an rscript is added to the results object", { expect_true("rscript" %in% names(res$optional_benchmark_info)) }) + +test_that("run() dispatches and run.default() errors", { + expect_error( + run(1), + "No method found for class `numeric`" + ) +}) + +test_that("run.BenchmarkDataFrame() works", { + bm_list <- list(placebo, placebo) + param_list <- list( + default_params( + placebo, + error = list(NULL, "rlang::abort", "base::stop"), + cpu_count = arrow::cpu_count() + ), + NULL + ) + bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) + + bm_df_res <- run(bm_df) + + assert_benchmark_dataframe( + bm_df_res, + benchmarks = bm_list, + parameters = list(param_list[[1]], default_params(placebo)) + ) + expect_true("results" %in% names(bm_df_res)) + purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { + expect_s3_class(results, c("BenchmarkResults", "Serializable", "R6")) + expect_equal(nrow(parameters), length(results$results)) + if ("error" %in% names(parameters)) { + # param set with some cases that will error + purrr::walk2(parameters$error, results$results, function(err, res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + if (is.null(err)) { + # passing case + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + } else { + # erroring case + expect_false(is.null(res$error)) + } + }) + } else { + # param set with no cases that will error (includes defaults) + purrr::walk(results$results, function(res) { + expect_s3_class(res, c("BenchmarkResult", "Serializable", "R6")) + expect_null(res$error) + expect_gt(res$stats$data[[1]], 0) + }) + } + }) +}) + + wipe_results() From c219ffc424ecc17fa60a7ebee2a8f82b4c3b44e3 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:57:26 +0000 Subject: [PATCH 6/7] default_params() -> get_default_parameters() --- NAMESPACE | 8 +++---- R/benchmark-dataframe.R | 2 +- R/params.R | 16 ++++++-------- R/run.R | 6 ++--- man/BenchmarkDataFrame.Rd | 2 +- ...lt_params.Rd => get_default_parameters.Rd} | 8 +++---- man/run.Rd | 2 +- man/run_benchmark.Rd | 2 +- tests/testthat/test-benchmark-dataframe.R | 2 +- tests/testthat/test-bm-read-file.R | 4 ++-- tests/testthat/test-bm-row-group-size.R | 2 +- tests/testthat/test-params.R | 22 +++++++++---------- tests/testthat/test-run.R | 4 ++-- 13 files changed, 39 insertions(+), 41 deletions(-) rename man/{default_params.Rd => get_default_parameters.Rd} (79%) diff --git a/NAMESPACE b/NAMESPACE index 5ea8ef9..c6707de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,10 @@ S3method(as.character,Serializable) S3method(as.data.frame,BenchmarkResult) S3method(as.data.frame,BenchmarkResults) S3method(as.list,Serializable) -S3method(default_params,Benchmark) -S3method(default_params,BenchmarkDataFrame) -S3method(default_params,default) S3method(format,BenchmarkDataFrame) +S3method(get_default_parameters,Benchmark) +S3method(get_default_parameters,BenchmarkDataFrame) +S3method(get_default_parameters,default) S3method(run,BenchmarkDataFrame) S3method(run,default) export("%||%") @@ -21,7 +21,6 @@ export(array_to_vector) export(confirm_mem_alloc) export(dataset_taxi_2013) export(dataset_taxi_parquet) -export(default_params) export(df_to_table) export(ensure_dataset) export(ensure_format) @@ -31,6 +30,7 @@ export(generate_tpch) export(get_csv_reader) export(get_csv_writer) export(get_dataset_attr) +export(get_default_parameters) export(get_input_func) export(get_json_reader) export(get_package_benchmarks) diff --git a/R/benchmark-dataframe.R b/R/benchmark-dataframe.R index b39b174..437dafb 100644 --- a/R/benchmark-dataframe.R +++ b/R/benchmark-dataframe.R @@ -2,7 +2,7 @@ #' #' @param benchmarks A list with elements of class `Benchmark` #' @param parameters Optional. A list of dataframes of parameter combinations to -#' run as generated by [default_params()]. If null, defaults will be generated +#' run as generated by [get_default_parameters()]. If null, defaults will be generated #' when [run()] is called. #' #' @return A classed dataframe with `name` (benchmark attribute, not object name), diff --git a/R/params.R b/R/params.R index c72295e..9ddbfc5 100644 --- a/R/params.R +++ b/R/params.R @@ -1,5 +1,3 @@ - - #' Generate a dataframe of default parameters for a benchmark #' #' Generates a dataframe of parameter combinations for a benchmark to try based @@ -9,21 +7,21 @@ #' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` #' function. May also contain `cpu_count`, `lib_path`, and `mem_alloc`. #' -#' @return For `default_params.Benchmark`, a dataframe of parameter combinations +#' @return For `get_default_parameters.Benchmark`, a dataframe of parameter combinations #' to try with a column for each parameter and a row for each combination. #' #' @export -default_params <- function(x, ...) { - UseMethod("default_params") +get_default_parameters <- function(x, ...) { + UseMethod("get_default_parameters") } #' @export -default_params.default <- function(x, ...) { +get_default_parameters.default <- function(x, ...) { stop("No method found for class `", toString(class(x)), '`') } #' @export -default_params.Benchmark <- function(x, ...) { +get_default_parameters.Benchmark <- function(x, ...) { # This takes the expansion of the default parameters in the function signature # perhaps restricted by the ... params params <- modifyList(get_default_args(x$setup), list(...)) @@ -56,10 +54,10 @@ default_params.Benchmark <- function(x, ...) { } #' @export -default_params.BenchmarkDataFrame <- function(x, ...) { +get_default_parameters.BenchmarkDataFrame <- function(x, ...) { x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { if (is.null(params)) { - params <- default_params(bm, ...) + params <- get_default_parameters(bm, ...) } params }) diff --git a/R/run.R b/R/run.R index 9f0bc8d..ed2ed33 100644 --- a/R/run.R +++ b/R/run.R @@ -2,7 +2,7 @@ #' #' @param x An S3 classed object to run #' @param ... Additional arguments passed through to methods. For -#' `run.BenchmarkDataFrame`, passed through to [default_params()] (when +#' `run.BenchmarkDataFrame`, passed through to [get_default_parameters()] (when #' parameters are not specified) and [run_benchmark()]. #' #' @return A modified object containing run results. For `run.BenchmarkDataFrame`, @@ -24,7 +24,7 @@ run.default <- function(x, ...) { #' @export run.BenchmarkDataFrame <- function(x, ...) { # if already run (so no elements of `parameters` are NULL), is no-op - x <- default_params(x, ...) + x <- get_default_parameters(x, ...) x$results <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { run_benchmark(bm = bm, params = params, ...) @@ -58,7 +58,7 @@ run.BenchmarkDataFrame <- function(x, ...) { #' @importFrom progress progress_bar run_benchmark <- function(bm, ..., - params = default_params(bm, ...), + params = get_default_parameters(bm, ...), n_iter = 1, dry_run = FALSE, profiling = FALSE, diff --git a/man/BenchmarkDataFrame.Rd b/man/BenchmarkDataFrame.Rd index 4d1ab7e..42ae339 100644 --- a/man/BenchmarkDataFrame.Rd +++ b/man/BenchmarkDataFrame.Rd @@ -10,7 +10,7 @@ BenchmarkDataFrame(benchmarks, parameters) \item{benchmarks}{A list with elements of class \code{Benchmark}} \item{parameters}{Optional. A list of dataframes of parameter combinations to -run as generated by \code{\link[=default_params]{default_params()}}. If null, defaults will be generated +run as generated by \code{\link[=get_default_parameters]{get_default_parameters()}}. If null, defaults will be generated when \code{\link[=run]{run()}} is called.} } \value{ diff --git a/man/default_params.Rd b/man/get_default_parameters.Rd similarity index 79% rename from man/default_params.Rd rename to man/get_default_parameters.Rd index 114d128..e8fedfe 100644 --- a/man/default_params.Rd +++ b/man/get_default_parameters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/params.R -\name{default_params} -\alias{default_params} +\name{get_default_parameters} +\alias{get_default_parameters} \title{Generate a dataframe of default parameters for a benchmark} \usage{ -default_params(x, ...) +get_default_parameters(x, ...) } \arguments{ \item{x}{An object for which to generate parameters} @@ -13,7 +13,7 @@ default_params(x, ...) function. May also contain \code{cpu_count}, \code{lib_path}, and \code{mem_alloc}.} } \value{ -For \code{default_params.Benchmark}, a dataframe of parameter combinations +For \code{get_default_parameters.Benchmark}, a dataframe of parameter combinations to try with a column for each parameter and a row for each combination. } \description{ diff --git a/man/run.Rd b/man/run.Rd index 984ccc7..5f64ee3 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -13,7 +13,7 @@ run(x, ...) \item{x}{An S3 classed object to run} \item{...}{Additional arguments passed through to methods. For -\code{run.BenchmarkDataFrame}, passed through to \code{\link[=default_params]{default_params()}} (when +\code{run.BenchmarkDataFrame}, passed through to \code{\link[=get_default_parameters]{get_default_parameters()}} (when parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.} } \value{ diff --git a/man/run_benchmark.Rd b/man/run_benchmark.Rd index 6dd758b..3e603c4 100644 --- a/man/run_benchmark.Rd +++ b/man/run_benchmark.Rd @@ -7,7 +7,7 @@ run_benchmark( bm, ..., - params = default_params(bm, ...), + params = get_default_parameters(bm, ...), n_iter = 1, dry_run = FALSE, profiling = FALSE, diff --git a/tests/testthat/test-benchmark-dataframe.R b/tests/testthat/test-benchmark-dataframe.R index d6367e5..5f64795 100644 --- a/tests/testthat/test-benchmark-dataframe.R +++ b/tests/testthat/test-benchmark-dataframe.R @@ -9,7 +9,7 @@ test_that("BenchmarkDataFrame can be instantiated", { } bm_list <- list(placebo, placebo) - param_list <- list(default_params(placebo), NULL) + param_list <- list(get_default_parameters(placebo), NULL) bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) assert_benchmark_dataframe(bm_df, benchmarks = bm_list, parameters = param_list) diff --git a/tests/testthat/test-bm-read-file.R b/tests/testthat/test-bm-read-file.R index 624d3f3..59703d7 100644 --- a/tests/testthat/test-bm-read-file.R +++ b/tests/testthat/test-bm-read-file.R @@ -3,9 +3,9 @@ test_that("read_file validation", { read_file_no_validate <- read_file read_file_no_validate$valid_params <- NULL - params_no_validate <- default_params(read_file_no_validate) + params_no_validate <- get_default_parameters(read_file_no_validate) - params <- default_params(read_file) + params <- get_default_parameters(read_file) expect_lt(nrow(params), nrow(params_no_validate)) diff --git a/tests/testthat/test-bm-row-group-size.R b/tests/testthat/test-bm-row-group-size.R index 7bfb7a2..de3f18a 100644 --- a/tests/testthat/test-bm-row-group-size.R +++ b/tests/testthat/test-bm-row-group-size.R @@ -1,6 +1,6 @@ test_that("row_group_size benchmark runs", { - params <- default_params(row_group_size, chunk_size = list(NULL, 10000L, 100000L, 1000000L)) + params <- get_default_parameters(row_group_size, chunk_size = list(NULL, 10000L, 100000L, 1000000L)) expect_benchmark_run( run_benchmark( diff --git a/tests/testthat/test-params.R b/tests/testthat/test-params.R index 1137156..9e13eba 100644 --- a/tests/testthat/test-params.R +++ b/tests/testthat/test-params.R @@ -1,32 +1,32 @@ -test_that("default_params.BenchmarkDataFrame() can fill in params col", { +test_that("get_default_parameters.BenchmarkDataFrame() can fill in params col", { bm_list <- list(placebo, placebo) bm_df <- BenchmarkDataFrame(bm_list) assert_benchmark_dataframe(bm_df, bm_list) - bm_df_augmented <- default_params(bm_df) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params)) + bm_df_augmented <- get_default_parameters(bm_df) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters)) lapply(bm_df_augmented$parameters, function(param_df) { expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo)) + expect_equal(param_df, get_default_parameters(placebo)) expect_gt(nrow(param_df), 0L) }) # handle keyword args - bm_df_augmented <- default_params(bm_df, duration = 1) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + bm_df_augmented <- get_default_parameters(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1)) lapply(bm_df_augmented$parameters, function(param_df) { expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo, duration = 1)) + expect_equal(param_df, get_default_parameters(placebo, duration = 1)) expect_gt(nrow(param_df), 0L) }) # handle partially-specified param lists - bm_df <- BenchmarkDataFrame(bm_list, parameters = list(default_params(placebo, duration = 1), NULL)) - bm_df_augmented <- default_params(bm_df, duration = 1) - assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, default_params, duration = 1)) + bm_df <- BenchmarkDataFrame(bm_list, parameters = list(get_default_parameters(placebo, duration = 1), NULL)) + bm_df_augmented <- get_default_parameters(bm_df, duration = 1) + assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1)) lapply(bm_df_augmented$parameters, function(param_df) { expect_s3_class(param_df, "data.frame") - expect_equal(param_df, default_params(placebo, duration = 1)) + expect_equal(param_df, get_default_parameters(placebo, duration = 1)) expect_gt(nrow(param_df), 0L) }) }) diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R index 3c62c78..e8abcd4 100644 --- a/tests/testthat/test-run.R +++ b/tests/testthat/test-run.R @@ -245,7 +245,7 @@ test_that("run() dispatches and run.default() errors", { test_that("run.BenchmarkDataFrame() works", { bm_list <- list(placebo, placebo) param_list <- list( - default_params( + get_default_parameters( placebo, error = list(NULL, "rlang::abort", "base::stop"), cpu_count = arrow::cpu_count() @@ -259,7 +259,7 @@ test_that("run.BenchmarkDataFrame() works", { assert_benchmark_dataframe( bm_df_res, benchmarks = bm_list, - parameters = list(param_list[[1]], default_params(placebo)) + parameters = list(param_list[[1]], get_default_parameters(placebo)) ) expect_true("results" %in% names(bm_df_res)) purrr::walk2(bm_df_res$parameters, bm_df_res$results, function(parameters, results) { From fe7ba3cdb0a4be4f248431a4a4c4ee0ee6f58f70 Mon Sep 17 00:00:00 2001 From: Edward Visel <1693477+alistaire47@users.noreply.github.com> Date: Wed, 4 Jan 2023 23:54:45 +0000 Subject: [PATCH 7/7] rm vector of benchmark names; fill in README --- DESCRIPTION | 1 - NAMESPACE | 1 - R/config.R | 11 ------- R/params.R | 3 ++ README.md | 38 +++++++++++++++++++++++ man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd | 16 ---------- man/get_default_parameters.Rd | 9 ++++++ tests/testthat/test-benchmark-dataframe.R | 10 ++++++ 8 files changed, 60 insertions(+), 29 deletions(-) delete mode 100644 R/config.R delete mode 100644 man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 48d63e6..57f3816 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,7 +66,6 @@ Collate: 'bm-tpc-h.R' 'bm-write-csv.R' 'bm-write-file.R' - 'config.R' 'custom-duckdb.R' 'ensure-format.R' 'ensure-lib.R' diff --git a/NAMESPACE b/NAMESPACE index c6707de..0eb5832 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export("%||%") export(BenchEnvironment) export(Benchmark) export(BenchmarkDataFrame) -export(URSA_I9_9960X_R_BENCHMARK_NAMES) export(all_sources) export(array_altrep_materialization) export(array_to_vector) diff --git a/R/config.R b/R/config.R deleted file mode 100644 index 650010a..0000000 --- a/R/config.R +++ /dev/null @@ -1,11 +0,0 @@ -#' A vector of benchmark attribute names run on `ursa-i9-9960x` -#' -#' @export -URSA_I9_9960X_R_BENCHMARK_NAMES <- c( - "dataframe-to-table", # `df_to_table` - "file-read", - "file-write", - "partitioned-dataset-filter", # `dataset_taxi_parquet` - "wide-dataframe", # not actually an R benchmark - "tpch" # `tpc_h` -) diff --git a/R/params.R b/R/params.R index 9ddbfc5..9ee7d40 100644 --- a/R/params.R +++ b/R/params.R @@ -15,11 +15,13 @@ get_default_parameters <- function(x, ...) { UseMethod("get_default_parameters") } +#' @rdname get_default_parameters #' @export get_default_parameters.default <- function(x, ...) { stop("No method found for class `", toString(class(x)), '`') } +#' @rdname get_default_parameters #' @export get_default_parameters.Benchmark <- function(x, ...) { # This takes the expansion of the default parameters in the function signature @@ -53,6 +55,7 @@ get_default_parameters.Benchmark <- function(x, ...) { out } +#' @rdname get_default_parameters #' @export get_default_parameters.BenchmarkDataFrame <- function(x, ...) { x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { diff --git a/README.md b/README.md index cf8d64d..4321dd8 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,44 @@ run_benchmark( ) ``` +### Running a set of benchmarks together + +The `get_package_benchmarks()` function gets all the benchmarks defined in a +package—by default this one—and returns them in a classed dataframe with columns +for the benchmark name attribute, the benchmark itself (in a list column), and +a list column of dataframes of parameters with which to run the benchmark (NULL +by default, meaning use `get_default_parameters(benchmark)` for each: + +``` r +> get_package_benchmarks() +# +# A tibble: 14 × 3 + name benchmark parameters + * + 1 array_to_vector + 2 remote_dataset + 3 row_group_size + 4 file-read + 5 dataframe-to-table + 6 write_csv + 7 array_altrep_materialization + 8 partitioned-dataset-filter + 9 read_csv +10 read_json +11 tpch +12 dataset_taxi_2013 +13 file-write +14 table_to_df +``` + +If certain benchmarks are to be run on certain machines, the dataframe can be +subset with normal dataframe operations. If parameters other than defaults +should be used, the `parameters` column can be filled in manually. When ready, +the dataframe can be passed to `run()`, which will run each benchmark on each of +its sets of parameters and append a `results` column to the returned dataset +that contains result objects that can be transformed to JSON appropriate for +sending to a Conbench server. + ### Enabling benchmarks to be run on conbench [Conbench](https://conbench.ursa.dev/) is a service that runs benchmarks continuously on a repo. We have a conbench diff --git a/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd b/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd deleted file mode 100644 index 232925a..0000000 --- a/man/URSA_I9_9960X_R_BENCHMARK_NAMES.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/config.R -\docType{data} -\name{URSA_I9_9960X_R_BENCHMARK_NAMES} -\alias{URSA_I9_9960X_R_BENCHMARK_NAMES} -\title{A vector of benchmark attribute names run on \verb{ursa-i9-9960x}} -\format{ -An object of class \code{character} of length 6. -} -\usage{ -URSA_I9_9960X_R_BENCHMARK_NAMES -} -\description{ -A vector of benchmark attribute names run on \verb{ursa-i9-9960x} -} -\keyword{datasets} diff --git a/man/get_default_parameters.Rd b/man/get_default_parameters.Rd index e8fedfe..d8c378f 100644 --- a/man/get_default_parameters.Rd +++ b/man/get_default_parameters.Rd @@ -2,9 +2,18 @@ % Please edit documentation in R/params.R \name{get_default_parameters} \alias{get_default_parameters} +\alias{get_default_parameters.default} +\alias{get_default_parameters.Benchmark} +\alias{get_default_parameters.BenchmarkDataFrame} \title{Generate a dataframe of default parameters for a benchmark} \usage{ get_default_parameters(x, ...) + +\method{get_default_parameters}{default}(x, ...) + +\method{get_default_parameters}{Benchmark}(x, ...) + +\method{get_default_parameters}{BenchmarkDataFrame}(x, ...) } \arguments{ \item{x}{An object for which to generate parameters} diff --git a/tests/testthat/test-benchmark-dataframe.R b/tests/testthat/test-benchmark-dataframe.R index 5f64795..494e7fb 100644 --- a/tests/testthat/test-benchmark-dataframe.R +++ b/tests/testthat/test-benchmark-dataframe.R @@ -26,6 +26,16 @@ test_that("format.BenchmarkDataFrame() works", { }) +# A vector of benchmark attribute names run on `ursa-i9-9960x` +URSA_I9_9960X_R_BENCHMARK_NAMES <- c( + "dataframe-to-table", # `df_to_table` + "file-read", + "file-write", + "partitioned-dataset-filter", # `dataset_taxi_parquet` + "wide-dataframe", # not actually an R benchmark + "tpch" # `tpc_h` +) + test_that("`get_package_benchmarks()` works", { bm_df <- get_package_benchmarks() assert_benchmark_dataframe(bm_df = bm_df, benchmarks = bm_df$benchmark)