Skip to content

Commit

Permalink
Add --warmup-run option (unisa-hpc#14)
Browse files Browse the repository at this point in the history
Add option to perform a first "warmup" run that will not be taken into
consideration to omit possible JIT overhead in the evaluation.

---------

Signed-off-by: Victor Perez <[email protected]>
  • Loading branch information
victor-eds authored Nov 17, 2023
1 parent ac3c42e commit fe9f58f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
5 changes: 5 additions & 0 deletions bin/run-suite
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def create_log_range(begin, end):
--no-verification - disable verification entirely
--no-ndrange-kernels - do not run kernels based on ndrange parallel for
--hierarchical-kernels - run kernels using hierarchical parallelism
--warmup-run - run benchmarks once before evaluation to discard possible "warmup" times, e.g., JIT compilation
'''
output_file = "./sycl-bench.csv"
discard = io.StringIO()
Expand All @@ -138,6 +139,7 @@ parser.add_argument("-t", "--timeout", metavar="SEC",
default=-1,
help="Time out (second)", type=int)
parser.add_argument("--mlir-only", action='store_true', help="Run MLIR tests only")
parser.add_argument("--warmup-run", action='store_true', help="Perform warmup run")

parse_args = parser.parse_args()
if parse_args.timeout <= 0:
Expand Down Expand Up @@ -357,6 +359,9 @@ if __name__ == '__main__':
args.append('--size='+str(size))
args.append('--local='+str(localsize))

if parse_args.warmup_run:
args.append('--warmup-run')

retcode, elapsed_time = invoke_benchmark(benchmark_executable, args)
if retcode == 0:
max_runtime = max(max_runtime, elapsed_time)
Expand Down
9 changes: 8 additions & 1 deletion include/command_line.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct BenchmarkArgs
// can be used to query additional benchmark specific information from the command line
CommandLine cli;
std::shared_ptr<ResultConsumer> result_consumer;
bool warmup_run;
};

class CUDASelector : public cl::sycl::device_selector {
Expand Down Expand Up @@ -193,6 +194,11 @@ class BenchmarkCommandLine
std::size_t num_runs = cli_parser.getOrDefault<std::size_t>("--num-runs", 5);

std::string device_type = cli_parser.getOrDefault<std::string>("--device", "default");
bool warmup_run = cli_parser.isFlagSet("--warmup-run");
if (warmup_run) {
// Make drop of first run transparent to the user
++num_runs;
}
cl::sycl::queue q = getQueue(device_type);

bool verification_enabled = true;
Expand All @@ -216,7 +222,8 @@ class BenchmarkCommandLine
verification_begin,
verification_range},
cli_parser,
result_consumer};
result_consumer,
warmup_run};
}

private:
Expand Down
6 changes: 5 additions & 1 deletion include/time_metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ class TimeMetricsProcessor {
for(const auto& name : allTimings) {
if(unavailableTimings.count(name) == 0) {
std::vector<double> resultsSeconds;
std::transform(timingResults.at(name).begin(), timingResults.at(name).end(), std::back_inserter(resultsSeconds),
auto times_begin = timingResults.at(name).begin();
if (args.warmup_run) {
++times_begin;
}
std::transform(times_begin, timingResults.at(name).end(), std::back_inserter(resultsSeconds),
[](auto r) { return r.count() / 1.0e9; });
std::sort(resultsSeconds.begin(), resultsSeconds.end());

Expand Down

0 comments on commit fe9f58f

Please sign in to comment.