-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add perfetto trace analysis benchmark (#969)
Summary: Add a benchmark for trace analysis tasks on backends like Perfetto. Step 1: install the benchmark (this will download and decompress sample trace from Amazon S3) ``` $ python -m benchmarks.perfetto.install Checking out https://ossci-datasets.s3.amazonaws.com/torchbench/traces/torchbench_traces.tar.gz to /Users/xzhao9/git/kineto/benchmarks/trace_analysis/.data/torchbench_traces.tar.gz decompressing input tarball: /Users/xzhao9/git/kineto/benchmarks/trace_analysis/.data/torchbench_traces.tar.gz...OK Requirement already satisfied: perfetto in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from -r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 1)) (0.7.0) Requirement already satisfied: tabulate in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from -r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 2)) (0.9.0) Requirement already satisfied: protobuf in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from perfetto->-r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 1)) (4.25.3) ``` Step 2: run the benchmark ``` $ python -m benchmarks.perfetto.run input-task perfetto-latency ---------------------------------------------- ------------------ torchbench_resnet50_3080ti-load 8.53069 torchbench_resnet50_3080ti-search_gemm_kernels 0.067583 torchbench_resnet50_3080ti-select_kernels 0.000549563 torchbench_resnet50_3080ti-group_kernels 0.0145147 ``` Right now, only latency metric is available. We could add other metrics like memory footprint later. Pull Request resolved: #969 Differential Revision: D60466932 Pulled By: xuzhao9
- Loading branch information
1 parent
188c5f5
commit 18efd40
Showing
12 changed files
with
440 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# ignore common items | ||
.idea | ||
.vscode | ||
.data | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import os | ||
|
||
BENCHMARK_ROOT = os.path.dirname(__file__) | ||
BENCHMARK_DATA_DIR = os.path.join(BENCHMARK_ROOT, ".data") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from .clp import CLPTraceAnalysis | ||
from .common import DEFAULT_METRICS # noqa: F401 | ||
from .perfetto import PerfettoTraceAnalysis | ||
|
||
AVAILABLE_BACKENDS = { | ||
"perfetto": PerfettoTraceAnalysis, | ||
"clp": CLPTraceAnalysis, | ||
} | ||
|
||
AVAILABLE_TASKS = [ | ||
"load", | ||
"search_gemm_kernels", | ||
"select_kernels", | ||
"group_kernels", | ||
] | ||
|
||
for name in AVAILABLE_BACKENDS: | ||
analysis = AVAILABLE_BACKENDS[name] | ||
analysis.name = name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
|
||
from .common import TraceAnalysis | ||
|
||
|
||
class CLPTraceAnalysis(TraceAnalysis): | ||
|
||
def __init__(self, args: argparse.Namespace): | ||
super().__init__(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
import os | ||
|
||
import time | ||
from dataclasses import dataclass, field, fields | ||
|
||
from typing import Callable, Dict | ||
|
||
import numpy | ||
|
||
from .. import BENCHMARK_DATA_DIR | ||
|
||
|
||
def _get_input_path(input_name): | ||
input_name = f"{input_name}.json" | ||
return os.path.join(BENCHMARK_DATA_DIR, "torchbench_traces", input_name) | ||
|
||
|
||
@dataclass | ||
class TraceAnalysisMetrics: | ||
# Latency to perform trace analysis tasks | ||
latency: Dict[str, float] = field(default_factory=dict) | ||
# Peak CPU memory to perform trace analysis tasks | ||
peak_mem: Dict[str, float] = field(default_factory=dict) | ||
# extra metrics | ||
extra_metrics: Dict[str, float] = field(default_factory=dict) | ||
|
||
|
||
DEFAULT_METRICS = ["latency"] | ||
BUILTIN_METRICS = {x.name for x in fields(TraceAnalysisMetrics)} - {"extra_metrics"} | ||
|
||
|
||
class TraceAnalysis: | ||
output: TraceAnalysisMetrics | ||
|
||
def __init__(self, args: argparse.Namespace): | ||
self.output = TraceAnalysisMetrics() | ||
self.warmup = args.warmup | ||
self.iter = args.iter | ||
|
||
def _load(self, input: str): | ||
input_path = _get_input_path(input) | ||
t_iter_begin = time.perf_counter() | ||
self.load(input_path) | ||
t_iter_end = time.perf_counter() | ||
self.output.latency["load"] = t_iter_end - t_iter_begin | ||
|
||
def _run(self, task: str): | ||
run_lambda = self.run(task) | ||
# warmup | ||
for _ in range(self.warmup): | ||
run_lambda() | ||
latencies = [] | ||
# TODO: does perfetto cache the query result? | ||
for _ in range(self.iter): | ||
t_iter_begin = time.perf_counter() | ||
run_lambda() | ||
t_iter_end = time.perf_counter() | ||
latencies.append(t_iter_end - t_iter_begin) | ||
# record p50 latency only | ||
self.output.latency[task] = numpy.median(latencies) | ||
|
||
def load(self, input_file_path: str): | ||
raise NotImplementedError("Trace loading is not implemented yet.") | ||
|
||
def run(self, task: str) -> Callable: | ||
task_lambda = getattr(self, task, None) | ||
if not task_lambda: | ||
raise NotImplementedError(f"Task {task} is not implemented yet.") | ||
return lambda: task_lambda() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
|
||
from typing import List | ||
|
||
from perfetto.trace_processor import TraceProcessor | ||
|
||
from .common import TraceAnalysis | ||
|
||
|
||
class PerfettoTraceAnalysis(TraceAnalysis): | ||
|
||
name = "perfetto" | ||
|
||
def __init__(self, args: argparse.Namespace): | ||
super().__init__(args) | ||
|
||
def load(self, input_file_path: str): | ||
self.tp = TraceProcessor(input_file_path) | ||
|
||
def search_gemm_kernels(self) -> List[str]: | ||
query = "SELECT DISTINCT(name) FROM slice WHERE name like '%sm90_xmma_gemm_%' ORDER BY ts" | ||
query_result = [str(x) for x in self.tp.query(query)] | ||
return query_result | ||
|
||
def select_kernels(self): | ||
query = "SELECT ts, dur, name FROM slice WHERE category == 'kernel' ORDER BY ts limit 30" | ||
query_result = [str(x) for x in self.tp.query(query)] | ||
return query_result | ||
|
||
def group_kernels(self): | ||
query = "SELECT name, sum(dur), avg(dur), count(*) as occ FROM slice WHERE category == 'kernel' GROUP BY name ORDER BY occ DESC" | ||
|
||
query_result = [str(x) for x in self.tp.query(query)] | ||
return query_result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import argparse | ||
|
||
from .backends import AVAILABLE_BACKENDS, DEFAULT_METRICS | ||
from .table import TraceAnalysisBenchmarkResult | ||
|
||
|
||
class TraceAnalysisBenchmark: | ||
def __init__(self, args: argparse.Namespace): | ||
self.inputs = args.inputs | ||
self.tasks = args.tasks | ||
self.backends = { | ||
x_val(args) | ||
for x_name, x_val in AVAILABLE_BACKENDS.items() | ||
if x_name in args.backends | ||
} | ||
|
||
self.metrics = args.metrics if args.metrics else DEFAULT_METRICS | ||
|
||
assert self.inputs, "Inputs to benchmark cannot be empty." | ||
assert self.tasks, "Tasks to benchmark cannot be empty." | ||
assert self.backends, "Backends to benchmark cannot be empty." | ||
|
||
self.result = TraceAnalysisBenchmarkResult( | ||
inputs=self.inputs, | ||
tasks=self.tasks, | ||
metrics=self.metrics, | ||
) | ||
|
||
def run(self): | ||
for backend in self.backends: | ||
for input in self.inputs: | ||
backend._load(input) | ||
for task in filter(lambda x: not x == "load", self.tasks): | ||
backend._run(task) | ||
result_key = (input, backend.name) | ||
self.result.data[result_key] = backend.output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import os | ||
|
||
import subprocess | ||
import sys | ||
|
||
from . import BENCHMARK_ROOT, s3_utils | ||
|
||
|
||
TRACES = [ | ||
"torchbench_traces.tar.gz", | ||
] | ||
|
||
|
||
def download_traces_from_s3(): | ||
"""Download trace to benchmarks/trace_analysis/.data""" | ||
for trace in TRACES: | ||
s3_utils.checkout_s3_data(trace, decompress=True) | ||
|
||
|
||
def install_deps(requirements_txt="requirements.txt"): | ||
subprocess.check_call( | ||
[ | ||
sys.executable, | ||
"-m", | ||
"pip", | ||
"install", | ||
"-r", | ||
os.path.join(BENCHMARK_ROOT, requirements_txt), | ||
] | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
install_deps() | ||
download_traces_from_s3() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
perfetto | ||
tabulate | ||
numpy | ||
requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
""" | ||
Kineto trace analysis benchmark. | ||
""" | ||
|
||
import argparse | ||
import sys | ||
|
||
from .backends import AVAILABLE_TASKS, DEFAULT_METRICS | ||
|
||
from .framework import TraceAnalysisBenchmark | ||
|
||
|
||
def _get_parser(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--inputs", | ||
nargs="+", | ||
default=["torchbench_resnet50_3080ti"], | ||
help="Name of the inputs.", | ||
) | ||
parser.add_argument( | ||
"--tasks", nargs="+", default=AVAILABLE_TASKS, help="Name of the tasks." | ||
) | ||
parser.add_argument( | ||
"--backends", nargs="+", default=["perfetto"], help="Name of the backends." | ||
) | ||
parser.add_argument( | ||
"--metrics", nargs="+", default=DEFAULT_METRICS, help="Metrics to collect." | ||
) | ||
|
||
parser.add_argument("--csv", action="store_true", help="Output the result as csv") | ||
parser.add_argument( | ||
"--warmup", default=10, type=int, help="Number of warmup iterations." | ||
) | ||
|
||
parser.add_argument("--iter", default=20, type=int, help="Run iterations.") | ||
return parser | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = _get_parser() | ||
args = parser.parse_args() | ||
benchmark = TraceAnalysisBenchmark(args) | ||
benchmark.run() | ||
result = benchmark.result | ||
|
||
if args.csv: | ||
print(result.write_csv_to_file(sys.stdout)) | ||
else: | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Copyright (c) Meta Platforms, Inc. and affiliates. | ||
# All rights reserved. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
import os | ||
from pathlib import Path | ||
|
||
from . import BENCHMARK_DATA_DIR | ||
|
||
|
||
def decompress_s3_data(s3_tarball_path: Path): | ||
assert str(s3_tarball_path.absolute()).endswith( | ||
".tar.gz" | ||
), f"Expected .tar.gz file path but got {s3_tarball_path}." | ||
import tarfile | ||
|
||
# Hide decompressed file in .data directory so that they won't be checked in | ||
decompress_dir = os.path.join( | ||
BENCHMARK_DATA_DIR, s3_tarball_path.name.removesuffix(".tar.gz") | ||
) | ||
|
||
os.makedirs(decompress_dir, exist_ok=True) | ||
print(f"Decompressing input tarball: {s3_tarball_path}...", end="", flush=True) | ||
tar = tarfile.open(s3_tarball_path) | ||
tar.extractall(path=decompress_dir) | ||
tar.close() | ||
print("OK") | ||
|
||
|
||
def checkout_s3_data(name: str, decompress: bool = True): | ||
S3_URL_BASE = "https://ossci-datasets.s3.amazonaws.com/torchbench" | ||
download_dir = Path(BENCHMARK_DATA_DIR) | ||
download_dir.mkdir(parents=True, exist_ok=True) | ||
import requests | ||
|
||
full_path = download_dir.joinpath(name) | ||
s3_url = f"{S3_URL_BASE}/traces/{name}" | ||
r = requests.get(s3_url, allow_redirects=True) | ||
with open(str(full_path.absolute()), "wb") as output: | ||
print(f"Checking out {s3_url} to {full_path}") | ||
output.write(r.content) | ||
if decompress: | ||
decompress_s3_data(full_path) |
Oops, something went wrong.