Skip to content

Commit

Permalink
Add perfetto trace analysis benchmark (#969)
Browse files Browse the repository at this point in the history
Summary:
Add a benchmark for trace analysis tasks on backends like Perfetto.

Step 1: install the benchmark (this will download and decompress sample trace from Amazon S3)

```
$ python -m benchmarks.perfetto.install

Checking out https://ossci-datasets.s3.amazonaws.com/torchbench/traces/torchbench_traces.tar.gz to /Users/xzhao9/git/kineto/benchmarks/trace_analysis/.data/torchbench_traces.tar.gz
decompressing input tarball: /Users/xzhao9/git/kineto/benchmarks/trace_analysis/.data/torchbench_traces.tar.gz...OK
Requirement already satisfied: perfetto in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from -r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 1)) (0.7.0)
Requirement already satisfied: tabulate in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from -r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 2)) (0.9.0)
Requirement already satisfied: protobuf in /Users/xzhao9/miniconda3/envs/test-numpy/lib/python3.11/site-packages (from perfetto->-r /Users/xzhao9/git/kineto/benchmarks/trace_analysis/requirements.txt (line 1)) (4.25.3)
```

Step 2: run the benchmark

```
$ python -m benchmarks.perfetto.run

                                    input-task    perfetto-latency
----------------------------------------------  ------------------
               torchbench_resnet50_3080ti-load         8.53069
torchbench_resnet50_3080ti-search_gemm_kernels         0.067583
     torchbench_resnet50_3080ti-select_kernels         0.000549563
      torchbench_resnet50_3080ti-group_kernels         0.0145147
```

Right now, only latency metric is available. We could add other metrics like memory footprint later.

Pull Request resolved: #969

Differential Revision: D60466932

Pulled By: xuzhao9
  • Loading branch information
xuzhao9 authored and facebook-github-bot committed Jul 30, 2024
1 parent 188c5f5 commit 18efd40
Show file tree
Hide file tree
Showing 12 changed files with 440 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# ignore common items
.idea
.vscode
.data
__pycache__
10 changes: 10 additions & 0 deletions benchmarks/perfetto/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os

BENCHMARK_ROOT = os.path.dirname(__file__)
BENCHMARK_DATA_DIR = os.path.join(BENCHMARK_ROOT, ".data")
25 changes: 25 additions & 0 deletions benchmarks/perfetto/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from .clp import CLPTraceAnalysis
from .common import DEFAULT_METRICS # noqa: F401
from .perfetto import PerfettoTraceAnalysis

AVAILABLE_BACKENDS = {
"perfetto": PerfettoTraceAnalysis,
"clp": CLPTraceAnalysis,
}

AVAILABLE_TASKS = [
"load",
"search_gemm_kernels",
"select_kernels",
"group_kernels",
]

for name in AVAILABLE_BACKENDS:
analysis = AVAILABLE_BACKENDS[name]
analysis.name = name
15 changes: 15 additions & 0 deletions benchmarks/perfetto/backends/clp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse

from .common import TraceAnalysis


class CLPTraceAnalysis(TraceAnalysis):

def __init__(self, args: argparse.Namespace):
super().__init__(args)
76 changes: 76 additions & 0 deletions benchmarks/perfetto/backends/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import os

import time
from dataclasses import dataclass, field, fields

from typing import Callable, Dict

import numpy

from .. import BENCHMARK_DATA_DIR


def _get_input_path(input_name):
input_name = f"{input_name}.json"
return os.path.join(BENCHMARK_DATA_DIR, "torchbench_traces", input_name)


@dataclass
class TraceAnalysisMetrics:
# Latency to perform trace analysis tasks
latency: Dict[str, float] = field(default_factory=dict)
# Peak CPU memory to perform trace analysis tasks
peak_mem: Dict[str, float] = field(default_factory=dict)
# extra metrics
extra_metrics: Dict[str, float] = field(default_factory=dict)


DEFAULT_METRICS = ["latency"]
BUILTIN_METRICS = {x.name for x in fields(TraceAnalysisMetrics)} - {"extra_metrics"}


class TraceAnalysis:
output: TraceAnalysisMetrics

def __init__(self, args: argparse.Namespace):
self.output = TraceAnalysisMetrics()
self.warmup = args.warmup
self.iter = args.iter

def _load(self, input: str):
input_path = _get_input_path(input)
t_iter_begin = time.perf_counter()
self.load(input_path)
t_iter_end = time.perf_counter()
self.output.latency["load"] = t_iter_end - t_iter_begin

def _run(self, task: str):
run_lambda = self.run(task)
# warmup
for _ in range(self.warmup):
run_lambda()
latencies = []
# TODO: does perfetto cache the query result?
for _ in range(self.iter):
t_iter_begin = time.perf_counter()
run_lambda()
t_iter_end = time.perf_counter()
latencies.append(t_iter_end - t_iter_begin)
# record p50 latency only
self.output.latency[task] = numpy.median(latencies)

def load(self, input_file_path: str):
raise NotImplementedError("Trace loading is not implemented yet.")

def run(self, task: str) -> Callable:
task_lambda = getattr(self, task, None)
if not task_lambda:
raise NotImplementedError(f"Task {task} is not implemented yet.")
return lambda: task_lambda()
40 changes: 40 additions & 0 deletions benchmarks/perfetto/backends/perfetto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse

from typing import List

from perfetto.trace_processor import TraceProcessor

from .common import TraceAnalysis


class PerfettoTraceAnalysis(TraceAnalysis):

name = "perfetto"

def __init__(self, args: argparse.Namespace):
super().__init__(args)

def load(self, input_file_path: str):
self.tp = TraceProcessor(input_file_path)

def search_gemm_kernels(self) -> List[str]:
query = "SELECT DISTINCT(name) FROM slice WHERE name like '%sm90_xmma_gemm_%' ORDER BY ts"
query_result = [str(x) for x in self.tp.query(query)]
return query_result

def select_kernels(self):
query = "SELECT ts, dur, name FROM slice WHERE category == 'kernel' ORDER BY ts limit 30"
query_result = [str(x) for x in self.tp.query(query)]
return query_result

def group_kernels(self):
query = "SELECT name, sum(dur), avg(dur), count(*) as occ FROM slice WHERE category == 'kernel' GROUP BY name ORDER BY occ DESC"

query_result = [str(x) for x in self.tp.query(query)]
return query_result
42 changes: 42 additions & 0 deletions benchmarks/perfetto/framework.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse

from .backends import AVAILABLE_BACKENDS, DEFAULT_METRICS
from .table import TraceAnalysisBenchmarkResult


class TraceAnalysisBenchmark:
def __init__(self, args: argparse.Namespace):
self.inputs = args.inputs
self.tasks = args.tasks
self.backends = {
x_val(args)
for x_name, x_val in AVAILABLE_BACKENDS.items()
if x_name in args.backends
}

self.metrics = args.metrics if args.metrics else DEFAULT_METRICS

assert self.inputs, "Inputs to benchmark cannot be empty."
assert self.tasks, "Tasks to benchmark cannot be empty."
assert self.backends, "Backends to benchmark cannot be empty."

self.result = TraceAnalysisBenchmarkResult(
inputs=self.inputs,
tasks=self.tasks,
metrics=self.metrics,
)

def run(self):
for backend in self.backends:
for input in self.inputs:
backend._load(input)
for task in filter(lambda x: not x == "load", self.tasks):
backend._run(task)
result_key = (input, backend.name)
self.result.data[result_key] = backend.output
41 changes: 41 additions & 0 deletions benchmarks/perfetto/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os

import subprocess
import sys

from . import BENCHMARK_ROOT, s3_utils


TRACES = [
"torchbench_traces.tar.gz",
]


def download_traces_from_s3():
"""Download trace to benchmarks/trace_analysis/.data"""
for trace in TRACES:
s3_utils.checkout_s3_data(trace, decompress=True)


def install_deps(requirements_txt="requirements.txt"):
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"install",
"-r",
os.path.join(BENCHMARK_ROOT, requirements_txt),
]
)


if __name__ == "__main__":
install_deps()
download_traces_from_s3()
4 changes: 4 additions & 0 deletions benchmarks/perfetto/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
perfetto
tabulate
numpy
requests
56 changes: 56 additions & 0 deletions benchmarks/perfetto/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Kineto trace analysis benchmark.
"""

import argparse
import sys

from .backends import AVAILABLE_TASKS, DEFAULT_METRICS

from .framework import TraceAnalysisBenchmark


def _get_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"--inputs",
nargs="+",
default=["torchbench_resnet50_3080ti"],
help="Name of the inputs.",
)
parser.add_argument(
"--tasks", nargs="+", default=AVAILABLE_TASKS, help="Name of the tasks."
)
parser.add_argument(
"--backends", nargs="+", default=["perfetto"], help="Name of the backends."
)
parser.add_argument(
"--metrics", nargs="+", default=DEFAULT_METRICS, help="Metrics to collect."
)

parser.add_argument("--csv", action="store_true", help="Output the result as csv")
parser.add_argument(
"--warmup", default=10, type=int, help="Number of warmup iterations."
)

parser.add_argument("--iter", default=20, type=int, help="Run iterations.")
return parser


if __name__ == "__main__":
parser = _get_parser()
args = parser.parse_args()
benchmark = TraceAnalysisBenchmark(args)
benchmark.run()
result = benchmark.result

if args.csv:
print(result.write_csv_to_file(sys.stdout))
else:
print(result)
45 changes: 45 additions & 0 deletions benchmarks/perfetto/s3_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
from pathlib import Path

from . import BENCHMARK_DATA_DIR


def decompress_s3_data(s3_tarball_path: Path):
assert str(s3_tarball_path.absolute()).endswith(
".tar.gz"
), f"Expected .tar.gz file path but got {s3_tarball_path}."
import tarfile

# Hide decompressed file in .data directory so that they won't be checked in
decompress_dir = os.path.join(
BENCHMARK_DATA_DIR, s3_tarball_path.name.removesuffix(".tar.gz")
)

os.makedirs(decompress_dir, exist_ok=True)
print(f"Decompressing input tarball: {s3_tarball_path}...", end="", flush=True)
tar = tarfile.open(s3_tarball_path)
tar.extractall(path=decompress_dir)
tar.close()
print("OK")


def checkout_s3_data(name: str, decompress: bool = True):
S3_URL_BASE = "https://ossci-datasets.s3.amazonaws.com/torchbench"
download_dir = Path(BENCHMARK_DATA_DIR)
download_dir.mkdir(parents=True, exist_ok=True)
import requests

full_path = download_dir.joinpath(name)
s3_url = f"{S3_URL_BASE}/traces/{name}"
r = requests.get(s3_url, allow_redirects=True)
with open(str(full_path.absolute()), "wb") as output:
print(f"Checking out {s3_url} to {full_path}")
output.write(r.content)
if decompress:
decompress_s3_data(full_path)
Loading

0 comments on commit 18efd40

Please sign in to comment.