Skip to content

Commit

Permalink
fix 1-indexing for replica_id in MetricsStore
Browse files Browse the repository at this point in the history
  • Loading branch information
sicario001 committed Aug 28, 2024
1 parent 8afeaf0 commit 46d9ec8
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions vidur/metrics/metrics_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def wrapper(self, *args, **kwargs):


class MetricsStore:

def __init__(self, simulation_config: SimulationConfig) -> None:
self._simulation_config = simulation_config
self._config = self._simulation_config.metrics_config
Expand Down Expand Up @@ -176,9 +175,9 @@ def __init__(self, simulation_config: SimulationConfig) -> None:
)

self._cpu_operation_metrics: Dict[CpuOperationMetrics, CDFSketch] = {}
self._cpu_operation_metrics_per_batch: Dict[CpuOperationMetrics, DataSeries] = (
{}
)
self._cpu_operation_metrics_per_batch: Dict[
CpuOperationMetrics, DataSeries
] = {}
for metric_name in CpuOperationMetrics:
self._cpu_operation_metrics[metric_name] = CDFSketch(
metric_name.value,
Expand Down Expand Up @@ -646,7 +645,7 @@ def on_batch_end(
self._on_request_end(time, request)

if self._config.store_utilization_metrics:
self._replica_memory_usage[replica_id - 1].put(time, memory_usage_percent)
self._replica_memory_usage[replica_id].put(time, memory_usage_percent)

for request in batch.requests:
self._update_per_token_execution_times(time, request, batch)
Expand Down Expand Up @@ -685,7 +684,7 @@ def on_replica_schedule(
if not self._config.store_utilization_metrics:
return

self._replica_memory_usage[replica_id - 1].put(time, memory_usage_percent)
self._replica_memory_usage[replica_id].put(time, memory_usage_percent)

@if_write_metrics
def on_replica_stage_schedule(
Expand All @@ -699,9 +698,9 @@ def on_replica_stage_schedule(
if not self._config.store_utilization_metrics:
return

self._replica_busy_time[replica_id - 1][stage_id - 1].put(time, 100)
self._replica_busy_time[replica_id][stage_id - 1].put(time, 100)
mfu = self._mfu_calculator.get_mfu(batch_stage)
self._replica_mfu[replica_id - 1][stage_id - 1].put(time, mfu)
self._replica_mfu[replica_id][stage_id - 1].put(time, mfu)

if not self._config.store_operation_metrics:
return
Expand Down Expand Up @@ -817,5 +816,5 @@ def on_batch_stage_end(
) -> None:
if not self._config.store_utilization_metrics:
return
self._replica_busy_time[replica_id - 1][stage_id - 1].put(time, 0)
self._replica_mfu[replica_id - 1][stage_id - 1].put(time, 0)
self._replica_busy_time[replica_id][stage_id - 1].put(time, 0)
self._replica_mfu[replica_id][stage_id - 1].put(time, 0)

0 comments on commit 46d9ec8

Please sign in to comment.