Skip to content

Commit

Permalink
Use larger threshold 0.4 * 3
Browse files Browse the repository at this point in the history
  • Loading branch information
rayluo committed Aug 2, 2023
1 parent a8ec0a9 commit c79f565
Showing 1 changed file with 18 additions and 14 deletions.
32 changes: 18 additions & 14 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
import random
import timeit
import unittest
try:
from unittest.mock import patch
except:
from mock import patch
from unittest.mock import patch

import msal
from tests.http_client import MinimalResponse
Expand All @@ -16,7 +13,15 @@
logging.basicConfig(level=logging.INFO)


def time_and_log(stmt, setup="pass", number=100, repeat=30, globals=None):
def time_and_log(stmt, setup="pass", number=5, repeat=30, globals=None):
"""Return the seconds needed to run the statement ``number`` times.
The granularity is in micro-second. So, you can adjust the ``number``so that
you have at least 2 or 3 effective digits i.e. hundreds of microseconds.
Increase the ``repeat`` to repeat the timing process multiple times,
so that you will have a more stable time value.
"""
assert repeat > 10, "We need more samples to get a stable minimum"
t = min(timeit.repeat( # Use min() based on suggestion from official docs
# https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
Expand Down Expand Up @@ -88,27 +93,26 @@ def run(self, app=None, scope=None):
))


reference = time_and_log(
"fib(20)", globals={"fib": fib},
) / .204 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
# Here we run several different calculation to observe their variation from logs.
# Only the last one will be used as our reference number.
time_and_log(
"d.get('miss')", setup="d={k: 'v' for k in range(100)}",
number=1000, repeat=150*1000,
) / .000042 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
time_and_log(
"fib(20)", globals={"fib": fib}, number=100, repeat=30,
) / .204 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
time_and_log(
"fib(20)", globals={"fib": fib},
number=1, repeat=3000,
) / .002037 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
"fib(14)", globals={"fib": fib},
number=1, repeat=3000,
) / .000114 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

def fail_on_regression(actual, baseline):
threshold = 0.4 # We observed variance approaching 40% on Github Actions
threshold = 3 * 0.4 # 40% variance is observed on Github. Here we add more buffer.
ratio = actual / baseline # Ratio varies on different machines. So does reference.
variance = (ratio - reference) / reference # Variance shall be comparable across machines
logger.info(
Expand Down

0 comments on commit c79f565

Please sign in to comment.