Use larger threshold 0.4 * 3

AzureAD · Aug 2, 2023 · c79f565 · c79f565
1 parent a8ec0a9
commit c79f565
Showing 1 changed file with 18 additions and 14 deletions.
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -3,10 +3,7 @@
 import random
 import timeit
 import unittest
-try:
-    from unittest.mock import patch
-except:
-    from mock import patch
+from unittest.mock import patch
 
 import msal
 from tests.http_client import MinimalResponse
@@ -16,7 +13,15 @@
 logging.basicConfig(level=logging.INFO)
 
 
-def time_and_log(stmt, setup="pass", number=100, repeat=30, globals=None):
+def time_and_log(stmt, setup="pass", number=5, repeat=30, globals=None):
+    """Return the seconds needed to run the statement ``number`` times.
+
+    The granularity is in micro-second. So, you can adjust the ``number``so that
+    you have at least 2 or 3 effective digits i.e. hundreds of microseconds.
+
+    Increase the ``repeat`` to repeat the timing process multiple times,
+    so that you will have a more stable time value.
+    """
     assert repeat > 10, "We need more samples to get a stable minimum"
     t = min(timeit.repeat(  # Use min() based on suggestion from official docs
         # https://docs.python.org/3/library/timeit.html#timeit.Timer.repeat
@@ -88,27 +93,26 @@ def run(self, app=None, scope=None):
             ))
 
 
-reference = time_and_log(
-    "fib(20)", globals={"fib": fib},
-    ) / .204  # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
-
-reference = time_and_log(
+# Here we run several different calculation to observe their variation from logs.
+# Only the last one will be used as our reference number.
+time_and_log(
     "d.get('miss')", setup="d={k: 'v' for k in range(100)}",
     number=1000, repeat=150*1000,
     ) / .000042  # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
-
-reference = time_and_log(
+time_and_log(
+    "fib(20)", globals={"fib": fib}, number=100, repeat=30,
+    ) / .204  # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
+time_and_log(
     "fib(20)", globals={"fib": fib},
     number=1, repeat=3000,
     ) / .002037  # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
-
 reference = time_and_log(
     "fib(14)", globals={"fib": fib},
     number=1, repeat=3000,
     ) / .000114  # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
 
 def fail_on_regression(actual, baseline):
-    threshold = 0.4  # We observed variance approaching 40% on Github Actions
+    threshold = 3 * 0.4  # 40% variance is observed on Github. Here we add more buffer.
     ratio = actual / baseline  # Ratio varies on different machines. So does reference.
     variance = (ratio - reference) / reference  # Variance shall be comparable across machines
     logger.info(