Skip to content

Commit

Permalink
Go with fewer loop in hoping for more stable time
Browse files Browse the repository at this point in the history
  • Loading branch information
rayluo committed Aug 1, 2023
1 parent 8e965ca commit 3a90e81
Showing 1 changed file with 19 additions and 15 deletions.
34 changes: 19 additions & 15 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,24 @@ def run(self, app=None, scope=None):
))


reference = time_and_log(
"fib(20)", globals={"fib": fib},
) / .204 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
"d.get('miss')", setup="d={k: 'v' for k in range(100)}",
number=1000*1000, repeat=150,
) / .0475 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
number=1000, repeat=150*1000,
) / .000042 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
"fib(20)", globals={"fib": fib},
) / .204 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.
number=1, repeat=3000,
) / .002037 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

reference = time_and_log(
"fib(5)", globals={"fib": fib},
number=1000*100, repeat=100,
) / .141 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

"fib(14)", globals={"fib": fib},
number=1, repeat=3000,
) / .000114 # It took this much second on Ray's baseline laptop. Normalize it to a ratio.

def fail_on_regression(actual, baseline, threshold=0.05):
ratio = actual / baseline # Ratio varies on different machines. So does reference.
Expand All @@ -118,41 +122,41 @@ def fail_on_regression(actual, baseline, threshold=0.05):
# To recalibrate, run "pytest --log-cli-level INFO this_file.py" to see times.
def test_cca_1_tenant_with_1_token_per_tenant_and_cache_hit(benchmark):
tester = CcaTester(tokens_per_tenant=1, cache_hit=True)
fail_on_regression(time_and_log(tester.run, number=5000), 0.183)
fail_on_regression(time_and_log(tester.run, number=5, repeat=3000), 0.000179)
benchmark(tester.run)

def test_cca_1_tenant_with_10_tokens_per_tenant_and_cache_hit(benchmark):
tester = CcaTester(tokens_per_tenant=10, cache_hit=True)
fail_on_regression(time_and_log(tester.run, number=5000), 0.248)
fail_on_regression(time_and_log(tester.run, number=5, repeat=3000), 0.000237)
benchmark(tester.run)

def test_cca_1_tenant_with_1000_tokens_per_tenant_and_cache_hit(benchmark):
tester = CcaTester(tokens_per_tenant=1000, cache_hit=True)
fail_on_regression(time_and_log(tester.run), 0.134)
fail_on_regression(time_and_log(tester.run, number=1, repeat=3000), 0.001280)
benchmark(tester.run)

def test_cca_many_tenants_with_10_tokens_per_tenant_and_cache_hit(benchmark):
tester = CcaTester(number_of_tenants=100, tokens_per_tenant=10, cache_hit=True)
fail_on_regression(time_and_log(tester.run, number=5000), 0.255)
fail_on_regression(time_and_log(tester.run, number=5, repeat=3000), 0.000242)
benchmark(tester.run)

def test_cca_1_tenant_with_1_token_per_tenant_and_cache_miss(benchmark):
tester = CcaTester(tokens_per_tenant=1, cache_hit=False)
fail_on_regression(time_and_log(tester.run, number=1000), 0.130)
fail_on_regression(time_and_log(tester.run, number=1, repeat=30000), 0.000124)
benchmark(tester.run)

def test_cca_1_tenant_with_10_tokens_per_tenant_and_cache_miss(benchmark):
tester = CcaTester(tokens_per_tenant=10, cache_hit=False)
fail_on_regression(time_and_log(tester.run, number=1000), 0.143)
fail_on_regression(time_and_log(tester.run, number=1, repeat=30000), 0.000137)
benchmark(tester.run)

def test_cca_1_tenant_with_1000_tokens_per_tenant_and_cache_miss(benchmark):
tester = CcaTester(tokens_per_tenant=1000, cache_hit=False)
fail_on_regression(time_and_log(tester.run), 0.150)
fail_on_regression(time_and_log(tester.run, number=1, repeat=3000), 0.001365)
benchmark(tester.run)

def test_cca_many_tenants_with_10_tokens_per_tenant_and_cache_miss(benchmark):
tester = CcaTester(number_of_tenants=100, tokens_per_tenant=10, cache_hit=False)
fail_on_regression(time_and_log(tester.run, number=1000), 0.146)
fail_on_regression(time_and_log(tester.run, number=1, repeat=3000), 0.000139)
benchmark(tester.run)

0 comments on commit 3a90e81

Please sign in to comment.