exo-explore · ji-cryptocafe · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 11, 2024
diff --git a/exo/inference/DummyInferenceEngine.py b/exo/inference/DummyInferenceEngine.py
@@ -0,0 +1,32 @@
+import random  
+import asyncio
+import numpy as np  
+
+class DummyInferenceEngine:
+    def __init__(self, output_type="static", output_value=None, output_shape=(1,), latency_mean=0.1, latency_stddev=0.1):
+        self.output_type = output_type
+        self.output_value = output_value
+        self.output_shape = output_shape
+        self.latency_mean = latency_mean
+        self.latency_stddev = latency_stddev
+
+        # Validation for static output type
+        if self.output_type == "static" and self.output_value is None:
+            raise ValueError("output_value must be provided when output_type is 'static'.")
+
+    async def run_inference(self):
+        # Simulate latency
+        latency = max(0, random.normalvariate(self.latency_mean, self.latency_stddev))  # Non-negative latency
+        await asyncio.sleep(latency)
+
+        # Generate output based on the specified output type
+        if self.output_type == "static":
+            return self.output_value  # Return the static output
+        elif self.output_type == "random":
+            self.output_value = np.random.randn(*self.output_shape).tolist()  # Generate random output and store it
+            return self.output_value
+
+    async def get_latency(self):
+        # Simulate and return the latency
+        return max(0, random.normalvariate(self.latency_mean, self.latency_stddev))
+
diff --git a/exo/inference/inference_engine.py b/exo/inference/inference_engine.py
@@ -27,5 +27,8 @@ def get_inference_engine(inference_engine_name: str, shard_downloader: 'ShardDow
     tinygrad.helpers.DEBUG.value = int(os.getenv("TINYGRAD_DEBUG", default="0"))
 
     return TinygradDynamicShardInferenceEngine(shard_downloader)
+  elif inference_engine_name == "dummy" :
+    from exo.inference.DummyInferenceEngine import DummyInferenceEngine
+    return DummyInferenceEngine(output_type="static", output_value=[1, 2, 3], latency_mean=0.1, latency_stddev=0.1)
   else:
     raise ValueError(f"Inference engine {inference_engine_name} not supported")
diff --git a/exo/inference/test_dummy_inference_engine.py b/exo/inference/test_dummy_inference_engine.py
@@ -0,0 +1,60 @@
+import asyncio
+import pytest
+from exo.inference.DummyInferenceEngine import DummyInferenceEngine 
+
+
+@pytest.mark.asyncio
+async def test_dummy_engine():
+    dummy_engine = DummyInferenceEngine(output_type="random", output_shape=(2, 2), latency_mean=0.5, latency_stddev=0.1)
+
+    # Simulate inference
+    output = await dummy_engine.run_inference()
+    latency = await dummy_engine.get_latency()
+
+    assert isinstance(output, list), "Output should be a list."
+    assert isinstance(latency, float), "Latency should be a float."
+
+
+
+@pytest.mark.asyncio
+async def test_dummy_inference_engine_static():
+    # Test with static output
+    dummy_engine = DummyInferenceEngine(output_type="static", output_value=[1, 2, 3], latency_mean=0.2, latency_stddev=0.1)
+    await dummy_engine.run_inference()  # Simulate inference, check for errors
+    assert dummy_engine.output_value == [1, 2, 3], "The static output should match the provided value."
+
+
+@pytest.mark.asyncio
+async def test_dummy_inference_engine_random():
+    # Test with random output
+    dummy_engine = DummyInferenceEngine(output_type="random", output_shape=(128, 128), latency_mean=0.1, latency_stddev=0.1)
+    await dummy_engine.run_inference()  # Simulate inference, check for errors
+    output = dummy_engine.output_value
+
+    # Check that the output is a list and has the correct shape
+    assert isinstance(output, list), "Output should be a list."
+    assert len(output) == 128, "Output should have the specified outer shape."
+
+    # Check each sub-list for the correct length
+    for sublist in output:
+        assert isinstance(sublist, list), "Each output item should be a list."
+        assert len(sublist) == 128, "Each output sub-list should have the specified inner shape."
+
+    # Optionally check the type of each element
+    for sublist in output:
+        for element in sublist:
+            assert isinstance(element, (float, int)), "Each element should be a float or int."
+
+
+
+@pytest.mark.asyncio
+async def test_dummy_inference_engine_latency():
+    for i in range(10):
+        # Test that latency is within expected range
+        dummy_engine = DummyInferenceEngine(output_type="static", output_value=[1], latency_mean=0.1, latency_stddev=0.0)
+        start_time = asyncio.get_event_loop().time()
+        await dummy_engine.run_inference()
+        elapsed_time = asyncio.get_event_loop().time() - start_time
+        # Allow a small tolerance (e.g., 0.1 seconds) for system-level deviations
+        tolerance = 0.1
+        assert abs(elapsed_time - 0.1) <= tolerance, f"Expected latency to be around 0.1s, but got {elapsed_time}s."
diff --git a/exo/main.py b/exo/main.py
@@ -41,7 +41,7 @@
 parser.add_argument("--chatgpt-api-port", type=int, default=8000, help="ChatGPT API port")
 parser.add_argument("--chatgpt-api-response-timeout", type=int, default=90, help="ChatGPT API response timeout in seconds")
 parser.add_argument("--max-generate-tokens", type=int, default=10000, help="Max tokens to generate in each request")
-parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use")
+parser.add_argument("--inference-engine", type=str, default=None, help="Inference engine to use e.g. 'mlx', 'tinygrad', 'dummy')")
 parser.add_argument("--disable-tui", action=argparse.BooleanOptionalAction, help="Disable TUI")
 parser.add_argument("--run-model", type=str, help="Specify a model to run directly")
 parser.add_argument("--prompt", type=str, help="Prompt for the model when using --run-model", default="Who are you?")

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,4 +4,7 @@ max-line-length = 200
 
 [tool.autopep8]
 max_line_length = 200
-indent_size = 2
+indent_size = 2
+
+[tool.pytest.ini_options]
+asyncio_default_fixture_loop_scope = "function"