zeno-ml · neubig · Sep 19, 2023 · Sep 18, 2023 · Sep 19, 2023
@@ -18,6 +18,7 @@ class LMConfig:
              Hugging Face transformers.
         tokenizer_cls: The Python class corresponding to the tokenizer, mostly
             for Hugging Face transformers.
+        api_base:  Custom api base.
         name_replacements: A dictionary mapping from the names of the roles
             (e.g., "system", "assistant", "user") to the names of the
             roles in the model.
@@ -29,6 +30,7 @@ class LMConfig:
     model: str
     model_cls: type | None = None
     tokenizer_cls: type | None = None
+    api_base: str | None = None
     name_replacements: dict[str, str] = dataclasses.field(
         default_factory=lambda: dict(
             {

@@ -19,6 +19,7 @@ async def _throttled_litellm_completion_acreate(
     top_p: float,
     n: int,
     limiter: aiolimiter.AsyncLimiter,
+    api_base: str = None,
 ) -> dict[str, Any]:
     try:
         from litellm import acompletion, exceptions
@@ -44,6 +45,7 @@ async def _throttled_litellm_completion_acreate(
                     max_tokens=max_tokens,
                     top_p=top_p,
                     n=n,
+                    api_base=api_base,
                 )
             except tuple(ERROR_ERRORS_TO_MESSAGES.keys()) as e:
                 if isinstance(
@@ -90,6 +92,7 @@ async def generate_from_litellm_completion(
         top_p: Top p to use.
         context_length: Length of context to use.
         requests_per_minute: Number of requests per minute to allow.
+        api_base: Custom API endpoint
 
     Returns:
         List of generated responses.
@@ -106,6 +109,7 @@ async def generate_from_litellm_completion(
             top_p=top_p,
             n=n,
             limiter=limiter,
+            api_base=model_config.api_base,
         )
         for full_context in full_contexts
     ]