aixplain · mikelam-us-aixplain · Jan 17, 2024 · Jan 17, 2024 · Jan 19, 2024 · Jan 29, 2024
diff --git a/aixplain/cli_groups.py b/aixplain/cli_groups.py
@@ -21,7 +21,7 @@
     CLI Runner
 """
 import click
-from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model
+from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status
 
 @click.group('cli')
 def cli():
@@ -52,7 +52,10 @@ def onboard():
 list.add_command(list_host_machines)
 list.add_command(list_functions)
 get.add_command(asset_repo_login)
+get.add_command(get_huggingface_model_status)
 onboard.add_command(onboard_model)
+onboard.add_command(deploy_huggingface_model)
+
 
 def run_cli():
     cli()
diff --git a/aixplain/factories/cli/model_factory_cli.py b/aixplain/factories/cli/model_factory_cli.py
@@ -135,3 +135,41 @@ def onboard_model(model_id: Text, image_tag: Text, image_hash: Text,
     ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, api_key)
     ret_val_yaml = yaml.dump(ret_val)
     click.echo(ret_val_yaml)
+
+@click.command("hf-model")
+@click.option("--name", help="User-defined name for Hugging Face model.")
+@click.option("--hf-repo-id", help="Repository ID from Hugging Face in {supplier}/{model name} form.")
+@click.option("--hf-token", help="Hugging Face token used to authenticate to this model.")
+@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.")
+def deploy_huggingface_model(name: Text, hf_repo_id: Text, 
+                             hf_token: Optional[Text] = None, 
+                             api_key: Optional[Text] = None) -> None:
+    """CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory.
+
+    Args:
+        name (Text): User-defined name for Hugging Face model.
+        api_key (Text, optional): Team API key. Defaults to None.
+
+    Returns:
+        None
+    """
+    ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, hf_token, api_key)
+    ret_val_yaml = yaml.dump(ret_val)
+    click.echo(ret_val_yaml)
+
+@click.command("hf-model-status")
+@click.option("--model-id", help="Model ID from DEPLOY_HUGGINGFACE_MODEL.")
+@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.")
+def get_huggingface_model_status(model_id: Text, api_key: Optional[Text] = None) -> None:
+    """CLI wrapper function for the GET_HUGGINGFACE_MODEL_STATUS function in ModelFactory.
+
+    Args:
+        model_id (Text): Model ID obtained from DEPLOY_HUGGINGFACE_MODEL.
+        api_key (Text, optional): Team API key. Defaults to None.
+
+    Returns:
+        None
+    """
+    ret_val = ModelFactory.get_huggingface_model_status(model_id, api_key)
+    ret_val_yaml = yaml.dump(ret_val)
+    click.echo(ret_val_yaml)
diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py
@@ -28,7 +28,6 @@
 from aixplain.modules.finetune import Finetune
 from aixplain.modules.finetune.cost import FinetuneCost
 from aixplain.modules.finetune.hyperparameters import Hyperparameters
-from aixplain.modules.finetune.peft import Peft
 from aixplain.modules.dataset import Dataset
 from aixplain.modules.model import Model
 from aixplain.utils import config
@@ -66,7 +65,6 @@ def create(
         model: Model,
         prompt_template: Optional[Text] = None,
         hyperparameters: Optional[Hyperparameters] = None,
-        peft: Optional[Peft] = None,
         train_percentage: Optional[float] = 100,
         dev_percentage: Optional[float] = 0,
     ) -> Finetune:
@@ -78,7 +76,6 @@ def create(
             model (Model): Model to be fine-tuned.
             prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <<COLUMN_NAME>>. Defaults to None.
             hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None.
-            peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None.
             train_percentage (float, optional): Percentage of training samples. Defaults to 100.
             dev_percentage (float, optional): Percentage of development samples. Defaults to 0.
         Returns:
@@ -106,8 +103,6 @@ def create(
                 parameters["prompt"] = prompt_template
             if hyperparameters is not None:
                 parameters["hyperparameters"] = hyperparameters.to_dict()
-            if peft is not None:
-                parameters["peft"] = peft.to_dict()
             payload["parameters"] = parameters
             logging.info(f"Start service for POST Create FineTune - {url} - {headers} - {json.dumps(payload)}")
             r = _request_with_retry("post", url, headers=headers, json=payload)
@@ -123,7 +118,6 @@ def create(
                 dev_percentage=dev_percentage,
                 prompt_template=prompt_template,
                 hyperparameters=hyperparameters,
-                peft=peft,
             )
         except Exception:
             error_message = f"Create FineTune: Error with payload {json.dumps(payload)}"

diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py
@@ -304,10 +304,10 @@ def list_functions(cls, verbose: Optional[bool] = False, api_key: Optional[Text]
     def create_asset_repo(
         cls,
         name: Text,
-        hosting_machine: Text,
-        version: Text,
         description: Text,
         function: Text,
+        input_modality: Text,
+        output_modality: Text,
         source_language: Text,
         api_key: Optional[Text] = None,
     ) -> Dict:
@@ -316,12 +316,10 @@ def create_asset_repo(
 
         Args:
             name (Text): Model name
-            hosting_machine (Text): Hosting machine ID obtained via list_host_machines
-            always_on (bool): Whether the model should always be on
-            version (Text): Model version
             description (Text): Model description
-            function (Text): Model function name obtained via LIST_HOST_MACHINES
-            is_async (bool): Whether the model is asynchronous or not (False in first release)
+            function (Text): Model function name obtained via LIST_FUNCTIONS
+            input_modality (Text): Modality of the inputs to this model.
+            output_modality (Text): Modality of the outputs to this model.
             source_language (Text): 2-character 639-1 code or 3-character 639-3 language code.
             api_key (Text, optional): Team API key. Defaults to None.
 
@@ -336,23 +334,28 @@ def create_asset_repo(
                 function_id = function_dict["id"]
         if function_id is None:
             raise Exception("Invalid function name")
-        create_url = urljoin(config.BACKEND_URL, f"sdk/models/register")
+        create_url = urljoin(config.BACKEND_URL, f"sdk/models/onboard")
         logging.debug(f"URL: {create_url}")
         if api_key:
             headers = {"x-api-key": f"{api_key}", "Content-Type": "application/json"}
         else:
             headers = {"x-api-key": f"{config.TEAM_API_KEY}", "Content-Type": "application/json"}
-        always_on = False
-        is_async = False  # Hard-coded to False for first release
         payload = {
-            "name": name,
-            "hostingMachine": hosting_machine,
-            "alwaysOn": always_on,
-            "version": version,
-            "description": description,
-            "function": function_id,
-            "isAsync": is_async,
-            "sourceLanguage": source_language,
+            "model": {
+                "name": name,
+                "description": description,
+                "connectionType": [
+                    "asynchronous"
+                ],
+                "function": function_id,
+                "modalities": [
+                    f"{input_modality}-{output_modality}"
+                ],
+                "documentationUrl": "aiXplain",
+                "sourceLanguage": source_language
+            },
+            "source": "aixplain-ecr",
+            "onboardingParams": {}
         }
         payload = json.dumps(payload)
         logging.debug(f"Body: {str(payload)}")
@@ -404,3 +407,68 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke
         message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed."
         logging.info(message)
         return response
+
+    @classmethod
+    def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict:
+        """Onboards and deploys a Hugging Face large language model.
+
+        Args:
+            name (Text): The user's name for the model.
+            hf_repo_id (Text): The Hugging Face repository ID for this model ({author}/{model name}).
+            hf_token (Text, optional): Hugging Face access token. Defaults to None.
+            api_key (Text, optional): Team API key. Defaults to None.
+        Returns:
+            Dict: Backend response
+        """
+        supplier, model_name = hf_repo_id.split("/")
+        deploy_url = urljoin(config.BACKEND_URL, f"sdk/model-onboarding/onboard")
+        if api_key:
+            headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
+        else:
+            headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
+        body = {
+            "model": {
+                "name": name,
+                "description": "A user-deployed Hugging Face model",
+                "connectionType": ["synchronous"],
+                "function": "text-generation",
+                "documentationUrl": "aiXplain",
+                "sourceLanguage": "en",
+            },
+            "source": "huggingface",
+            "onboardingParams": {
+                "hf_model_name": model_name,
+                "hf_supplier": supplier,
+                "hf_token": hf_token
+            }
+        }
+        response = _request_with_retry("post", deploy_url, headers=headers, json=body)
+        logging.debug(response.text)
+        response_dicts = json.loads(response.text)
+        return response_dicts
+
+    @classmethod
+    def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None):
+        """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. 
+
+        Args:
+            model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL
+            api_key (Text, optional): Team API key. Defaults to None.
+        Returns:
+            Dict: Backend response
+        """
+        status_url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}")
+        if api_key:
+            headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"}
+        else:
+            headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"}
+        response = _request_with_retry("get", status_url, headers=headers)
+        logging.debug(response.text)
+        response_dicts = json.loads(response.text)
+        ret_dict = {
+            "status": response_dicts["status"],
+            "name": response_dicts["name"],
+            "id": response_dicts["id"],
+            "pricing": response_dicts["pricing"]
+        }
+        return ret_dict
diff --git a/aixplain/modules/finetune/__init__.py b/aixplain/modules/finetune/__init__.py
@@ -26,7 +26,6 @@
 from urllib.parse import urljoin
 from aixplain.modules.finetune.cost import FinetuneCost
 from aixplain.modules.finetune.hyperparameters import Hyperparameters
-from aixplain.modules.finetune.peft import Peft
 from aixplain.factories.model_factory import ModelFactory
 from aixplain.modules.asset import Asset
 from aixplain.modules.dataset import Dataset
@@ -52,7 +51,6 @@ class Finetune(Asset):
         dev_percentage (float): Percentage of development samples.
         prompt_template (Text): Fine-tuning prompt_template.
         hyperparameters (Hyperparameters): Hyperparameters for fine-tuning.
-        peft (Peft): PEFT (Parameter-Efficient Fine-Tuning) configuration.
         additional_info (dict): Additional information to be saved with the FineTune.
         backend_url (str): URL of the backend.
         api_key (str): The TEAM API key used for authentication.
@@ -72,7 +70,6 @@ def __init__(
         dev_percentage: Optional[float] = 0,
         prompt_template: Optional[Text] = None,
         hyperparameters: Optional[Hyperparameters] = None,
-        peft: Optional[Peft] = None,
         **additional_info,
     ) -> None:
         """Create a FineTune with the necessary information.
@@ -90,7 +87,6 @@ def __init__(
             dev_percentage (float, optional): Percentage of development samples. Defaults to 0.
             prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <<COLUMN_NAME>>. Defaults to None.
             hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None.
-            peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None.
             **additional_info: Additional information to be saved with the FineTune.
         """
         super().__init__(id, name, description, supplier, version)
@@ -101,7 +97,6 @@ def __init__(
         self.dev_percentage = dev_percentage
         self.prompt_template = prompt_template
         self.hyperparameters = hyperparameters
-        self.peft = peft
         self.additional_info = additional_info
         self.backend_url = config.BACKEND_URL
         self.api_key = config.TEAM_API_KEY
@@ -134,8 +129,6 @@ def start(self) -> Model:
                 parameters["prompt"] = self.prompt_template
             if self.hyperparameters is not None:
                 parameters["hyperparameters"] = self.hyperparameters.to_dict()
-            if self.peft is not None:
-                parameters["peft"] = self.peft.to_dict()
             payload["parameters"] = parameters
             logging.info(f"Start service for POST Start FineTune - {url} - {headers} - {json.dumps(payload)}")
             r = _request_with_retry("post", url, headers=headers, json=payload)

diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py
@@ -1,8 +1,10 @@
 from dataclasses import dataclass
 from dataclasses_json import dataclass_json
+from enum import Enum
+from typing import Text
 
 
-class SchedulerType:
+class SchedulerType(Text, Enum):
     LINEAR = "linear"
     COSINE = "cosine"
     COSINE_WITH_RESTARTS = "cosine_with_restarts"
@@ -13,19 +15,49 @@ class SchedulerType:
     REDUCE_ON_PLATEAU = "reduce_lr_on_plateau"
 
 
+EPOCHS_MAX_VALUE = 4
+MAX_SEQ_LENGTH_MAX_VALUE = 4096
+GENERATION_MAX_LENGTH_MAX_VALUE = 225
+
+
 @dataclass_json
 @dataclass
 class Hyperparameters(object):
-    epochs: int = 4
-    train_batch_size: int = 4
-    eval_batch_size: int = 4
-    learning_rate: float = 2e-5
+    epochs: int = 1
+    learning_rate: float = 1e-5
     generation_max_length: int = 225
-    tokenizer_batch_size: int = 256
-    gradient_checkpointing: bool = False
-    gradient_accumulation_steps: int = 1
     max_seq_length: int = 4096
     warmup_ratio: float = 0.0
     warmup_steps: int = 0
-    early_stopping_patience: int = 1
     lr_scheduler_type: SchedulerType = SchedulerType.LINEAR
+
+    def __post_init__(self):
+        if not isinstance(self.epochs, int):
+            raise TypeError("epochs should be of type int")
+
+        if not isinstance(self.learning_rate, float):
+            raise TypeError("learning_rate should be of type float")
+
+        if not isinstance(self.generation_max_length, int):
+            raise TypeError("generation_max_length should be of type int")
+
+        if not isinstance(self.max_seq_length, int):
+            raise TypeError("max_seq_length should be of type int")
+
+        if not isinstance(self.warmup_ratio, float):
+            raise TypeError("warmup_ratio should be of type float")
+
+        if not isinstance(self.warmup_steps, int):
+            raise TypeError("warmup_steps should be of type int")
+
+        if not isinstance(self.lr_scheduler_type, SchedulerType):
+            raise TypeError("lr_scheduler_type should be of type SchedulerType")
+
+        if self.epochs > EPOCHS_MAX_VALUE:
+            raise ValueError(f"epochs must be one less than {EPOCHS_MAX_VALUE}")
+
+        if self.max_seq_length > MAX_SEQ_LENGTH_MAX_VALUE:
+            raise ValueError(f"max_seq_length must be less than {MAX_SEQ_LENGTH_MAX_VALUE}")
+
+        if self.generation_max_length > GENERATION_MAX_LENGTH_MAX_VALUE:
+            raise ValueError(f"generation_max_length must be less than {GENERATION_MAX_LENGTH_MAX_VALUE}")
diff --git a/aixplain/modules/finetune/peft.py b/aixplain/modules/finetune/peft.py
diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md
@@ -57,6 +57,17 @@ poll_url = start_response["url"]
 ## Poll to see current job status
 poll_response = model.poll(poll_url)
 ```
+### Deploying Hugging Face Large Language Models
+You can deploy your very own Hugging Face large language models on our platform using the aiXplain SDK:
+```console
+$ aixplain onboard hf-model --name <what you'd like to name your model> --hf-repo-id <Hugging Face repository ID ({supplier}/{name})> --hf-token <Hugging Face token> [--api-key <TEAM_API_KEY>]
+```
+This command will return your model's ID. The on-boarding process will take 5 to 15 minutes, during which you can check the on-boarding status by running the following:
+```console
+$ aixplain get hf-model-status --model-id <model ID> [--api-key <TEAM_API_KEY>]
+```
+
+Once the on-boarding process has completed, you can use this newly-deployed large language model just like any other model on our platform. Note that our platform currently only supports language models up 7 billion parameters in size (~30 GB), so any attempts to deploy larger models will result in an error message.
 
 ### Uploading Models
 In addition to exploring and running models, the aiXplain SDK allows you to upload your own models to the aiXplain platform. This requires a working model image in line with the template specified [here](https://github.com/aixplain/model-interfaces/blob/main/docs/user/model_setup.md). [These](https://github.com/aixplain/model-interfaces/tree/main) are the interfaces with which you will be working. You will also be required to have an aiXplain account as well as a TEAM_API_KEY which should be set either as an environment variable or passed into each of the following functions.