nutanix · johnugeorge · Jan 8, 2024 · Jan 3, 2024 · Jan 5, 2024 · Jan 5, 2024
diff --git a/llm/generate.py b/llm/generate.py
@@ -347,6 +347,12 @@ def run_script(params: argparse.Namespace) -> bool:
     check_if_path_exists(gen_model.output, "output", is_dir=True)
     gen_model.set_model_files_and_mar(params)
 
+    if gen_model.is_custom and not gen_model.mar_utils.model_path:
+        print(
+            "## Error: For HuggingFace models and Custom models model path should be set"
+        )
+        sys.exit(1)
+
     if gen_model.download_model:
         gen_model = run_download(gen_model)
 

diff --git a/llm/kubeflow_inference_run.py b/llm/kubeflow_inference_run.py
@@ -10,7 +10,6 @@
 from typing import List, Dict
 import tqdm
 import utils.tsutils as ts
-import utils.hf_utils as hf
 from utils.system_utils import check_if_path_exists, get_all_files_in_directory
 from kubernetes import client, config
 from kserve import (
@@ -50,38 +49,74 @@ def get_inputs_from_folder(input_path: str) -> List:
     )
 
 
-def check_if_valid_version(model_info: Dict, mount_path: str) -> str:
+def check_model_paths(
+    model_info: Dict, mount_path: str, is_custom: bool = False
+) -> None:
     """
-    Check if the model files for a specific commit ID exist in the given directory.
+    Checks if the MAR file and config.properties file exists for a given model and repository
 
     Args:
       model_info(dict): A dictionary containing the following:
         model_name (str): The name of the model.
         repo_version (str): The commit ID of HuggingFace repo of the model.
         repo_id (str): The repo id.
-        hf_token (str): Your HuggingFace token (Required only for LLAMA2 model).
       mount_path (str): The local file server mount path where the model files are expected.
+      is_custom (bool): Flag signifying whether the model is a custom model or not.
+    """
+    model_path = os.path.join(mount_path, model_info["model_name"])
+    if not is_custom:
+        model_path = os.path.join(model_path, model_info["repo_version"])
+
+    check_if_path_exists(
+        os.path.join(model_path, "model-store", f"{model_info['model_name']}.mar"),
+        "Model Archive file",
+        is_dir=False,
+    )
+    check_if_path_exists(
+        os.path.join(model_path, "config", "config.properties"),
+        "Config file",
+        is_dir=False,
+    )
+
+
+def set_repo_version(
+    model_info: Dict, mount_path: str, config_repo_version: str
+) -> None:
+    """
+    Check if the model files for a specific commit ID exist in the given directory and
+    sets the repo_version to it's complete form.
+
+    Args:
+      model_info(dict): A dictionary containing the following:
+        model_name (str): The name of the model.
+        repo_version (str): The commit ID of HuggingFace repo of the model.
+        repo_id (str): The repo id.
+      mount_path (str): The local file server mount path where the model files are expected.
+      config_repo_version (str): The commit ID of the HuggingFace repo in model_config.json
     Raises:
         sys.exit(1): If the model files do not exist, the
                      function will terminate the program with an exit code of 1.
     """
-    hf.hf_token_check(model_info["repo_id"], model_info["hf_token"])
-    model_info["repo_version"] = hf.get_repo_commit_id(
-        repo_id=model_info["repo_id"],
-        revision=model_info["repo_version"],
-        token=model_info["hf_token"],
-    )
+    model_path = os.path.join(mount_path, model_info["model_name"])
+
+    if not model_info["repo_version"]:
+        model_info["repo_version"] = config_repo_version
+
+    # Compare the directory name with given repo_version
+    for full_repo_version in os.listdir(model_path):
+        if full_repo_version.startswith(model_info["repo_version"]) and os.path.isdir(
+            os.path.join(model_path, full_repo_version)
+        ):
+            model_info["repo_version"] = full_repo_version
+            break
     print(model_info)
-    model_spec_path = os.path.join(
-        mount_path, model_info["model_name"], model_info["repo_version"]
-    )
-    if not os.path.exists(model_spec_path):
+
+    if not os.path.exists(os.path.join(model_path, model_info["repo_version"])):
         print(
             f"## ERROR: The {model_info['model_name']} model files for given commit ID "
             "are not downloaded"
         )
         sys.exit(1)
-    return model_info["repo_version"]
 
 
 def create_pv(
@@ -363,7 +398,6 @@ def execute(params: argparse.Namespace) -> None:
     model_info = {}
     model_info["model_name"] = params.model_name
     model_info["repo_version"] = params.repo_version
-    model_info["hf_token"] = params.hf_token
 
     input_path = params.data
     mount_path = params.mount_path
@@ -381,11 +415,17 @@ def execute(params: argparse.Namespace) -> None:
 
     model_params = ts.get_model_params(model_info["model_name"])
 
+    check_if_path_exists(
+        os.path.join(mount_path, model_info["model_name"]),
+        "model directory",
+        is_dir=True,
+    )
+
     if not model_params["is_custom"]:
-        if not model_info["repo_version"]:
-            model_info["repo_version"] = model_params["repo_version"]
         model_info["repo_id"] = model_params["repo_id"]
-        model_info["repo_version"] = check_if_valid_version(model_info, mount_path)
+        set_repo_version(model_info, mount_path, model_params["repo_version"])
+
+    check_model_paths(model_info, mount_path, model_params["is_custom"])
 
     if quantize_bits and int(quantize_bits) not in [4, 8]:
         print(
@@ -445,12 +485,6 @@ def execute(params: argparse.Namespace) -> None:
     parser.add_argument(
         "--mount_path", type=str, help="local path to the nfs mount location"
     )
-    parser.add_argument(
-        "--hf_token",
-        type=str,
-        default=None,
-        help="HuggingFace Hub token to download LLAMA(2) models",
-    )
     parser.add_argument(
         "--quantize_bits",
         type=str,

diff --git a/llm/run.sh b/llm/run.sh
@@ -8,7 +8,7 @@ MODEL_TIMEOUT_IN_SEC="1500"
 
 function helpFunction()
 {
-    echo "Usage: $0 -n <MODEL_NAME>  -g <NUM_OF_GPUS> -f <NFS_ADDRESS_WITH_SHARE_PATH> -m <NFS_LOCAL_MOUNT_LOCATION> -e <KUBE_DEPLOYMENT_NAME> [OPTIONAL -d <INPUT_DATA_ABSOLUTE_PATH> -v <REPO_COMMIT_ID> -t <Your_HuggingFace_Hub_Token> -q <QUANTIZE_BITS>]"
+    echo "Usage: $0 -n <MODEL_NAME>  -g <NUM_OF_GPUS> -f <NFS_ADDRESS_WITH_SHARE_PATH> -m <NFS_LOCAL_MOUNT_LOCATION> -e <KUBE_DEPLOYMENT_NAME> [OPTIONAL -d <INPUT_DATA_ABSOLUTE_PATH> -v <REPO_COMMIT_ID> -q <QUANTIZE_BITS>]"
     echo -e "\t-f NFS server address with share path information"
     echo -e "\t-m Absolute path to the NFS local mount location"
     echo -e "\t-e Name of the deployment metadata"
@@ -17,7 +17,6 @@ function helpFunction()
     echo -e "\t-d Absolute path to the inputs folder that contains data to be predicted."
     echo -e "\t-g Number of gpus to be used to execute. Set 0 to use cpu"
     echo -e "\t-v Commit id of the HuggingFace Repo."
-    echo -e "\t-t Your HuggingFace token (Required only for LLAMA2 model)."
     echo -e "\t-q BitsAndBytes Quantization Precision (4 or 8)"
     exit 1 # Exit script after printing help
 }
@@ -64,10 +63,6 @@ function inference_exec_kubernetes()
         exec_cmd+=" --repo_version $repo_version"
     fi
 
-    if [ ! -z $hf_token ] ; then
-        exec_cmd+=" --hf_token $hf_token"
-    fi
-
     if [ ! -z $quantize_bits ] ; then
         exec_cmd+=" --quantize_bits $quantize_bits"
     fi
@@ -77,7 +72,7 @@ function inference_exec_kubernetes()
 }
 
 # Entry Point
-while getopts ":n:v:m:t:d:g:f:e:q:" opt;
+while getopts ":n:v:m:d:g:f:e:q:" opt;
 do
    case "$opt" in
         n ) model_name="$OPTARG" ;;
@@ -87,7 +82,6 @@ do
         e ) deploy_name="$OPTARG" ;;
         v ) repo_version="$OPTARG" ;;
         m ) mount_path="$OPTARG" ;;
-        t ) hf_token="$OPTARG" ;;
         q ) quantize_bits="$OPTARG" ;;
         ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
    esac

diff --git a/llm/utils/generate_data_model.py b/llm/utils/generate_data_model.py
@@ -104,7 +104,7 @@ class with values set based on the arguments.
 
         self.repo_info.repo_id = params.repo_id
         self.repo_info.repo_version = params.repo_version
-        self.repo_info.hf_token = params.hf_token
+        self.repo_info.hf_token = params.hf_token or os.environ.get("HF_TOKEN")
 
         self.debug = params.debug
 

diff --git a/llm/utils/hf_utils.py b/llm/utils/hf_utils.py
@@ -110,7 +110,8 @@ def hf_token_check(repo_id: str, token: str) -> None:
         print(
             (
                 "HuggingFace Hub token is required for llama download. "
-                "Please specify it using --hf_token=<your token>. Refer "
+                "Please specify it using --hf_token=<your token> argument "
+                "or set it as an environment variable 'HF_TOKEN'. Refer "
                 "https://huggingface.co/docs/hub/security-tokens"
             )
         )