From c11c4028596ec3bb4eac521f6146e1baf51ed2d1 Mon Sep 17 00:00:00 2001 From: xainaz Date: Thu, 10 Oct 2024 14:16:22 +0300 Subject: [PATCH] Improve error log for: Benchmark, Corpus, Dataset, Model, Pipeline --- aixplain/factories/benchmark_factory.py | 49 ++++-- aixplain/factories/corpus_factory.py | 139 +++++++++------- aixplain/factories/dataset_factory.py | 154 ++++++++++-------- aixplain/factories/model_factory.py | 33 ++-- .../factories/pipeline_factory/__init__.py | 64 +++++--- 5 files changed, 261 insertions(+), 178 deletions(-) diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 57d4a833..ea983075 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -22,10 +22,8 @@ """ import logging -from typing import Dict, List, Optional, Text +from typing import Dict, List, Text import json -import pandas as pd -from pathlib import Path from aixplain.enums.supplier import Supplier from aixplain.modules import Dataset, Metric, Model from aixplain.modules.benchmark_job import BenchmarkJob @@ -34,9 +32,8 @@ from aixplain.factories.dataset_factory import DatasetFactory from aixplain.factories.model_factory import ModelFactory from aixplain.utils import config -from aixplain.utils.file_utils import _request_with_retry, save_file +from aixplain.utils.file_utils import _request_with_retry from urllib.parse import urljoin -from warnings import warn class BenchmarkFactory: @@ -117,7 +114,14 @@ def get(cls, benchmark_id: str) -> Benchmark: logging.info(f"Start service for GET Benchmark - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - benchmark = cls._create_benchmark_from_response(resp) + if 200 <= r.status_code < 300: + benchmark = cls._create_benchmark_from_response(resp) + logging.info(f"Benchmark {benchmark_id} retrieved successfully.") + return benchmark + else: + error_message = f"Benchmark GET Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) except Exception as e: status_code = 400 if resp is not None and "statusCode" in resp: @@ -125,10 +129,9 @@ def get(cls, benchmark_id: str) -> Benchmark: message = resp["message"] message = f"Benchmark Creation: Status {status_code} - {message}" else: - message = f"Benchmark Creation: Unspecified Error" + message = "Benchmark Creation: Unspecified Error" logging.error(f"Benchmark Creation Failed: {e}") raise Exception(f"Status {status_code}: {message}") - return benchmark @classmethod def get_job(cls, job_id: Text) -> BenchmarkJob: @@ -189,7 +192,7 @@ def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], """ payload = {} try: - url = urljoin(cls.backend_url, f"sdk/benchmarks") + url = urljoin(cls.backend_url, "sdk/benchmarks") headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} payload = { "name": name, @@ -204,8 +207,13 @@ def create(cls, name: str, dataset_list: List[Dataset], model_list: List[Model], payload = json.dumps(clean_payload) r = _request_with_retry("post", url, headers=headers, data=payload) resp = r.json() - logging.info(f"Creating Benchmark Job: Status for {name}: {resp}") - return cls.get(resp["id"]) + if 200 <= r.status_code < 300: + logging.info(f"Benchmark {name} created successfully: {resp}") + return cls.get(resp["id"]) + else: + error_message = f"Benchmark Creation Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) except Exception as e: error_message = f"Creating Benchmark Job: Error in Creating Benchmark with payload {payload} : {e}" logging.error(error_message, exc_info=True) @@ -223,7 +231,7 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: List[str]: List of supported normalization options """ try: - url = urljoin(cls.backend_url, f"sdk/benchmarks/normalization-options") + url = urljoin(cls.backend_url, "sdk/benchmarks/normalization-options") if cls.aixplain_key != "": headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} else: @@ -231,9 +239,17 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: payload = json.dumps({"metricId": metric.id, "modelIds": [model.id]}) r = _request_with_retry("post", url, headers=headers, data=payload) resp = r.json() - logging.info(f"Listing Normalization Options: Status of listing options: {resp}") - normalization_options = [item["value"] for item in resp] - return normalization_options + + if 200 <= r.status_code < 300: + logging.info(f"Listing Normalization Options: Status of listing options: {resp}") + normalization_options = [item["value"] for item in resp] + return normalization_options + else: + error_message = ( + f"Error listing normalization options: Status {r.status_code} - {resp.get('message', 'No message')}" + ) + logging.error(error_message) + return [] except Exception as e: error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}" logging.error(error_message, exc_info=True) @@ -255,7 +271,8 @@ def __get_model_name(model_id): if model.version is not None: name = f"{name}({model.version})" return name + benchmarkJob = cls.get_job(job_id) scores_df = benchmarkJob.get_scores() scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x)) - return scores_df \ No newline at end of file + return scores_df diff --git a/aixplain/factories/corpus_factory.py b/aixplain/factories/corpus_factory.py index 1f81ac4d..59333ffd 100644 --- a/aixplain/factories/corpus_factory.py +++ b/aixplain/factories/corpus_factory.py @@ -21,7 +21,6 @@ Corpus Factory Class """ -import aixplain.utils.config as config import aixplain.processes.data_onboarding.onboard_functions as onboard_functions import json import logging @@ -86,12 +85,12 @@ def __from_response(cls, response: Dict) -> Corpus: try: license = License(response["license"]["typeId"]) - except: + except Exception: license = None try: length = int(response["segmentsCount"]) - except: + except Exception: length = None corpus = Corpus( @@ -116,17 +115,26 @@ def get(cls, corpus_id: Text) -> Corpus: Returns: Corpus: Created 'Corpus' object """ - url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start service for GET Corpus - {url} - {headers}") - r = _request_with_retry("get", url, headers=headers) - resp = r.json() - if "statusCode" in resp and resp["statusCode"] == 404: - raise Exception(f"Corpus GET Error: Dataset {corpus_id} not found.") - return cls.__from_response(resp) + try: + url = urljoin(cls.backend_url, f"sdk/corpora/{corpus_id}/overview") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for GET Corpus - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + if 200 <= r.status_code < 300: + logging.info(f"Corpus {corpus_id} retrieved successfully.") + return cls.__from_response(resp) + else: + error_message = f"Corpus GET Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) + except Exception as e: + error_message = f"Error retrieving Corpus {corpus_id}: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) @classmethod def create_asset_from_id(cls, corpus_id: Text) -> Corpus: @@ -162,52 +170,63 @@ def list( Returns: Dict: list of corpora in agreement with the filters, page number, page total and total elements """ - url = urljoin(cls.backend_url, "sdk/corpora/paginate") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - - assert 0 < page_size <= 100, f"Corpus List Error: Page size must be greater than 0 and not exceed 100." - payload = {"pageSize": page_size, "pageNumber": page_number, "sort": [{"field": "createdAt", "dir": -1}]} - - if query is not None: - payload["q"] = str(query) - - if function is not None: - payload["function"] = function.value - - if license is not None: - payload["license"] = license.value - - if data_type is not None: - payload["dataType"] = data_type.value + try: + url = urljoin(cls.backend_url, "sdk/corpora/paginate") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + assert 0 < page_size <= 100, "Corpus List Error: Page size must be greater than 0 and not exceed 100." + payload = {"pageSize": page_size, "pageNumber": page_number, "sort": [{"field": "createdAt", "dir": -1}]} + + if query is not None: + payload["q"] = str(query) + + if function is not None: + payload["function"] = function.value + + if license is not None: + payload["license"] = license.value + + if data_type is not None: + payload["dataType"] = data_type.value + + if language is not None: + if isinstance(language, Language): + language = [language] + payload["language"] = [lng.value["language"] for lng in language] + + logging.info(f"Start service for POST List Corpus - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + if 200 <= r.status_code < 300: + corpora, page_total, total = [], 0, 0 + if "results" in resp: + results = resp["results"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Corpus - Page Total: {page_total} / Total: {total}") + for corpus in results: + corpus_ = cls.__from_response(corpus) + # add languages + languages = [] + for lng in corpus["languages"]: + if "dialect" not in lng: + lng["dialect"] = "" + languages.append(Language(lng)) + corpus_.kwargs["languages"] = languages + corpora.append(corpus_) + return {"results": corpora, "page_total": page_total, "page_number": page_number, "total": total} + else: + error_message = f"Corpus List Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) - if language is not None: - if isinstance(language, Language): - language = [language] - payload["language"] = [lng.value["language"] for lng in language] - - logging.info(f"Start service for POST List Corpus - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() - corpora, page_total, total = [], 0, 0 - if "results" in resp: - results = resp["results"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Corpus - Page Total: {page_total} / Total: {total}") - for corpus in results: - corpus_ = cls.__from_response(corpus) - # add languages - languages = [] - for lng in corpus["languages"]: - if "dialect" not in lng: - lng["dialect"] = "" - languages.append(Language(lng)) - corpus_.kwargs["languages"] = languages - corpora.append(corpus_) - return {"results": corpora, "page_total": page_total, "page_number": page_number, "total": total} + except Exception as e: + error_message = f"Error listing corpora: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) @classmethod def get_assets_from_page( @@ -245,7 +264,7 @@ def create( functions: List[Function] = [], privacy: Privacy = Privacy.PRIVATE, error_handler: ErrorHandler = ErrorHandler.SKIP, - api_key: Optional[Text] = None + api_key: Optional[Text] = None, ) -> Dict: """Asynchronous call to Upload a corpus to the user's dashboard. diff --git a/aixplain/factories/dataset_factory.py b/aixplain/factories/dataset_factory.py index 5e69d572..4b486cf0 100644 --- a/aixplain/factories/dataset_factory.py +++ b/aixplain/factories/dataset_factory.py @@ -21,7 +21,6 @@ Dataset Factory Class """ -import aixplain.utils.config as config import aixplain.processes.data_onboarding.onboard_functions as onboard_functions import json import os @@ -49,7 +48,6 @@ from typing import Any, Dict, List, Optional, Text, Union from urllib.parse import urljoin from uuid import uuid4 -from warnings import warn class DatasetFactory(AssetFactory): @@ -122,7 +120,7 @@ def __from_response(cls, response: Dict) -> Dataset: target_data_list = [data[data_id] for data_id in out["dataIds"]] data_name = target_data_list[0].name target_data[data_name] = target_data_list - except: + except Exception: pass # process function @@ -164,17 +162,26 @@ def get(cls, dataset_id: Text) -> Dataset: Returns: Dataset: Created 'Dataset' object """ - url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - logging.info(f"Start service for GET Dataset - {url} - {headers}") - r = _request_with_retry("get", url, headers=headers) - resp = r.json() - if "statusCode" in resp and resp["statusCode"] == 404: - raise Exception(f"Dataset GET Error: Dataset {dataset_id} not found.") - return cls.__from_response(resp) + try: + url = urljoin(cls.backend_url, f"sdk/datasets/{dataset_id}/overview") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + logging.info(f"Start service for GET Dataset - {url} - {headers}") + r = _request_with_retry("get", url, headers=headers) + resp = r.json() + if 200 <= r.status_code < 300: + logging.info(f"Dataset {dataset_id} retrieved successfully.") + return cls.__from_response(resp) + else: + error_message = f"Dataset GET Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) + except Exception as e: + error_message = f"Error retrieving Dataset {dataset_id}: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) @classmethod def list( @@ -205,59 +212,70 @@ def list( Returns: Dict: list of datasets in agreement with the filters, page number, page total and total elements """ - url = urljoin(cls.backend_url, "sdk/datasets/paginate") - if cls.aixplain_key != "": - headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} - else: - headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} - - assert 0 < page_size <= 100, f"Dataset List Error: Page size must be greater than 0 and not exceed 100." - payload = { - "pageSize": page_size, - "pageNumber": page_number, - "sort": [{"field": "createdAt", "dir": -1}], - "input": {}, - "output": {}, - } - - if query is not None: - payload["q"] = str(query) - - if function is not None: - payload["function"] = function.value - - if license is not None: - payload["license"] = license.value - - if data_type is not None: - payload["dataType"] = data_type.value - - if is_referenceless is not None: - payload["isReferenceless"] = is_referenceless - - if source_languages is not None: - if isinstance(source_languages, Language): - source_languages = [source_languages] - payload["input"]["languages"] = [lng.value["language"] for lng in source_languages] - - if target_languages is not None: - if isinstance(target_languages, Language): - target_languages = [target_languages] - payload["output"]["languages"] = [lng.value["language"] for lng in target_languages] - - logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() - - datasets, page_total, total = [], 0, 0 - if "results" in resp: - results = resp["results"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Dataset - Page Total: {page_total} / Total: {total}") - for dataset in results: - datasets.append(cls.__from_response(dataset)) - return {"results": datasets, "page_total": page_total, "page_number": page_number, "total": total} + try: + url = urljoin(cls.backend_url, "sdk/datasets/paginate") + if cls.aixplain_key != "": + headers = {"x-aixplain-key": f"{cls.aixplain_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + + assert 0 < page_size <= 100, "Dataset List Error: Page size must be greater than 0 and not exceed 100." + payload = { + "pageSize": page_size, + "pageNumber": page_number, + "sort": [{"field": "createdAt", "dir": -1}], + "input": {}, + "output": {}, + } + + if query is not None: + payload["q"] = str(query) + + if function is not None: + payload["function"] = function.value + + if license is not None: + payload["license"] = license.value + + if data_type is not None: + payload["dataType"] = data_type.value + + if is_referenceless is not None: + payload["isReferenceless"] = is_referenceless + + if source_languages is not None: + if isinstance(source_languages, Language): + source_languages = [source_languages] + payload["input"]["languages"] = [lng.value["language"] for lng in source_languages] + + if target_languages is not None: + if isinstance(target_languages, Language): + target_languages = [target_languages] + payload["output"]["languages"] = [lng.value["language"] for lng in target_languages] + + logging.info(f"Start service for POST List Dataset - {url} - {headers} - {json.dumps(payload)}") + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + + if 200 <= r.status_code < 300: + datasets, page_total, total = [], 0, 0 + if "results" in resp: + results = resp["results"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Dataset - Page Total: {page_total} / Total: {total}") + for dataset in results: + datasets.append(cls.__from_response(dataset)) + return {"results": datasets, "page_total": page_total, "page_number": page_number, "total": total} + else: + error_message = f"Dataset List Error: Status {r.status_code} - {resp.get('message', 'No message')}" + logging.error(error_message) + raise Exception(error_message) + + except Exception as e: + error_message = f"Error listing datasets: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) @classmethod def create( @@ -282,7 +300,7 @@ def create( error_handler: ErrorHandler = ErrorHandler.SKIP, s3_link: Optional[Text] = None, aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}, - api_key: Optional[Text] = None + api_key: Optional[Text] = None, ) -> Dict: """Dataset Onboard diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index d82bdd63..b7b7ee42 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -113,13 +113,19 @@ def get(cls, model_id: Text, api_key: Optional[Text] = None) -> Model: logging.info(f"Start service for GET Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - # set api key - resp["api_key"] = config.TEAM_API_KEY - if api_key is not None: - resp["api_key"] = api_key - model = cls._create_model_from_response(resp) - logging.info(f"Model Creation: Model {model_id} instantiated.") - return model + if 200 <= r.status_code < 300: + resp["api_key"] = config.TEAM_API_KEY + if api_key is not None: + resp["api_key"] = api_key + model = cls._create_model_from_response(resp) + logging.info(f"Model Creation: Model {model_id} instantiated.") + return model + else: + error_message = ( + f"Model GET Error: Failed to retrieve model {model_id}. Status Code: {r.status_code}. Error: {resp}" + ) + logging.error(error_message) + raise Exception(error_message) except Exception: if resp is not None and "statusCode" in resp: status_code = resp["statusCode"] @@ -198,10 +204,15 @@ def _get_assets_from_page( logging.info(f"Start service for POST Models Paginate - {url} - {headers} - {json.dumps(filter_params)}") r = _request_with_retry("post", url, headers=headers, json=filter_params) resp = r.json() - logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") - all_models = resp["items"] - model_list = [cls._create_model_from_response(model_info_json) for model_info_json in all_models] - return model_list, resp["total"] + if 200 <= r.status_code < 300: + logging.info(f"Listing Models: Status of getting Models on Page {page_number}: {r.status_code}") + all_models = resp["items"] + model_list = [cls._create_model_from_response(model_info_json) for model_info_json in all_models] + return model_list, resp["total"] + else: + error_message = f"Listing Models Error: Failed to retrieve models. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) except Exception as e: error_message = f"Listing Models: Error in getting Models on Page {page_number}: {e}" logging.error(error_message, exc_info=True) diff --git a/aixplain/factories/pipeline_factory/__init__.py b/aixplain/factories/pipeline_factory/__init__.py index cb4336fe..ba8ccad9 100644 --- a/aixplain/factories/pipeline_factory/__init__.py +++ b/aixplain/factories/pipeline_factory/__init__.py @@ -78,12 +78,19 @@ def get(cls, pipeline_id: Text, api_key: Optional[Text] = None) -> Pipeline: logging.info(f"Start service for GET Pipeline - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - # set api key - resp["api_key"] = config.TEAM_API_KEY - if api_key is not None: - resp["api_key"] = api_key - pipeline = build_from_response(resp, load_architecture=True) - return pipeline + + if 200 <= r.status_code < 300: + resp["api_key"] = config.TEAM_API_KEY + if api_key is not None: + resp["api_key"] = api_key + pipeline = build_from_response(resp, load_architecture=True) + logging.info(f"Pipeline {pipeline_id} retrieved successfully.") + return pipeline + + else: + error_message = f"Pipeline GET Error: Failed to retrieve pipeline {pipeline_id}. Status Code: {r.status_code}. Error: {resp}" + logging.error(error_message) + raise Exception(error_message) except Exception as e: logging.exception(e) status_code = 400 @@ -220,23 +227,34 @@ def list( payload["inputDataTypes"] = [data_type.value for data_type in output_data_types] logging.info(f"Start service for POST List Pipeline - {url} - {headers} - {json.dumps(payload)}") - r = _request_with_retry("post", url, headers=headers, json=payload) - resp = r.json() - - pipelines, page_total, total = [], 0, 0 - if "items" in resp: - results = resp["items"] - page_total = resp["pageTotal"] - total = resp["total"] - logging.info(f"Response for POST List Pipeline - Page Total: {page_total} / Total: {total}") - for pipeline in results: - pipelines.append(build_from_response(pipeline)) - return { - "results": pipelines, - "page_total": page_total, - "page_number": page_number, - "total": total, - } + try: + r = _request_with_retry("post", url, headers=headers, json=payload) + resp = r.json() + if 200 <= r.status_code < 300: + pipelines, page_total, total = [], 0, 0 + if "items" in resp: + results = resp["items"] + page_total = resp["pageTotal"] + total = resp["total"] + logging.info(f"Response for POST List Pipeline - Page Total: {page_total} / Total: {total}") + for pipeline in results: + pipelines.append(build_from_response(pipeline)) + return { + "results": pipelines, + "page_total": page_total, + "page_number": page_number, + "total": total, + } + else: + error_message = ( + f"Pipeline List Error: Failed to retrieve pipelines. Status Code: {r.status_code}. Error: {resp}" + ) + logging.error(error_message) + raise Exception(error_message) + except Exception as e: + error_message = f"Pipeline List Error: {str(e)}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) @classmethod def init(cls, name: Text, api_key: Optional[Text] = None) -> Pipeline: