diff --git a/examples/advanced/time_series_forecasting/exogenous.py b/examples/advanced/time_series_forecasting/exogenous.py index b956e6fba6..fa22be9fc8 100644 --- a/examples/advanced/time_series_forecasting/exogenous.py +++ b/examples/advanced/time_series_forecasting/exogenous.py @@ -76,8 +76,10 @@ def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, vis task_params=task.task_params, timeout=10, initial_assumption=pipeline, + metric=['mae'], available_operations=['lagged', 'ridge', 'exog_ts', 'arima', 'knnreg', 'rfr', 'svr'], max_pipeline_fit_time=2, + with_tuning=False, n_jobs=-1) fedot.fit(train_dataset) diff --git a/examples/simple/time_series_forecasting/api_forecasting.py b/examples/simple/time_series_forecasting/api_forecasting.py index a026e72dde..eb1ca75de8 100644 --- a/examples/simple/time_series_forecasting/api_forecasting.py +++ b/examples/simple/time_series_forecasting/api_forecasting.py @@ -7,7 +7,7 @@ from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.repository.dataset_types import DataTypesEnum -from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams logging.raiseExceptions = False @@ -33,7 +33,7 @@ def get_ts_data(dataset='australia', horizon: int = 30, validation_blocks=None): def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout: float = None, - visualization=False, with_tuning=True, validation_blocks=2): + visualization=False, with_tuning=False, validation_blocks=2): train_data, test_data = get_ts_data(dataset, horizon, validation_blocks) # init model for the time series forecasting model = Fedot(problem='ts_forecasting', @@ -41,7 +41,7 @@ def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout: TsForecastingParams(forecast_length=horizon)).task_params, timeout=timeout, n_jobs=-1, - metric='mase', + metric=['mase', 'mae', 'mape', 'rmse'], with_tuning=with_tuning, cv_folds=2, preset='fast_train') diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py index ac2c3a7a64..d71e300685 100644 --- a/fedot/api/api_utils/api_params_repository.py +++ b/fedot/api/api_utils/api_params_repository.py @@ -1,12 +1,10 @@ import datetime -from functools import partial from typing import Sequence from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum -from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation, \ - add_resample_mutation +from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, add_resample_mutation from fedot.core.constants import AUTO_PRESET_NAME from fedot.core.repository.tasks import TaskTypesEnum from fedot.core.utils import default_fedot_data_dir @@ -130,8 +128,12 @@ def _get_default_mutations(task_type: TaskTypesEnum, params) -> Sequence[Mutatio MutationTypesEnum.single_edge] # TODO remove workaround after boosting mutation fix + # Boosting mutation does not work due to problem with __eq__ with it copy. + # ``partial`` refactor to ``def`` does not work + # Also boosting mutation does not work by it own. if task_type == TaskTypesEnum.ts_forecasting: - mutations.append(partial(boosting_mutation, params=params)) + # mutations.append(partial(boosting_mutation, params=params)) + pass else: mutations.append(add_resample_mutation) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 709f9dfd91..fc071d5a99 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -4,7 +4,7 @@ import os from copy import copy, deepcopy from dataclasses import dataclass, field -from typing import List, Optional, Tuple, Union, Iterable, Any +from typing import Any, Iterable, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -472,6 +472,13 @@ class InputData(Data): """Data class for input data for the nodes """ + def __post_init__(self): + if self.numerical_idx is None: + if self.features is not None and isinstance(self.features, np.ndarray) and self.features.ndim > 1: + self.numerical_idx = list(range(self.features.shape[1])) + else: + self.numerical_idx = [0] + @property def num_classes(self) -> Optional[int]: """Returns number of classes that are present in the target. @@ -600,7 +607,7 @@ def get_not_encoded_data(self): if self.numerical_idx: num_features = self.features[:, self.numerical_idx] - if self.features_names: + if self.features_names is not None and np.size(self.features_names): num_features_names = self.features_names[self.numerical_idx] else: num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)]) @@ -609,7 +616,7 @@ def get_not_encoded_data(self): if self.categorical_idx: cat_features = self.categorical_features - if self.features_names: + if self.features_names is not None and np.size(self.features_names): cat_features_names = self.features_names[self.categorical_idx] else: cat_features_names = np.array([f'cat_feature_{i}' for i in range(1, cat_features.shape[1] + 1)]) @@ -618,8 +625,8 @@ def get_not_encoded_data(self): new_features = np.hstack((num_features, cat_features)) new_features_names = np.hstack((num_features_names, cat_features_names)) new_features_idx = np.array(range(new_features.shape[1])) - new_num_idx = new_features_idx[:new_features.shape[1]] - new_cat_idx = new_features_idx[cat_features.shape[1]:] + new_num_idx = new_features_idx[:num_features.shape[1]] + new_cat_idx = new_features_idx[-cat_features.shape[1]:] elif cat_features is not None: new_features = cat_features @@ -630,6 +637,8 @@ def get_not_encoded_data(self): new_features = num_features new_features_names = num_features_names new_num_idx = np.array(range(new_features.shape[1])) + else: + raise ValueError('There is no features') return InputData(idx=self.idx, features=new_features, features_names=new_features_names, numerical_idx=new_num_idx, categorical_idx=new_cat_idx, diff --git a/fedot/core/operations/evaluation/boostings.py b/fedot/core/operations/evaluation/boostings.py index 396b36525d..d74a63a27b 100644 --- a/fedot/core/operations/evaluation/boostings.py +++ b/fedot/core/operations/evaluation/boostings.py @@ -5,13 +5,13 @@ from fedot.core.operations.evaluation.operation_implementations.models.boostings_implementations import \ FedotCatBoostClassificationImplementation, FedotCatBoostRegressionImplementation from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.tasks import TaskTypesEnum from fedot.utilities.random import ImplementationRandomStateHandler class BoostingStrategy(EvaluationStrategy): __operations_by_types = { 'catboost': FedotCatBoostClassificationImplementation, - 'catboostreg': FedotCatBoostRegressionImplementation } @@ -43,12 +43,16 @@ def __init__(self, operation_type: str, params: Optional[OperationParameters] = super().__init__(operation_type, params) def predict(self, trained_operation, predict_data: InputData) -> OutputData: - if self.output_mode in ['default', 'labels']: + n_classes = len(trained_operation.classes_) + if self.output_mode in ['labels']: prediction = trained_operation.predict(predict_data) - - elif self.output_mode in ['probs', 'full_probs'] and predict_data.task == 'classification': + elif (self.output_mode in ['probs', 'full_probs', 'default'] and + predict_data.task.task_type is TaskTypesEnum.classification): prediction = trained_operation.predict_proba(predict_data) - + if n_classes < 2: + raise ValueError('Data set contain only 1 target class. Please reformat your data.') + elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1: + prediction = prediction[:, 1] else: raise ValueError(f'Output mode {self.output_mode} is not supported') diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py index 6182db000c..6910bf0a30 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/ts_transformations.py @@ -345,7 +345,7 @@ class LaggedTransformationImplementation(LaggedImplementation): def __init__(self, params: Optional[OperationParameters]): super().__init__(params) - self.window_size_minimum = 2 + self.window_size_minimum = 1 class TsSmoothingImplementation(DataOperationImplementation): diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py index 96a7a4bc39..f092fe671b 100644 --- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py +++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py @@ -88,7 +88,7 @@ def fit(self, input_data: InputData): self.ids_to_process = ids_to_process self.bool_ids = bool_ids if len(ids_to_process) > 0: - features_to_process = np.array(features[:, ids_to_process]) + features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features self.operation.fit(features_to_process) return self.operation @@ -119,8 +119,7 @@ def _make_new_table(self, features): :param features: tabular data for processing :return transformed_features: transformed features table """ - - features_to_process = np.array(features[:, self.ids_to_process]) + features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy() transformed_part = self.operation.transform(features_to_process) # If there are no binary features in the dataset @@ -161,7 +160,7 @@ def _reasonability_check(features): # For every column in table make check for column_id in range(0, columns_amount): - column = features[:, column_id] if columns_amount > 1 else features + column = features[:, column_id] if columns_amount > 1 else features.copy() if len(np.unique(column)) > 2: non_bool_ids.append(column_id) else: diff --git a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py index 7e4032b9c7..1b2d7d1602 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py @@ -1,8 +1,9 @@ import os from typing import Optional +import numpy as np import pandas as pd -from catboost import Pool, CatBoostClassifier, CatBoostRegressor +from catboost import CatBoostClassifier, CatBoostRegressor, Pool from matplotlib import pyplot as plt from fedot.core.data.data import InputData @@ -64,7 +65,7 @@ def convert_to_pool(data: Optional[InputData]): data=data.features, label=data.target, cat_features=data.categorical_idx, - feature_names=data.features_names.tolist() + feature_names=data.features_names.tolist() if data.features_names is not None else None ) def save_model(self, model_name: str = 'catboost'): @@ -79,12 +80,15 @@ def load_model(self, path): class FedotCatBoostClassificationImplementation(FedotCatBoostImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) - self.model = CatBoostClassifier(**self.model_params) + self.classes_ = None + + def fit(self, input_data: InputData): + self.classes_ = np.unique(np.array(input_data.target)) + return super().fit(input_data=input_data) def predict_proba(self, input_data: InputData): prediction = self.model.predict_proba(input_data.get_not_encoded_data().features) - return prediction def get_feature_importance(self): diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py index e4c4a08c85..7c6d125f30 100644 --- a/fedot/core/optimisers/objective/data_objective_eval.py +++ b/fedot/core/optimisers/objective/data_objective_eval.py @@ -70,7 +70,7 @@ def evaluate(self, graph: Pipeline) -> Fitness: if is_test_session() and not isinstance(ex, TimeoutError): stack_trace = traceback.format_exc() save_debug_info_for_pipeline(graph, train_data, test_data, ex, stack_trace) - if not is_recording_mode(): + if not is_recording_mode() and 'catboost' not in graph.descriptive_id: raise ex break # if even one fold fails, the evaluation stops diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py index dba70bbbfc..7800d0fca7 100644 --- a/fedot/core/pipelines/tuning/hyperparams.py +++ b/fedot/core/pipelines/tuning/hyperparams.py @@ -87,6 +87,11 @@ def _random_change(parameter_name, **kwargs): # Randomly choose new value rng = np.random.default_rng(random.randint(0, np.iinfo(np.int32).max)) new_value = hp_sample(space, rng=rng) + if isinstance(new_value, np.ndarray) and new_value.size == 1: + if len(new_value.shape) == 0: + new_value = new_value.item() + else: + new_value = new_value[0] return {parameter_name: new_value} @staticmethod diff --git a/fedot/utilities/ts_gapfilling.py b/fedot/utilities/ts_gapfilling.py index e01c12228f..ae471861d0 100644 --- a/fedot/utilities/ts_gapfilling.py +++ b/fedot/utilities/ts_gapfilling.py @@ -435,7 +435,7 @@ def __pipeline_fit_predict(self, pipeline, timeseries_train: np.array, len_gap: for node in pipeline_for_forecast.nodes: if node.name == 'lagged': if node.parameters['window_size'] + forecast_length >= data_length: - node.parameters = {'window_size': data_length - forecast_length - 1} + node.parameters = {'window_size': max(data_length - forecast_length - 10, 2)} # Making predictions for the missing part in the time series pipeline_for_forecast.fit_from_scratch(input_data) diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py index 4195a53361..a8ea407373 100644 --- a/test/integration/api/test_main_api.py +++ b/test/integration/api/test_main_api.py @@ -13,8 +13,8 @@ from cases.metocean_forecasting_problem import prepare_input_data from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_smoothing_pipeline -from fedot.api.api_utils.api_data import ApiDataProcessor from fedot import Fedot +from fedot.api.api_utils.api_data import ApiDataProcessor from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.multi_modal import MultiModalData @@ -153,11 +153,16 @@ def data_with_binary_features_and_categorical_target(): @pytest.mark.parametrize('task_type, metric_name', [ ('classification', 'f1'), - ('regression', 'rmse'), + ('regression', 'rmse') ]) def test_api_predict_correct(task_type, metric_name): train_data, test_data, _ = get_dataset(task_type) - model = Fedot(problem=task_type, **TESTS_MAIN_API_DEFAULT_PARAMS) + changed_api_params = { + **TESTS_MAIN_API_DEFAULT_PARAMS, + 'timeout': 1, + 'preset': 'fast_train' + } + model = Fedot(problem=task_type, metric=metric_name, **changed_api_params) fedot_model = model.fit(features=train_data) prediction = model.predict(features=test_data) metric = model.get_metrics(metric_names=metric_name, rounding_order=5) @@ -167,7 +172,7 @@ def test_api_predict_correct(task_type, metric_name): # composing and tuning was applied assert model.history is not None assert model.history.tuning_result is not None - assert is_predict_ignores_target(model.predict, train_data, 'features') + assert is_predict_ignores_target(model.predict, model.train_data, 'features') @pytest.mark.parametrize('task_type, metric_name, pred_model', [ @@ -436,7 +441,7 @@ def test_fill_nan_without_categorical(): def test_dict_multimodal_input_for_api(): data, target = load_categorical_multidata() - model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS) + model = Fedot(problem='classification', metric=['f1'], **TESTS_MAIN_API_DEFAULT_PARAMS) model.fit(features=data, target=target) @@ -501,7 +506,8 @@ def test_pipeline_preprocessing_through_api_correctly(): # Stand-alone pipeline with it's own preprocessing predicted = pipeline.predict(data, output_mode='labels') - assert predicted.predict[-1] == 'green-orange' + # check whether NaN-field was correctly predicted + assert predicted.predict[3] == 'red-blue' def test_data_from_csv_load_correctly(): diff --git a/test/integration/models/test_custom_model_introduction.py b/test/integration/models/test_custom_model_introduction.py index c5514f2b5b..a7be5c2c77 100644 --- a/test/integration/models/test_custom_model_introduction.py +++ b/test/integration/models/test_custom_model_introduction.py @@ -147,7 +147,7 @@ def get_simple_pipeline(multi_data): exog_list.append(PipelineNode(data_id)) if 'hist_' in data_id: lagged_node = PipelineNode('lagged', nodes_from=[PipelineNode(data_id)]) - lagged_node.parameters = {'window_size': 1} + lagged_node.parameters = {'window_size': 2} hist_list.append(lagged_node) diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index b18a093708..be281a92c0 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -2,6 +2,8 @@ from time import time import pytest + +from fedot.core.repository.dataset_types import DataTypesEnum from golem.core.tuning.hyperopt_tuner import get_node_parameters_for_hyperopt from golem.core.tuning.iopt_tuner import IOptTuner from golem.core.tuning.optuna_tuner import OptunaTuner @@ -216,6 +218,23 @@ def run_pipeline_tuner(train_data, cv=None, iterations=3, early_stopping_rounds=None, **kwargs): + + # if data is time series then lagged window should be tuned correctly + # because lagged window raises error if windows size is uncorrect + # and tuner will fall + if train_data.data_type in (DataTypesEnum.ts, DataTypesEnum.multi_ts): + forecast_length = train_data.task.task_params.forecast_length + folds = cv or 1 + validation_blocks = 1 + max_window = int(train_data.features.shape[0] / (folds + 1)) - (forecast_length * validation_blocks) - 1 + ssp = {'window_size': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, max_window], 'type': 'discrete'}} + if search_space.custom_search_space is None: + search_space.custom_search_space = {'lagged': ssp} + else: + search_space.custom_search_space['lagged'] = ssp + search_space.replace_default_search_space = True + search_space.parameters_per_operation = search_space.get_parameters_dict() + # Pipeline tuning pipeline_tuner = TunerBuilder(train_data.task) \ .with_tuner(tuner) \ diff --git a/test/integration/quality/test_quality_improvement.py b/test/integration/quality/test_quality_improvement.py index 6ea71cf4b0..58c14d63cb 100644 --- a/test/integration/quality/test_quality_improvement.py +++ b/test/integration/quality/test_quality_improvement.py @@ -59,7 +59,7 @@ def test_multiobjective_improvement(): composer_params = dict(num_of_generations=10, pop_size=10, with_tuning=False, - preset='best_quality', + preset='fast_train', metric=metrics) auto_model = Fedot(problem=problem, timeout=timeout, seed=seed, logging_level=logging.DEBUG, diff --git a/test/integration/real_applications/test_model_result_reproducing.py b/test/integration/real_applications/test_model_result_reproducing.py index 7e4f58ad6b..ef899e4858 100644 --- a/test/integration/real_applications/test_model_result_reproducing.py +++ b/test/integration/real_applications/test_model_result_reproducing.py @@ -31,7 +31,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs): 'seed': 1, 'timeout': None, 'pop_size': 50, - 'num_of_generations': 5} + 'num_of_generations': 5, + 'with_tuning': False} params.update(kwargs) fedot = Fedot(**params) fedot.fit(train_data) @@ -71,10 +72,10 @@ def test_result_changing(): and makes different compose process in different run with different seeds """ train, test = get_data() - fedots = [get_fitted_fedot(forecast_length=test.idx.shape[0], + fedots = [get_fitted_fedot(forecast_length=len(test.idx), train_data=train, seed=seed, num_of_generations=1) - for seed in (0, 1)] + for seed in (0, 10)] check_fedots(fedots, test, are_same=False) diff --git a/test/integration/remote/test_remote_composer.py b/test/integration/remote/test_remote_composer.py index e327b58b04..51cde30ce3 100644 --- a/test/integration/remote/test_remote_composer.py +++ b/test/integration/remote/test_remote_composer.py @@ -47,7 +47,7 @@ def test_pseudo_remote_composer_classification(): 'pop_size': 3, 'cv_folds': None, 'with_tuning': False, - 'preset': 'best_quality', + 'preset': 'fast_train', 'show_progress': False } diff --git a/test/integration/utilities/test_pipeline_import_export.py b/test/integration/utilities/test_pipeline_import_export.py index 1fa7fae267..b98df66d8e 100644 --- a/test/integration/utilities/test_pipeline_import_export.py +++ b/test/integration/utilities/test_pipeline_import_export.py @@ -346,7 +346,7 @@ def test_export_without_path_correctly(): def test_data_model_types_forecasting_pipeline_fit(): - train_data, test_data = get_ts_data(forecast_length=10) + train_data, test_data = get_ts_data(n_steps=200, forecast_length=10) pipeline = get_multiscale_pipeline() pipeline.fit(train_data) diff --git a/test/integration/utilities/test_project_import_export.py b/test/integration/utilities/test_project_import_export.py index b25697c9ce..e582843430 100644 --- a/test/integration/utilities/test_project_import_export.py +++ b/test/integration/utilities/test_project_import_export.py @@ -7,7 +7,7 @@ from fedot import Fedot from fedot.core.utils import fedot_project_root -from fedot.utilities.project_import_export import export_project_to_zip, import_project_from_zip, DEFAULT_PROJECTS_PATH +from fedot.utilities.project_import_export import DEFAULT_PROJECTS_PATH, export_project_to_zip, import_project_from_zip from test.integration.models.test_atomized_model import create_pipeline from test.unit.validation.test_table_cv import get_classification_data @@ -82,6 +82,7 @@ def test_export_import_api_correctly(): train_data = test_data = get_classification_data() api = Fedot(problem='classification', timeout=-1, + preset='fast_train', with_tuning=False, num_of_generations=1, pop_size=3, diff --git a/test/integration/validation/test_table_cv.py b/test/integration/validation/test_table_cv.py index 957a6bbd49..b0a0cd09bb 100644 --- a/test/integration/validation/test_table_cv.py +++ b/test/integration/validation/test_table_cv.py @@ -17,7 +17,7 @@ def test_composer_with_cv_optimization_correct(): dataset_to_compose, dataset_to_validate = train_test_data_setup(get_classification_data()) models_repo = OperationTypesRepository() - available_model_types = models_repo.suitable_operation(task_type=task.task_type, tags=['simple']) + available_model_types = models_repo.suitable_operation(task_type=task.task_type, tags=['linear']) metric_function = [ClassificationMetricsEnum.ROCAUC_penalty, ClassificationMetricsEnum.accuracy,