aimclub · kasyanovse · Nov 21, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 9, 2023
diff --git a/examples/advanced/time_series_forecasting/exogenous.py b/examples/advanced/time_series_forecasting/exogenous.py
@@ -76,8 +76,10 @@ def run_exogenous_experiment(path_to_file, len_forecast=250, with_exog=True, vis
                   task_params=task.task_params,
                   timeout=10,
                   initial_assumption=pipeline,
+                  metric=['mae'],
                   available_operations=['lagged', 'ridge', 'exog_ts', 'arima', 'knnreg', 'rfr', 'svr'],
                   max_pipeline_fit_time=2,
+                  with_tuning=False,
                   n_jobs=-1)
     fedot.fit(train_dataset)
 

diff --git a/examples/simple/time_series_forecasting/api_forecasting.py b/examples/simple/time_series_forecasting/api_forecasting.py
@@ -7,7 +7,7 @@
 from fedot.core.data.data import InputData
 from fedot.core.data.data_split import train_test_data_setup
 from fedot.core.repository.dataset_types import DataTypesEnum
-from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum
+from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
 
 logging.raiseExceptions = False
 
@@ -33,15 +33,15 @@ def get_ts_data(dataset='australia', horizon: int = 30, validation_blocks=None):
 
 
 def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout: float = None,
-                               visualization=False, with_tuning=True, validation_blocks=2):
+                               visualization=False, with_tuning=False, validation_blocks=2):
     train_data, test_data = get_ts_data(dataset, horizon, validation_blocks)
     # init model for the time series forecasting
     model = Fedot(problem='ts_forecasting',
                   task_params=Task(TaskTypesEnum.ts_forecasting,
                                    TsForecastingParams(forecast_length=horizon)).task_params,
                   timeout=timeout,
                   n_jobs=-1,
-                  metric='mase',
+                  metric=['mase', 'mae', 'mape', 'rmse'],
                   with_tuning=with_tuning,
                   cv_folds=2, preset='fast_train')
 

diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py
@@ -1,12 +1,10 @@
 import datetime
-from functools import partial
 from typing import Sequence
 
 from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
 from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum
 
-from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation, \
-    add_resample_mutation
+from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, add_resample_mutation
 from fedot.core.constants import AUTO_PRESET_NAME
 from fedot.core.repository.tasks import TaskTypesEnum
 from fedot.core.utils import default_fedot_data_dir
@@ -130,8 +128,12 @@ def _get_default_mutations(task_type: TaskTypesEnum, params) -> Sequence[Mutatio
                      MutationTypesEnum.single_edge]
 
         # TODO remove workaround after boosting mutation fix
+        #      Boosting mutation does not work due to problem with __eq__ with it copy.
+        #      ``partial`` refactor to ``def`` does not work
+        #      Also boosting mutation does not work by it own.
         if task_type == TaskTypesEnum.ts_forecasting:
-            mutations.append(partial(boosting_mutation, params=params))
+            # mutations.append(partial(boosting_mutation, params=params))
+            pass
         else:
             mutations.append(add_resample_mutation)
 

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
@@ -4,7 +4,7 @@
 import os
 from copy import copy, deepcopy
 from dataclasses import dataclass, field
-from typing import List, Optional, Tuple, Union, Iterable, Any
+from typing import Any, Iterable, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -472,6 +472,13 @@ class InputData(Data):
     """Data class for input data for the nodes
     """
 
+    def __post_init__(self):
+        if self.numerical_idx is None:
+            if self.features is not None and isinstance(self.features, np.ndarray) and self.features.ndim > 1:
+                self.numerical_idx = list(range(self.features.shape[1]))
+            else:
+                self.numerical_idx = [0]
+
     @property
     def num_classes(self) -> Optional[int]:
         """Returns number of classes that are present in the target.
@@ -600,7 +607,7 @@ def get_not_encoded_data(self):
         if self.numerical_idx:
             num_features = self.features[:, self.numerical_idx]
 
-            if self.features_names:
+            if self.features_names is not None and np.size(self.features_names):
                 num_features_names = self.features_names[self.numerical_idx]
             else:
                 num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)])
@@ -609,7 +616,7 @@ def get_not_encoded_data(self):
         if self.categorical_idx:
             cat_features = self.categorical_features
 
-            if self.features_names:
+            if self.features_names is not None and np.size(self.features_names):
                 cat_features_names = self.features_names[self.categorical_idx]
             else:
                 cat_features_names = np.array([f'cat_feature_{i}' for i in range(1, cat_features.shape[1] + 1)])
@@ -618,8 +625,8 @@ def get_not_encoded_data(self):
             new_features = np.hstack((num_features, cat_features))
             new_features_names = np.hstack((num_features_names, cat_features_names))
             new_features_idx = np.array(range(new_features.shape[1]))
-            new_num_idx = new_features_idx[:new_features.shape[1]]
-            new_cat_idx = new_features_idx[cat_features.shape[1]:]
+            new_num_idx = new_features_idx[:num_features.shape[1]]
+            new_cat_idx = new_features_idx[-cat_features.shape[1]:]
 
         elif cat_features is not None:
             new_features = cat_features
@@ -630,6 +637,8 @@ def get_not_encoded_data(self):
             new_features = num_features
             new_features_names = num_features_names
             new_num_idx = np.array(range(new_features.shape[1]))
+        else:
+            raise ValueError('There is no features')
 
         return InputData(idx=self.idx, features=new_features, features_names=new_features_names,
                          numerical_idx=new_num_idx, categorical_idx=new_cat_idx,

diff --git a/fedot/core/operations/evaluation/boostings.py b/fedot/core/operations/evaluation/boostings.py
@@ -5,13 +5,13 @@
 from fedot.core.operations.evaluation.operation_implementations.models.boostings_implementations import \
     FedotCatBoostClassificationImplementation, FedotCatBoostRegressionImplementation
 from fedot.core.operations.operation_parameters import OperationParameters
+from fedot.core.repository.tasks import TaskTypesEnum
 from fedot.utilities.random import ImplementationRandomStateHandler
 
 
 class BoostingStrategy(EvaluationStrategy):
     __operations_by_types = {
         'catboost': FedotCatBoostClassificationImplementation,
-
         'catboostreg': FedotCatBoostRegressionImplementation
     }
 
@@ -43,12 +43,16 @@ def __init__(self, operation_type: str, params: Optional[OperationParameters] =
         super().__init__(operation_type, params)
 
     def predict(self, trained_operation, predict_data: InputData) -> OutputData:
-        if self.output_mode in ['default', 'labels']:
+        n_classes = len(trained_operation.classes_)
+        if self.output_mode in ['labels']:
             prediction = trained_operation.predict(predict_data)
-
-        elif self.output_mode in ['probs', 'full_probs'] and predict_data.task == 'classification':
+        elif (self.output_mode in ['probs', 'full_probs', 'default'] and
+              predict_data.task.task_type is TaskTypesEnum.classification):
             prediction = trained_operation.predict_proba(predict_data)
-
+            if n_classes < 2:
+                raise ValueError('Data set contain only 1 target class. Please reformat your data.')
+            elif n_classes == 2 and self.output_mode != 'full_probs' and len(prediction.shape) > 1:
+                prediction = prediction[:, 1]
         else:
             raise ValueError(f'Output mode {self.output_mode} is not supported')
 

diff --git a/...ore/operations/evaluation/operation_implementations/data_operations/ts_transformations.py b/...ore/operations/evaluation/operation_implementations/data_operations/ts_transformations.py
@@ -345,7 +345,7 @@ class LaggedTransformationImplementation(LaggedImplementation):
 
     def __init__(self, params: Optional[OperationParameters]):
         super().__init__(params)
-        self.window_size_minimum = 2
+        self.window_size_minimum = 1
 
 
 class TsSmoothingImplementation(DataOperationImplementation):

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -88,7 +88,7 @@ def fit(self, input_data: InputData):
         self.ids_to_process = ids_to_process
         self.bool_ids = bool_ids
         if len(ids_to_process) > 0:
-            features_to_process = np.array(features[:, ids_to_process])
+            features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
             self.operation.fit(features_to_process)
         return self.operation
 
@@ -119,8 +119,7 @@ def _make_new_table(self, features):
         :param features: tabular data for processing
         :return transformed_features: transformed features table
         """
-
-        features_to_process = np.array(features[:, self.ids_to_process])
+        features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy()
         transformed_part = self.operation.transform(features_to_process)
 
         # If there are no binary features in the dataset
@@ -161,7 +160,7 @@ def _reasonability_check(features):
 
         # For every column in table make check
         for column_id in range(0, columns_amount):
-            column = features[:, column_id] if columns_amount > 1 else features
+            column = features[:, column_id] if columns_amount > 1 else features.copy()
             if len(np.unique(column)) > 2:
                 non_bool_ids.append(column_id)
             else:

diff --git a/.../core/operations/evaluation/operation_implementations/models/boostings_implementations.py b/.../core/operations/evaluation/operation_implementations/models/boostings_implementations.py
@@ -1,8 +1,9 @@
 import os
 from typing import Optional
 
+import numpy as np
 import pandas as pd
-from catboost import Pool, CatBoostClassifier, CatBoostRegressor
+from catboost import CatBoostClassifier, CatBoostRegressor, Pool
 from matplotlib import pyplot as plt
 
 from fedot.core.data.data import InputData
@@ -64,7 +65,7 @@ def convert_to_pool(data: Optional[InputData]):
             data=data.features,
             label=data.target,
             cat_features=data.categorical_idx,
-            feature_names=data.features_names.tolist()
+            feature_names=data.features_names.tolist() if data.features_names is not None else None
         )
 
     def save_model(self, model_name: str = 'catboost'):
@@ -79,12 +80,15 @@ def load_model(self, path):
 class FedotCatBoostClassificationImplementation(FedotCatBoostImplementation):
     def __init__(self, params: Optional[OperationParameters] = None):
         super().__init__(params)
-
         self.model = CatBoostClassifier(**self.model_params)
+        self.classes_ = None
+
+    def fit(self, input_data: InputData):
+        self.classes_ = np.unique(np.array(input_data.target))
+        return super().fit(input_data=input_data)
 
     def predict_proba(self, input_data: InputData):
         prediction = self.model.predict_proba(input_data.get_not_encoded_data().features)
-
         return prediction
 
     def get_feature_importance(self):

diff --git a/fedot/core/optimisers/objective/data_objective_eval.py b/fedot/core/optimisers/objective/data_objective_eval.py
@@ -70,7 +70,7 @@ def evaluate(self, graph: Pipeline) -> Fitness:
                 if is_test_session() and not isinstance(ex, TimeoutError):
                     stack_trace = traceback.format_exc()
                     save_debug_info_for_pipeline(graph, train_data, test_data, ex, stack_trace)
-                    if not is_recording_mode():
+                    if not is_recording_mode() and 'catboost' not in graph.descriptive_id:
                         raise ex
                 break  # if even one fold fails, the evaluation stops
 

diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py
@@ -87,6 +87,11 @@ def _random_change(parameter_name, **kwargs):
         # Randomly choose new value
         rng = np.random.default_rng(random.randint(0, np.iinfo(np.int32).max))
         new_value = hp_sample(space, rng=rng)
+        if isinstance(new_value, np.ndarray) and new_value.size == 1:
+            if len(new_value.shape) == 0:
+                new_value = new_value.item()
+            else:
+                new_value = new_value[0]
         return {parameter_name: new_value}
 
     @staticmethod

diff --git a/fedot/utilities/ts_gapfilling.py b/fedot/utilities/ts_gapfilling.py
@@ -435,7 +435,7 @@ def __pipeline_fit_predict(self, pipeline, timeseries_train: np.array, len_gap:
         for node in pipeline_for_forecast.nodes:
             if node.name == 'lagged':
                 if node.parameters['window_size'] + forecast_length >= data_length:
-                    node.parameters = {'window_size': data_length - forecast_length - 1}
+                    node.parameters = {'window_size': max(data_length - forecast_length - 10, 2)}
 
         # Making predictions for the missing part in the time series
         pipeline_for_forecast.fit_from_scratch(input_data)

diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py
@@ -13,8 +13,8 @@
 
 from cases.metocean_forecasting_problem import prepare_input_data
 from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_smoothing_pipeline
-from fedot.api.api_utils.api_data import ApiDataProcessor
 from fedot import Fedot
+from fedot.api.api_utils.api_data import ApiDataProcessor
 from fedot.core.data.data import InputData
 from fedot.core.data.data_split import train_test_data_setup
 from fedot.core.data.multi_modal import MultiModalData
@@ -153,11 +153,16 @@ def data_with_binary_features_and_categorical_target():
 
 @pytest.mark.parametrize('task_type, metric_name', [
     ('classification', 'f1'),
-    ('regression', 'rmse'),
+    ('regression', 'rmse')
 ])
 def test_api_predict_correct(task_type, metric_name):
     train_data, test_data, _ = get_dataset(task_type)
-    model = Fedot(problem=task_type, **TESTS_MAIN_API_DEFAULT_PARAMS)
+    changed_api_params = {
+        **TESTS_MAIN_API_DEFAULT_PARAMS,
+        'timeout': 1,
+        'preset': 'fast_train'
+    }
+    model = Fedot(problem=task_type, metric=metric_name, **changed_api_params)
     fedot_model = model.fit(features=train_data)
     prediction = model.predict(features=test_data)
     metric = model.get_metrics(metric_names=metric_name, rounding_order=5)
@@ -167,7 +172,7 @@ def test_api_predict_correct(task_type, metric_name):
     # composing and tuning was applied
     assert model.history is not None
     assert model.history.tuning_result is not None
-    assert is_predict_ignores_target(model.predict, train_data, 'features')
+    assert is_predict_ignores_target(model.predict, model.train_data, 'features')
 
 
 @pytest.mark.parametrize('task_type, metric_name, pred_model', [
@@ -436,7 +441,7 @@ def test_fill_nan_without_categorical():
 def test_dict_multimodal_input_for_api():
     data, target = load_categorical_multidata()
 
-    model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS)
+    model = Fedot(problem='classification', metric=['f1'], **TESTS_MAIN_API_DEFAULT_PARAMS)
 
     model.fit(features=data, target=target)
 
@@ -501,7 +506,8 @@ def test_pipeline_preprocessing_through_api_correctly():
     # Stand-alone pipeline with it's own preprocessing
     predicted = pipeline.predict(data, output_mode='labels')
 
-    assert predicted.predict[-1] == 'green-orange'
+    # check whether NaN-field was correctly predicted
+    assert predicted.predict[3] == 'red-blue'
 
 
 def test_data_from_csv_load_correctly():

diff --git a/test/integration/models/test_custom_model_introduction.py b/test/integration/models/test_custom_model_introduction.py
@@ -147,7 +147,7 @@ def get_simple_pipeline(multi_data):
             exog_list.append(PipelineNode(data_id))
         if 'hist_' in data_id:
             lagged_node = PipelineNode('lagged', nodes_from=[PipelineNode(data_id)])
-            lagged_node.parameters = {'window_size': 1}
+            lagged_node.parameters = {'window_size': 2}
 
             hist_list.append(lagged_node)
 

diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py
@@ -2,6 +2,8 @@
 from time import time
 
 import pytest
+
+from fedot.core.repository.dataset_types import DataTypesEnum
 from golem.core.tuning.hyperopt_tuner import get_node_parameters_for_hyperopt
 from golem.core.tuning.iopt_tuner import IOptTuner
 from golem.core.tuning.optuna_tuner import OptunaTuner
@@ -216,6 +218,23 @@ def run_pipeline_tuner(train_data,
                        cv=None,
                        iterations=3,
                        early_stopping_rounds=None, **kwargs):
+
+    # if data is time series then lagged window should be tuned correctly
+    # because lagged window raises error if windows size is uncorrect
+    # and tuner will fall
+    if train_data.data_type in (DataTypesEnum.ts, DataTypesEnum.multi_ts):
+        forecast_length = train_data.task.task_params.forecast_length
+        folds = cv or 1
+        validation_blocks = 1
+        max_window = int(train_data.features.shape[0] / (folds + 1)) - (forecast_length * validation_blocks) - 1
+        ssp = {'window_size': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, max_window], 'type': 'discrete'}}
+        if search_space.custom_search_space is None:
+            search_space.custom_search_space = {'lagged': ssp}
+        else:
+            search_space.custom_search_space['lagged'] = ssp
+        search_space.replace_default_search_space = True
+        search_space.parameters_per_operation = search_space.get_parameters_dict()
+
     # Pipeline tuning
     pipeline_tuner = TunerBuilder(train_data.task) \
         .with_tuner(tuner) \

diff --git a/test/integration/quality/test_quality_improvement.py b/test/integration/quality/test_quality_improvement.py
@@ -59,7 +59,7 @@ def test_multiobjective_improvement():
     composer_params = dict(num_of_generations=10,
                            pop_size=10,
                            with_tuning=False,
-                           preset='best_quality',
+                           preset='fast_train',
                            metric=metrics)
 
     auto_model = Fedot(problem=problem, timeout=timeout, seed=seed, logging_level=logging.DEBUG,

diff --git a/test/integration/real_applications/test_model_result_reproducing.py b/test/integration/real_applications/test_model_result_reproducing.py
@@ -31,7 +31,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
               'seed': 1,
               'timeout': None,
               'pop_size': 50,
-              'num_of_generations': 5}
+              'num_of_generations': 5,
+              'with_tuning': False}
     params.update(kwargs)
     fedot = Fedot(**params)
     fedot.fit(train_data)
@@ -71,10 +72,10 @@ def test_result_changing():
         and makes different compose process in different run with different seeds """
     train, test = get_data()
 
-    fedots = [get_fitted_fedot(forecast_length=test.idx.shape[0],
+    fedots = [get_fitted_fedot(forecast_length=len(test.idx),
                                train_data=train,
                                seed=seed,
                                num_of_generations=1)
-              for seed in (0, 1)]
+              for seed in (0, 10)]
 
     check_fedots(fedots, test, are_same=False)