diff --git a/fedot/core/constants.py b/fedot/core/constants.py index c827df30d4..7919725ba1 100644 --- a/fedot/core/constants.py +++ b/fedot/core/constants.py @@ -1,22 +1,44 @@ from fedot.core.repository.tasks import TaskTypesEnum MINIMAL_SECONDS_FOR_TUNING = 15 +"""Minimal seconds for tuning.""" + DEFAULT_TUNING_ITERATIONS_NUMBER = 100000 +"""Default number of tuning iterations.""" + DEFAULT_API_TIMEOUT_MINUTES = 5.0 +"""Default API timeout in minutes.""" + DEFAULT_FORECAST_LENGTH = 30 +"""Default forecast length.""" + COMPOSING_TUNING_PROPORTION = 0.6 +"""Proportion of data used for composing tuning.""" BEST_QUALITY_PRESET_NAME = 'best_quality' +"""Name of the preset for best quality.""" + FAST_TRAIN_PRESET_NAME = 'fast_train' +"""Name of the preset for fast training.""" + AUTO_PRESET_NAME = 'auto' +"""Name of the preset for auto tuning.""" MINIMAL_PIPELINE_NUMBER_FOR_EVALUATION = 100 +"""Minimal number of pipelines for evaluation.""" + MIN_NUMBER_OF_GENERATIONS = 3 +"""Minimum number of generations.""" FRACTION_OF_UNIQUE_VALUES = 0.95 +"""Fraction of unique values.""" default_data_split_ratio_by_task = { TaskTypesEnum.classification: 0.8, TaskTypesEnum.regression: 0.8, TaskTypesEnum.ts_forecasting: 0.5 } +"""Default data split ratio by task.""" + +PCA_MIN_THRESHOLD_TS = 7 +"""Minimum threshold for PCA in TS forecasting.""" diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py index d1be4d3d1b..a2f882bf41 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py @@ -7,20 +7,23 @@ from sklearn.impute import SimpleImputer from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures, StandardScaler +from fedot.core.constants import PCA_MIN_THRESHOLD_TS from fedot.core.data.data import InputData, OutputData, data_type_is_table from fedot.core.data.data_preprocessing import convert_into_column, data_has_categorical_features, \ divide_data_categorical_numerical, find_categorical_columns, replace_inf_with_nans from fedot.core.operations.evaluation.operation_implementations. \ implementation_interfaces import DataOperationImplementation, EncodedInvariantImplementation from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.dataset_types import DataTypesEnum from fedot.preprocessing.data_types import TYPE_TO_ID class ComponentAnalysisImplementation(DataOperationImplementation): - """ Class for applying PCA and kernel PCA models from sklearn + """ + Class for applying PCA and kernel PCA models from sklearn Args: - params: OpearationParameters with the arguments + params: OperationParameters with the arguments """ def __init__(self, params: Optional[OperationParameters]): @@ -29,8 +32,9 @@ def __init__(self, params: Optional[OperationParameters]): self.number_of_features = None self.number_of_samples = None - def fit(self, input_data: InputData): - """The method trains the PCA model + def fit(self, input_data: InputData) -> PCA: + """ + The method trains the PCA model Args: input_data: data with features, target and ids for PCA training @@ -42,13 +46,14 @@ def fit(self, input_data: InputData): self.number_of_samples, self.number_of_features = np.array(input_data.features).shape if self.number_of_features > 1: - self.check_and_correct_params() + self.check_and_correct_params(is_ts_data=input_data.data_type is DataTypesEnum.ts) self.pca.fit(input_data.features) return self.pca def transform(self, input_data: InputData) -> OutputData: - """Method for transformation tabular data using PCA + """ + Method for transformation tabular data using PCA Args: input_data: data with features, target and ids for PCA applying @@ -63,13 +68,13 @@ def transform(self, input_data: InputData) -> OutputData: transformed_features = input_data.features # Update features - output_data = self._convert_to_output(input_data, - transformed_features) + output_data = self._convert_to_output(input_data, transformed_features) self.update_column_types(output_data) return output_data - def check_and_correct_params(self): - """Method check if number of features in data enough for ``n_components`` + def check_and_correct_params(self, is_ts_data: bool = False): + """ + Method check if number of features in data enough for ``n_components`` parameter in PCA or not. And if not enough - fixes it """ n_components = self.params.get('n_components') @@ -80,12 +85,15 @@ def check_and_correct_params(self): # Check that n_samples correctly map with n_features if self.number_of_samples < self.number_of_features: self.params.update(n_components=0.5) + if is_ts_data and (n_components * self.number_of_features) < PCA_MIN_THRESHOLD_TS: + self.params.update(n_components=PCA_MIN_THRESHOLD_TS / self.number_of_features) self.pca.set_params(**self.params.to_dict()) @staticmethod def update_column_types(output_data: OutputData) -> OutputData: - """Update column types after applying PCA operations + """ + Update column types after applying PCA operations """ _, n_cols = output_data.predict.shape @@ -94,7 +102,8 @@ def update_column_types(output_data: OutputData) -> OutputData: class PCAImplementation(ComponentAnalysisImplementation): - """Class for applying PCA from sklearn + """ + Class for applying PCA from sklearn Args: params: OperationParameters with the hyperparameters @@ -111,7 +120,8 @@ def __init__(self, params: Optional[OperationParameters] = None): class KernelPCAImplementation(ComponentAnalysisImplementation): - """ Class for applying kernel PCA from sklearn + """ + Class for applying kernel PCA from sklearn Args: params: OperationParameters with the hyperparameters @@ -123,7 +133,8 @@ def __init__(self, params: Optional[OperationParameters]): class FastICAImplementation(ComponentAnalysisImplementation): - """ Class for applying FastICA from sklearn + """ + Class for applying FastICA from sklearn Args: params: OperationParameters with the hyperparameters @@ -135,7 +146,8 @@ def __init__(self, params: Optional[OperationParameters]): class PolyFeaturesImplementation(EncodedInvariantImplementation): - """ Class for application of :obj:`PolynomialFeatures` operation on data, + """ + Class for application of :obj:`PolynomialFeatures` operation on data, where only not encoded features (were not converted from categorical using ``OneHot encoding``) are used @@ -158,7 +170,9 @@ def __init__(self, params: Optional[OperationParameters]): self.columns_to_take = None def fit(self, input_data: InputData): - """ Method for fit Poly features operation """ + """ + Method for fit Poly features operation + """ # Check the number of columns in source dataset n_rows, n_cols = input_data.features.shape if n_cols > self.th_columns: @@ -170,7 +184,8 @@ def fit(self, input_data: InputData): return super().fit(input_data) def transform(self, input_data: InputData) -> OutputData: - """Firstly perform filtration of columns + """ + Firstly perform filtration of columns """ clipped_input_data = input_data diff --git a/test/integration/real_applications/test_examples.py b/test/integration/real_applications/test_examples.py index 8b5796c787..681e6fae9b 100644 --- a/test/integration/real_applications/test_examples.py +++ b/test/integration/real_applications/test_examples.py @@ -1,7 +1,6 @@ from datetime import timedelta import numpy as np -import pytest from sklearn.metrics import mean_squared_error from examples.advanced.multimodal_text_num_example import run_multi_modal_example @@ -84,7 +83,6 @@ def test_api_classification_example(): assert prediction is not None -@pytest.mark.skip(reason="topo features fail") # TODO resolve def test_api_ts_forecasting_example(): forecast = run_ts_forecasting_example(dataset='salaries', timeout=2, with_tuning=False) assert forecast is not None