From c3f9f9c698e43b7d21e351e08dbd4fae3ba47b9e Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 17 Jan 2024 19:49:40 +0300 Subject: [PATCH 01/20] add fast topo --- .../evaluation/common_preprocessing.py | 5 +- .../topological/fast_topological_extractor.py | 72 +++++++++++++++++++ .../data/data_operation_repository.json | 14 ++++ .../data/default_operation_params.json | 4 ++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py diff --git a/fedot/core/operations/evaluation/common_preprocessing.py b/fedot/core/operations/evaluation/common_preprocessing.py index 3a6424efb0..57213ba007 100644 --- a/fedot/core/operations/evaluation/common_preprocessing.py +++ b/fedot/core/operations/evaluation/common_preprocessing.py @@ -8,6 +8,8 @@ from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \ ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \ PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation +from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.fast_topological_extractor import \ + FastTopologicalFeaturesImplementation from fedot.core.operations.evaluation.operation_implementations.data_operations.topological. \ topological_extractor import TopologicalFeaturesImplementation from fedot.core.operations.operation_parameters import OperationParameters @@ -47,7 +49,8 @@ class FedotPreprocessingStrategy(EvaluationStrategy): 'one_hot_encoding': OneHotEncodingImplementation, 'label_encoding': LabelEncodingImplementation, 'fast_ica': FastICAImplementation, - 'topological_features': TopologicalFeaturesImplementation + 'topological_features': TopologicalFeaturesImplementation, + 'fast_topological_features': FastTopologicalFeaturesImplementation, } def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py new file mode 100644 index 0000000000..c40b178d3e --- /dev/null +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -0,0 +1,72 @@ +import math +import sys +from itertools import chain +from multiprocessing import cpu_count +from typing import Optional + +import numpy as np +from gph import ripser_parallel as ripser + +from fedot.core.data.data import InputData, OutputData +from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.point_cloud import \ + TopologicalTransformation +from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.topological import \ + HolesNumberFeature, MaxHoleLifeTimeFeature, RelevantHolesNumber, AverageHoleLifetimeFeature, \ + SumHoleLifetimeFeature, PersistenceEntropyFeature, SimultaneousAliveHolesFeature, \ + AveragePersistenceLandscapeFeature, BettiNumbersSumFeature, RadiusAtMaxBNFeature, PersistenceDiagramsExtractor, \ + TopologicalFeaturesExtractor +from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ + DataOperationImplementation +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.window_size_selector import WindowSizeSelector, WindowSizeSelectorMethodsEnum +from golem.utilities.utilities import determine_n_jobs + + +class FastTopologicalFeaturesImplementation(DataOperationImplementation): + def __init__(self, params: Optional[OperationParameters] = None): + super().__init__(params) + self.window_size = params.get('window_size') + self.points_count = params.get('points_count') + self.max_homology_dimension = 1 + self.shapes = None + + def fit(self, input_data: InputData): + if self.points_count == 0: + self.points_count = int(input_data.features.shape[1] * 0.33) + + self.shapes = None + + return self + + def transform(self, input_data: InputData) -> OutputData: + topological_features = [self._extract_features(self._slice_by_window(data, self.points_count)) + for data in input_data.features] + + if self.shapes is None: + self.shapes = [max(x[dim].shape[0] for x in topological_features) + for dim in range(self.max_homology_dimension + 1)] + + features = list() + for dim in range(self.max_homology_dimension + 1): + _features = np.zeros((len(topological_features), self.shapes[dim])) + for topo_features_num, topo_features in enumerate(topological_features): + if len(topo_features[dim]) > 0: + x = topo_features[dim][:, 1] - topo_features[dim][:, 0] + _features[topo_features_num, :len(x)] = x + features.append(_features) + features = np.concatenate(features, axis=1) + features[np.isinf(features) | (features < 0)] = 0 + + return features + + def _extract_features(self, x): + x_processed = ripser(x, + maxdim=self.max_homology_dimension, + coeff=2, + metric='euclidean', + n_threads=1, + collapse_edges=False)["dgms"] + return x_processed + + def _slice_by_window(self, data, window): + return [data[i:window + i] for i in range(data.shape[0] - window + 1)] \ No newline at end of file diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index 6fe73f19b2..e171b36b96 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -256,6 +256,20 @@ "presets": [ "ts" ], + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", + "tags": [ + "non_applicable_for_ts", + "feature_space_transformation" + ] + }, + "fast_topological_features": { + "meta": "custom_ts_preprocessing", + "presets": [ + "ts" + ], + "input_type": "[DataTypesEnum.table]", + "output_type": "[DataTypesEnum.table]", "tags": [ "non_applicable_for_ts", "feature_space_transformation" diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 431ccf2d05..7700797f12 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -160,5 +160,9 @@ }, "topological_features": { "n_jobs": -1 + }, + "fast_topological_features": { + "window_size": 0, + "points_count": 0 } } \ No newline at end of file From 2388af252d9c7744bdb330213760a904861a7e32 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Fri, 19 Jan 2024 18:12:32 +0300 Subject: [PATCH 02/20] fix fast topo --- .../topological/fast_topological_extractor.py | 37 +++++++------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index c40b178d3e..0e4598be82 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -5,6 +5,7 @@ from typing import Optional import numpy as np +from scipy.stats import entropy from gph import ripser_parallel as ripser from fedot.core.data.data import InputData, OutputData @@ -25,39 +26,21 @@ class FastTopologicalFeaturesImplementation(DataOperationImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) - self.window_size = params.get('window_size') self.points_count = params.get('points_count') self.max_homology_dimension = 1 - self.shapes = None + self.feature_funs = (lambda x: np.quantile(x, (0.1, 0.25, 0.5, 0.75, 0.9)), ) + self.shape = None def fit(self, input_data: InputData): if self.points_count == 0: self.points_count = int(input_data.features.shape[1] * 0.33) - - self.shapes = None - + self.shape = sum(map(len, [fun(np.zeros((10, ))) for fun in self.feature_funs])) return self def transform(self, input_data: InputData) -> OutputData: topological_features = [self._extract_features(self._slice_by_window(data, self.points_count)) for data in input_data.features] - - if self.shapes is None: - self.shapes = [max(x[dim].shape[0] for x in topological_features) - for dim in range(self.max_homology_dimension + 1)] - - features = list() - for dim in range(self.max_homology_dimension + 1): - _features = np.zeros((len(topological_features), self.shapes[dim])) - for topo_features_num, topo_features in enumerate(topological_features): - if len(topo_features[dim]) > 0: - x = topo_features[dim][:, 1] - topo_features[dim][:, 0] - _features[topo_features_num, :len(x)] = x - features.append(_features) - features = np.concatenate(features, axis=1) - features[np.isinf(features) | (features < 0)] = 0 - - return features + return np.array(topological_features) def _extract_features(self, x): x_processed = ripser(x, @@ -66,7 +49,15 @@ def _extract_features(self, x): metric='euclidean', n_threads=1, collapse_edges=False)["dgms"] - return x_processed + result = list() + for xp in x_processed: + if xp.shape[0] > 0: + xp = xp[:, 1] - xp[:, 0] + for fun in self.feature_funs: + result.append(fun(xp)) + else: + result.append(np.zeros(self.shape)) + return np.concatenate(result) def _slice_by_window(self, data, window): return [data[i:window + i] for i in range(data.shape[0] - window + 1)] \ No newline at end of file From 5dd70ac8aeaf70ee4666d1d8d2aba2f43b95f1fc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 19 Jan 2024 15:46:48 +0000 Subject: [PATCH 03/20] Automated autopep8 fixes --- .../data_operations/topological/fast_topological_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index 0e4598be82..6807bd999b 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -60,4 +60,4 @@ def _extract_features(self, x): return np.concatenate(result) def _slice_by_window(self, data, window): - return [data[i:window + i] for i in range(data.shape[0] - window + 1)] \ No newline at end of file + return [data[i:window + i] for i in range(data.shape[0] - window + 1)] From e39eecc67bc0217c236f3b062e510d1690295da8 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Fri, 19 Jan 2024 18:55:32 +0300 Subject: [PATCH 04/20] pep8 --- .../operations/evaluation/common_preprocessing.py | 3 ++- .../topological/fast_topological_extractor.py | 14 -------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/fedot/core/operations/evaluation/common_preprocessing.py b/fedot/core/operations/evaluation/common_preprocessing.py index 57213ba007..5f9ba45906 100644 --- a/fedot/core/operations/evaluation/common_preprocessing.py +++ b/fedot/core/operations/evaluation/common_preprocessing.py @@ -8,7 +8,8 @@ from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \ ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \ PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.fast_topological_extractor import \ +from fedot.core.operations.evaluation.operation_implementations.\ + data_operations.topological.fast_topological_extractor import \ FastTopologicalFeaturesImplementation from fedot.core.operations.evaluation.operation_implementations.data_operations.topological. \ topological_extractor import TopologicalFeaturesImplementation diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index 6807bd999b..569ddfc328 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -1,26 +1,12 @@ -import math -import sys -from itertools import chain -from multiprocessing import cpu_count from typing import Optional import numpy as np -from scipy.stats import entropy from gph import ripser_parallel as ripser from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.point_cloud import \ - TopologicalTransformation -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.topological import \ - HolesNumberFeature, MaxHoleLifeTimeFeature, RelevantHolesNumber, AverageHoleLifetimeFeature, \ - SumHoleLifetimeFeature, PersistenceEntropyFeature, SimultaneousAliveHolesFeature, \ - AveragePersistenceLandscapeFeature, BettiNumbersSumFeature, RadiusAtMaxBNFeature, PersistenceDiagramsExtractor, \ - TopologicalFeaturesExtractor from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ DataOperationImplementation from fedot.core.operations.operation_parameters import OperationParameters -from fedot.utilities.window_size_selector import WindowSizeSelector, WindowSizeSelectorMethodsEnum -from golem.utilities.utilities import determine_n_jobs class FastTopologicalFeaturesImplementation(DataOperationImplementation): From 619849efada01e6d2acbc8f667bb7b32c8f3a826 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Fri, 19 Jan 2024 18:58:11 +0300 Subject: [PATCH 05/20] add to initial assumption --- fedot/api/api_utils/assumptions/task_assumptions.py | 2 ++ fedot/core/repository/data/data_operation_repository.json | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py index 8afa713cd1..11f8050772 100644 --- a/fedot/api/api_utils/assumptions/task_assumptions.py +++ b/fedot/api/api_utils/assumptions/task_assumptions.py @@ -64,6 +64,8 @@ def builders(self): .add_branch('polyfit', 'lagged') .grow_branches(None, 'ridge') .join_branches('ridge'), + 'topo_ica_ridge': PipelineBuilder().add_sequence('lagged', 'fast_topological_features', + 'fast_ica', 'ridge'), 'smoothing_ar': PipelineBuilder() .add_sequence('smoothing', 'ar'), diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index e171b36b96..4623338044 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -266,7 +266,8 @@ "fast_topological_features": { "meta": "custom_ts_preprocessing", "presets": [ - "ts" + "ts", + "fast_train" ], "input_type": "[DataTypesEnum.table]", "output_type": "[DataTypesEnum.table]", From 18815b83cc186ec859e8c552451057492ef60e40 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Mon, 22 Jan 2024 19:26:12 +0300 Subject: [PATCH 06/20] make code more clear in `fit` method --- .../topological/fast_topological_extractor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index 569ddfc328..074f00ea8a 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -20,7 +20,11 @@ def __init__(self, params: Optional[OperationParameters] = None): def fit(self, input_data: InputData): if self.points_count == 0: self.points_count = int(input_data.features.shape[1] * 0.33) - self.shape = sum(map(len, [fun(np.zeros((10, ))) for fun in self.feature_funs])) + + # define shape of features after transforming on the one data sample + sample = input_data.features[0, :].ravel() + features = np.concatenate([fun(sample) for fun in self.feature_funs]) + self.shape = features.shape[0] return self def transform(self, input_data: InputData) -> OutputData: From 328b4b30adaf7b14f91e65d368cfc8fa2cafe9c1 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Mon, 22 Jan 2024 19:26:29 +0300 Subject: [PATCH 07/20] add `fast_topoligical_features` to docs --- .../introduction/fedot_features/automation_features.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst index 14e20ac644..0e2a43f1cf 100644 --- a/docs/source/introduction/fedot_features/automation_features.rst +++ b/docs/source/introduction/fedot_features/automation_features.rst @@ -70,6 +70,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `label_encoding`,Label Encoder, Feature encoding `resample`,Imbalanced binary class transformation in classification, Data transformation `topological_features`,Calculation of topological features, only for time series,Data transformation + `fast_topological_features`,Fast calculation of part of topological features, only for time series,Data transformation .. csv-table:: Feature transformation operations implementations @@ -105,7 +106,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`, `label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree` `resample`,`FEDOT model using sklearn.utils.resample`, - `topological_features`,FEDOT model,`ts` + `topological_features`,FEDOT model,`ts`, + `fast_topological_features`,FEDOT model,`ts` Models used From 3972d10106bbc4f3fdf65b9e35e3c7ee5ba98e0b Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 24 Jan 2024 20:50:18 +0300 Subject: [PATCH 08/20] fix aligment for pipeline builders in `TSForecastingAssumptions` --- fedot/api/api_utils/assumptions/task_assumptions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py index 11f8050772..089d4cde01 100644 --- a/fedot/api/api_utils/assumptions/task_assumptions.py +++ b/fedot/api/api_utils/assumptions/task_assumptions.py @@ -52,7 +52,8 @@ class TSForecastingAssumptions(TaskAssumptions): def builders(self): return { 'lagged_ridge': - PipelineBuilder().add_sequence('lagged', 'ridge'), + PipelineBuilder() + .add_sequence('lagged', 'ridge'), 'topological': PipelineBuilder() .add_node('lagged') @@ -64,8 +65,9 @@ def builders(self): .add_branch('polyfit', 'lagged') .grow_branches(None, 'ridge') .join_branches('ridge'), - 'topo_ica_ridge': PipelineBuilder().add_sequence('lagged', 'fast_topological_features', - 'fast_ica', 'ridge'), + 'topo_ica_ridge': + PipelineBuilder() + .add_sequence('lagged', 'fast_topological_features', 'fast_ica', 'ridge'), 'smoothing_ar': PipelineBuilder() .add_sequence('smoothing', 'ar'), From 7f742a4ecc6a24dbcdf37585b7dd91b495609fce Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 24 Jan 2024 20:53:24 +0300 Subject: [PATCH 09/20] add topo to `FedotBuilder` docs --- fedot/api/builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fedot/api/builder.py b/fedot/api/builder.py index fe2c2472c1..90fd81ceed 100644 --- a/fedot/api/builder.py +++ b/fedot/api/builder.py @@ -329,6 +329,8 @@ def setup_pipeline_structure( - ``diff_filter`` -> Derivative Filter Transformation - ``cut`` -> Cut Transformation - ``exog_ts`` -> Exogeneus Transformation + - ``topological_features`` -> Topological features + - ``fast_topological_features`` -> Fast implementation of topological features max_depth: max depth of a pipeline. Defaults to ``6``. From e2b02497adac08867be0af01db8a0ba924de58f9 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 24 Jan 2024 20:54:42 +0300 Subject: [PATCH 10/20] fix table in docs --- .../introduction/fedot_features/automation_features.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst index 0e2a43f1cf..88489b0028 100644 --- a/docs/source/introduction/fedot_features/automation_features.rst +++ b/docs/source/introduction/fedot_features/automation_features.rst @@ -69,8 +69,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `one_hot_encoding`,One-Hot Encoder, Feature encoding `label_encoding`,Label Encoder, Feature encoding `resample`,Imbalanced binary class transformation in classification, Data transformation - `topological_features`,Calculation of topological features, only for time series,Data transformation - `fast_topological_features`,Fast calculation of part of topological features, only for time series,Data transformation + `topological_features`,Calculation of topological features,Time series transformation + `fast_topological_features`,Fast calculation of part of topological features,Time series transformation .. csv-table:: Feature transformation operations implementations From a74967823cc380a66baf89a019e1741b09d09b77 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 24 Jan 2024 21:02:08 +0300 Subject: [PATCH 11/20] add params to fast_topo --- .../topological/fast_topological_extractor.py | 24 ++++++++++--------- .../data/default_operation_params.json | 6 +++-- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index 074f00ea8a..b5245a6484 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -2,6 +2,7 @@ import numpy as np from gph import ripser_parallel as ripser +from joblib import Parallel, delayed from fedot.core.data.data import InputData, OutputData from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ @@ -13,9 +14,11 @@ class FastTopologicalFeaturesImplementation(DataOperationImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.points_count = params.get('points_count') - self.max_homology_dimension = 1 + self.max_homology_dimension = params.get('max_homology_dimension') + self.metric = params.get('metric') + self.n_jobs = params.get('n_jobs') self.feature_funs = (lambda x: np.quantile(x, (0.1, 0.25, 0.5, 0.75, 0.9)), ) - self.shape = None + self._shape = None def fit(self, input_data: InputData): if self.points_count == 0: @@ -24,19 +27,21 @@ def fit(self, input_data: InputData): # define shape of features after transforming on the one data sample sample = input_data.features[0, :].ravel() features = np.concatenate([fun(sample) for fun in self.feature_funs]) - self.shape = features.shape[0] + self._shape = features.shape[0] return self def transform(self, input_data: InputData) -> OutputData: - topological_features = [self._extract_features(self._slice_by_window(data, self.points_count)) - for data in input_data.features] + with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel: + topological_features = parallel(delayed(self._extract_features)(data) + for data in input_data.features) return np.array(topological_features) def _extract_features(self, x): - x_processed = ripser(x, + x_sliced = [x[i:self.points_count + i] for i in range(x.shape[0] - self.points_count + 1)] + x_processed = ripser(x_sliced, maxdim=self.max_homology_dimension, coeff=2, - metric='euclidean', + metric=self.metric, n_threads=1, collapse_edges=False)["dgms"] result = list() @@ -46,8 +51,5 @@ def _extract_features(self, x): for fun in self.feature_funs: result.append(fun(xp)) else: - result.append(np.zeros(self.shape)) + result.append(np.zeros(self._shape)) return np.concatenate(result) - - def _slice_by_window(self, data, window): - return [data[i:window + i] for i in range(data.shape[0] - window + 1)] diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 7700797f12..6a66f003a5 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -162,7 +162,9 @@ "n_jobs": -1 }, "fast_topological_features": { - "window_size": 0, - "points_count": 0 + "n_jobs": 1, + "points_count": 0, + "max_homology_dimension": 1, + "metric": "euclidean" } } \ No newline at end of file From b2c5f3e319f8429023bc8481e6dc39fc0486b1ed Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Wed, 24 Jan 2024 21:09:39 +0300 Subject: [PATCH 12/20] change params and add it to tuner search space --- .../topological/fast_topological_extractor.py | 11 +++++++---- fedot/core/pipelines/tuning/search_space.py | 16 ++++++++++++++++ .../data/default_operation_params.json | 2 +- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index b5245a6484..c79b95ebb7 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -13,16 +13,19 @@ class FastTopologicalFeaturesImplementation(DataOperationImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) - self.points_count = params.get('points_count') + self.window_size_as_share = params.get('window_size_as_share') self.max_homology_dimension = params.get('max_homology_dimension') self.metric = params.get('metric') self.n_jobs = params.get('n_jobs') self.feature_funs = (lambda x: np.quantile(x, (0.1, 0.25, 0.5, 0.75, 0.9)), ) self._shape = None + self._window_size = None def fit(self, input_data: InputData): - if self.points_count == 0: - self.points_count = int(input_data.features.shape[1] * 0.33) + if self._window_size is None: + self._window_size = int(input_data.features.shape[1] * self.window_size_as_share) + self._window_size = max(self._window_size, 2) + self._window_size = min(self._window_size, input_data.features.shape[1] - 2) # define shape of features after transforming on the one data sample sample = input_data.features[0, :].ravel() @@ -37,7 +40,7 @@ def transform(self, input_data: InputData) -> OutputData: return np.array(topological_features) def _extract_features(self, x): - x_sliced = [x[i:self.points_count + i] for i in range(x.shape[0] - self.points_count + 1)] + x_sliced = [x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)] x_processed = ripser(x_sliced, maxdim=self.max_homology_dimension, coeff=2, diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index 08b67ab60f..b44a02b977 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -768,6 +768,22 @@ def get_parameters_dict(self): 'sampling-scope': [0.9, 0.99], 'type': 'continuous'} }, + 'fast_topological_features': { + 'window_size_as_share': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous' + }, + 'max_homology_dimension': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete' + }, + 'metric': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['euclidean', 'manhattan', 'cosine']], + 'type': 'categorical'} + } } if self.custom_search_space is not None: diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 6a66f003a5..3c673212c2 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -163,7 +163,7 @@ }, "fast_topological_features": { "n_jobs": 1, - "points_count": 0, + "window_size_as_share": 0.33, "max_homology_dimension": 1, "metric": "euclidean" } From 29f101322ac7cde8089d2285f89c8ccc80dcc7c7 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 10:05:31 +0300 Subject: [PATCH 13/20] fix integration tests --- .../topological/fast_topological_extractor.py | 33 ++++++++----------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index c79b95ebb7..ceb9a1a98e 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -17,27 +17,23 @@ def __init__(self, params: Optional[OperationParameters] = None): self.max_homology_dimension = params.get('max_homology_dimension') self.metric = params.get('metric') self.n_jobs = params.get('n_jobs') - self.feature_funs = (lambda x: np.quantile(x, (0.1, 0.25, 0.5, 0.75, 0.9)), ) - self._shape = None + self.quantiles = (0.1, 0.25, 0.5, 0.75, 0.9) + self._shape = len(self.quantiles) self._window_size = None def fit(self, input_data: InputData): - if self._window_size is None: - self._window_size = int(input_data.features.shape[1] * self.window_size_as_share) - self._window_size = max(self._window_size, 2) - self._window_size = min(self._window_size, input_data.features.shape[1] - 2) - - # define shape of features after transforming on the one data sample - sample = input_data.features[0, :].ravel() - features = np.concatenate([fun(sample) for fun in self.feature_funs]) - self._shape = features.shape[0] + self._window_size = int(input_data.features.shape[1] * self.window_size_as_share) + self._window_size = max(self._window_size, 2) + self._window_size = min(self._window_size, input_data.features.shape[1] - 2) return self def transform(self, input_data: InputData) -> OutputData: with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel: topological_features = parallel(delayed(self._extract_features)(data) for data in input_data.features) - return np.array(topological_features) + result = np.array(topological_features) + np.nan_to_num(result, copy=False, nan=0, posinf=0, neginf=0) + return result def _extract_features(self, x): x_sliced = [x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)] @@ -47,12 +43,9 @@ def _extract_features(self, x): metric=self.metric, n_threads=1, collapse_edges=False)["dgms"] - result = list() - for xp in x_processed: + result = np.zeros(self._shape * (self.max_homology_dimension + 1)) + for i, xp in enumerate(x_processed): if xp.shape[0] > 0: - xp = xp[:, 1] - xp[:, 0] - for fun in self.feature_funs: - result.append(fun(xp)) - else: - result.append(np.zeros(self._shape)) - return np.concatenate(result) + result[i * self._shape:(i + 1) * self._shape] = np.quantile(xp[:, 1] - xp[:, 0], self.quantiles, + overwrite_input=True, method='hazen') + return result From 002551c8290634815b219acf6c0dc90f27cc0d07 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 10:10:52 +0300 Subject: [PATCH 14/20] add new param stride --- .../data_operations/topological/fast_topological_extractor.py | 3 ++- fedot/core/repository/data/default_operation_params.json | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index ceb9a1a98e..f26bf483c9 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -16,6 +16,7 @@ def __init__(self, params: Optional[OperationParameters] = None): self.window_size_as_share = params.get('window_size_as_share') self.max_homology_dimension = params.get('max_homology_dimension') self.metric = params.get('metric') + self.stride = params.get('stride') self.n_jobs = params.get('n_jobs') self.quantiles = (0.1, 0.25, 0.5, 0.75, 0.9) self._shape = len(self.quantiles) @@ -29,7 +30,7 @@ def fit(self, input_data: InputData): def transform(self, input_data: InputData) -> OutputData: with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel: - topological_features = parallel(delayed(self._extract_features)(data) + topological_features = parallel(delayed(self._extract_features)(data[::self.stride]) for data in input_data.features) result = np.array(topological_features) np.nan_to_num(result, copy=False, nan=0, posinf=0, neginf=0) diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 3c673212c2..9867b20bf6 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -165,6 +165,7 @@ "n_jobs": 1, "window_size_as_share": 0.33, "max_homology_dimension": 1, - "metric": "euclidean" + "metric": "euclidean", + "stride": 1 } } \ No newline at end of file From 0c9ddfc833e9891d3fb7261a6a0d00b4743a8b81 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 11:00:39 +0300 Subject: [PATCH 15/20] fix param --- fedot/core/repository/data/default_operation_params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index 9867b20bf6..a6abf66cb0 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -163,7 +163,7 @@ }, "fast_topological_features": { "n_jobs": 1, - "window_size_as_share": 0.33, + "window_size_as_share": 0.66, "max_homology_dimension": 1, "metric": "euclidean", "stride": 1 From 7b755eae476221c694cbf312053cb35e669eb325 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 20:00:43 +0300 Subject: [PATCH 16/20] dirty speedup --- .../topological/fast_topological_extractor.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index f26bf483c9..75ad31ab39 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -1,3 +1,4 @@ +from itertools import chain from typing import Optional import numpy as np @@ -29,15 +30,21 @@ def fit(self, input_data: InputData): return self def transform(self, input_data: InputData) -> OutputData: + features = input_data.features with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel: - topological_features = parallel(delayed(self._extract_features)(data[::self.stride]) - for data in input_data.features) - result = np.array(topological_features) + topological_features = parallel(delayed(self._extract_features) + (np.mean(features[i:i+2, ::self.stride], axis=0)) + for i in range(0, features.shape[0], 2)) + if len(topological_features) * 2 < features.shape[0]: + topological_features.append(topological_features[-1]) + result = np.array(list(chain(*zip(topological_features, topological_features)))) + if result.shape[0] > features.shape[0]: + result = result[:-1, :] np.nan_to_num(result, copy=False, nan=0, posinf=0, neginf=0) return result def _extract_features(self, x): - x_sliced = [x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)] + x_sliced = np.array([x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)]) x_processed = ripser(x_sliced, maxdim=self.max_homology_dimension, coeff=2, From 1b4db862e3ee4a3c8ea61e3b26016c13e4370249 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 20:32:02 +0300 Subject: [PATCH 17/20] delete ica from initial assumption due to instability --- fedot/api/api_utils/assumptions/task_assumptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py index 089d4cde01..c650b41f94 100644 --- a/fedot/api/api_utils/assumptions/task_assumptions.py +++ b/fedot/api/api_utils/assumptions/task_assumptions.py @@ -65,9 +65,9 @@ def builders(self): .add_branch('polyfit', 'lagged') .grow_branches(None, 'ridge') .join_branches('ridge'), - 'topo_ica_ridge': + 'topo_ridge': PipelineBuilder() - .add_sequence('lagged', 'fast_topological_features', 'fast_ica', 'ridge'), + .add_sequence('lagged', 'fast_topological_features', 'ridge'), 'smoothing_ar': PipelineBuilder() .add_sequence('smoothing', 'ar'), From ee2ecd0716c542b81d98717c3ea974005d063024 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 20:42:13 +0300 Subject: [PATCH 18/20] fix documentation --- docs/source/introduction/fedot_features/automation_features.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst index 88489b0028..96e224ab52 100644 --- a/docs/source/introduction/fedot_features/automation_features.rst +++ b/docs/source/introduction/fedot_features/automation_features.rst @@ -107,7 +107,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree` `resample`,`FEDOT model using sklearn.utils.resample`, `topological_features`,FEDOT model,`ts`, - `fast_topological_features`,FEDOT model,`ts` + `fast_topological_features`,FEDOT model,`ts` `fast_train` Models used From 355a134fc33f9f00942eda9a6456e94f45df0b01 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 21:02:18 +0300 Subject: [PATCH 19/20] fix test --- test/integration/models/test_model.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 6216c2ab95..09873db356 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -54,16 +54,16 @@ def get_data_for_testing(task_type, data_type, length=100, features_count=1, return None if task_type is TaskTypesEnum.ts_forecasting: - task = Task(task_type, TsForecastingParams(max(length // 10, 2))) + forecast_length = max(length // 10, 2) + task = Task(task_type, TsForecastingParams(forecast_length)) if data_type is DataTypesEnum.ts: features = np.zeros(length) + value else: features = np.zeros((length, features_count)) + value if data_type is DataTypesEnum.table: - target = np.zeros(length) + value + target = np.zeros((length, forecast_length)) + value else: target = features - else: task = Task(task_type) data_type = DataTypesEnum.table @@ -156,11 +156,15 @@ def fit_time_for_operation(operation: OperationMetaInfo, return perf_counter() - start_time for task_type in operation.task_type: - for data_type in operation.input_types: + input_types = operation.input_types + if task_type is TaskTypesEnum.ts_forecasting: + if operation.input_types == [DataTypesEnum.table]: + input_types = [DataTypesEnum.ts] + for data_type in input_types: perfomance_values = [] for length in data_lengths: data = get_data_for_testing(task_type, data_type, - length=length, features_count=2, + length=length, features_count=10, random=True) if data is not None: min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times)) From 8f895c3d347513719b697b7ed3331323fe724078 Mon Sep 17 00:00:00 2001 From: kasyanovse Date: Sat, 27 Jan 2024 21:03:29 +0300 Subject: [PATCH 20/20] delete fast_topo from assumption and delete fast_train tag --- .../source/introduction/fedot_features/automation_features.rst | 2 +- fedot/api/api_utils/assumptions/task_assumptions.py | 3 --- fedot/core/repository/data/data_operation_repository.json | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst index 96e224ab52..88489b0028 100644 --- a/docs/source/introduction/fedot_features/automation_features.rst +++ b/docs/source/introduction/fedot_features/automation_features.rst @@ -107,7 +107,7 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree` `resample`,`FEDOT model using sklearn.utils.resample`, `topological_features`,FEDOT model,`ts`, - `fast_topological_features`,FEDOT model,`ts` `fast_train` + `fast_topological_features`,FEDOT model,`ts` Models used diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py index c650b41f94..8090436639 100644 --- a/fedot/api/api_utils/assumptions/task_assumptions.py +++ b/fedot/api/api_utils/assumptions/task_assumptions.py @@ -65,9 +65,6 @@ def builders(self): .add_branch('polyfit', 'lagged') .grow_branches(None, 'ridge') .join_branches('ridge'), - 'topo_ridge': - PipelineBuilder() - .add_sequence('lagged', 'fast_topological_features', 'ridge'), 'smoothing_ar': PipelineBuilder() .add_sequence('smoothing', 'ar'), diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index 4623338044..e171b36b96 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -266,8 +266,7 @@ "fast_topological_features": { "meta": "custom_ts_preprocessing", "presets": [ - "ts", - "fast_train" + "ts" ], "input_type": "[DataTypesEnum.table]", "output_type": "[DataTypesEnum.table]",