aimclub · kasyanovse · Jan 27, 2024 · Jan 17, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst
@@ -69,7 +69,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
    `one_hot_encoding`,One-Hot Encoder, Feature encoding
    `label_encoding`,Label Encoder, Feature encoding
    `resample`,Imbalanced binary class transformation in classification, Data transformation
-   `topological_features`,Calculation of topological features, only for time series,Data transformation
+   `topological_features`,Calculation of topological features,Time series transformation
+   `fast_topological_features`,Fast calculation of part of topological features,Time series transformation
 
 
 .. csv-table:: Feature transformation operations implementations
@@ -105,7 +106,8 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a
    `one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`,
    `label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree`
    `resample`,`FEDOT model using sklearn.utils.resample`,
-   `topological_features`,FEDOT model,`ts`
+   `topological_features`,FEDOT model,`ts`,
+   `fast_topological_features`,FEDOT model,`ts`
 
 
 Models used

diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py
@@ -52,7 +52,8 @@ class TSForecastingAssumptions(TaskAssumptions):
     def builders(self):
         return {
             'lagged_ridge':
-                PipelineBuilder().add_sequence('lagged', 'ridge'),
+                PipelineBuilder()
+            .add_sequence('lagged', 'ridge'),
             'topological':
                 PipelineBuilder()
             .add_node('lagged')

diff --git a/fedot/api/builder.py b/fedot/api/builder.py
@@ -329,6 +329,8 @@ def setup_pipeline_structure(
                     - ``diff_filter`` -> Derivative Filter Transformation
                     - ``cut`` -> Cut Transformation
                     - ``exog_ts`` -> Exogeneus Transformation
+                    - ``topological_features`` -> Topological features
+                    - ``fast_topological_features`` -> Fast implementation of topological features
 
             max_depth: max depth of a pipeline. Defaults to ``6``.
 

diff --git a/fedot/core/operations/evaluation/common_preprocessing.py b/fedot/core/operations/evaluation/common_preprocessing.py
@@ -8,6 +8,9 @@
 from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \
     ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \
     PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation
+from fedot.core.operations.evaluation.operation_implementations.\
+    data_operations.topological.fast_topological_extractor import \
+    FastTopologicalFeaturesImplementation
 from fedot.core.operations.evaluation.operation_implementations.data_operations.topological. \
     topological_extractor import TopologicalFeaturesImplementation
 from fedot.core.operations.operation_parameters import OperationParameters
@@ -47,7 +50,8 @@ class FedotPreprocessingStrategy(EvaluationStrategy):
         'one_hot_encoding': OneHotEncodingImplementation,
         'label_encoding': LabelEncodingImplementation,
         'fast_ica': FastICAImplementation,
-        'topological_features': TopologicalFeaturesImplementation
+        'topological_features': TopologicalFeaturesImplementation,
+        'fast_topological_features': FastTopologicalFeaturesImplementation,
     }
 
     def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):

diff --git a/...ation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/...ation/operation_implementations/data_operations/topological/fast_topological_extractor.py
@@ -0,0 +1,59 @@
+from itertools import chain
+from typing import Optional
+
+import numpy as np
+from gph import ripser_parallel as ripser
+from joblib import Parallel, delayed
+
+from fedot.core.data.data import InputData, OutputData
+from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \
+    DataOperationImplementation
+from fedot.core.operations.operation_parameters import OperationParameters
+
+
+class FastTopologicalFeaturesImplementation(DataOperationImplementation):
+    def __init__(self, params: Optional[OperationParameters] = None):
+        super().__init__(params)
+        self.window_size_as_share = params.get('window_size_as_share')
+        self.max_homology_dimension = params.get('max_homology_dimension')
+        self.metric = params.get('metric')
+        self.stride = params.get('stride')
+        self.n_jobs = params.get('n_jobs')
+        self.quantiles = (0.1, 0.25, 0.5, 0.75, 0.9)
+        self._shape = len(self.quantiles)
+        self._window_size = None
+
+    def fit(self, input_data: InputData):
+        self._window_size = int(input_data.features.shape[1] * self.window_size_as_share)
+        self._window_size = max(self._window_size, 2)
+        self._window_size = min(self._window_size, input_data.features.shape[1] - 2)
+        return self
+
+    def transform(self, input_data: InputData) -> OutputData:
+        features = input_data.features
+        with Parallel(n_jobs=self.n_jobs, prefer='processes') as parallel:
+            topological_features = parallel(delayed(self._extract_features)
+                                            (np.mean(features[i:i+2, ::self.stride], axis=0))
+                                            for i in range(0, features.shape[0], 2))
+        if len(topological_features) * 2 < features.shape[0]:
+            topological_features.append(topological_features[-1])
+        result = np.array(list(chain(*zip(topological_features, topological_features))))
+        if result.shape[0] > features.shape[0]:
+            result = result[:-1, :]
+        np.nan_to_num(result, copy=False, nan=0, posinf=0, neginf=0)
+        return result
+
+    def _extract_features(self, x):
+        x_sliced = np.array([x[i:self._window_size + i] for i in range(x.shape[0] - self._window_size + 1)])
+        x_processed = ripser(x_sliced,
+                             maxdim=self.max_homology_dimension,
+                             coeff=2,
+                             metric=self.metric,
+                             n_threads=1,
+                             collapse_edges=False)["dgms"]
+        result = np.zeros(self._shape * (self.max_homology_dimension + 1))
+        for i, xp in enumerate(x_processed):
+            if xp.shape[0] > 0:
+                result[i * self._shape:(i + 1) * self._shape] = np.quantile(xp[:, 1] - xp[:, 0], self.quantiles,
+                                                                            overwrite_input=True, method='hazen')
+        return result
diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py
@@ -768,6 +768,22 @@ def get_parameters_dict(self):
                     'sampling-scope': [0.9, 0.99],
                     'type': 'continuous'}
             },
+            'fast_topological_features': {
+                'window_size_as_share': {
+                    'hyperopt-dist': hp.uniform,
+                    'sampling-scope': [0.1, 0.9],
+                    'type': 'continuous'
+                },
+                'max_homology_dimension': {
+                    'hyperopt-dist': hp.uniformint,
+                    'sampling-scope': [1, 5],
+                    'type': 'discrete'
+                },
+                'metric': {
+                    'hyperopt-dist': hp.choice,
+                    'sampling-scope': [['euclidean', 'manhattan', 'cosine']],
+                    'type': 'categorical'}
+            }
         }
 
         if self.custom_search_space is not None:

diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json
@@ -256,6 +256,20 @@
       "presets": [
         "ts"
       ],
+      "input_type": "[DataTypesEnum.table]",
+      "output_type": "[DataTypesEnum.table]",
+      "tags": [
+        "non_applicable_for_ts",
+        "feature_space_transformation"
+      ]
+    },
+    "fast_topological_features": {
+      "meta": "custom_ts_preprocessing",
+      "presets": [
+        "ts"
+      ],
+      "input_type": "[DataTypesEnum.table]",
+      "output_type": "[DataTypesEnum.table]",
       "tags": [
         "non_applicable_for_ts",
         "feature_space_transformation"

diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json
@@ -160,5 +160,12 @@
   },
   "topological_features": {
     "n_jobs": -1
+  },
+  "fast_topological_features": {
+    "n_jobs": 1,
+    "window_size_as_share": 0.66,
+    "max_homology_dimension": 1,
+    "metric": "euclidean",
+    "stride": 1
   }
 }
diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py
@@ -54,16 +54,16 @@ def get_data_for_testing(task_type, data_type, length=100, features_count=1,
         return None
 
     if task_type is TaskTypesEnum.ts_forecasting:
-        task = Task(task_type, TsForecastingParams(max(length // 10, 2)))
+        forecast_length = max(length // 10, 2)
+        task = Task(task_type, TsForecastingParams(forecast_length))
         if data_type is DataTypesEnum.ts:
             features = np.zeros(length) + value
         else:
             features = np.zeros((length, features_count)) + value
         if data_type is DataTypesEnum.table:
-            target = np.zeros(length) + value
+            target = np.zeros((length, forecast_length)) + value
         else:
             target = features
-
     else:
         task = Task(task_type)
         data_type = DataTypesEnum.table
@@ -156,11 +156,15 @@ def fit_time_for_operation(operation: OperationMetaInfo,
         return perf_counter() - start_time
 
     for task_type in operation.task_type:
-        for data_type in operation.input_types:
+        input_types = operation.input_types
+        if task_type is TaskTypesEnum.ts_forecasting:
+            if operation.input_types == [DataTypesEnum.table]:
+                input_types = [DataTypesEnum.ts]
+        for data_type in input_types:
             perfomance_values = []
             for length in data_lengths:
                 data = get_data_for_testing(task_type, data_type,
-                                            length=length, features_count=2,
+                                            length=length, features_count=10,
                                             random=True)
                 if data is not None:
                     min_evaluated_time = min(fit_time_for_operation(operation, data) for _ in range(times))