From 6d60801b09c929979eebd9057d1c1428de4b706a Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 7 Aug 2024 18:47:20 +0300
Subject: [PATCH 01/69] Adding logs & the ability to specify categorical data

---
 fedot/core/data/data.py              |  5 ++-
 fedot/preprocessing/data_types.py    | 52 +++++++++++++++++++++-------
 fedot/preprocessing/preprocessing.py | 12 +++++--
 3 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 0101650ee2..f4ab7491a1 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -105,6 +105,7 @@ def from_numpy_time_series(cls,
     def from_dataframe(cls,
                        features_df: Union[pd.DataFrame, pd.Series],
                        target_df: Union[pd.DataFrame, pd.Series],
+                       categorical_idx: np.ndarray = None,
                        task: Union[Task, str] = 'classification',
                        data_type: DataTypesEnum = DataTypesEnum.table) -> InputData:
         """Import data from pandas DataFrame.
@@ -131,9 +132,11 @@ def from_dataframe(cls,
         features_names = features_df.columns.to_numpy()
         df = pd.concat([features_df, target_df], axis=1)
         features, target = process_target_and_features(df, target_columns)
+        categorical_features = features_df.loc[:, categorical_idx].to_numpy()
 
         return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type,
-                         features_names=features_names)
+                         features_names=features_names, categorical_features=categorical_features,
+                         categorical_idx=categorical_idx)
 
     @classmethod
     def from_csv(cls,
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index a81700b964..18a15b08bc 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -85,10 +85,10 @@ def convert_data_for_fit(self, data: InputData):
 
         # And in target(s)
         data.target = self.target_types_converting(target=data.target, task=data.task)
-        data.supplementary_data.col_type_ids = self.prepare_column_types_info(predictors=data.features,
-                                                                              target=data.target,
-                                                                              task=data.task)
-
+        column_types_info = self.prepare_column_types_info(predictors=data.features, target=data.target, task=data.task)
+        data.supplementary_data.col_type_ids = column_types_info
+        col_types_info_message = prepare_log_message_with_cols_types(column_types_info, data.features_names)
+        self.log.message(f'The information about types of each feature are {col_types_info_message}')
         self._into_numeric_features_transformation_for_fit(data)
         # Launch conversion float and integer features into categorical
         self._into_categorical_features_transformation_for_fit(data)
@@ -155,7 +155,7 @@ def target_types_converting(self, target: np.ndarray, task: Task) -> np.ndarray:
 
     def prepare_column_types_info(self, predictors: np.ndarray, target: np.ndarray = None,
                                   task: Task = None) -> dict:
-        """ Prepare information about columns in a form of dictionary
+        """ Prepare information about columns in a form of dictionary.
         Dictionary has two keys: 'target' and 'features'
         """
         if self.features_columns_info.empty:
@@ -181,7 +181,7 @@ def _retain_columns_info_without_types_conflicts(self, data: InputData):
         Such columns have no conflicts with types converting.
         """
         if self.string_columns_transformation_failed:
-            self.log.warning(f'Columns with indices {self.string_columns_transformation_failed} were '
+            self.log.message(f'Columns with indices {self.string_columns_transformation_failed} were '
                              f'removed during mixed types column converting due to conflicts.')
 
             data.features = self.remove_incorrect_features(data.features, self.string_columns_transformation_failed)
@@ -287,13 +287,26 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
 
         # reduce dataframe to include only categorical features
         num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)]
-        cat_col_ids = num_df.columns
-        # Convert into string
-        data.features[:, cat_col_ids] = num_df.apply(convert_num_column_into_string_array).to_numpy()
-        # Columns need to be transformed into categorical (string) ones
-        self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str))
-        # Update information about column types (in-place)
-        feature_type_ids[cat_col_ids] = TYPE_TO_ID[str]
+
+        if data.categorical_idx is not None:
+            cat_col_ids = data.categorical_idx
+        else:
+            cat_col_ids = num_df.columns
+
+        if np.size(cat_col_ids) > 0:
+            cat_features_names = data.features_names[cat_col_ids]
+        else:
+            cat_features_names = []
+
+        self.log.message(f'Preprocessing define next cols {cat_features_names} as categorical')
+
+        if np.size(cat_col_ids) > 0:
+            # Convert into string
+            data.features[:, cat_col_ids] = num_df.apply(convert_num_column_into_string_array).to_numpy()
+            # Columns need to be transformed into categorical (string) ones
+            self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str))
+            # Update information about column types (in-place)
+            feature_type_ids[cat_col_ids] = TYPE_TO_ID[str]
 
     def _into_categorical_features_transformation_for_predict(self, data: InputData):
         """ Apply conversion into categorical string column for every signed column """
@@ -499,3 +512,16 @@ def _process_predict_column_values_one_by_one(value, current_type: type):
             except ValueError:
                 pass
     return new_value
+
+
+def prepare_log_message_with_cols_types(col_types_info, features_names):
+    message = '\n'
+    for type_name, type_id in TYPE_TO_ID.items():
+        count_types = np.count_nonzero(col_types_info['features'] == type_id)
+        features_idx = np.where(col_types_info['features'] == type_id)[0]
+        names_or_indexes = features_names[features_idx] if features_names is not None else features_idx
+        message = message + f'TYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \
+
+    message = message + f'Target: TYPE {_convertable_types[col_types_info["target"][0]]}'
+
+    return message
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index a59f901d1b..634d35e299 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -270,10 +270,13 @@ def _find_features_lacking_nans(self, data: InputData, source_name: str):
         features = data.features
         axes_except_cols = (0,) + tuple(range(2, features.ndim))
         are_allowed = np.mean(pd.isna(features), axis=axes_except_cols) < ALLOWED_NAN_PERCENT
+        self.log.message(
+            f'The number of features with an acceptable nan\'s percent value was taken '
+            f'{len(are_allowed)} / {data.features.shape[1]}'
+        )
         self.ids_relevant_features[source_name] = np.flatnonzero(are_allowed)
 
-    @staticmethod
-    def _drop_rows_with_nan_in_target(data: InputData) -> InputData:
+    def _drop_rows_with_nan_in_target(self, data: InputData) -> InputData:
         """
         Drops rows with nans in target column
 
@@ -299,6 +302,11 @@ def _drop_rows_with_nan_in_target(data: InputData) -> InputData:
         data.target = target[non_nan_row_ids, :]
         data.idx = np.array(data.idx)[non_nan_row_ids]
 
+        self.log.message(
+            f'The number of rows with an nan\'s in target is '
+            f'{sum(number_nans_per_rows)} / {data.features.shape[0]}'
+        )
+
         return data
 
     @staticmethod

From 057c4d24ad58466c1d5a0d9b00cdc028f800653b Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 9 Aug 2024 16:06:24 +0300
Subject: [PATCH 02/69] Fixes categorical features

---
 fedot/core/data/data.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index f4ab7491a1..dcaefc2d73 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -132,7 +132,10 @@ def from_dataframe(cls,
         features_names = features_df.columns.to_numpy()
         df = pd.concat([features_df, target_df], axis=1)
         features, target = process_target_and_features(df, target_columns)
-        categorical_features = features_df.loc[:, categorical_idx].to_numpy()
+
+        categorical_features = None
+        if categorical_idx is not None:
+            categorical_features = features_df.loc[:, categorical_idx].to_numpy()
 
         return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type,
                          features_names=features_names, categorical_features=categorical_features,

From 4b4536af23a77d0bf4034ad5a166cb40af6d6d63 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 9 Aug 2024 18:38:31 +0300
Subject: [PATCH 03/69] Changing getsizeof to nbytes

---
 fedot/api/api_utils/api_data.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 69c9f2a97b..1ba04a7f59 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -1,4 +1,3 @@
-import sys
 from datetime import datetime
 from typing import Dict, Union
 from typing import Optional
@@ -133,7 +132,7 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod
     def fit_transform(self, train_data: InputData) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(sys.getsizeof(train_data.features))
+        memory_usage = convert_memory_size(train_data.features.nbytes)
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
         self.log.message(
@@ -144,7 +143,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
         train_data = self.preprocessor.convert_indexes_for_fit(pipeline=Pipeline(), data=train_data)
         train_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(sys.getsizeof(train_data.features))
+        memory_usage = convert_memory_size(train_data.features.nbytes)
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
         self.log.message(
@@ -156,7 +155,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
     def transform(self, test_data: InputData, current_pipeline) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(sys.getsizeof(test_data))
+        memory_usage = convert_memory_size(test_data.features.nbytes)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
@@ -168,7 +167,7 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
         test_data = self.preprocessor.update_indices_for_time_series(test_data)
         test_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(sys.getsizeof(test_data))
+        memory_usage = convert_memory_size(test_data.features.nbytes)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(

From ae6eb42667384039c721010a36e30bdcdda50d89 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 9 Aug 2024 18:45:45 +0300
Subject: [PATCH 04/69] Delete _clean_extra_spaces

---
 fedot/preprocessing/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 634d35e299..f4d40e27da 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -227,7 +227,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             # TODO andreygetmanov to new class text preprocessing?
             replace_nans_with_empty_strings(data)
         elif data_type_is_table(data):
-            data = self._clean_extra_spaces(data)
+            # data = self._clean_extra_spaces(data)
             # Process binary categorical features
             if is_fit_stage:
                 data = self.binary_categorical_processors[source_name].fit_transform(data)

From f0df60ceb30a923f4e9bbfe2e8b6adee7b074b83 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Tue, 13 Aug 2024 17:59:51 +0300
Subject: [PATCH 05/69] Adding more logs, adding OptimisedFeature storage,
 refactoring fitting BinaryCategoricalPreprocessor, fix bugs, adding reduce
 memory size, delete clean_extra_spaces

---
 fedot/api/api_utils/api_data.py      |  27 ++++++-
 fedot/core/data/data.py              |  58 +++++++++++++-
 fedot/preprocessing/categorical.py   |  56 +++++++------
 fedot/preprocessing/data_types.py    |   5 +-
 fedot/preprocessing/preprocessing.py | 115 +++++++++++++++++++--------
 5 files changed, 197 insertions(+), 64 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 1ba04a7f59..9d6d1a33bd 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -6,7 +6,7 @@
 from golem.core.log import default_log
 
 from fedot.api.api_utils.data_definition import data_strategy_selector, FeaturesType, TargetType
-from fedot.core.data.data import InputData, OutputData, data_type_is_table
+from fedot.core.data.data import InputData, OutputData, data_type_is_table, OptimisedFeature
 from fedot.core.data.data_preprocessing import convert_into_column
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.pipelines.pipeline import Pipeline
@@ -138,12 +138,26 @@ def fit_transform(self, train_data: InputData) -> InputData:
         self.log.message(
             f'Train Data (Original) Memory Usage: {memory_usage} Data Shapes: {features_shape, target_shape}')
 
+        self.log.message('- Obligatory preprocessing started')
         train_data = self.preprocessor.obligatory_prepare_for_fit(data=train_data)
+
+        self.log.message('- Optional preprocessing started')
         train_data = self.preprocessor.optional_prepare_for_fit(pipeline=Pipeline(), data=train_data)
+
+        self.log.message('- Converting indexes for fitting started')
         train_data = self.preprocessor.convert_indexes_for_fit(pipeline=Pipeline(), data=train_data)
+
+        self.log.message('- Reducing memory started')
+        train_data = self.preprocessor.reduce_memory_size(data=train_data)
+
         train_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(train_data.features.nbytes)
+        if isinstance(train_data.features, OptimisedFeature):
+            memory_usage = convert_memory_size(train_data.features.memory_usage)
+
+        else:
+            memory_usage = convert_memory_size(train_data.features.nbytes)
+
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
         self.log.message(
@@ -167,7 +181,14 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
         test_data = self.preprocessor.update_indices_for_time_series(test_data)
         test_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(test_data.features.nbytes)
+        test_data = self.preprocessor.reduce_memory_size(data=test_data)
+
+        if isinstance(test_data.features, OptimisedFeature):
+            memory_usage = convert_memory_size(test_data.features.memory_usage)
+
+        else:
+            memory_usage = convert_memory_size(test_data.features.nbytes)
+
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index dcaefc2d73..ed3433beca 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -42,7 +42,7 @@ class Data:
     idx: np.ndarray
     task: Task
     data_type: DataTypesEnum
-    features: np.ndarray
+    features: Optional[np.ndarray, OptimisedFeature]
     categorical_features: Optional[np.ndarray] = None
     categorical_idx: Optional[np.ndarray] = None
     numerical_idx: Optional[np.ndarray] = None
@@ -671,6 +671,62 @@ class OutputData(Data):
     target: Optional[np.ndarray] = None
     encoded_idx: Optional[np.ndarray] = None
 
+@dataclass
+class OptimisedFeature:
+    _columns: list = field(default_factory=list, init=False)
+    _shape: tuple = field(default=(0, 0), init=False)
+    _memory_usage: int = 0
+    ndim: int = 2
+
+    def add_column(self, data: np.ndarray):
+        if not isinstance(data, np.ndarray):
+            raise ValueError("Data should be a NumPy array.")
+
+        if self._shape == (0, 0):
+            self._shape = (data.shape[0], 1)
+        else:
+            if data.shape[0] != self._shape[0]:
+                raise ValueError("All columns must have the same number of rows.")
+
+            self._shape = (self._shape[0], self._shape[1] + 1)
+
+        self._columns.append(data)
+        self._memory_usage += data.nbytes
+
+    def __getitem__(self, key):
+        if isinstance(key, tuple):
+            row_idx, col_idx = key
+            if isinstance(col_idx, int):
+                return self._columns[col_idx][row_idx]
+            else:
+                selected_columns = [self._columns[i] for i in col_idx]
+                return np.column_stack(selected_columns)[row_idx]
+        else:
+            result = np.column_stack(self._columns)[key]
+            return result if result.ndim > 1 else result.ravel()
+
+    def __setitem__(self, key, value):
+        if isinstance(key, tuple):
+            row_idx, col_idx = key
+            if isinstance(col_idx, int):
+                self._columns[col_idx][row_idx] = value
+            else:
+                for i, col in zip(col_idx, value):
+                    self._columns[i][row_idx] = col
+        else:
+            raise NotImplementedError("Setting values by index without specifying a column is not supported.")
+
+    def __len__(self):
+        return self._shape[0] if self._columns else 0
+
+    @property
+    def shape(self):
+        return self._shape
+
+    @property
+    def memory_usage(self):
+        return self._memory_usage
+
 
 def _resize_image(file_path: str, target_size: Tuple[int, int]):
     """Function resizes and rewrites the input image
diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py
index 5cde088d7a..509bb811c0 100644
--- a/fedot/preprocessing/categorical.py
+++ b/fedot/preprocessing/categorical.py
@@ -24,31 +24,39 @@ def fit(self, input_data: InputData):
         Find indices of columns which are contains categorical values. Binary features and at the same time
         has str objects. If there are such features - convert it into int
         """
-        feature_type_ids = input_data.supplementary_data.col_type_ids['features']
-        categorical_ids, _ = find_categorical_columns(input_data.features,
-                                                      feature_type_ids)
-
-        binary_ids_to_convert = []
-        for column_id, column in zip(categorical_ids, input_data.features[:, categorical_ids].T):
-            pd_column = pd.Series(column, name=column_id, copy=True)
-            is_nan = pd_column.isna()
-            column_nuniques = pd_column.nunique(dropna=False)
-            if is_nan.sum():
-                # This categorical column has nans
-                pd_column[is_nan] = FEDOT_STR_NAN
-
-                if column_nuniques <= 3:
-                    # There is column with binary categories and gaps
-                    self.binary_features_with_nans.append(column_id)
+        if np.size(input_data.categorical_idx) != 0:
+            categorical_columns = input_data.features[:, input_data.categorical_idx].T
+            nan_matrix = np.isnan(categorical_columns.astype(float, copy=False))
+            nuniques = np.array([len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix)])
+
+            binary_ids_to_convert = []
+
+            for i, (column_id, column_nuniques, is_nan) in enumerate(
+                    zip(input_data.categorical_idx, nuniques, nan_matrix)
+            ):
+                if is_nan.any():
+                    # This categorical column has nans
+                    categorical_columns[i, is_nan] = FEDOT_STR_NAN
+                    column_nuniques = len(set(categorical_columns[i]))
+
+                    if column_nuniques <= 3:
+                        # There is column with binary categories and gaps
+                        self.binary_features_with_nans.append(column_id)
+                        binary_ids_to_convert.append(column_id)
+                        self._train_encoder(pd.Series(categorical_columns[i], name=column_id))
+
+                elif column_nuniques <= 2:
+                    # Column contains binary string feature
                     binary_ids_to_convert.append(column_id)
-                    self._train_encoder(pd_column)
-            elif column_nuniques <= 2:
-                # Column contains binary string feature
-                binary_ids_to_convert.append(column_id)
-                # Train encoder for current column
-                self._train_encoder(pd_column)
-
-        self.binary_ids_to_convert = binary_ids_to_convert
+                    # Train encoder for current column
+                    self._train_encoder(pd.Series(categorical_columns[i], name=column_id))
+
+            # Remove binary columns from categorical_idx
+            input_data.categorical_idx = [idx for idx in input_data.categorical_idx if idx not in binary_ids_to_convert]
+            self.binary_ids_to_convert = binary_ids_to_convert
+            
+            # TODO: Add log.message with binary ids
+
         return self
 
     def transform(self, input_data: InputData) -> InputData:
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 18a15b08bc..aa5038c201 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -88,7 +88,7 @@ def convert_data_for_fit(self, data: InputData):
         column_types_info = self.prepare_column_types_info(predictors=data.features, target=data.target, task=data.task)
         data.supplementary_data.col_type_ids = column_types_info
         col_types_info_message = prepare_log_message_with_cols_types(column_types_info, data.features_names)
-        self.log.message(f'The information about types of each feature are {col_types_info_message}')
+        self.log.message(f'--- The information about types of each feature are {col_types_info_message}')
         self._into_numeric_features_transformation_for_fit(data)
         # Launch conversion float and integer features into categorical
         self._into_categorical_features_transformation_for_fit(data)
@@ -292,13 +292,14 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
             cat_col_ids = data.categorical_idx
         else:
             cat_col_ids = num_df.columns
+            data.categorical_idx = cat_col_ids
 
         if np.size(cat_col_ids) > 0:
             cat_features_names = data.features_names[cat_col_ids]
         else:
             cat_features_names = []
 
-        self.log.message(f'Preprocessing define next cols {cat_features_names} as categorical')
+        self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
 
         if np.size(cat_col_ids) > 0:
             # Convert into string
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index f4d40e27da..a0271086ad 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -7,7 +7,7 @@
 from golem.core.paths import copy_doc
 from sklearn.preprocessing import LabelEncoder
 
-from fedot.core.data.data import InputData, np_datetime_to_numeric
+from fedot.core.data.data import InputData, np_datetime_to_numeric, OptimisedFeature
 from fedot.core.data.data import OutputData, data_type_is_table, data_type_is_text, data_type_is_ts
 from fedot.core.data.data_preprocessing import (
     data_has_categorical_features,
@@ -29,7 +29,7 @@
 from fedot.preprocessing.base_preprocessing import BasePreprocessor
 from fedot.preprocessing.categorical import BinaryCategoricalPreprocessor
 from fedot.preprocessing.data_type_check import exclude_image, exclude_multi_ts, exclude_ts
-from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector
+from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, _convertable_types
 from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME, PipelineStructureExplorer
 
 # The allowed percent of empty samples in features.
@@ -192,6 +192,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             return data
 
         # Convert datetime data to numerical
+        self.log.message('-- Converting datetime data to numerical')
         data.features = np_datetime_to_numeric(data.features)
         if data.target is not None:
             data.target = np_datetime_to_numeric(data.target)
@@ -200,36 +201,49 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
         data.idx = np.asarray(data.idx)
 
         # Fix tables / time series sizes
+        self.log.message('-- Fixing table / time series shapes')
         data = self._correct_shapes(data)
         replace_inf_with_nans(data)
 
         # Find incorrect features which must be removed
         if is_fit_stage:
+            self.log.message('-- Finding incorrect features')
             self._find_features_lacking_nans(data, source_name)
+
+        self.log.message('-- Removing incorrect features')
         self._take_only_correct_features(data, source_name)
 
         if is_fit_stage:
+            self.log.message('-- Dropping rows with nan\'s in target')
             data = self._drop_rows_with_nan_in_target(data)
 
             # Column types processing - launch after correct features selection
+            self.log.message('-- Features types processing')
             self.types_correctors[source_name].convert_data_for_fit(data)
+
             if self.types_correctors[source_name].target_converting_has_errors:
+                self.log.message('-- Dropping rows with nan\'s in target')
                 data = self._drop_rows_with_nan_in_target(data)
+
             # Train Label Encoder for categorical target if necessary and apply it
+            self.log.message('-- Applying the Label Encoder to Target due to the presence of categories')
             if source_name not in self.target_encoders:
                 self._train_target_encoder(data, source_name)
+
             data.target = self._apply_target_encoding(data, source_name)
+
         else:
+            self.log.message('-- Converting data for predict')
             self.types_correctors[source_name].convert_data_for_predict(data)
 
         # TODO andreygetmanov target encoding must be obligatory for all data types
         if data_type_is_text(data):
             # TODO andreygetmanov to new class text preprocessing?
             replace_nans_with_empty_strings(data)
+
         elif data_type_is_table(data):
-            # data = self._clean_extra_spaces(data)
-            # Process binary categorical features
             if is_fit_stage:
+                self.log.message('-- Searching binary categorical features to encode them')
                 data = self.binary_categorical_processors[source_name].fit_transform(data)
             else:
                 data = self.binary_categorical_processors[source_name].transform(data)
@@ -252,10 +266,13 @@ def _prepare_optional(self, pipeline, data: InputData, source_name: str):
             (data_has_missing_values, 'imputation', self._apply_imputation_unidata),
             (data_has_categorical_features, 'encoding', self._apply_categorical_encoding)
         ]:
+            self.log.message(f'-- Deciding to apply {tag_to_check} for data')
             if has_problems(data):
+                self.log.message(f'-- Finding {tag_to_check} is required and trying to apply')
                 # Data contains missing values
                 has_tag = PipelineStructureExplorer.check_structure_by_tag(
                     pipeline, tag_to_check=tag_to_check, source_name=source_name)
+
                 if not has_tag:
                     data = action_if_no_tag(data, source_name)
 
@@ -271,7 +288,7 @@ def _find_features_lacking_nans(self, data: InputData, source_name: str):
         axes_except_cols = (0,) + tuple(range(2, features.ndim))
         are_allowed = np.mean(pd.isna(features), axis=axes_except_cols) < ALLOWED_NAN_PERCENT
         self.log.message(
-            f'The number of features with an acceptable nan\'s percent value was taken '
+            f'--- The number of features with an acceptable nan\'s percent value was taken '
             f'{len(are_allowed)} / {data.features.shape[1]}'
         )
         self.ids_relevant_features[source_name] = np.flatnonzero(are_allowed)
@@ -303,39 +320,12 @@ def _drop_rows_with_nan_in_target(self, data: InputData) -> InputData:
         data.idx = np.array(data.idx)[non_nan_row_ids]
 
         self.log.message(
-            f'The number of rows with an nan\'s in target is '
+            f'--- The number of rows with an nan\'s in target is '
             f'{sum(number_nans_per_rows)} / {data.features.shape[0]}'
         )
 
         return data
 
-    @staticmethod
-    def _clean_extra_spaces(data: InputData) -> InputData:
-        """
-        Removes extra spaces from data.
-            Transforms cells in columns from ' x ' to 'x'
-
-        Args:
-            data: to be stripped
-
-        Returns:
-            cleaned ``data``
-        """
-
-        def strip_all_strs(item: Union[object, str]):
-            try:
-                return item.strip()
-            except AttributeError:
-                # not a str object
-                return item
-
-        features_df = pd.DataFrame(data.features)
-        mixed_or_str = features_df.select_dtypes(object)
-        features_df[mixed_or_str.columns] = mixed_or_str.applymap(strip_all_strs)
-
-        data.features = features_df.to_numpy()
-        return data
-
     @copy_doc(BasePreprocessor.label_encoding_for_fit)
     def label_encoding_for_fit(self, data: InputData, source_name: str = DEFAULT_SOURCE_NAME):
         if data_has_categorical_features(data):
@@ -369,20 +359,26 @@ def _apply_imputation_unidata(self, data: InputData, source_name: str) -> InputD
         Returns:
             imputed ``data``
         """
+        self.log.message('--- Initialising imputer')
         imputer = self.features_imputers.get(source_name)
+
         if not imputer:
             imputer = ImputationImplementation()
+            self.log.message('--- Fitting and transforming imputer for missings')
             output_data = imputer.fit_transform(data)
             self.features_imputers[source_name] = imputer
+
         else:
+            self.log.message('--- Transforming imputer for missings')
             output_data = imputer.transform(data)
+
         data.features = output_data.predict
         return data
 
     def _apply_categorical_encoding(self, data: InputData, source_name: str) -> InputData:
         """
         Transforms the data inplace. Uses the same transformations as for the training data if trained already.
-        Otherwise fits appropriate encoder and converts data's categorical features with it.
+        Otherwise, fits appropriate encoder and converts data's categorical features with it.
 
         Args:
             data: data to be transformed
@@ -391,11 +387,16 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu
         Returns:
             encoded ``data``
         """
+        self.log.message('--- Initialising categorical encoder')
         encoder = self.features_encoders.get(source_name)
+
         if encoder is None:
             encoder = LabelEncodingImplementation() if self.use_label_encoder else OneHotEncodingImplementation()
             encoder.fit(data)
             self.features_encoders[source_name] = encoder
+
+        self.log.message(f'--- {encoder.__class__.__name__} was choose')
+        self.log.message(f'--- Fitting and transforming data')
         output_data = encoder.transform_for_fit(data)
         output_data.predict = output_data.predict.astype(float)
         data.features = output_data.predict
@@ -550,3 +551,49 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
                 last_id = len(input_data.idx)
                 input_data.idx = np.arange(last_id, last_id + input_data.task.task_params.forecast_length)
         return test_data
+
+    def reduce_memory_size(self, data: InputData) -> InputData:
+        def reduce_mem_usage_np(arr, initial_types):
+            reduced_columns = OptimisedFeature()
+
+
+            for i in range(arr.shape[1]):
+                col = arr[:, i]
+                init_type = _convertable_types[initial_types[i]]
+                col = col.astype(init_type)
+                col_type = col.dtype.name
+
+                if col_type not in ['object']:
+                    c_min = col.max()
+                    c_max = col.max()
+
+                    if np.issubdtype(col.dtype, np.integer):
+                        if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
+                            reduced_columns.add_column(col.astype(np.int8))
+                        elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
+                            reduced_columns.add_column(col.astype(np.int16))
+                        elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
+                            reduced_columns.add_column(col.astype(np.int32))
+                        elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
+                            reduced_columns.add_column(col.astype(np.int64))
+
+                    elif np.issubdtype(col.dtype, np.floating):
+                        if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
+                            reduced_columns.add_column(col.astype(np.float16))
+                        elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
+                            reduced_columns.add_column(col.astype(np.float32))
+                        else:
+                            reduced_columns.add_column(col.astype(np.float64))
+                else:
+                    reduced_columns.add_column(col)
+
+            return reduced_columns
+
+        if isinstance(data, InputData):
+            self.log.message('-- Reduce memory in features')
+            data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
+
+            self.log.message('-- Reduce memory in target')
+            data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
+
+        return data

From e4c13f54a0689235646fb0be1d027258d456b24c Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 17:14:00 +0300
Subject: [PATCH 06/69] @Lopa10ko requested changes

---
 fedot/api/api_utils/api_data.py      |  7 +------
 fedot/core/data/data.py              | 10 +++++-----
 fedot/preprocessing/data_types.py    |  4 ++--
 fedot/preprocessing/preprocessing.py |  6 +++---
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 9d6d1a33bd..3ac1c9242b 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -183,12 +183,7 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
 
         test_data = self.preprocessor.reduce_memory_size(data=test_data)
 
-        if isinstance(test_data.features, OptimisedFeature):
-            memory_usage = convert_memory_size(test_data.features.memory_usage)
-
-        else:
-            memory_usage = convert_memory_size(test_data.features.nbytes)
-
+        memory_usage = convert_memory_size(test_data.features.nbytes)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index ed3433beca..2650d2cedd 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -42,7 +42,7 @@ class Data:
     idx: np.ndarray
     task: Task
     data_type: DataTypesEnum
-    features: Optional[np.ndarray, OptimisedFeature]
+    features: Union[np.ndarray, OptimisedFeature]
     categorical_features: Optional[np.ndarray] = None
     categorical_idx: Optional[np.ndarray] = None
     numerical_idx: Optional[np.ndarray] = None
@@ -675,7 +675,7 @@ class OutputData(Data):
 class OptimisedFeature:
     _columns: list = field(default_factory=list, init=False)
     _shape: tuple = field(default=(0, 0), init=False)
-    _memory_usage: int = 0
+    nbytes: int = 0
     ndim: int = 2
 
     def add_column(self, data: np.ndarray):
@@ -691,7 +691,7 @@ def add_column(self, data: np.ndarray):
             self._shape = (self._shape[0], self._shape[1] + 1)
 
         self._columns.append(data)
-        self._memory_usage += data.nbytes
+        self.nbytes += data.nbytes
 
     def __getitem__(self, key):
         if isinstance(key, tuple):
@@ -724,8 +724,8 @@ def shape(self):
         return self._shape
 
     @property
-    def memory_usage(self):
-        return self._memory_usage
+    def nbytes(self):
+        return self.nbytes
 
 
 def _resize_image(file_path: str, target_size: Tuple[int, int]):
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index aa5038c201..f8436c4932 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -521,8 +521,8 @@ def prepare_log_message_with_cols_types(col_types_info, features_names):
         count_types = np.count_nonzero(col_types_info['features'] == type_id)
         features_idx = np.where(col_types_info['features'] == type_id)[0]
         names_or_indexes = features_names[features_idx] if features_names is not None else features_idx
-        message = message + f'TYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \
+        message += f'TYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \
 
-    message = message + f'Target: TYPE {_convertable_types[col_types_info["target"][0]]}'
+    message += f'Target: TYPE {_convertable_types[col_types_info["target"][0]]}'
 
     return message
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index a0271086ad..6128090bc4 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -214,7 +214,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
         self._take_only_correct_features(data, source_name)
 
         if is_fit_stage:
-            self.log.message('-- Dropping rows with nan\'s in target')
+            self.log.message('-- Dropping rows with NaN-values in target')
             data = self._drop_rows_with_nan_in_target(data)
 
             # Column types processing - launch after correct features selection
@@ -222,7 +222,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             self.types_correctors[source_name].convert_data_for_fit(data)
 
             if self.types_correctors[source_name].target_converting_has_errors:
-                self.log.message('-- Dropping rows with nan\'s in target')
+                self.log.message('-- Dropping rows with NaN-values in target')
                 data = self._drop_rows_with_nan_in_target(data)
 
             # Train Label Encoder for categorical target if necessary and apply it
@@ -395,7 +395,7 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu
             encoder.fit(data)
             self.features_encoders[source_name] = encoder
 
-        self.log.message(f'--- {encoder.__class__.__name__} was choose')
+        self.log.message(f'--- {encoder.__class__.__name__} was chosen')
         self.log.message(f'--- Fitting and transforming data')
         output_data = encoder.transform_for_fit(data)
         output_data.predict = output_data.predict.astype(float)

From c0f7ff322eca01c8664c5db0c2e565024e417be8 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 18:00:54 +0300
Subject: [PATCH 07/69] Fix bug with nbytes

---
 fedot/core/data/data.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 2650d2cedd..3077f42e92 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -675,7 +675,7 @@ class OutputData(Data):
 class OptimisedFeature:
     _columns: list = field(default_factory=list, init=False)
     _shape: tuple = field(default=(0, 0), init=False)
-    nbytes: int = 0
+    _nbytes: int = 0
     ndim: int = 2
 
     def add_column(self, data: np.ndarray):
@@ -691,7 +691,7 @@ def add_column(self, data: np.ndarray):
             self._shape = (self._shape[0], self._shape[1] + 1)
 
         self._columns.append(data)
-        self.nbytes += data.nbytes
+        self._nbytes += data.nbytes
 
     def __getitem__(self, key):
         if isinstance(key, tuple):
@@ -725,7 +725,7 @@ def shape(self):
 
     @property
     def nbytes(self):
-        return self.nbytes
+        return self._nbytes
 
 
 def _resize_image(file_path: str, target_size: Tuple[int, int]):

From 6d7bf975c422e13bbc27c182419bc7b85041d2a8 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 18:01:52 +0300
Subject: [PATCH 08/69] Fix bug with cat_features_names if there aren't exists
 features_names

---
 fedot/preprocessing/data_types.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index f8436c4932..7475a51c56 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -295,11 +295,13 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
             data.categorical_idx = cat_col_ids
 
         if np.size(cat_col_ids) > 0:
-            cat_features_names = data.features_names[cat_col_ids]
+            if data.features_names is not None:
+                cat_features_names = data.features_names[cat_col_ids]
+                self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
+            else:
+                self.log.message(f'--- Preprocessing define next cols {cat_col_ids} as categorical')
         else:
-            cat_features_names = []
-
-        self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
+            self.log.message(f'--- Preprocessing was unable to define the categorical columns')
 
         if np.size(cat_col_ids) > 0:
             # Convert into string

From 705529a851e402c8ca532afe92e004eab9a48805 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 18:02:19 +0300
Subject: [PATCH 09/69] Adding reduce_memory_size to pipeline._preprocess

---
 fedot/core/pipelines/pipeline.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py
index 66c62b09e4..76b7c4a7ac 100644
--- a/fedot/core/pipelines/pipeline.py
+++ b/fedot/core/pipelines/pipeline.py
@@ -137,18 +137,17 @@ def _preprocess(self, input_data: Union[InputData, MultiModalData], *, is_fit_st
         if is_fit_stage:
             copied_input_data = self.preprocessor.obligatory_prepare_for_fit(copied_input_data)
             # Make additional preprocessing if it is needed
-            copied_input_data = self.preprocessor.optional_prepare_for_fit(pipeline=self,
-                                                                           data=copied_input_data)
-            copied_input_data = self.preprocessor.convert_indexes_for_fit(pipeline=self,
-                                                                          data=copied_input_data)
+            copied_input_data = self.preprocessor.optional_prepare_for_fit(pipeline=self, data=copied_input_data)
+            copied_input_data = self.preprocessor.convert_indexes_for_fit(pipeline=self, data=copied_input_data)
+            copied_input_data = self.preprocessor.reduce_memory_size(data=copied_input_data)
         else:
             copied_input_data = self.preprocessor.obligatory_prepare_for_predict(copied_input_data)
             # Make additional preprocessing if it is needed
-            copied_input_data = self.preprocessor.optional_prepare_for_predict(pipeline=self,
-                                                                               data=copied_input_data)
-            copied_input_data = self.preprocessor.convert_indexes_for_predict(pipeline=self,
-                                                                              data=copied_input_data)
+            copied_input_data = self.preprocessor.optional_prepare_for_predict(pipeline=self, data=copied_input_data)
+            copied_input_data = self.preprocessor.convert_indexes_for_predict(pipeline=self, data=copied_input_data)
             copied_input_data = self.preprocessor.update_indices_for_time_series(copied_input_data)
+            copied_input_data = self.preprocessor.reduce_memory_size(data=copied_input_data)
+
         return copied_input_data
 
     def _postprocess(self, copied_input_data: Optional[InputData], result: OutputData,

From 4c7d281eefed6092ea6e6057a05929b9d7ff65fe Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 19:20:13 +0300
Subject: [PATCH 10/69] Return to Pandas for nan_matrix

---
 fedot/preprocessing/categorical.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py
index 509bb811c0..c879a25744 100644
--- a/fedot/preprocessing/categorical.py
+++ b/fedot/preprocessing/categorical.py
@@ -26,8 +26,10 @@ def fit(self, input_data: InputData):
         """
         if np.size(input_data.categorical_idx) != 0:
             categorical_columns = input_data.features[:, input_data.categorical_idx].T
-            nan_matrix = np.isnan(categorical_columns.astype(float, copy=False))
-            nuniques = np.array([len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix)])
+            nan_matrix = pd.DataFrame(categorical_columns.T, columns=input_data.categorical_idx).isna().values
+            nuniques = np.array([
+                len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix.T)
+            ])
 
             binary_ids_to_convert = []
 

From 75901ae96fece246c669ca78e714900a5eb7577b Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 19:20:47 +0300
Subject: [PATCH 11/69] Change logic of
 _into_categorical_features_transformation_for_fit

---
 fedot/preprocessing/data_types.py | 52 +++++++++++++++----------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 7475a51c56..ce9442d1b3 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -279,37 +279,37 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
         Perform automated categorical features determination. If feature column
         contains int or float values with few unique values (less than 13)
         """
-        feature_type_ids = data.supplementary_data.col_type_ids['features']
-        is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]])
-        numeric_type_ids = np.flatnonzero(is_numeric_type)
-        num_df = pd.DataFrame(data.features[:, numeric_type_ids], columns=numeric_type_ids)
-        nuniques = num_df.nunique(dropna=True)
-
-        # reduce dataframe to include only categorical features
-        num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)]
+        if data.categorical_idx is None:
+            feature_type_ids = data.supplementary_data.col_type_ids['features']
+            is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]])
+            numeric_type_ids = np.flatnonzero(is_numeric_type)
+            num_df = pd.DataFrame(data.features[:, numeric_type_ids], columns=numeric_type_ids)
+            nuniques = num_df.nunique(dropna=True)
 
-        if data.categorical_idx is not None:
-            cat_col_ids = data.categorical_idx
-        else:
-            cat_col_ids = num_df.columns
-            data.categorical_idx = cat_col_ids
-
-        if np.size(cat_col_ids) > 0:
-            if data.features_names is not None:
-                cat_features_names = data.features_names[cat_col_ids]
-                self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
-            else:
-                self.log.message(f'--- Preprocessing define next cols {cat_col_ids} as categorical')
-        else:
-            self.log.message(f'--- Preprocessing was unable to define the categorical columns')
+            # reduce dataframe to include only categorical features
+            num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)]
+            cat_col_from_heuristic_rule_ids = num_df.columns
 
-        if np.size(cat_col_ids) > 0:
             # Convert into string
-            data.features[:, cat_col_ids] = num_df.apply(convert_num_column_into_string_array).to_numpy()
+            data.features[:, cat_col_from_heuristic_rule_ids] = num_df.apply(
+                convert_num_column_into_string_array).to_numpy()
             # Columns need to be transformed into categorical (string) ones
-            self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str))
+            self.numerical_into_str.extend(cat_col_from_heuristic_rule_ids.difference(self.numerical_into_str))
             # Update information about column types (in-place)
-            feature_type_ids[cat_col_ids] = TYPE_TO_ID[str]
+            feature_type_ids[cat_col_from_heuristic_rule_ids] = TYPE_TO_ID[str]
+
+            is_cat_type = np.isin(feature_type_ids, [TYPE_TO_ID[str]])
+            all_cat_col_ids = np.flatnonzero(is_cat_type)
+            data.categorical_idx = all_cat_col_ids
+
+            if np.size(all_cat_col_ids) > 0:
+                if data.features_names is not None:
+                    cat_features_names = data.features_names[all_cat_col_ids]
+                    self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
+                else:
+                    self.log.message(f'--- Preprocessing define next cols {all_cat_col_ids} as categorical')
+            else:
+                self.log.message(f'--- Preprocessing was unable to define the categorical columns')
 
     def _into_categorical_features_transformation_for_predict(self, data: InputData):
         """ Apply conversion into categorical string column for every signed column """

From 426dbd9a09b1cdb3de75569e6690222b20168584 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 19:56:39 +0300
Subject: [PATCH 12/69] Adding convert to np.array

---
 fedot/core/data/data_preprocessing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fedot/core/data/data_preprocessing.py b/fedot/core/data/data_preprocessing.py
index c8f9fd383a..40afac8d87 100644
--- a/fedot/core/data/data_preprocessing.py
+++ b/fedot/core/data/data_preprocessing.py
@@ -101,8 +101,8 @@ def data_has_categorical_features(data: InputData) -> bool:
     feature_type_ids = data.supplementary_data.col_type_ids['features']
     cat_ids, non_cat_ids = find_categorical_columns(data.features, feature_type_ids)
 
-    data.numerical_idx = non_cat_ids
-    data.categorical_idx = cat_ids
+    data.numerical_idx = np.array(non_cat_ids)
+    data.categorical_idx = np.array(cat_ids)
 
     if len(cat_ids) > 0:
         data.categorical_features = data.subset_features(cat_ids).features

From 9ab9f997d754921f3f3b4796c053b81d7d0dffbb Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 14 Aug 2024 19:57:31 +0300
Subject: [PATCH 13/69] Update ImputationImplementation

---
 .../sklearn_transformations.py                | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index 7a70442914..3485586fa4 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -281,10 +281,13 @@ def fit(self, input_data: InputData):
         replace_inf_with_nans(input_data)
 
         if data_type_is_table(input_data):
+            categorical_idx = input_data.categorical_idx.tolist()
+            numerical_idx = np.setdiff1d(
+                np.concatenate((input_data.numerical_idx, input_data.encoded_idx)),
+                categorical_idx
+            ).tolist()
             # Tabular data contains categorical features
-            categorical_ids, non_categorical_ids = find_categorical_columns(input_data.features)
-            numerical, categorical = divide_data_categorical_numerical(input_data, categorical_ids,
-                                                                       non_categorical_ids)
+            numerical, categorical = divide_data_categorical_numerical(input_data, categorical_idx, numerical_idx)
 
             if categorical is not None and categorical.features.size > 0:
                 categorical.features = convert_into_column(categorical.features)
@@ -312,12 +315,16 @@ def transform(self, input_data: InputData) -> OutputData:
 
         replace_inf_with_nans(input_data)
 
-        if data_type_is_table(input_data) and data_has_categorical_features(input_data):
-            feature_type_ids = input_data.supplementary_data.col_type_ids['features']
-            self.categorical_ids, self.non_categorical_ids = find_categorical_columns(input_data.features,
-                                                                                      feature_type_ids)
-            numerical, categorical = divide_data_categorical_numerical(input_data, self.categorical_ids,
-                                                                       self.non_categorical_ids)
+        if data_type_is_table(input_data) and input_data.categorical_idx is not None:
+            self.categorical_ids = input_data.categorical_idx.tolist()
+            self.non_categorical_ids = np.setdiff1d(
+                np.concatenate((input_data.numerical_idx, input_data.encoded_idx)),
+                self.categorical_ids
+            ).tolist()
+
+            numerical, categorical = divide_data_categorical_numerical(
+                input_data, self.categorical_ids, self.non_categorical_ids
+            )
 
             if categorical is not None:
                 categorical_features = convert_into_column(categorical.features)

From b679660089a1339f2eb9d32b8cd6aa2b0758d061 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 13:50:03 +0300
Subject: [PATCH 14/69] Fix bug in BinaryCategorical

---
 fedot/preprocessing/categorical.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py
index c879a25744..2a2226d524 100644
--- a/fedot/preprocessing/categorical.py
+++ b/fedot/preprocessing/categorical.py
@@ -26,9 +26,9 @@ def fit(self, input_data: InputData):
         """
         if np.size(input_data.categorical_idx) != 0:
             categorical_columns = input_data.features[:, input_data.categorical_idx].T
-            nan_matrix = pd.DataFrame(categorical_columns.T, columns=input_data.categorical_idx).isna().values
+            nan_matrix = pd.DataFrame(categorical_columns.T, columns=input_data.categorical_idx).isna().values.T
             nuniques = np.array([
-                len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix.T)
+                len(np.unique(col[~is_nan])) for col, is_nan in zip(categorical_columns, nan_matrix)
             ])
 
             binary_ids_to_convert = []
@@ -38,7 +38,7 @@ def fit(self, input_data: InputData):
             ):
                 if is_nan.any():
                     # This categorical column has nans
-                    categorical_columns[i, is_nan] = FEDOT_STR_NAN
+                    categorical_columns[i, np.where(is_nan)[0]] = FEDOT_STR_NAN
                     column_nuniques = len(set(categorical_columns[i]))
 
                     if column_nuniques <= 3:

From 119bca83ef01f0b093890190a6b38249068524c4 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 16:14:35 +0300
Subject: [PATCH 15/69] Fix bug with test_data_from_csv_load_correctly

---
 fedot/preprocessing/data_types.py    | 7 +++++++
 fedot/preprocessing/preprocessing.py | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index ce9442d1b3..1628ead891 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -298,10 +298,16 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
             # Update information about column types (in-place)
             feature_type_ids[cat_col_from_heuristic_rule_ids] = TYPE_TO_ID[str]
 
+            # Update cat cols idx in data
             is_cat_type = np.isin(feature_type_ids, [TYPE_TO_ID[str]])
             all_cat_col_ids = np.flatnonzero(is_cat_type)
             data.categorical_idx = all_cat_col_ids
 
+            # Update num cols idx in data
+            is_numeric_type = np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]])
+            all_numeric_type_ids = np.flatnonzero(is_numeric_type)
+            data.numerical_idx = all_numeric_type_ids
+
             if np.size(all_cat_col_ids) > 0:
                 if data.features_names is not None:
                     cat_features_names = data.features_names[all_cat_col_ids]
@@ -359,6 +365,7 @@ def _into_numeric_features_transformation_for_fit(self, data: InputData):
             (self.acceptable_failed_rate_bottom <= failed_ratio) &
             (failed_ratio < self.acceptable_failed_rate_top))
         self.string_columns_transformation_failed.update(dict.fromkeys(is_of_mistakes[is_of_mistakes].index))
+        data.numerical_idx = is_numeric_ids
 
     def _into_numeric_features_transformation_for_predict(self, data: InputData):
         """ Apply conversion into float string column for every signed column """
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 6128090bc4..9fee4db89f 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -236,6 +236,10 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             self.log.message('-- Converting data for predict')
             self.types_correctors[source_name].convert_data_for_predict(data)
 
+        feature_type_ids = data.supplementary_data.col_type_ids['features']
+        data.numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]))
+        data.categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]]))
+
         # TODO andreygetmanov target encoding must be obligatory for all data types
         if data_type_is_text(data):
             # TODO andreygetmanov to new class text preprocessing?

From 7a3946a95ed633e02e67d0cf0111652eabf93ce0 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 16:34:33 +0300
Subject: [PATCH 16/69] Fix bug with
 test_api_fit_predict_with_pseudo_large_dataset_with_label_correct

---
 .../data_operations/sklearn_transformations.py      | 10 ++--------
 fedot/preprocessing/categorical.py                  |  1 +
 fedot/preprocessing/preprocessing.py                | 13 ++++++++++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index 3485586fa4..9b6b3d2c8c 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -282,10 +282,7 @@ def fit(self, input_data: InputData):
 
         if data_type_is_table(input_data):
             categorical_idx = input_data.categorical_idx.tolist()
-            numerical_idx = np.setdiff1d(
-                np.concatenate((input_data.numerical_idx, input_data.encoded_idx)),
-                categorical_idx
-            ).tolist()
+            numerical_idx = np.setdiff1d(input_data.numerical_idx, categorical_idx).tolist()
             # Tabular data contains categorical features
             numerical, categorical = divide_data_categorical_numerical(input_data, categorical_idx, numerical_idx)
 
@@ -317,10 +314,7 @@ def transform(self, input_data: InputData) -> OutputData:
 
         if data_type_is_table(input_data) and input_data.categorical_idx is not None:
             self.categorical_ids = input_data.categorical_idx.tolist()
-            self.non_categorical_ids = np.setdiff1d(
-                np.concatenate((input_data.numerical_idx, input_data.encoded_idx)),
-                self.categorical_ids
-            ).tolist()
+            self.non_categorical_ids = np.setdiff1d(input_data.numerical_idx, self.categorical_ids).tolist()
 
             numerical, categorical = divide_data_categorical_numerical(
                 input_data, self.categorical_ids, self.non_categorical_ids
diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py
index 2a2226d524..07c70de0c9 100644
--- a/fedot/preprocessing/categorical.py
+++ b/fedot/preprocessing/categorical.py
@@ -55,6 +55,7 @@ def fit(self, input_data: InputData):
 
             # Remove binary columns from categorical_idx
             input_data.categorical_idx = [idx for idx in input_data.categorical_idx if idx not in binary_ids_to_convert]
+            input_data.categorical_idx = np.array(input_data.categorical_idx)
             self.binary_ids_to_convert = binary_ids_to_convert
             
             # TODO: Add log.message with binary ids
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 9fee4db89f..0752e17b3e 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -237,8 +237,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             self.types_correctors[source_name].convert_data_for_predict(data)
 
         feature_type_ids = data.supplementary_data.col_type_ids['features']
-        data.numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]))
-        data.categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]]))
+        data.numerical_idx, data.categorical_idx = self._update_num_and_cats_ids(feature_type_ids)
 
         # TODO andreygetmanov target encoding must be obligatory for all data types
         if data_type_is_text(data):
@@ -252,6 +251,9 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             else:
                 data = self.binary_categorical_processors[source_name].transform(data)
 
+            feature_type_ids = data.supplementary_data.col_type_ids['features']
+            data.numerical_idx, data.categorical_idx = self._update_num_and_cats_ids(feature_type_ids)
+
         return data
 
     def _prepare_optional(self, pipeline, data: InputData, source_name: str):
@@ -560,7 +562,6 @@ def reduce_memory_size(self, data: InputData) -> InputData:
         def reduce_mem_usage_np(arr, initial_types):
             reduced_columns = OptimisedFeature()
 
-
             for i in range(arr.shape[1]):
                 col = arr[:, i]
                 init_type = _convertable_types[initial_types[i]]
@@ -601,3 +602,9 @@ def reduce_mem_usage_np(arr, initial_types):
             data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
 
         return data
+
+    def _update_num_and_cats_ids(self, feature_type_ids):
+        numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]))
+        categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]]))
+
+        return numerical_idx, categorical_idx

From 3134fc64da0dbea2d59a672cc53bdc19ae9bb80f Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 16:42:42 +0300
Subject: [PATCH 17/69] Fix bug with
 test_pipeline_preprocessing_through_api_correctly

---
 fedot/preprocessing/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 0752e17b3e..144d865eef 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -568,7 +568,7 @@ def reduce_mem_usage_np(arr, initial_types):
                 col = col.astype(init_type)
                 col_type = col.dtype.name
 
-                if col_type not in ['object']:
+                if col_type not in ['object', 'str384']:
                     c_min = col.max()
                     c_max = col.max()
 

From e5db54dcf809bf70e64f6e6c2197aea749a8e898 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 16:59:13 +0300
Subject: [PATCH 18/69] Fix bug with test_default_forecast (add new TODO for
 ts_forecasting)

---
 fedot/core/data/data.py              |  4 ++--
 fedot/preprocessing/preprocessing.py | 13 +++++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 3077f42e92..c8f39e9392 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -613,7 +613,7 @@ def get_not_encoded_data(self):
         num_features_names, cat_features_names = None, None
 
         # Checking numerical data exists
-        if self.numerical_idx:
+        if self.numerical_idx.any():
             num_features = self.features[:, self.numerical_idx]
 
             if self.features_names is not None and np.size(self.features_names):
@@ -622,7 +622,7 @@ def get_not_encoded_data(self):
                 num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)])
 
         # Checking categorical data exists
-        if self.categorical_idx:
+        if self.categorical_idx.any():
             cat_features = self.categorical_features
 
             if self.features_names is not None and np.size(self.features_names):
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 144d865eef..64e9720c3d 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -595,11 +595,16 @@ def reduce_mem_usage_np(arr, initial_types):
             return reduced_columns
 
         if isinstance(data, InputData):
-            self.log.message('-- Reduce memory in features')
-            data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
+            if data.task.task_type == TaskTypesEnum.ts_forecasting:
+                # TODO: TS data has col_type_ids['features'] = None.
+                #  It required to add this to reduce memory for them
+                pass
+            else:
+                self.log.message('-- Reduce memory in features')
+                data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
 
-            self.log.message('-- Reduce memory in target')
-            data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
+                self.log.message('-- Reduce memory in target')
+                data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
 
         return data
 

From ebab7f2736fa9dc60ec1befdec2c51118220af3e Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 17:15:38 +0300
Subject: [PATCH 19/69] Fix bug with test_cv_multiple_metrics_evaluated_correct
 by adding copy method to OptimisedFeature

---
 fedot/core/data/data.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index c8f39e9392..eb09a08ea8 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -719,6 +719,9 @@ def __setitem__(self, key, value):
     def __len__(self):
         return self._shape[0] if self._columns else 0
 
+    def copy(self):
+        return self._columns.copy()
+
     @property
     def shape(self):
         return self._shape

From c12377921b957f1238d09f79e48733fe1e48dcc4 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 15 Aug 2024 17:19:04 +0300
Subject: [PATCH 20/69] Fix bug with
 test_regression_pipeline_with_data_operation_fit_predict_correct by adding
 check for target

---
 fedot/preprocessing/preprocessing.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 64e9720c3d..874c308280 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -603,8 +603,9 @@ def reduce_mem_usage_np(arr, initial_types):
                 self.log.message('-- Reduce memory in features')
                 data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
 
-                self.log.message('-- Reduce memory in target')
-                data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
+                if data.target is not None:
+                    self.log.message('-- Reduce memory in target')
+                    data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
 
         return data
 

From 2e168dc346570b8bc8867c022137c71af2e9145c Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 14:10:04 +0300
Subject: [PATCH 21/69] Fix bug in test_default_train_test_simple with nbytes

---
 fedot/api/api_utils/api_data.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 3ac1c9242b..c9db8f641e 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -152,11 +152,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
 
         train_data.supplementary_data.is_auto_preprocessed = True
 
-        if isinstance(train_data.features, OptimisedFeature):
-            memory_usage = convert_memory_size(train_data.features.memory_usage)
-
-        else:
-            memory_usage = convert_memory_size(train_data.features.nbytes)
+        memory_usage = convert_memory_size(train_data.features.nbytes)
 
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape

From f6d539a850a437a939321e71a00c8da3d014777a Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 14:17:26 +0300
Subject: [PATCH 22/69] Fix bugs with str* types in features

---
 fedot/preprocessing/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 874c308280..bc0661495c 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -568,7 +568,7 @@ def reduce_mem_usage_np(arr, initial_types):
                 col = col.astype(init_type)
                 col_type = col.dtype.name
 
-                if col_type not in ['object', 'str384']:
+                if col_type not in ['object', 'str32', 'str96', 'str128', 'str160', 'str384']:
                     c_min = col.max()
                     c_max = col.max()
 

From 9290d8266f76032737937773b33ee2d83946f6f3 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 14:20:58 +0300
Subject: [PATCH 23/69] Fix bug with
 test_inf_and_nan_absence_after_imputation_implementation_fit_transform by
 adding cat and num idx in get_dataset func

---
 .../test_data_operations_implementations.py   | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/test/unit/data_operations/test_data_operations_implementations.py b/test/unit/data_operations/test_data_operations_implementations.py
index b5832b1bc1..3f49b1b21e 100644
--- a/test/unit/data_operations/test_data_operations_implementations.py
+++ b/test/unit/data_operations/test_data_operations_implementations.py
@@ -130,15 +130,21 @@ def get_multivariate_time_series(mutli_ts=False):
 
 def get_nan_inf_data():
     supp_data = SupplementaryData(col_type_ids={'features': np.array([TYPE_TO_ID[float]] * 4)})
-    train_input = InputData(idx=[0, 1, 2, 3],
-                            features=np.array([[1, 2, 3, 4],
-                                               [2, np.nan, 4, 5],
-                                               [3, 4, 5, np.inf],
-                                               [-np.inf, 5, 6, 7]]),
-                            target=np.array([1, 2, 3, 4]),
-                            task=Task(TaskTypesEnum.regression),
-                            data_type=DataTypesEnum.table,
-                            supplementary_data=supp_data)
+    train_input = InputData(
+        idx=np.array([0, 1, 2, 3]),
+        features=np.array([
+            [1, 2, 3, 4],
+            [2, np.nan, 4, 5],
+            [3, 4, 5, np.inf],
+            [-np.inf, 5, 6, 7]
+        ]),
+        target=np.array([1, 2, 3, 4]),
+        numerical_idx=np.array([0, 1, 2, 4]),
+        categorical_idx=np.array([]),
+        task=Task(TaskTypesEnum.regression),
+        data_type=DataTypesEnum.table,
+        supplementary_data=supp_data
+    )
 
     return train_input
 

From 2f5946636b21ca925dacfad3e89111a153173407 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 14:35:21 +0300
Subject: [PATCH 24/69] Fix bug with
 test_pipeline_objective_evaluate_with_different_metrics by switching Xgboost
 to Catboost, due to "Experimental support for categorical data is not
 implemented for current tree method yet." for XgBoost and checking feat ids
 with size

---
 fedot/core/data/data.py                             | 4 ++--
 test/unit/optimizer/test_pipeline_objective_eval.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index eb09a08ea8..e1031cb4e0 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -613,7 +613,7 @@ def get_not_encoded_data(self):
         num_features_names, cat_features_names = None, None
 
         # Checking numerical data exists
-        if self.numerical_idx.any():
+        if self.numerical_idx.size != 0:
             num_features = self.features[:, self.numerical_idx]
 
             if self.features_names is not None and np.size(self.features_names):
@@ -622,7 +622,7 @@ def get_not_encoded_data(self):
                 num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)])
 
         # Checking categorical data exists
-        if self.categorical_idx.any():
+        if self.categorical_idx.size != 0:
             cat_features = self.categorical_features
 
             if self.features_names is not None and np.size(self.features_names):
diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py
index 145a28d3db..1a82f86548 100644
--- a/test/unit/optimizer/test_pipeline_objective_eval.py
+++ b/test/unit/optimizer/test_pipeline_objective_eval.py
@@ -35,7 +35,7 @@ def pipeline_second_test():
 
 
 def pipeline_third_test():
-    pipeline = PipelineBuilder().add_node('xgboost').build()
+    pipeline = PipelineBuilder().add_node('catboost').build()
     return pipeline
 
 

From 1be317f32a5a3af0ca185f840c7af232bdada3b1 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 14:56:13 +0300
Subject: [PATCH 25/69] Fix bug with test_order_by_data_flow_len_correct

---
 test/unit/pipelines/test_decompose_pipelines.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/unit/pipelines/test_decompose_pipelines.py b/test/unit/pipelines/test_decompose_pipelines.py
index a3fdc50a30..fb86ca9646 100644
--- a/test/unit/pipelines/test_decompose_pipelines.py
+++ b/test/unit/pipelines/test_decompose_pipelines.py
@@ -145,14 +145,14 @@ def test_order_by_data_flow_len_correct():
     counters can allow for decompose implementation to determine how the nodes
     in the graph are located
     """
-    input_data = get_iris_data()
-    input_data = DataPreprocessor().obligatory_prepare_for_fit(input_data)
-
     data_operations = ['scaling', 'normalization', 'pca', 'poly_features']
     model_operations = ['lda', 'knn', 'logit']
     list_with_operations = list(product(data_operations, model_operations))
 
     for data_operation, model_operation in list_with_operations:
+        input_data = get_iris_data()
+        input_data = DataPreprocessor().obligatory_prepare_for_fit(input_data)
+
         # Generate pipeline with different operations in the nodes with decomposition
         pipeline = generate_pipeline_with_decomposition(data_operation,
                                                         model_operation)

From 16285df716dd371b717da8ba48f65de5397683d2 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 15:44:28 +0300
Subject: [PATCH 26/69] Fix bug with test_pipeline_with_imputer (finally)

---
 fedot/core/data/data.py                                |  5 +++++
 fedot/core/data/merge/data_merger.py                   |  5 +++++
 .../data_operations/sklearn_transformations.py         | 10 +++++-----
 .../implementation_interfaces.py                       |  5 +++++
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index e1031cb4e0..83056e37a1 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -545,7 +545,12 @@ def subset_features(self, feature_ids: list) -> Optional[InputData]:
         subsample_input = InputData(features=subsample_features,
                                     data_type=self.data_type,
                                     target=self.target, task=self.task,
+                                    categorical_features=self.categorical_features,
                                     idx=self.idx,
+                                    numerical_idx=self.numerical_idx,
+                                    categorical_idx=self.categorical_idx,
+                                    encoded_idx=self.encoded_idx,
+                                    features_names=self.features_names,
                                     supplementary_data=self.supplementary_data)
 
         return subsample_input
diff --git a/fedot/core/data/merge/data_merger.py b/fedot/core/data/merge/data_merger.py
index b7f8ac1a5f..a1dc312f0b 100644
--- a/fedot/core/data/merge/data_merger.py
+++ b/fedot/core/data/merge/data_merger.py
@@ -78,6 +78,11 @@ def merge(self) -> 'InputData':
 
         return InputData(idx=common_idx, features=merged_features, target=filtered_main_target,
                          task=self.main_output.task, data_type=self.data_type,
+                         numerical_idx=self.main_output.numerical_idx,
+                         categorical_idx=self.main_output.categorical_idx,
+                         encoded_idx=self.main_output.encoded_idx,
+                         categorical_features=self.main_output.categorical_features,
+                         features_names=self.main_output.features_names,
                          supplementary_data=updated_metadata)
 
     def merge_targets(self) -> np.array:
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index 9b6b3d2c8c..4090b6002b 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -281,10 +281,10 @@ def fit(self, input_data: InputData):
         replace_inf_with_nans(input_data)
 
         if data_type_is_table(input_data):
-            categorical_idx = input_data.categorical_idx.tolist()
-            numerical_idx = np.setdiff1d(input_data.numerical_idx, categorical_idx).tolist()
+            encoded_idx = input_data.encoded_idx.tolist()
+            numerical_idx = np.setdiff1d(input_data.numerical_idx, encoded_idx).tolist()
             # Tabular data contains categorical features
-            numerical, categorical = divide_data_categorical_numerical(input_data, categorical_idx, numerical_idx)
+            numerical, categorical = divide_data_categorical_numerical(input_data, encoded_idx, numerical_idx)
 
             if categorical is not None and categorical.features.size > 0:
                 categorical.features = convert_into_column(categorical.features)
@@ -312,8 +312,8 @@ def transform(self, input_data: InputData) -> OutputData:
 
         replace_inf_with_nans(input_data)
 
-        if data_type_is_table(input_data) and input_data.categorical_idx is not None:
-            self.categorical_ids = input_data.categorical_idx.tolist()
+        if data_type_is_table(input_data) and input_data.encoded_idx is not None:
+            self.categorical_ids = input_data.encoded_idx.tolist()
             self.non_categorical_ids = np.setdiff1d(input_data.numerical_idx, self.categorical_ids).tolist()
 
             numerical, categorical = divide_data_categorical_numerical(
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index ed952ecf81..c4f60d1bbb 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -236,6 +236,11 @@ def _convert_to_output_function(input_data: InputData, transformed_features: np.
                            task=input_data.task,
                            target=input_data.target,
                            data_type=data_type,
+                           numerical_idx=input_data.numerical_idx,
+                           categorical_idx=input_data.categorical_idx,
+                           encoded_idx=input_data.encoded_idx,
+                           categorical_features=input_data.categorical_features,
+                           features_names=input_data.features_names,
                            supplementary_data=input_data.supplementary_data)
 
     return converted

From 36f994c73d095fb71add51975d265bd82d6fa480 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 16 Aug 2024 16:22:23 +0300
Subject: [PATCH 27/69] Fix bug with
 test_correct_api_dataset_with_text_preprocessing by update col_type regex
 rule for str*

---
 fedot/preprocessing/preprocessing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index bc0661495c..1444e0287f 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -1,3 +1,4 @@
+import re
 from copy import copy
 from typing import Optional, Union
 
@@ -568,7 +569,7 @@ def reduce_mem_usage_np(arr, initial_types):
                 col = col.astype(init_type)
                 col_type = col.dtype.name
 
-                if col_type not in ['object', 'str32', 'str96', 'str128', 'str160', 'str384']:
+                if col_type not in ['object'] and not bool(re.match(r'str\d*$', col_type)):
                     c_min = col.max()
                     c_max = col.max()
 

From 0b8c41c5b3db9206e3e6cbdfe429c85821db7d3a Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:14:21 +0300
Subject: [PATCH 28/69] Update for OneHotImplementation

---
 .../data_operations/categorical_encoders.py   | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index dce9296c12..03b1ac70bb 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
-from fedot.core.data.data import InputData, OutputData
+from fedot.core.data.data import InputData, OutputData, OptimisedFeature
 from fedot.core.data.data_preprocessing import find_categorical_columns
 from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import (
     DataOperationImplementation
@@ -35,10 +35,10 @@ def fit(self, input_data: InputData):
         """
         features = input_data.features
         feature_type_ids = input_data.supplementary_data.col_type_ids['features']
-        self.categorical_ids, self.non_categorical_ids = find_categorical_columns(features, feature_type_ids)
+        self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx
 
         # If there are categorical features - process it
-        if self.categorical_ids:
+        if self.categorical_ids.size > 0:
             updated_cat_features = features[:, self.categorical_ids].astype(str)
             self.encoder.fit(updated_cat_features)
 
@@ -55,7 +55,7 @@ def transform(self, input_data: InputData) -> OutputData:
         copied_data = deepcopy(input_data)
 
         transformed_features = copied_data.features
-        if self.categorical_ids:
+        if self.categorical_ids.size > 0:
             # If categorical features exist
             transformed_features = self._apply_one_hot_encoding(transformed_features)
 
@@ -67,7 +67,7 @@ def transform(self, input_data: InputData) -> OutputData:
 
     def _update_column_types(self, output_data: OutputData):
         """ Update column types after encoding. Categorical columns becomes integer with extension """
-        if self.categorical_ids:
+        if self.categorical_ids.size > 0:
             # There are categorical features in the table
             feature_type_ids = output_data.supplementary_data.col_type_ids['features']
             numerical_columns = feature_type_ids[feature_type_ids != TYPE_TO_ID[str]]
@@ -108,9 +108,7 @@ def __init__(self, params: Optional[OperationParameters] = None):
         self.non_categorical_ids: List[int] = []
 
     def fit(self, input_data: InputData):
-        feature_type_ids = input_data.supplementary_data.col_type_ids['features']
-        self.categorical_ids, self.non_categorical_ids = find_categorical_columns(input_data.features,
-                                                                                  feature_type_ids)
+        self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx
 
         # For every existing categorical feature - perform encoding
         self._fit_label_encoders(input_data.features)
@@ -161,7 +159,12 @@ def _apply_label_encoder(self, data: np.ndarray):
                 # Store np.nan values
                 transformed_column = transformed_column.astype(object)
                 transformed_column[nan_idxs] = np.nan
-            data[:, column_id] = transformed_column
+
+            if isinstance(data, np.ndarray):
+                data[:, column_id] = transformed_column
+
+            elif isinstance(data, OptimisedFeature):
+                data._columns[column_id] = transformed_column
 
     def get_params(self) -> OperationParameters:
         """ Due to LabelEncoder has no parameters - return empty set """

From c3a80696179a72dfe861927706ede6548dd44662 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:15:02 +0300
Subject: [PATCH 29/69] Update for subset_features and post_init

---
 fedot/core/data/data.py | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 83056e37a1..d8799ec555 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -481,9 +481,12 @@ class InputData(Data):
     def __post_init__(self):
         if self.numerical_idx is None:
             if self.features is not None and isinstance(self.features, np.ndarray) and self.features.ndim > 1:
-                self.numerical_idx = list(range(self.features.shape[1]))
+                if self.categorical_idx is None:
+                    self.numerical_idx = np.arange(0, self.features.shape[1])
+                else:
+                    self.numerical_idx = np.setdiff1d(np.arange(0, self.features.shape[1]), self.categorical_idx)
             else:
-                self.numerical_idx = [0]
+                self.numerical_idx = np.array([0])
 
     @property
     def num_classes(self) -> Optional[int]:
@@ -534,24 +537,26 @@ def subset_indices(self, selected_idx: List):
                          target=self.target[row_nums],
                          task=self.task, data_type=self.data_type)
 
-    def subset_features(self, feature_ids: list) -> Optional[InputData]:
+    def subset_features(self, feature_ids: np.array) -> Optional[InputData]:
         """
         Return new :obj:`InputData` with subset of features based on non-empty ``features_ids`` list or `None` otherwise
         """
-        if not feature_ids:
+        if feature_ids.size == 0:
             return None
 
         subsample_features = self.features[:, feature_ids]
-        subsample_input = InputData(features=subsample_features,
-                                    data_type=self.data_type,
-                                    target=self.target, task=self.task,
-                                    categorical_features=self.categorical_features,
-                                    idx=self.idx,
-                                    numerical_idx=self.numerical_idx,
-                                    categorical_idx=self.categorical_idx,
-                                    encoded_idx=self.encoded_idx,
-                                    features_names=self.features_names,
-                                    supplementary_data=self.supplementary_data)
+        subsample_input = InputData(
+            features=subsample_features,
+            data_type=self.data_type,
+            target=self.target, task=self.task,
+            idx=self.idx,
+            categorical_idx=np.setdiff1d(self.categorical_idx, feature_ids),
+            numerical_idx=np.setdiff1d(self.numerical_idx, feature_ids),
+            encoded_idx=np.setdiff1d(self.encoded_idx, feature_ids),
+            categorical_features=self.categorical_features,
+            features_names=self.features_names,
+            supplementary_data=self.supplementary_data
+        )
 
         return subsample_input
 

From 1d5ecfe6f1fe6111a1b16dd145cde8e880300746 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:15:38 +0300
Subject: [PATCH 30/69] Update data_has_categorical_features

---
 fedot/core/data/data_preprocessing.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/fedot/core/data/data_preprocessing.py b/fedot/core/data/data_preprocessing.py
index 40afac8d87..4323020da3 100644
--- a/fedot/core/data/data_preprocessing.py
+++ b/fedot/core/data/data_preprocessing.py
@@ -33,8 +33,8 @@ def convert_into_column(array: np.ndarray) -> np.ndarray:
     return array
 
 
-def divide_data_categorical_numerical(input_data: InputData, categorical_ids: list,
-                                      non_categorical_ids: list) -> Tuple[Optional[InputData], Optional[InputData]]:
+def divide_data_categorical_numerical(input_data: InputData, categorical_ids: np.ndarray,
+                                      non_categorical_ids: np.ndarray) -> Tuple[Optional[InputData], Optional[InputData]]:
     """
     Split tabular InputData into two parts: with numerical and categorical features
     using list with ids of categorical and numerical features.
@@ -98,16 +98,12 @@ def data_has_categorical_features(data: InputData) -> bool:
     if data.data_type is not DataTypesEnum.table:
         return False
 
-    feature_type_ids = data.supplementary_data.col_type_ids['features']
-    cat_ids, non_cat_ids = find_categorical_columns(data.features, feature_type_ids)
-
-    data.numerical_idx = np.array(non_cat_ids)
-    data.categorical_idx = np.array(cat_ids)
+    cat_ids, non_cat_ids = data.categorical_idx, data.numerical_idx
 
     if len(cat_ids) > 0:
-        data.categorical_features = data.subset_features(cat_ids).features
+        data.categorical_features = data.features[:, cat_ids]
 
-    return bool(cat_ids)
+    return bool(cat_ids.tolist())
 
 
 def data_has_text_features(data: InputData) -> bool:

From eb14784f540fe90b1f4a8c8f1efbc5d51bf57791 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:15:57 +0300
Subject: [PATCH 31/69] Adding bool to numerical

---
 fedot/preprocessing/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 1444e0287f..d98c32da1a 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -611,7 +611,7 @@ def reduce_mem_usage_np(arr, initial_types):
         return data
 
     def _update_num_and_cats_ids(self, feature_type_ids):
-        numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float]]))
+        numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float], TYPE_TO_ID[bool]]))
         categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]]))
 
         return numerical_idx, categorical_idx

From af00955c247fc4c9cdcd33b43774cbe52e279292 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:16:14 +0300
Subject: [PATCH 32/69] Update for ImputationImplementation

---
 .../sklearn_transformations.py                | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index 4090b6002b..3e127d09c6 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -264,7 +264,7 @@ def __init__(self, params: Optional[OperationParameters] = None):
         default_params_categorical = {'strategy': 'most_frequent'}
         self.params_cat = {**self.params.to_dict(), **default_params_categorical}
         self.params_num = self.params.to_dict()
-        self.categorical_ids = None
+        self.categorical_or_encoded_ids = None
         self.non_categorical_ids = None
         self.ids_binary_integer_features = {}
 
@@ -281,10 +281,20 @@ def fit(self, input_data: InputData):
         replace_inf_with_nans(input_data)
 
         if data_type_is_table(input_data):
-            encoded_idx = input_data.encoded_idx.tolist()
-            numerical_idx = np.setdiff1d(input_data.numerical_idx, encoded_idx).tolist()
+            self.non_categorical_ids = input_data.numerical_idx
+
+            # The data may have arrived here before categorical data encoding was called.
+            if input_data.categorical_idx is not None and input_data.encoded_idx is None:
+                self.categorical_or_encoded_ids = input_data.categorical_idx
+
+            # Otherwise, it may have arrived here after categorical data encoding
+            elif input_data.encoded_idx is not None:
+                self.categorical_or_encoded_ids = input_data.encoded_idx
+
             # Tabular data contains categorical features
-            numerical, categorical = divide_data_categorical_numerical(input_data, encoded_idx, numerical_idx)
+            numerical, categorical = divide_data_categorical_numerical(
+                input_data, self.categorical_or_encoded_ids, self.non_categorical_ids
+            )
 
             if categorical is not None and categorical.features.size > 0:
                 categorical.features = convert_into_column(categorical.features)
@@ -312,12 +322,11 @@ def transform(self, input_data: InputData) -> OutputData:
 
         replace_inf_with_nans(input_data)
 
-        if data_type_is_table(input_data) and input_data.encoded_idx is not None:
-            self.categorical_ids = input_data.encoded_idx.tolist()
-            self.non_categorical_ids = np.setdiff1d(input_data.numerical_idx, self.categorical_ids).tolist()
+        categorical_features, numerical_features = None, None
 
+        if data_type_is_table(input_data):
             numerical, categorical = divide_data_categorical_numerical(
-                input_data, self.categorical_ids, self.non_categorical_ids
+                input_data, self.categorical_or_encoded_ids, self.non_categorical_ids
             )
 
             if categorical is not None:
@@ -332,13 +341,14 @@ def transform(self, input_data: InputData) -> OutputData:
                 numerical_features = self.imputer_num.transform(numerical_features)
                 numerical_features = self._correct_binary_ids_features(numerical_features)
 
-            if categorical is not None and numerical is not None:
+            if categorical_features is not None and numerical_features is not None:
                 # Stack both categorical and numerical features
                 transformed_features = self._categorical_numerical_union(categorical_features,
                                                                          numerical_features)
-            elif categorical is not None and numerical is None:
+            elif categorical_features is not None and numerical_features is None:
                 # Dataset contain only categorical features
                 transformed_features = categorical_features
+
             elif categorical is None and numerical is not None:
                 # Dataset contain only numerical features
                 transformed_features = numerical_features
@@ -368,7 +378,7 @@ def _categorical_numerical_union(self, categorical_features: np.array, numerical
         """Merge numerical and categorical features in right order (as it was in source table)
         """
 
-        categorical_df = pd.DataFrame(categorical_features, columns=self.categorical_ids)
+        categorical_df = pd.DataFrame(categorical_features, columns=self.categorical_or_encoded_ids)
         numerical_df = pd.DataFrame(numerical_features, columns=self.non_categorical_ids)
         all_features_df = pd.concat([numerical_df, categorical_df], axis=1)
 

From 600d12c4592cebb7efae2385e30ba8ac83ec9772 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:17:43 +0300
Subject: [PATCH 33/69] Fix data for tests

---
 .../test_data_operations_implementations.py   | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/test/unit/data_operations/test_data_operations_implementations.py b/test/unit/data_operations/test_data_operations_implementations.py
index 3f49b1b21e..9529b33316 100644
--- a/test/unit/data_operations/test_data_operations_implementations.py
+++ b/test/unit/data_operations/test_data_operations_implementations.py
@@ -139,7 +139,7 @@ def get_nan_inf_data():
             [-np.inf, 5, 6, 7]
         ]),
         target=np.array([1, 2, 3, 4]),
-        numerical_idx=np.array([0, 1, 2, 4]),
+        numerical_idx=np.array([0, 1, 2, 3]),
         categorical_idx=np.array([]),
         task=Task(TaskTypesEnum.regression),
         data_type=DataTypesEnum.table,
@@ -216,10 +216,14 @@ def get_nan_binary_data(task=None):
                          [1, '1', 1],
                          [5, '1', 1]], dtype=object)
 
-    input_data = InputData(idx=[0, 1, 2, 3], features=features,
-                           target=np.array([[0], [0], [1], [1]]),
-                           task=task, data_type=DataTypesEnum.table,
-                           supplementary_data=supp_data)
+    input_data = InputData(
+        idx=np.array([0, 1, 2, 3]),
+        features=features,
+        target=np.array([[0], [0], [1], [1]]),
+        categorical_idx=np.array([1]),
+        task=task, data_type=DataTypesEnum.table,
+        supplementary_data=supp_data
+    )
 
     return input_data
 
@@ -266,9 +270,19 @@ def data_with_binary_int_features_and_equal_categories():
                          [np.nan, np.nan],
                          [0, 0]])
     target = np.array([['not-nan'], ['nan'], ['nan'], ['not-nan']])
-    train_input = InputData(idx=[0, 1, 2, 3], features=features, target=target,
-                            task=task, data_type=DataTypesEnum.table,
-                            supplementary_data=supp_data)
+    train_input = InputData(
+        idx=np.array([0, 1, 2, 3]),
+        features=features,
+        target=target,
+        numerical_idx=np.array([0, 1]),
+        categorical_idx=np.array([]),
+        encoded_idx=np.array([]),
+        categorical_features=None,
+        features_names=None,
+        task=task,
+        data_type=DataTypesEnum.table,
+        supplementary_data=supp_data
+    )
 
     return train_input
 

From 91c24a481727161b463645793e594a1af6c0cd61 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:18:04 +0300
Subject: [PATCH 34/69] Fix test with adding new types

---
 test/integration/api/test_main_api.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py
index 01700842e1..351dc6e24a 100644
--- a/test/integration/api/test_main_api.py
+++ b/test/integration/api/test_main_api.py
@@ -224,8 +224,14 @@ def test_categorical_preprocessing_unidata_predefined_linear():
     pipeline.fit(train_data)
     prediction = pipeline.predict(test_data)
 
+    types_encountered = (
+        int, float,
+        np.int8, np.int16, np.int32, np.int64,
+        np.float16, np.float32, np.float64,
+    )
+
     for i in range(prediction.features.shape[1]):
-        assert all(list(map(lambda x: isinstance(x, (int, float)), prediction.features[:, i])))
+        assert all(list(map(lambda x: isinstance(x, types_encountered), prediction.features[:, i])))
 
 
 def test_fill_nan_without_categorical():

From 313ad8a583b2b5fc6c0e0ef923d95f4d512a43d0 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:34:41 +0300
Subject: [PATCH 35/69] Update test with deleting extra spaces

---
 test/unit/preprocessing/test_preprocessors.py | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py
index 856f59f40d..6eda74361d 100644
--- a/test/unit/preprocessing/test_preprocessors.py
+++ b/test/unit/preprocessing/test_preprocessors.py
@@ -91,24 +91,24 @@ def data_with_complicated_types():
     """
 
     task = Task(TaskTypesEnum.classification)
-    features = np.array([[0, np.nan, 1, 1, 1, 'monday', 'a ', 'true', 1, '0', 'a'],
+    features = np.array([[0, np.nan, 1, 1, 1, 'monday', 'a', 'true', 1, '0', 'a'],
                          [np.nan, 5, 2, 2, 0, 'tuesday', 'b', np.nan, 0, '1', np.inf],
                          [2, np.nan, 3, 3, np.nan, 3, 'c', 'false', 1, '?', 'c'],
-                         [3, np.nan, 4, 4, 3.0, 4, '  a  ', 'true', 0, 'error', 'd'],
-                         [4, np.nan, 5, 5.0, 0, 5, '   b ', np.nan, 0, '3', 'e'],
-                         [5, np.nan, 6, 6, 0, 6, '   c  ', 'false', 0, '4', 'f'],
-                         [6, np.inf, 7, 7, 0, 7, '    a  ', 'true', 1, '5', 'g'],
-                         [7, np.inf, 8, 8, 1.0, 1, ' b   ', np.nan, 0, '6', 'h'],
+                         [3, np.nan, 4, 4, 3.0, 4, 'a', 'true', 0, 'error', 'd'],
+                         [4, np.nan, 5, 5.0, 0, 5, 'b', np.nan, 0, '3', 'e'],
+                         [5, np.nan, 6, 6, 0, 6, 'c', 'false', 0, '4', 'f'],
+                         [6, np.inf, 7, 7, 0, 7, 'a', 'true', 1, '5', 'g'],
+                         [7, np.inf, 8, 8, 1.0, 1, 'b', np.nan, 0, '6', 'h'],
                          [np.inf, np.inf, '9', '9', 2, 2, np.nan, 'true', 1, '7', 'i'],
-                         [9, np.inf, '10', '10', 2, 3, ' c  ', 'false', 0, '8', 'j'],
-                         [10, np.nan, 11.0, 11.0, 0, 4, 'c ', 'false', 0, '9', 'k'],
+                         [9, np.inf, '10', '10', 2, 3, 'c', 'false', 0, '8', 'j'],
+                         [10, np.nan, 11.0, 11.0, 0, 4, 'c', 'false', 0, '9', 'k'],
                          [11, np.nan, 12, 12, 2.0, 5, np.nan, 'false', 1, '10', 'l'],
-                         [12, np.nan, 1, 1.0, 1.0, 6, ' b  ', 'false', 0, '11', 'm'],
-                         [13, np.nan, 2, 2, 1, 7, ' c  ', 'true', np.nan, '12', 'n'],
+                         [12, np.nan, 1, 1.0, 1.0, 6, 'b', 'false', 0, '11', 'm'],
+                         [13, np.nan, 2, 2, 1, 7, 'c', 'true', np.nan, '12', 'n'],
                          [14, np.nan, 3, 3, 2.0, 1, 'a', 'false', np.nan, 'error', 'o'],
-                         [15, np.nan, 4, 4, 1, 2, 'a  ', 'false', np.nan, '13', 'p'],
-                         [16, 2, 5, 12, 0, 3, '   d       ', 'true', 1, '?', 'r'],
-                         [17, 3, 6, 13, 0, 4, '  d      ', 'false', 0, '17', 's']],
+                         [15, np.nan, 4, 4, 1, 2, 'a', 'false', np.nan, '13', 'p'],
+                         [16, 2, 5, 12, 0, 3, 'd', 'true', 1, '?', 'r'],
+                         [17, 3, 6, 13, 0, 4, 'd', 'false', 0, '17', 's']],
                         dtype=object)
     target = np.array([['no'], ['yes'], ['yes'], ['yes'], ['no'], ['no'], ['no'], ['no'], ['no'],
                        ['yes'], ['yes'], ['yes'], ['yes'], ['yes'], ['no'], ['no'], ['yes'], ['no']])

From fa11d8bbbc747b3e2985e6d077a552d0bb88c3ea Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 18:39:27 +0300
Subject: [PATCH 36/69] Update test with adding extra types_encountered

---
 test/unit/preprocessing/test_preprocessors.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py
index 6eda74361d..d4d52c4884 100644
--- a/test/unit/preprocessing/test_preprocessors.py
+++ b/test/unit/preprocessing/test_preprocessors.py
@@ -216,8 +216,14 @@ def test_binary_pseudo_string_column_process_correctly():
     pipeline = correct_preprocessing_params(pipeline)
     train_predicted = pipeline.fit(train_data)
 
+    types_encountered = (
+        int, float,
+        np.int8, np.int16, np.int32, np.int64,
+        np.float16, np.float32, np.float64,
+    )
+
     assert train_predicted.features.shape[1] == 1
-    assert all(isinstance(el[0], float) for el in train_predicted.features)
+    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features)
 
 
 def fit_predict_cycle_for_testing(idx: int):
@@ -240,8 +246,15 @@ def test_mixed_column_with_str_and_float_values():
 
     # column with index 1 must be converted to float and the gaps must be filled
     train_predicted = fit_predict_cycle_for_testing(idx=1)
+
+    types_encountered = (
+        int, float,
+        np.int8, np.int16, np.int32, np.int64,
+        np.float16, np.float32, np.float64,
+    )
+
     assert train_predicted.features.shape[1] == 1
-    assert all(isinstance(el[0], float) for el in train_predicted.features)
+    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features)
 
     # column with index 2 must be removed due to unclear type of data
     try:

From e76cd93ad52fc15dd0878c9873fad7b2211ae5e6 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Mon, 19 Aug 2024 20:30:36 +0300
Subject: [PATCH 37/69] Fixes different tests

---
 fedot/core/data/data.py                             |  2 +-
 .../data_operations/sklearn_transformations.py      |  2 +-
 test/unit/multimodal/data_generators.py             |  8 ++++----
 .../preprocessing/test_preprocessing_through_api.py | 13 +++++++++----
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index d8799ec555..41373743f5 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -541,7 +541,7 @@ def subset_features(self, feature_ids: np.array) -> Optional[InputData]:
         """
         Return new :obj:`InputData` with subset of features based on non-empty ``features_ids`` list or `None` otherwise
         """
-        if feature_ids.size == 0:
+        if feature_ids is None or feature_ids.size == 0:
             return None
 
         subsample_features = self.features[:, feature_ids]
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index 3e127d09c6..ca6e59a8e9 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -186,7 +186,7 @@ def fit(self, input_data: InputData):
         if n_cols > self.th_columns:
             # Randomly choose subsample of features columns - 10 features
             column_indices = np.arange(n_cols)
-            self.columns_to_take = random.sample(list(column_indices), self.th_columns)
+            self.columns_to_take = np.array(random.sample(list(column_indices), self.th_columns))
             input_data = input_data.subset_features(self.columns_to_take)
 
         return super().fit(input_data)
diff --git a/test/unit/multimodal/data_generators.py b/test/unit/multimodal/data_generators.py
index e5a390d0b5..f47732f758 100644
--- a/test/unit/multimodal/data_generators.py
+++ b/test/unit/multimodal/data_generators.py
@@ -27,10 +27,10 @@ def get_single_task_multimodal_tabular_data():
     task = Task(TaskTypesEnum.classification)
 
     # Create features table
-    features_first = np.array([[0, '  a'], [1, ' a '], [2, '  b'], [3, np.nan], [4, '  a'],
-                               [5, '  b'], [6, 'b  '], [7, '  c'], [8, ' c ']], dtype=object)
-    features_second = np.array([[10, '  a'], [11, ' a '], [12, '  b'], [13, ' a '], [14, '  a'],
-                                [15, '  b'], [16, 'b  '], [17, '  c'], [18, ' c ']], dtype=object)
+    features_first = np.array([[0, 'a'], [1, 'a'], [2, 'b'], [3, np.nan], [4, 'a'],
+                               [5, 'b'], [6, 'b'], [7, 'c'], [8, 'c']], dtype=object)
+    features_second = np.array([[10, 'a'], [11, 'a'], [12, 'b'], [13, 'a'], [14, 'a'],
+                                [15, 'b'], [16, 'b'], [17, 'c'], [18, 'c']], dtype=object)
 
     target = np.array(['true', 'false', 'true', 'false', 'false', 'false', 'false', 'true', 'true'], dtype=str)
 
diff --git a/test/unit/preprocessing/test_preprocessing_through_api.py b/test/unit/preprocessing/test_preprocessing_through_api.py
index 6e42ee0975..cb2d2479b2 100644
--- a/test/unit/preprocessing/test_preprocessing_through_api.py
+++ b/test/unit/preprocessing/test_preprocessing_through_api.py
@@ -16,10 +16,15 @@ def data_with_only_categorical_features():
     features = np.array([["'a'", "0", "1"],
                          ["'b'", "1", "0"],
                          ["'c'", "1", "0"]], dtype=object)
-    input_data = InputData(idx=np.array([0, 1, 2]), features=features,
-                           target=np.array([0, 1, 2]),
-                           task=task, data_type=DataTypesEnum.table,
-                           supplementary_data=supp_data)
+    input_data = InputData(
+        idx=np.array([0, 1, 2]),
+        features=features,
+        target=np.array([0, 1, 2]),
+        categorical_idx=np.array([0, 1, 2]),
+        numerical_idx=np.array([]),
+        task=task, data_type=DataTypesEnum.table,
+        supplementary_data=supp_data
+    )
 
     return input_data
 

From 4085f5501eb3b9b7874f556940191652bda7fbfe Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Tue, 20 Aug 2024 15:09:07 +0300
Subject: [PATCH 38/69] Update expected_values for test_metrics test

---
 test/data/expected_metric_values.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/data/expected_metric_values.json b/test/data/expected_metric_values.json
index 8a293325c6..4b81051a1d 100644
--- a/test/data/expected_metric_values.json
+++ b/test/data/expected_metric_values.json
@@ -13,11 +13,11 @@
     "accuracy": -0.95
   },
   "multiclass": {
-    "roc_auc": -0.9832500832500832,
+    "roc_auc": -0.9881784881784883,
     "precision": -0.9777777777777779,
     "f1": -0.9719701552732407,
     "neg_log_loss": 0.17094588819131074,
-    "roc_auc_pen": -0.9789893328893329,
+    "roc_auc_pen": -0.9838963813963815,
     "accuracy": -0.9722222222222222
   },
   "regression": {

From f9f8acfa75b0308075688545eb08396fc3211f29 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Tue, 20 Aug 2024 15:37:10 +0300
Subject: [PATCH 39/69] pep8 fixes

---
 fedot/api/api_utils/api_data.py                             | 2 +-
 fedot/core/data/data.py                                     | 1 +
 fedot/core/data/data_preprocessing.py                       | 5 +++--
 .../data_operations/categorical_encoders.py                 | 2 --
 .../data_operations/sklearn_transformations.py              | 4 ++--
 fedot/preprocessing/categorical.py                          | 4 +---
 fedot/preprocessing/data_types.py                           | 2 +-
 fedot/preprocessing/preprocessing.py                        | 6 ++++--
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index c9db8f641e..7d54a50745 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -6,7 +6,7 @@
 from golem.core.log import default_log
 
 from fedot.api.api_utils.data_definition import data_strategy_selector, FeaturesType, TargetType
-from fedot.core.data.data import InputData, OutputData, data_type_is_table, OptimisedFeature
+from fedot.core.data.data import InputData, OutputData, data_type_is_table
 from fedot.core.data.data_preprocessing import convert_into_column
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.pipelines.pipeline import Pipeline
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 41373743f5..eda9440313 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -681,6 +681,7 @@ class OutputData(Data):
     target: Optional[np.ndarray] = None
     encoded_idx: Optional[np.ndarray] = None
 
+
 @dataclass
 class OptimisedFeature:
     _columns: list = field(default_factory=list, init=False)
diff --git a/fedot/core/data/data_preprocessing.py b/fedot/core/data/data_preprocessing.py
index 4323020da3..ff767bfde6 100644
--- a/fedot/core/data/data_preprocessing.py
+++ b/fedot/core/data/data_preprocessing.py
@@ -34,7 +34,8 @@ def convert_into_column(array: np.ndarray) -> np.ndarray:
 
 
 def divide_data_categorical_numerical(input_data: InputData, categorical_ids: np.ndarray,
-                                      non_categorical_ids: np.ndarray) -> Tuple[Optional[InputData], Optional[InputData]]:
+                                      non_categorical_ids: np.ndarray) -> \
+        Tuple[Optional[InputData], Optional[InputData]]:
     """
     Split tabular InputData into two parts: with numerical and categorical features
     using list with ids of categorical and numerical features.
@@ -98,7 +99,7 @@ def data_has_categorical_features(data: InputData) -> bool:
     if data.data_type is not DataTypesEnum.table:
         return False
 
-    cat_ids, non_cat_ids = data.categorical_idx, data.numerical_idx
+    cat_ids, _ = data.categorical_idx, data.numerical_idx
 
     if len(cat_ids) > 0:
         data.categorical_features = data.features[:, cat_ids]
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index 03b1ac70bb..c2ffac8e93 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -5,7 +5,6 @@
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
 from fedot.core.data.data import InputData, OutputData, OptimisedFeature
-from fedot.core.data.data_preprocessing import find_categorical_columns
 from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import (
     DataOperationImplementation
 )
@@ -34,7 +33,6 @@ def fit(self, input_data: InputData):
         :return encoder: trained encoder (optional output)
         """
         features = input_data.features
-        feature_type_ids = input_data.supplementary_data.col_type_ids['features']
         self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx
 
         # If there are categorical features - process it
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
index ca6e59a8e9..8367007e0c 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_transformations.py
@@ -9,8 +9,8 @@
 
 from fedot.core.constants import PCA_MIN_THRESHOLD_TS
 from fedot.core.data.data import InputData, OutputData, data_type_is_table
-from fedot.core.data.data_preprocessing import convert_into_column, data_has_categorical_features, \
-    divide_data_categorical_numerical, find_categorical_columns, replace_inf_with_nans
+from fedot.core.data.data_preprocessing import convert_into_column, divide_data_categorical_numerical, \
+    replace_inf_with_nans
 from fedot.core.operations.evaluation.operation_implementations. \
     implementation_interfaces import DataOperationImplementation, EncodedInvariantImplementation
 from fedot.core.operations.operation_parameters import OperationParameters
diff --git a/fedot/preprocessing/categorical.py b/fedot/preprocessing/categorical.py
index 07c70de0c9..9994c98167 100644
--- a/fedot/preprocessing/categorical.py
+++ b/fedot/preprocessing/categorical.py
@@ -5,7 +5,6 @@
 from sklearn.preprocessing import LabelEncoder
 
 from fedot.core.data.data import InputData
-from fedot.core.data.data_preprocessing import find_categorical_columns
 from fedot.preprocessing.data_types import FEDOT_STR_NAN, TYPE_TO_ID
 
 
@@ -24,6 +23,7 @@ def fit(self, input_data: InputData):
         Find indices of columns which are contains categorical values. Binary features and at the same time
         has str objects. If there are such features - convert it into int
         """
+        # TODO: Add log.message with binary ids
         if np.size(input_data.categorical_idx) != 0:
             categorical_columns = input_data.features[:, input_data.categorical_idx].T
             nan_matrix = pd.DataFrame(categorical_columns.T, columns=input_data.categorical_idx).isna().values.T
@@ -57,8 +57,6 @@ def fit(self, input_data: InputData):
             input_data.categorical_idx = [idx for idx in input_data.categorical_idx if idx not in binary_ids_to_convert]
             input_data.categorical_idx = np.array(input_data.categorical_idx)
             self.binary_ids_to_convert = binary_ids_to_convert
-            
-            # TODO: Add log.message with binary ids
 
         return self
 
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 1628ead891..6005cff5a5 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -315,7 +315,7 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
                 else:
                     self.log.message(f'--- Preprocessing define next cols {all_cat_col_ids} as categorical')
             else:
-                self.log.message(f'--- Preprocessing was unable to define the categorical columns')
+                self.log.message('--- Preprocessing was unable to define the categorical columns')
 
     def _into_categorical_features_transformation_for_predict(self, data: InputData):
         """ Apply conversion into categorical string column for every signed column """
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index d98c32da1a..434da34928 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -403,7 +403,7 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu
             self.features_encoders[source_name] = encoder
 
         self.log.message(f'--- {encoder.__class__.__name__} was chosen')
-        self.log.message(f'--- Fitting and transforming data')
+        self.log.message('--- Fitting and transforming data')
         output_data = encoder.transform_for_fit(data)
         output_data.predict = output_data.predict.astype(float)
         data.features = output_data.predict
@@ -611,7 +611,9 @@ def reduce_mem_usage_np(arr, initial_types):
         return data
 
     def _update_num_and_cats_ids(self, feature_type_ids):
-        numerical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float], TYPE_TO_ID[bool]]))
+        numerical_idx = np.flatnonzero(
+            np.isin(feature_type_ids, [TYPE_TO_ID[int], TYPE_TO_ID[float], TYPE_TO_ID[bool]])
+        )
         categorical_idx = np.flatnonzero(np.isin(feature_type_ids, [TYPE_TO_ID[str]]))
 
         return numerical_idx, categorical_idx

From fca7ef6e3c7e1c996c1ee8cd4cc5d70c31b7ce16 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Tue, 20 Aug 2024 19:28:09 +0300
Subject: [PATCH 40/69] Adding preprocessing copying to predefined models

---
 fedot/api/api_utils/api_data.py                   | 11 ++++++++---
 fedot/api/api_utils/predefined_model.py           | 15 ++++++++++++---
 fedot/api/main.py                                 |  8 +++++---
 .../implementation_interfaces.py                  |  2 +-
 4 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 7d54a50745..9607ad40aa 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -33,14 +33,19 @@ def __init__(self, task: Task, use_input_preprocessing: bool = True):
         self.task = task
 
         self._recommendations = {}
-        self.preprocessor = DummyPreprocessor()
+
         if use_input_preprocessing:
             self.preprocessor = DataPreprocessor()
 
             # Dictionary with recommendations (e.g. 'cut' for cutting dataset, 'label_encoded'
             # to encode features using label encoder). Parameters for transformation provided also
-            self._recommendations = {'cut': self.preprocessor.cut_dataset,
-                                     'label_encoded': self.preprocessor.label_encoding_for_fit}
+            self._recommendations = {
+                'cut': self.preprocessor.cut_dataset,
+                'label_encoded': self.preprocessor.label_encoding_for_fit
+            }
+
+        else:
+            self.preprocessor = DummyPreprocessor()
 
         self.log = default_log(self)
 
diff --git a/fedot/api/api_utils/predefined_model.py b/fedot/api/api_utils/predefined_model.py
index 1b50bd8d90..7ba9c288e7 100644
--- a/fedot/api/api_utils/predefined_model.py
+++ b/fedot/api/api_utils/predefined_model.py
@@ -8,26 +8,35 @@
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.pipelines.verification import verify_pipeline
+from fedot.preprocessing.base_preprocessing import BasePreprocessor
 
 
 class PredefinedModel:
     def __init__(self, predefined_model: Union[str, Pipeline], data: InputData, log: LoggerAdapter,
-                 use_input_preprocessing: bool = True):
+                 use_input_preprocessing: bool = True, api_preprocessor: BasePreprocessor = None):
         self.predefined_model = predefined_model
         self.data = data
         self.log = log
-        self.pipeline = self._get_pipeline(use_input_preprocessing)
+        self.pipeline = self._get_pipeline(use_input_preprocessing, api_preprocessor)
 
-    def _get_pipeline(self, use_input_preprocessing: bool = True) -> Pipeline:
+    def _get_pipeline(self, use_input_preprocessing: bool = True, api_preprocessor: BasePreprocessor = None) -> Pipeline:
         if isinstance(self.predefined_model, Pipeline):
             pipelines = self.predefined_model
         elif self.predefined_model == 'auto':
             # Generate initial assumption automatically
             pipelines = AssumptionsBuilder.get(self.data).from_operations().build(
                 use_input_preprocessing=use_input_preprocessing)[0]
+
+            if use_input_preprocessing and api_preprocessor is not None:
+                pipelines.preprocessor = api_preprocessor
+
         elif isinstance(self.predefined_model, str):
             model = PipelineNode(self.predefined_model)
             pipelines = Pipeline(model, use_input_preprocessing=use_input_preprocessing)
+
+            if use_input_preprocessing and api_preprocessor is not None:
+                pipelines.preprocessor = api_preprocessor
+
         else:
             raise ValueError(f'{type(self.predefined_model)} is not supported as Fedot model')
 
diff --git a/fedot/api/main.py b/fedot/api/main.py
index f389489acc..56f19c3b5c 100644
--- a/fedot/api/main.py
+++ b/fedot/api/main.py
@@ -169,9 +169,11 @@ def fit(self,
         with fedot_composer_timer.launch_fitting():
             if predefined_model is not None:
                 # Fit predefined model and return it without composing
-                self.current_pipeline = PredefinedModel(predefined_model, self.train_data, self.log,
-                                                        use_input_preprocessing=self.params.get(
-                                                            'use_input_preprocessing')).fit()
+                self.current_pipeline = PredefinedModel(
+                    predefined_model, self.train_data, self.log,
+                    use_input_preprocessing=self.params.get('use_input_preprocessing'),
+                    api_preprocessor=self.data_processor.preprocessor,
+                ).fit()
             else:
                 self.current_pipeline, self.best_models, self.history = self.api_composer.obtain_model(self.train_data)
 
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index c4f60d1bbb..5a007bf33e 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -162,7 +162,7 @@ def _reasonability_check(features):
         # For every column in table make check
         for column_id in range(0, columns_amount):
             column = features[:, column_id] if columns_amount > 1 else features.copy()
-            if len(np.unique(column)) > 2:
+            if len(set(column)) > 2:
                 non_bool_ids.append(column_id)
             else:
                 bool_ids.append(column_id)

From 5a7cd7aa9a1236ffa213ec2ef5f44807d6d8ace2 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Tue, 20 Aug 2024 19:33:09 +0300
Subject: [PATCH 41/69] Adding docstring to reduce memory and optimisedfeatures

---
 fedot/core/data/data.py                            |  7 +++++--
 .../data_operations/categorical_encoders.py        |  4 ++--
 fedot/preprocessing/base_preprocessing.py          | 14 ++++++++++++++
 fedot/preprocessing/dummy_preprocessing.py         |  3 +++
 fedot/preprocessing/preprocessing.py               |  6 +++---
 5 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index eda9440313..8e475bc629 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -42,7 +42,7 @@ class Data:
     idx: np.ndarray
     task: Task
     data_type: DataTypesEnum
-    features: Union[np.ndarray, OptimisedFeature]
+    features: Union[np.ndarray, OptimisedFeatures]
     categorical_features: Optional[np.ndarray] = None
     categorical_idx: Optional[np.ndarray] = None
     numerical_idx: Optional[np.ndarray] = None
@@ -683,7 +683,10 @@ class OutputData(Data):
 
 
 @dataclass
-class OptimisedFeature:
+class OptimisedFeatures:
+    """``Data`` type for optimised storage data.
+    It based on numpy ndarray, but the features storages in list of np.ndarray with own optimal dtype
+    """
     _columns: list = field(default_factory=list, init=False)
     _shape: tuple = field(default=(0, 0), init=False)
     _nbytes: int = 0
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index c2ffac8e93..057702c6ba 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
-from fedot.core.data.data import InputData, OutputData, OptimisedFeature
+from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
 from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import (
     DataOperationImplementation
 )
@@ -161,7 +161,7 @@ def _apply_label_encoder(self, data: np.ndarray):
             if isinstance(data, np.ndarray):
                 data[:, column_id] = transformed_column
 
-            elif isinstance(data, OptimisedFeature):
+            elif isinstance(data, OptimisedFeatures):
                 data._columns[column_id] = transformed_column
 
     def get_params(self) -> OperationParameters:
diff --git a/fedot/preprocessing/base_preprocessing.py b/fedot/preprocessing/base_preprocessing.py
index 7871af8fc4..56c238ffb9 100644
--- a/fedot/preprocessing/base_preprocessing.py
+++ b/fedot/preprocessing/base_preprocessing.py
@@ -192,6 +192,20 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
         """
         raise AbstractMethodNotImplementError
 
+    @abstractmethod
+    def reduce_memory_size(self, data: InputData) -> InputData:
+        """
+        Method allows to reduce the memory consumption of InputData.
+
+        This works in this way:
+        - Getting the defined type of feature from preprocessing (e.g. int);
+        - Finding the minimum and maximum values in this feature;
+        - Finding a suitable type and change it
+         (e.g.: Feature has unique values 0 and 1, the suitable type would be np.bool.
+         Feature has all values between 0 and 100, the suitable type would be np.int8);
+        """
+        raise AbstractMethodNotImplementError
+
     @staticmethod
     def mark_as_preprocessed(data: Union[InputData, MultiModalData], *, is_obligatory: bool = True):
         """
diff --git a/fedot/preprocessing/dummy_preprocessing.py b/fedot/preprocessing/dummy_preprocessing.py
index d3c4206e34..4d0d1cd456 100644
--- a/fedot/preprocessing/dummy_preprocessing.py
+++ b/fedot/preprocessing/dummy_preprocessing.py
@@ -63,3 +63,6 @@ def restore_index(self, input_data: InputData, result: OutputData) -> OutputData
     def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalData]
                                        ) -> Union[InputData, MultiModalData]:
         return test_data
+
+    def reduce_memory_size(self, data: InputData) -> InputData:
+        return data
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 434da34928..1f85e4e824 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -8,7 +8,7 @@
 from golem.core.paths import copy_doc
 from sklearn.preprocessing import LabelEncoder
 
-from fedot.core.data.data import InputData, np_datetime_to_numeric, OptimisedFeature
+from fedot.core.data.data import InputData, np_datetime_to_numeric, OptimisedFeatures
 from fedot.core.data.data import OutputData, data_type_is_table, data_type_is_text, data_type_is_ts
 from fedot.core.data.data_preprocessing import (
     data_has_categorical_features,
@@ -558,10 +558,10 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
                 last_id = len(input_data.idx)
                 input_data.idx = np.arange(last_id, last_id + input_data.task.task_params.forecast_length)
         return test_data
-
+    @copy_doc(BasePreprocessor.reduce_memory_size)
     def reduce_memory_size(self, data: InputData) -> InputData:
         def reduce_mem_usage_np(arr, initial_types):
-            reduced_columns = OptimisedFeature()
+            reduced_columns = OptimisedFeatures()
 
             for i in range(arr.shape[1]):
                 col = arr[:, i]

From 25cbe7a135c1d0976c089b1310f23f9194ee337d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 16:37:18 +0000
Subject: [PATCH 42/69] Automated autopep8 fixes

---
 fedot/api/api_utils/predefined_model.py | 3 ++-
 fedot/preprocessing/preprocessing.py    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fedot/api/api_utils/predefined_model.py b/fedot/api/api_utils/predefined_model.py
index 7ba9c288e7..09ace672bf 100644
--- a/fedot/api/api_utils/predefined_model.py
+++ b/fedot/api/api_utils/predefined_model.py
@@ -19,7 +19,8 @@ def __init__(self, predefined_model: Union[str, Pipeline], data: InputData, log:
         self.log = log
         self.pipeline = self._get_pipeline(use_input_preprocessing, api_preprocessor)
 
-    def _get_pipeline(self, use_input_preprocessing: bool = True, api_preprocessor: BasePreprocessor = None) -> Pipeline:
+    def _get_pipeline(self, use_input_preprocessing: bool = True,
+                      api_preprocessor: BasePreprocessor = None) -> Pipeline:
         if isinstance(self.predefined_model, Pipeline):
             pipelines = self.predefined_model
         elif self.predefined_model == 'auto':
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 1f85e4e824..f4d95a36e0 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -558,6 +558,7 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
                 last_id = len(input_data.idx)
                 input_data.idx = np.arange(last_id, last_id + input_data.task.task_params.forecast_length)
         return test_data
+
     @copy_doc(BasePreprocessor.reduce_memory_size)
     def reduce_memory_size(self, data: InputData) -> InputData:
         def reduce_mem_usage_np(arr, initial_types):

From 9053f9f546967f8b924e3f51a6a05cd9cd59b3e2 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 16:48:44 +0300
Subject: [PATCH 43/69] Fix bug with unhashable np

---
 .../operation_implementations/implementation_interfaces.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 5a007bf33e..8822b8e436 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -160,8 +160,8 @@ def _reasonability_check(features):
         non_bool_ids = []
 
         # For every column in table make check
-        for column_id in range(0, columns_amount):
-            column = features[:, column_id] if columns_amount > 1 else features.copy()
+        for column_id, column in enumerate(features._columns):
+            # column = features[:, column_id] if columns_amount > 1 else features.copy()
             if len(set(column)) > 2:
                 non_bool_ids.append(column_id)
             else:

From 8411c6e62c8e1712cee9dc32da3498bbebf3deaa Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 16:53:30 +0300
Subject: [PATCH 44/69] Temp update

---
 fedot/api/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fedot/api/main.py b/fedot/api/main.py
index 56f19c3b5c..7087b36826 100644
--- a/fedot/api/main.py
+++ b/fedot/api/main.py
@@ -189,6 +189,7 @@ def fit(self,
                         self.log.message('Final pipeline was fitted')
                     else:
                         self.log.message('Already fitted initial pipeline is used')
+        # temp
 
         # Merge API & pipelines encoders if it is required
         self.current_pipeline.preprocessor = BasePreprocessor.merge_preprocessors(

From 5cf8d1bb943587c947b3e0e4684a83b75ecd6480 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 17:14:22 +0300
Subject: [PATCH 45/69] Fix tests

---
 fedot/api/main.py                                          | 1 -
 .../operation_implementations/implementation_interfaces.py | 7 +++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fedot/api/main.py b/fedot/api/main.py
index 7087b36826..56f19c3b5c 100644
--- a/fedot/api/main.py
+++ b/fedot/api/main.py
@@ -189,7 +189,6 @@ def fit(self,
                         self.log.message('Final pipeline was fitted')
                     else:
                         self.log.message('Already fitted initial pipeline is used')
-        # temp
 
         # Merge API & pipelines encoders if it is required
         self.current_pipeline.preprocessor = BasePreprocessor.merge_preprocessors(
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 8822b8e436..cb04120612 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -5,7 +5,7 @@
 import numpy as np
 from golem.core.log import default_log
 
-from fedot.core.data.data import InputData, OutputData
+from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.utilities.custom_errors import AbstractMethodNotImplementError
@@ -160,7 +160,10 @@ def _reasonability_check(features):
         non_bool_ids = []
 
         # For every column in table make check
-        for column_id, column in enumerate(features._columns):
+        if isinstance(features, OptimisedFeatures):
+            features = features._columns
+
+        for column_id, column in enumerate(features):
             # column = features[:, column_id] if columns_amount > 1 else features.copy()
             if len(set(column)) > 2:
                 non_bool_ids.append(column_id)

From 35446365fe944aa86f399ec37d9c0603b04e9955 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 17:50:08 +0300
Subject: [PATCH 46/69] Fix test_regression_data_operations with inf data after
 poly_features

---
 .../operation_implementations/implementation_interfaces.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index cb04120612..4a1d2902cc 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -107,6 +107,8 @@ def transform(self, input_data: InputData) -> OutputData:
         else:
             transformed_features = features
 
+        transformed_features = np.nan_to_num(transformed_features, copy=False, nan=0, posinf=0, neginf=0)
+
         # Update features and column types
         output_data = self._convert_to_output(input_data, transformed_features)
         self._update_column_types(source_features_shape, output_data)

From 40aabd755b575f9374f214e107845b7463f15883 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 17:57:58 +0300
Subject: [PATCH 47/69] Fix bug in tests with IndexError

---
 .../operation_implementations/implementation_interfaces.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 4a1d2902cc..a08c9a9f12 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -164,6 +164,8 @@ def _reasonability_check(features):
         # For every column in table make check
         if isinstance(features, OptimisedFeatures):
             features = features._columns
+        elif isinstance(features, np.ndarray):
+            features = features.T
 
         for column_id, column in enumerate(features):
             # column = features[:, column_id] if columns_amount > 1 else features.copy()

From 74293811eb6b5adb908a6af437c0e736477e2fe9 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 20:35:49 +0300
Subject: [PATCH 48/69] Adding take by indecies method and to_numpy() in
 OptimisedFeatures

---
 fedot/core/data/data.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 8e475bc629..d56bddc044 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -733,9 +733,28 @@ def __setitem__(self, key, value):
     def __len__(self):
         return self._shape[0] if self._columns else 0
 
+    def take(self, indices, axis=0):
+        output = OptimisedFeatures()
+
+        if axis == 0:
+            # Takes rows
+            for col in self._columns:
+                output.add_column(np.take(col, indices, axis))
+        elif axis == 1:
+            # Takes columns
+            for i in indices:
+                output.add_column(self._columns[i])
+        else:
+            raise ValueError("Axis must be 0 (rows) or 1 (columns)")
+
+        return output
+
     def copy(self):
         return self._columns.copy()
 
+    def to_numpy(self):
+        return np.transpose(np.array(self._columns))
+
     @property
     def shape(self):
         return self._shape

From b58993f30c3165397dd5b00b8e1fb2a308aa2f3f Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 20:36:21 +0300
Subject: [PATCH 49/69] Update train_test_split for OptimisedFeatures

---
 fedot/core/data/data_split.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fedot/core/data/data_split.py b/fedot/core/data/data_split.py
index 73b4f21da2..1c2f34e60a 100644
--- a/fedot/core/data/data_split.py
+++ b/fedot/core/data/data_split.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.model_selection import train_test_split
 
-from fedot.core.data.data import InputData
+from fedot.core.data.data import InputData, OptimisedFeatures
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.tasks import TaskTypesEnum
@@ -30,8 +30,13 @@ def _split_input_data_by_indexes(origin_input_data: Union[InputData, MultiModalD
         return data
     elif isinstance(origin_input_data, InputData):
         idx = np.take(origin_input_data.idx, index, 0)
-        target = np.take(origin_input_data.target, index, 0)
-        features = np.take(origin_input_data.features, index, 0)
+        if isinstance(origin_input_data.features, OptimisedFeatures):
+            features = origin_input_data.features.take(index)
+            target = origin_input_data.target.take(index)
+
+        else:
+            features = np.take(origin_input_data.features, index, 0)
+            target = np.take(origin_input_data.target, index, 0)
 
         if origin_input_data.categorical_features is not None:
             categorical_features = np.take(origin_input_data.categorical_features, index, 0)

From 936635cb1b02b7ada4264a9f1ca7d0dddc52208c Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 21 Aug 2024 20:37:02 +0300
Subject: [PATCH 50/69] Transform target to numpy array during memory_reduce

---
 fedot/preprocessing/preprocessing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index f4d95a36e0..ca03a8eb29 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -608,6 +608,7 @@ def reduce_mem_usage_np(arr, initial_types):
                 if data.target is not None:
                     self.log.message('-- Reduce memory in target')
                     data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
+                    data.target = data.target.to_numpy()
 
         return data
 

From 47f214c2330b64f4edc7e4d309b9721a233981e9 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 22 Aug 2024 18:54:08 +0300
Subject: [PATCH 51/69] PR#1318 migration

---
 fedot/core/data/data.py                 |  163 +++-
 fedot/preprocessing/data_types.py       |   27 +-
 test/data/melb_data.csv                 | 1001 +++++++++++++++++++++++
 test/unit/data/test_data_categorical.py |  205 +++++
 4 files changed, 1352 insertions(+), 44 deletions(-)
 create mode 100644 test/data/melb_data.csv
 create mode 100644 test/unit/data/test_data_categorical.py

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index d56bddc044..b382eb6839 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -57,24 +57,29 @@ class Data:
     def from_numpy(cls,
                    features_array: np.ndarray,
                    target_array: np.ndarray,
+                   features_names: np.ndarray[str] = None,
+                   categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None,
                    idx: Optional[np.ndarray] = None,
                    task: Union[Task, str] = 'classification',
                    data_type: Optional[DataTypesEnum] = DataTypesEnum.table) -> InputData:
         """Import data from numpy array.
 
-                        Args:
-                            features_array: numpy array with features.
-                            target_array: numpy array with target.
-                            idx: indices of arrays.
-                            task: the :obj:`Task` to solve with the data.
-                            data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
+        Args:
+            features_array: numpy array with features.
+            target_array: numpy array with target.
+            features_names: numpy array with names of features
+            categorical_idx: a list or numpy array with indexes or names of features (if provided feature_names)
+                that indicate that the feature is categorical.
+            idx: indices of arrays.
+            task: the :obj:`Task` to solve with the data.
+            data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
 
-                        Returns:
-                            data
-                        """
+        Returns:
+            data: :InputData: representation of data in an internal data structure.
+        """
         if isinstance(task, str):
             task = Task(TaskTypesEnum(task))
-        return array_to_input_data(features_array, target_array, idx, task, data_type)
+        return array_to_input_data(features_array, target_array, features_names, categorical_idx, idx, task, data_type)
 
     @classmethod
     def from_numpy_time_series(cls,
@@ -85,16 +90,16 @@ def from_numpy_time_series(cls,
                                data_type: Optional[DataTypesEnum] = DataTypesEnum.ts) -> InputData:
         """Import time series from numpy array.
 
-                        Args:
-                            features_array: numpy array with features time series.
-                            target_array: numpy array with target time series (if None same as features).
-                            idx: indices of arrays.
-                            task: the :obj:`Task` to solve with the data.
-                            data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
+        Args:
+            features_array: numpy array with features time series.
+            target_array: numpy array with target time series (if None same as features).
+            idx: indices of arrays.
+            task: the :obj:`Task` to solve with the data.
+            data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
 
-                        Returns:
-                            data
-                        """
+        Returns:
+            data: :InputData: representation of data in an internal data structure.
+        """
         if isinstance(task, str):
             task = Task(TaskTypesEnum(task))
         if target_array is None:
@@ -105,20 +110,22 @@ def from_numpy_time_series(cls,
     def from_dataframe(cls,
                        features_df: Union[pd.DataFrame, pd.Series],
                        target_df: Union[pd.DataFrame, pd.Series],
-                       categorical_idx: np.ndarray = None,
+                       categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None,
                        task: Union[Task, str] = 'classification',
                        data_type: DataTypesEnum = DataTypesEnum.table) -> InputData:
         """Import data from pandas DataFrame.
 
-                Args:
-                    features_df: loaded pandas DataFrame or Series with features.
-                    target_df: loaded pandas DataFrame or Series with target.
-                    task: the :obj:`Task` to solve with the data.
-                    data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
+        Args:
+            features_df: loaded pandas DataFrame or Series with features.
+            target_df: loaded pandas DataFrame or Series with target.
+            categorical_idx: a list or numpy array with indexes or names of features that indicate that
+                the feature is categorical.
+            task: the :obj:`Task` to solve with the data.
+            data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
 
-                Returns:
-                    data
-                """
+        Returns:
+            data: :InputData: representation of data in an internal data structure.
+        """
 
         if isinstance(task, str):
             task = Task(TaskTypesEnum(task))
@@ -135,11 +142,34 @@ def from_dataframe(cls,
 
         categorical_features = None
         if categorical_idx is not None:
-            categorical_features = features_df.loc[:, categorical_idx].to_numpy()
+            if isinstance(categorical_idx, list):
+                categorical_idx = np.array(categorical_idx)
+
+            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+                raise ValueError(
+                    'Impossible to specify categorical features by name when the features_names are not specified'
+                )
+
+            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+                categorical_idx = np.array(
+                    [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
+                )
+
+            if categorical_idx.size != 0:
+                categorical_features = features[:, categorical_idx]
+
+        data = InputData(
+            idx=idx,
+            features=features,
+            target=target,
+            task=task,
+            data_type=data_type,
+            features_names=features_names,
+            categorical_idx=categorical_idx,
+            categorical_features=categorical_features
+        )
 
-        return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type,
-                         features_names=features_names, categorical_features=categorical_features,
-                         categorical_idx=categorical_idx)
+        return data
 
     @classmethod
     def from_csv(cls,
@@ -149,6 +179,7 @@ def from_csv(cls,
                  data_type: DataTypesEnum = DataTypesEnum.table,
                  columns_to_drop: Optional[List[Union[str, int]]] = None,
                  target_columns: Union[str, List[Union[str, int]]] = '',
+                 categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None,
                  index_col: Optional[Union[str, int]] = None,
                  possible_idx_keywords: Optional[List[str]] = None) -> InputData:
         """Import data from ``csv``.
@@ -160,6 +191,8 @@ def from_csv(cls,
             task: the :obj:`Task` to solve with the data.
             data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`.
             target_columns: name of the target column (the last column if empty and no target if ``None``).
+            categorical_idx: a list or numpy array with indexes or names of features that indicate that
+                the feature is categorical.
             index_col: name or index of the column to use as the :obj:`Data.idx`.\n
                 If ``None``, then check the first column's name and use it as index if succeeded
                 (see the param ``possible_idx_keywords``).\n
@@ -184,8 +217,36 @@ def from_csv(cls,
 
         features, target = process_target_and_features(df, target_columns)
 
-        return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type,
-                         features_names=features_names)
+        categorical_features = None
+        if categorical_idx is not None:
+            if isinstance(categorical_idx, list):
+                categorical_idx = np.array(categorical_idx)
+
+            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+                raise ValueError(
+                    'Impossible to specify categorical features by name when the features_names are not specified'
+                )
+
+            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+                categorical_idx = np.array(
+                    [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
+                )
+
+            if categorical_idx.size != 0:
+                categorical_features = features[:, categorical_idx]
+
+        data = InputData(
+            idx=idx,
+            features=features,
+            target=target,
+            task=task,
+            data_type=data_type,
+            features_names=features_names,
+            categorical_idx=categorical_idx,
+            categorical_features=categorical_features
+        )
+
+        return data
 
     @classmethod
     def from_csv_time_series(cls,
@@ -852,6 +913,8 @@ def np_datetime_to_numeric(data: np.ndarray) -> np.ndarray:
 
 def array_to_input_data(features_array: np.ndarray,
                         target_array: np.ndarray,
+                        features_names: np.ndarray[str] = None,
+                        categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None,
                         idx: Optional[np.ndarray] = None,
                         task: Task = Task(TaskTypesEnum.classification),
                         data_type: Optional[DataTypesEnum] = None) -> InputData:
@@ -859,7 +922,37 @@ def array_to_input_data(features_array: np.ndarray,
         idx = np.arange(len(features_array))
     if data_type is None:
         data_type = autodetect_data_type(task)
-    return InputData(idx=idx, features=features_array, target=target_array, task=task, data_type=data_type)
+
+    categorical_features = None
+    if categorical_idx is not None:
+        if isinstance(categorical_idx, list):
+            categorical_idx = np.array(categorical_idx)
+
+        if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+            raise ValueError(
+                'Impossible to specify categorical features by name when the features_names are not specified'
+            )
+
+        if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+            categorical_idx = np.array(
+                [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
+            )
+
+        if categorical_idx.size != 0:
+            categorical_features = features_array[:, categorical_idx]
+
+    data = InputData(
+        idx=idx,
+        features=features_array,
+        target=target_array,
+        features_names=features_names,
+        categorical_idx=categorical_idx,
+        categorical_features=categorical_features,
+        task=task,
+        data_type=data_type
+    )
+
+    return data
 
 
 def autodetect_data_type(task: Task) -> DataTypesEnum:
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 6005cff5a5..566cdafbde 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -286,17 +286,26 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
             num_df = pd.DataFrame(data.features[:, numeric_type_ids], columns=numeric_type_ids)
             nuniques = num_df.nunique(dropna=True)
 
+            # TODO: Improve the naive approach (with categorical_max_uniques_th) of identifying categorical data
+            #  to a smarter approach (eg. numeric, features naming with llm)
             # reduce dataframe to include only categorical features
             num_df = num_df.loc[:, (2 < nuniques) & (nuniques < self.categorical_max_uniques_th)]
-            cat_col_from_heuristic_rule_ids = num_df.columns
-
-            # Convert into string
-            data.features[:, cat_col_from_heuristic_rule_ids] = num_df.apply(
-                convert_num_column_into_string_array).to_numpy()
-            # Columns need to be transformed into categorical (string) ones
-            self.numerical_into_str.extend(cat_col_from_heuristic_rule_ids.difference(self.numerical_into_str))
-            # Update information about column types (in-place)
-            feature_type_ids[cat_col_from_heuristic_rule_ids] = TYPE_TO_ID[str]
+
+            if data.categorical_idx is not None:
+                # If cats features were defined take it
+                cat_col_ids = data.categorical_idx
+            else:
+                # Else cats features are selected by heuristic rule
+                cat_col_ids = num_df.columns
+
+            if np.size(cat_col_ids) > 0:
+                # Convert into string
+                data.features[:, cat_col_ids] = num_df.apply(
+                    convert_num_column_into_string_array).to_numpy()
+                # Columns need to be transformed into categorical (string) ones
+                self.numerical_into_str.extend(cat_col_ids.difference(self.numerical_into_str))
+                # Update information about column types (in-place)
+                feature_type_ids[cat_col_ids] = TYPE_TO_ID[str]
 
             # Update cat cols idx in data
             is_cat_type = np.isin(feature_type_ids, [TYPE_TO_ID[str]])
diff --git a/test/data/melb_data.csv b/test/data/melb_data.csv
new file mode 100644
index 0000000000..53d430c2c1
--- /dev/null
+++ b/test/data/melb_data.csv
@@ -0,0 +1,1001 @@
+Type,Method,Regionname,Rooms,Distance,Postcode,Bedroom2,Bathroom,Landsize,Lattitude,Longtitude,Propertycount,Price
+t,VB,Western Metropolitan,2,11.7,3033.0,2.0,2.0,62.0,-37.73893,144.87661,5629.0,520000.0
+h,SP,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,670.0,-37.91855,145.02628,6795.0,1573000.0
+t,S,Western Metropolitan,4,7.5,3040.0,4.0,3.0,205.0,-37.74588,144.92700000000005,9264.0,1185000.0
+u,S,Southern Metropolitan,1,4.6,3122.0,2.0,1.0,0.0,-37.8264,145.02700000000004,11308.0,485000.0
+h,S,Eastern Metropolitan,3,23.0,3136.0,3.0,1.0,1082.0,-37.80618,145.27755,11925.0,1005000.0
+t,S,Southern Metropolitan,3,10.7,3187.0,3.0,2.0,119.0,-37.9084,145.0118,6938.0,972000.0
+h,VB,Eastern Metropolitan,4,10.3,3084.0,4.0,2.0,707.0,-37.762,145.0645,1651.0,1750000.0
+h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,730.0,-37.9207,145.0479,6795.0,1550000.0
+h,VB,Northern Metropolitan,3,3.5,3068.0,3.0,2.0,240.0,-37.782,144.9834,6244.0,1700000.0
+u,PI,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8753,144.9902,8989.0,680000.0
+t,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,159.0,-37.7059,145.0115,21650.0,416000.0
+h,S,Eastern Metropolitan,3,21.3,3135.0,3.0,2.0,941.0,-37.81289,145.24213,4407.0,1400000.0
+u,SA,Southern Metropolitan,1,4.6,3142.0,1.0,1.0,0.0,-37.8421,145.0104,7217.0,247500.0
+h,S,South-Eastern Metropolitan,4,35.4,3198.0,4.0,2.0,542.0,-38.11161,145.15011,8077.0,768000.0
+u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,17200.0,-37.83613,144.99661,14887.0,762500.0
+h,PI,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,193.0,-37.76418,144.95715,11918.0,1100000.0
+u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8166,145.0163,11308.0,663000.0
+h,S,Northern Metropolitan,3,15.3,3074.0,3.0,2.0,545.0,-37.68403,144.99246000000005,7955.0,740000.0
+h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,0.0,-37.7539,144.989,8870.0,1190000.0
+h,VB,Southern Metropolitan,3,5.9,3144.0,3.0,4.0,950.0,-37.85905,145.03229,4675.0,4900000.0
+h,S,Northern Metropolitan,3,5.2,3055.0,3.0,1.0,613.0,-37.76883,144.94592,7082.0,1445000.0
+h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,713.0,-37.7301,144.8671,5629.0,900000.0
+h,S,Western Metropolitan,3,14.8,3023.0,3.0,2.0,461.0,-37.74518,144.74708,1607.0,580000.0
+h,PI,Western Metropolitan,3,11.1,3025.0,3.0,1.0,540.0,-37.8294,144.8378,5132.0,710000.0
+h,S,Eastern Metropolitan,4,13.9,3108.0,4.0,4.0,1157.0,-37.7779,145.127,9028.0,1924500.0
+h,S,Northern Victoria,3,26.1,3099.0,3.0,2.0,785.0,-37.637,145.20166,1345.0,600000.0
+h,S,Southern Metropolitan,4,4.6,3122.0,4.0,1.0,383.0,-37.8237,145.0311,11308.0,2100000.0
+u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,1272.0,-37.84283,145.00015,14887.0,771000.0
+u,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8842,144.9829,8989.0,645000.0
+h,S,Western Metropolitan,3,7.0,3013.0,3.0,1.0,464.0,-37.8151,144.8638,6543.0,796000.0
+h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,310.0,-37.7288,145.0224,21650.0,498000.0
+h,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,292.0,-37.7642,144.9195,6232.0,1055000.0
+t,S,Northern Metropolitan,2,5.2,3056.0,2.0,2.0,177.0,-37.7625,144.9653,11918.0,754000.0
+h,S,Southern Metropolitan,4,10.4,3125.0,4.0,2.0,514.0,-37.84248,145.10181,5678.0,1750000.0
+h,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,650.0,-37.699,144.9421,8870.0,690000.0
+h,S,Northern Metropolitan,2,17.9,3082.0,2.0,1.0,257.0,-37.65636,145.03996999999995,10529.0,421000.0
+h,S,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,710.0,-37.9348,145.0634,10969.0,1085000.0
+h,S,Northern Metropolitan,4,12.1,3083.0,4.0,3.0,331.0,-37.67998,145.07345,10175.0,758000.0
+u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,134.0,-37.8987,145.0557,7822.0,765000.0
+u,S,Northern Metropolitan,3,1.6,3066.0,3.0,2.0,0.0,-37.8032,144.9842,4553.0,1326000.0
+t,PI,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,164.0,-37.9378,145.0385,2555.0,750000.0
+h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,622.0,-37.68908,144.9116,851.0,626000.0
+h,S,Northern Metropolitan,4,3.6,3068.0,4.0,2.0,191.0,-37.79274,144.99863,2954.0,1662500.0
+h,S,Southern Metropolitan,4,11.8,3204.0,4.0,2.0,705.0,-37.9035,145.028,3578.0,1715000.0
+h,S,Northern Metropolitan,5,5.5,3070.0,4.0,3.0,490.0,-37.7726,145.0048,11364.0,2700000.0
+t,VB,Northern Metropolitan,2,1.9,3003.0,2.0,2.0,54.0,-37.8094,144.9479,2230.0,800000.0
+h,S,Southern Metropolitan,3,7.3,3146.0,3.0,2.0,0.0,-37.85139,145.05835,10412.0,1440000.0
+t,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,257.0,-37.813,144.8703,6543.0,1070000.0
+h,SP,Western Metropolitan,4,8.7,3032.0,4.0,2.0,215.0,-37.7817,144.8916,4918.0,770000.0
+h,PI,Southern Metropolitan,4,4.6,3142.0,4.0,2.0,237.0,-37.8507,145.0298,7217.0,2025000.0
+h,S,Eastern Metropolitan,5,12.4,3108.0,5.0,2.0,726.0,-37.78133,145.10833,9028.0,1540500.0
+h,S,Eastern Metropolitan,4,13.9,3108.0,4.0,3.0,657.0,-37.7954,145.1379,9028.0,1520000.0
+h,S,Southern Metropolitan,3,7.4,3144.0,3.0,2.0,258.0,-37.8644,145.0302,4675.0,1895000.0
+h,S,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,667.0,-37.8007,145.0327,10331.0,1507000.0
+h,S,Eastern Metropolitan,5,13.8,3084.0,5.0,3.0,531.0,-37.7378,145.0955,2698.0,1025000.0
+h,PI,Southern Metropolitan,3,4.6,3181.0,3.0,2.0,362.0,-37.85327,144.99947,4380.0,1970000.0
+h,S,Southern Metropolitan,4,11.0,3147.0,4.0,2.0,696.0,-37.8711,145.0746,3052.0,1860000.0
+h,S,Western Metropolitan,5,7.5,3040.0,5.0,3.0,590.0,-37.75511,144.90935,9264.0,2210000.0
+h,S,Western Metropolitan,4,10.8,3019.0,4.0,2.0,599.0,-37.7896,144.8559,3589.0,856500.0
+u,VB,Southern Metropolitan,1,2.1,3205.0,1.0,1.0,0.0,-37.8341,144.9713,5943.0,320000.0
+h,SA,Western Metropolitan,4,5.1,3011.0,4.0,2.0,180.0,-37.79686,144.908,7570.0,1000000.0
+u,S,Southern Metropolitan,1,5.0,3182.0,1.0,1.0,0.0,-37.85705,144.98699,13240.0,451000.0
+h,PI,Southern Metropolitan,2,13.9,3165.0,2.0,1.0,591.0,-37.9179,145.071,10969.0,702000.0
+h,S,Northern Metropolitan,3,5.9,3055.0,3.0,1.0,349.0,-37.7589,144.9368,7082.0,810000.0
+h,SP,Northern Metropolitan,3,2.6,3052.0,3.0,2.0,173.0,-37.7795,144.9413,2309.0,965000.0
+h,SP,Northern Metropolitan,4,9.2,3058.0,4.0,2.0,302.0,-37.7271,144.9842,3445.0,735000.0
+h,S,Southern Metropolitan,4,3.3,3206.0,4.0,1.0,306.0,-37.8459,144.9574,3280.0,2950000.0
+h,S,Southern Metropolitan,3,11.2,3127.0,0.0,2.0,335.0,-37.8165,145.0981,5457.0,1560000.0
+h,S,Northern Metropolitan,2,3.2,3054.0,3.0,1.0,100.0,-37.7879,144.9759,3106.0,910000.0
+h,S,Western Metropolitan,3,5.9,3032.0,3.0,1.0,263.0,-37.7753,144.9116,6567.0,955000.0
+u,SP,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,710.0,-37.8259,145.0483,6482.0,500000.0
+h,S,Northern Metropolitan,4,9.9,3044.0,4.0,2.0,708.0,-37.7257,144.9418,7485.0,937000.0
+h,PI,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,756.0,-37.58885,144.90135,15510.0,665000.0
+h,S,Western Metropolitan,3,18.0,3037.0,3.0,2.0,666.0,-37.68381,144.73331000000005,5556.0,660000.0
+h,SP,Western Metropolitan,2,8.0,3016.0,2.0,1.0,278.0,-37.857,144.8906,6380.0,850000.0
+h,S,Western Metropolitan,3,5.9,3032.0,3.0,1.0,452.0,-37.7762,144.9174,6567.0,1215000.0
+h,S,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,363.0,-37.7621,144.9506,11918.0,1217000.0
+u,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,0.0,-37.8333,144.998,14949.0,695000.0
+h,S,Northern Metropolitan,3,17.9,3082.0,3.0,1.0,345.0,-37.67121,145.06246000000004,10529.0,665000.0
+u,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,166.0,-37.7624,144.9365,6232.0,600000.0
+h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,210.0,-37.7947,144.8871,7570.0,831000.0
+t,S,Southern Metropolitan,3,8.4,3126.0,3.0,2.0,230.0,-37.81653,145.05971,3265.0,1381000.0
+u,SP,Western Metropolitan,2,12.8,3033.0,2.0,1.0,220.0,-37.7346,144.8584,5629.0,490000.0
+t,VB,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,163.0,-37.8425,144.9877,14887.0,2010000.0
+h,S,Southern Metropolitan,3,9.7,3103.0,3.0,1.0,281.0,-37.8013,145.0652,5682.0,905000.0
+h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,452.0,-37.7271,145.0009,21650.0,623500.0
+h,S,Southern Metropolitan,3,12.3,3166.0,3.0,1.0,501.0,-37.90805,145.10683999999995,768.0,1093800.0
+h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,639.0,-37.7471,144.9157,9264.0,1720000.0
+u,S,Western Metropolitan,2,13.5,3020.0,2.0,1.0,235.0,-37.7847,144.8146,6763.0,350000.0
+h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,473.0,-37.8217,144.8842,6543.0,1320000.0
+h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,662.0,-37.738,144.869,5629.0,935000.0
+u,S,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,0.0,-37.88368,145.0515,7822.0,750000.0
+h,PI,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,374.0,-37.939,145.0533,2555.0,725000.0
+u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,174.0,-37.8978,145.062,7822.0,650000.0
+u,S,Southern Metropolitan,2,2.1,3205.0,2.0,1.0,0.0,-37.8341,144.9713,5943.0,490000.0
+t,PI,Northern Metropolitan,3,7.8,3058.0,3.0,2.0,531.0,-37.7424,144.9571,11204.0,720000.0
+h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,504.0,-37.61419,144.93448,5833.0,570000.0
+h,VB,Southern Metropolitan,5,9.7,3103.0,5.0,4.0,1437.0,-37.8058,145.0882,5682.0,4000000.0
+h,S,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,152.0,-37.7611,144.966,11918.0,770000.0
+h,S,Northern Metropolitan,2,12.1,3046.0,2.0,1.0,394.0,-37.7153,144.9507,2606.0,610000.0
+h,S,Southern Metropolitan,4,4.5,3181.0,4.0,1.0,305.0,-37.8493,144.9873,7717.0,1820000.0
+t,S,Northern Metropolitan,2,3.4,3031.0,2.0,1.0,105.0,-37.79244,144.92036000000004,5263.0,841000.0
+h,S,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,459.0,-37.7917,144.9251,5263.0,1335000.0
+u,S,Southern Metropolitan,3,5.4,3101.0,3.0,1.0,1096.0,-37.81207,145.0371,10331.0,660000.0
+h,PI,Southern Metropolitan,4,13.7,3188.0,4.0,3.0,684.0,-37.9436,145.0169,5454.0,2500000.0
+h,S,Eastern Metropolitan,5,10.5,3081.0,5.0,2.0,596.0,-37.7487,145.0522,2947.0,890000.0
+u,SA,Western Metropolitan,1,5.1,3011.0,1.0,1.0,1015.0,-37.78778,144.89037,7570.0,240000.0
+u,SP,Northern Metropolitan,1,2.0,3066.0,1.0,1.0,0.0,-37.79597,144.99108,4553.0,365000.0
+t,S,Eastern Metropolitan,2,10.6,3084.0,2.0,1.0,86.0,-37.7586,145.0629,2890.0,630000.0
+h,SA,Northern Metropolitan,4,11.2,3046.0,4.0,2.0,697.0,-37.72001,144.91683,2651.0,1138000.0
+u,S,Southern Metropolitan,2,2.7,3141.0,2.0,1.0,0.0,-37.83613,144.99661,14887.0,666000.0
+h,SP,Western Metropolitan,3,31.7,3429.0,3.0,1.0,639.0,-37.56291,144.72848,14092.0,455000.0
+h,PI,Southern Metropolitan,2,9.7,3103.0,2.0,0.0,1611.0,-37.8092,145.1016,5682.0,1010000.0
+u,VB,Southern Metropolitan,2,3.8,3207.0,2.0,2.0,2166.0,-37.8415,144.9412,8648.0,820000.0
+h,S,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,319.0,-37.7572,144.9686,11918.0,830000.0
+h,VB,Southern Metropolitan,3,3.3,3141.0,3.0,3.0,191.0,-37.836,144.9824,14887.0,3000000.0
+t,S,Southern Metropolitan,4,1.9,3008.0,4.0,2.0,0.0,-37.8141,144.9387,4707.0,1370000.0
+u,SP,Western Metropolitan,2,5.9,3032.0,2.0,1.0,301.0,-37.7791,144.914,6567.0,610000.0
+h,SP,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,589.0,-37.7006,144.9697,5070.0,650000.0
+t,VB,Western Metropolitan,3,6.4,3011.0,3.0,1.0,219.0,-37.796,144.8819,7570.0,760000.0
+u,S,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,109.0,-37.89578,145.06899,4442.0,660000.0
+h,S,Southern Metropolitan,3,3.8,3207.0,3.0,1.0,108.0,-37.8332,144.945,8648.0,1402000.0
+h,S,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,318.0,-37.7648,144.9633,11918.0,1200000.0
+h,S,South-Eastern Metropolitan,4,18.8,3170.0,4.0,2.0,492.0,-37.92299000000001,145.19156,7113.0,1030000.0
+h,S,Northern Metropolitan,4,3.4,3068.0,4.0,2.0,162.0,-37.7885,144.9994,2954.0,1506000.0
+h,SP,Eastern Metropolitan,2,10.9,3128.0,2.0,1.0,701.0,-37.82756,145.11897,4605.0,1820000.0
+u,PI,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.7911,144.89,7570.0,85000.0
+h,S,Southern Metropolitan,3,11.4,3163.0,3.0,2.0,603.0,-37.9023,145.0568,7822.0,1430000.0
+h,S,Northern Metropolitan,2,4.5,3057.0,2.0,1.0,263.0,-37.7661,144.9742,5533.0,1283000.0
+u,S,Northern Metropolitan,2,2.8,3000.0,2.0,2.0,1136.0,-37.8211,144.9559,17496.0,683000.0
+h,VB,Western Metropolitan,3,6.9,3039.0,3.0,1.0,572.0,-37.7683,144.9325,6232.0,1100000.0
+u,S,Northern Metropolitan,2,3.5,3068.0,2.0,2.0,4296.0,-37.7846,144.9785,6244.0,720000.0
+h,PI,Western Metropolitan,4,6.4,3011.0,4.0,2.0,369.0,-37.7914,144.8957,7570.0,815000.0
+h,S,Southern Metropolitan,4,7.5,3123.0,4.0,3.0,726.0,-37.8239,145.0553,6482.0,2920000.0
+u,VB,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,3084.0,-37.8414,145.0505,6482.0,500000.0
+u,S,Northern Metropolitan,2,1.6,3065.0,2.0,1.0,0.0,-37.7967,144.9836,5825.0,790000.0
+h,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,264.0,-37.8163,145.0301,11308.0,1510000.0
+u,S,Southern Metropolitan,3,7.8,3124.0,3.0,1.0,112.0,-37.8378,145.0949,8920.0,835000.0
+h,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,268.0,-37.7113,145.0224,21650.0,510000.0
+h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,175.0,-37.7393,144.894,9264.0,801500.0
+u,PI,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,14500.0,-37.8372,144.9963,14887.0,315000.0
+h,S,Northern Metropolitan,3,2.3,3051.0,2.0,1.0,517.0,-37.7967,144.9472,6821.0,1635000.0
+h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,504.0,-37.7476,144.9842,14577.0,1123000.0
+h,S,Western Metropolitan,3,8.0,3016.0,3.0,1.0,477.0,-37.8516,144.8949,6380.0,1360000.0
+u,S,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,1332.0,-37.8181,144.9901,14949.0,360000.0
+h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,309.0,-37.7434,144.9697,11204.0,985000.0
+h,S,Southern Metropolitan,3,7.8,3124.0,3.0,1.0,970.0,-37.8344,145.0818,8920.0,1700000.0
+h,S,Southern Metropolitan,5,11.7,3125.0,5.0,2.0,544.0,-37.8525,145.1154,5678.0,1100000.0
+u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,595.0,-37.8422,144.9855,14887.0,642000.0
+h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,650.0,-37.70768,144.92586,8870.0,820000.0
+h,S,Northern Metropolitan,3,4.4,3031.0,3.0,2.0,167.0,-37.7869,144.9228,3593.0,870000.0
+h,S,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,150.0,-37.7963,144.935,5263.0,775000.0
+u,S,Southern Metropolitan,2,9.2,3104.0,2.0,1.0,0.0,-37.7961,145.0808,7809.0,560000.0
+u,S,Western Metropolitan,2,6.2,3015.0,2.0,1.0,163.0,-37.84608,144.86525,5498.0,600000.0
+h,SP,Northern Metropolitan,1,8.8,3072.0,1.0,1.0,397.0,-37.7366,145.0115,14577.0,830000.0
+u,VB,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,0.0,-37.8555,145.0018,4380.0,380000.0
+h,S,Western Metropolitan,3,14.8,3023.0,3.0,3.0,585.0,-37.75907,144.75923999999995,6388.0,572000.0
+h,S,Northern Metropolitan,3,9.2,3058.0,3.0,1.0,565.0,-37.7283,144.969,3445.0,864000.0
+u,S,Northern Metropolitan,2,4.5,3057.0,2.0,1.0,0.0,-37.7786,144.9753,5533.0,665000.0
+h,S,Northern Metropolitan,3,19.6,3076.0,3.0,2.0,606.0,-37.62858,145.03735,10926.0,465000.0
+u,S,Southern Metropolitan,2,7.2,3184.0,2.0,1.0,837.0,-37.87307,144.98635,8989.0,930000.0
+h,S,Western Metropolitan,2,13.9,3020.0,2.0,1.0,497.0,-37.7806,144.8159,2185.0,541000.0
+h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,258.0,-37.7584,144.9971,8870.0,925000.0
+h,S,Eastern Metropolitan,3,21.3,3135.0,3.0,1.0,756.0,-37.82909,145.233,3794.0,1040000.0
+h,SP,Northern Metropolitan,3,16.5,3049.0,3.0,1.0,532.0,-37.67949,144.88349,2474.0,540000.0
+h,S,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,1339.0,-37.8649,145.0547,10412.0,3365000.0
+h,S,Southern Metropolitan,3,7.4,3144.0,3.0,2.0,246.0,-37.871,145.0364,4675.0,1970000.0
+h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,584.0,-37.67159,144.94522,2246.0,408500.0
+h,VB,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,783.0,-37.8181,145.0913,5457.0,2500000.0
+u,S,Western Metropolitan,1,7.5,3040.0,1.0,1.0,0.0,-37.75121,144.91326,9264.0,291000.0
+t,VB,Northern Metropolitan,3,7.0,3071.0,3.0,2.0,120.0,-37.76343,145.02096,8870.0,900000.0
+h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,292.0,-37.797,144.9051,7570.0,1003000.0
+h,S,Eastern Victoria,3,26.5,3138.0,3.0,1.0,864.0,-37.76983,145.31687,8280.0,760000.0
+h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,253.0,-37.7566,144.9965,8870.0,1280000.0
+h,PI,Western Metropolitan,4,13.8,3018.0,4.0,1.0,655.0,-37.868,144.8154,5301.0,780000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,606.0,-37.72057,145.02615,21650.0,760000.0
+h,S,South-Eastern Metropolitan,3,15.5,3167.0,3.0,1.0,640.0,-37.93646,145.08728,3692.0,945000.0
+h,PI,South-Eastern Metropolitan,3,15.5,3167.0,3.0,1.0,601.0,-37.93869,145.08441000000005,3692.0,840000.0
+h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,321.0,-37.7242,144.9424,7485.0,708000.0
+h,S,Western Metropolitan,3,4.3,3032.0,3.0,1.0,196.0,-37.77552,144.92022,6567.0,990000.0
+u,S,Western Metropolitan,2,10.5,3034.0,2.0,1.0,263.0,-37.7697,144.8657,4502.0,440000.0
+h,PI,Northern Metropolitan,4,11.2,3046.0,4.0,1.0,587.0,-37.69897,144.90998000000005,8870.0,651000.0
+h,S,Southern Metropolitan,4,11.8,3204.0,4.0,2.0,618.0,-37.9051,145.0473,3578.0,1486000.0
+h,S,Western Metropolitan,3,14.7,3030.0,3.0,1.0,530.0,-37.90111,144.63302,16166.0,390000.0
+h,S,Eastern Metropolitan,3,13.4,3130.0,3.0,1.0,448.0,-37.82956,145.13868,5713.0,1070000.0
+t,S,Western Metropolitan,3,6.4,3011.0,3.0,2.0,102.0,-37.7987,144.8807,7570.0,757000.0
+h,S,Eastern Metropolitan,3,14.3,3109.0,3.0,1.0,696.0,-37.77973,145.16473,10999.0,1369000.0
+h,S,Western Metropolitan,3,14.0,3021.0,3.0,1.0,539.0,-37.74845,144.8144,14042.0,645000.0
+h,S,Northern Metropolitan,3,3.6,3068.0,3.0,1.0,192.0,-37.78992,144.99845,2954.0,1460000.0
+h,SP,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,597.0,-37.99232,145.08469,5087.0,1300000.0
+h,S,Northern Metropolitan,3,3.4,3068.0,3.0,2.0,201.0,-37.7884,145.0,2954.0,1680000.0
+u,PI,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8619,144.976,13240.0,400000.0
+h,PI,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,1069.0,-37.6931,144.9596,5070.0,1000000.0
+h,S,Southern Metropolitan,3,9.2,3146.0,3.0,1.0,652.0,-37.8552,145.0785,10412.0,1825000.0
+u,SP,Western Metropolitan,2,8.7,3032.0,2.0,1.0,5661.0,-37.773,144.8806,4918.0,420000.0
+h,S,Eastern Victoria,3,26.5,3138.0,3.0,1.0,484.0,-37.78454,145.33073000000005,8280.0,581000.0
+h,SP,Western Metropolitan,3,8.0,3040.0,3.0,1.0,578.0,-37.7526,144.9089,9264.0,1400000.0
+h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,279.0,-37.77,145.0049,11364.0,980000.0
+h,VB,Northern Metropolitan,4,12.4,3060.0,4.0,2.0,254.0,-37.7082,144.9725,5070.0,540000.0
+h,SP,Western Metropolitan,3,7.7,3015.0,3.0,1.0,320.0,-37.8277,144.8841,1223.0,1016000.0
+t,SP,Eastern Metropolitan,3,14.2,3149.0,3.0,2.0,236.0,-37.88678,145.12748,13366.0,1035000.0
+u,S,Southern Metropolitan,2,11.2,3127.0,2.0,1.0,180.0,-37.8351,145.10299999999995,5457.0,825000.0
+h,S,Northern Metropolitan,4,9.9,3044.0,4.0,2.0,559.0,-37.7236,144.9347,7485.0,1196000.0
+h,VB,Southern Metropolitan,4,7.7,3184.0,3.0,3.0,235.0,-37.8775,144.9808,8989.0,1875000.0
+t,VB,Northern Metropolitan,2,6.7,3058.0,2.0,2.0,143.0,-37.72037,144.97023000000004,3445.0,525000.0
+u,VB,Southern Metropolitan,2,13.9,3165.0,2.0,1.0,0.0,-37.9096,145.0843,10969.0,340000.0
+u,S,Southern Metropolitan,3,10.5,3186.0,3.0,2.0,257.0,-37.89886,144.99462,10579.0,1410000.0
+h,VB,Southern Metropolitan,4,7.4,3144.0,4.0,2.0,586.0,-37.866,145.039,4675.0,2400000.0
+u,S,Southern Metropolitan,1,4.6,3181.0,1.0,1.0,0.0,-37.85279,145.00811000000004,7717.0,399000.0
+h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,673.0,-37.92837,145.03821000000005,6795.0,1700000.0
+h,S,Western Metropolitan,3,6.4,3011.0,3.0,2.0,497.0,-37.7958,144.9024,7570.0,1362000.0
+t,S,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,201.0,-37.8428,144.9877,14887.0,2030000.0
+h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,317.0,-37.7606,144.9995,8870.0,1260000.0
+u,S,Northern Metropolitan,2,9.9,3044.0,3.0,1.0,219.0,-37.7283,144.9331,7485.0,488000.0
+u,PI,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,0.0,-37.7899,144.9231,5263.0,395000.0
+h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,383.0,-37.61132,144.92075,5833.0,500000.0
+h,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,254.0,-37.8287,145.0419,11308.0,1985000.0
+h,SP,Southern Metropolitan,3,5.6,3101.0,3.0,2.0,392.0,-37.8081,145.0263,10331.0,1878000.0
+h,PI,Northern Metropolitan,4,3.6,3068.0,4.0,3.0,193.0,-37.79395,144.98899,2954.0,1010000.0
+h,SP,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,529.0,-37.7427,144.9868,14577.0,900000.0
+h,VB,Western Metropolitan,3,6.6,3011.0,3.0,2.0,309.0,-37.807,144.898,2417.0,920000.0
+h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,285.0,-37.7161,144.9662,5070.0,400000.0
+h,S,Eastern Metropolitan,5,16.7,3150.0,5.0,2.0,651.0,-37.8656,145.15034,15321.0,1285000.0
+h,S,Western Metropolitan,3,6.6,3011.0,3.0,2.0,229.0,-37.8032,144.8892,2417.0,1310000.0
+h,S,Western Metropolitan,3,8.0,3016.0,3.0,2.0,292.0,-37.8575,144.8922,6380.0,1245000.0
+h,PI,Western Metropolitan,4,8.2,3012.0,4.0,1.0,528.0,-37.7969,144.8647,5058.0,970000.0
+h,S,South-Eastern Metropolitan,3,14.7,3167.0,3.0,2.0,727.0,-37.9187,145.1098,3692.0,980500.0
+h,PI,Southern Metropolitan,3,4.5,3181.0,2.0,1.0,111.0,-37.8525,145.0071,7717.0,1100000.0
+u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8895,144.9902,8989.0,451000.0
+u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,1658.0,-37.7781,145.0157,2970.0,510000.0
+h,SP,Eastern Metropolitan,4,13.9,3108.0,4.0,2.0,840.0,-37.78,145.1145,9028.0,1460000.0
+h,VB,Southern Metropolitan,4,7.3,3146.0,4.0,1.0,1110.0,-37.85723,145.0547,10412.0,2250000.0
+u,S,Southern Metropolitan,3,13.6,3148.0,2.0,2.0,224.0,-37.8738,145.1054,3582.0,700000.0
+u,PI,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,258.0,-37.9244,145.0547,10969.0,740000.0
+t,S,Western Metropolitan,3,13.5,3020.0,3.0,3.0,134.0,-37.79,144.7886,6763.0,521000.0
+h,S,Eastern Metropolitan,5,13.8,3084.0,5.0,3.0,648.0,-37.7346,145.093,2698.0,895000.0
+h,S,Western Metropolitan,4,12.8,3033.0,4.0,2.0,659.0,-37.7467,144.8683,5629.0,970000.0
+t,S,Southern Metropolitan,3,7.3,3146.0,3.0,2.0,203.0,-37.86248,145.06682,10412.0,1160000.0
+h,PI,Western Metropolitan,1,9.1,3040.0,3.0,2.0,676.0,-37.7632,144.898,1543.0,1720000.0
+u,VB,Western Metropolitan,2,8.2,3012.0,2.0,1.0,781.0,-37.8079,144.8684,5058.0,420000.0
+u,S,Southern Metropolitan,3,6.3,3143.0,2.0,1.0,0.0,-37.853,145.0264,4836.0,869000.0
+h,S,South-Eastern Metropolitan,3,17.5,3169.0,3.0,1.0,602.0,-37.94146,145.11121,4734.0,800000.0
+u,PI,Southern Metropolitan,4,11.7,3125.0,4.0,2.0,0.0,-37.8507,145.109,5678.0,800000.0
+h,S,Western Metropolitan,3,11.1,3025.0,3.0,1.0,740.0,-37.8269,144.8455,5132.0,923000.0
+h,PI,Eastern Metropolitan,3,13.4,3130.0,3.0,2.0,567.0,-37.81684,145.14992,5713.0,1200000.0
+h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,445.0,-37.7652,145.0123,11364.0,1190000.0
+h,VB,South-Eastern Metropolitan,4,38.0,3199.0,4.0,2.0,767.0,-38.16147,145.14285,17055.0,680000.0
+u,S,Northern Metropolitan,2,3.1,3003.0,2.0,1.0,17.0,-37.8118,144.95259,2230.0,670000.0
+h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,1.0,578.0,-38.13743,145.16702,17055.0,506000.0
+h,S,Southern Metropolitan,3,9.3,3162.0,3.0,1.0,359.0,-37.8898,145.0196,5051.0,1170000.0
+u,SP,Southern Metropolitan,1,11.2,3145.0,1.0,1.0,0.0,-37.8728,145.0417,8801.0,373000.0
+u,S,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.8497,145.0466,10412.0,390000.0
+u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7847,145.0109,2970.0,676000.0
+u,S,Northern Metropolitan,2,4.4,3031.0,2.0,1.0,0.0,-37.7825,144.9239,3593.0,391000.0
+u,SP,Southern Metropolitan,3,6.1,3182.0,3.0,1.0,0.0,-37.8612,144.985,13240.0,720000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,570.0,-37.718,144.9999,21650.0,800000.0
+h,SP,South-Eastern Metropolitan,2,24.7,3175.0,2.0,1.0,414.0,-37.99173,145.22308999999996,10894.0,488000.0
+u,S,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,1369.0,-37.8405,145.0025,14887.0,411000.0
+u,S,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,2033.0,-37.75715,145.06463,2890.0,534000.0
+h,S,Western Metropolitan,4,31.7,3429.0,4.0,2.0,738.0,-37.58457,144.70005,14092.0,605000.0
+h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,742.0,-37.8529,145.0962,5678.0,1460000.0
+u,S,Southern Metropolitan,1,3.3,3141.0,1.0,1.0,0.0,-37.8355,144.9884,14887.0,400000.0
+h,S,Northern Metropolitan,3,2.6,3121.0,3.0,2.0,115.0,-37.8163,144.9984,14949.0,1600000.0
+h,S,Northern Metropolitan,4,12.1,3046.0,4.0,2.0,672.0,-37.7083,144.9495,2606.0,815000.0
+h,S,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,553.0,-37.8464,145.0511,10412.0,1723000.0
+h,S,Western Metropolitan,3,5.9,3032.0,3.0,2.0,416.0,-37.7727,144.9055,6567.0,1381500.0
+h,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,153.0,-37.8535,144.9952,4380.0,1325000.0
+t,PI,Southern Metropolitan,3,13.0,3204.0,3.0,2.0,418.0,-37.9172,145.0421,6795.0,900000.0
+u,PI,Southern Metropolitan,3,13.6,3148.0,3.0,2.0,235.0,-37.8856,145.0934,3582.0,701000.0
+u,PI,Southern Metropolitan,1,13.9,3165.0,1.0,1.0,0.0,-37.9205,145.0528,10969.0,370000.0
+h,PI,Eastern Metropolitan,3,11.8,3105.0,3.0,2.0,728.0,-37.7765,145.1031,4480.0,1050000.0
+u,S,Southern Metropolitan,1,6.1,3182.0,1.0,1.0,1659.0,-37.8699,144.9764,13240.0,354000.0
+h,S,Western Metropolitan,2,12.8,3033.0,2.0,1.0,116.0,-37.74800000000001,144.8696,5629.0,380000.0
+u,VB,Southern Metropolitan,2,2.1,3205.0,2.0,2.0,0.0,-37.8361,144.9682,5943.0,690000.0
+u,VB,Southern Metropolitan,2,9.3,3162.0,2.0,1.0,90.0,-37.8996,145.0169,5051.0,500000.0
+h,S,Western Metropolitan,3,6.2,3039.0,3.0,1.0,619.0,-37.76996,144.93466,6232.0,1162000.0
+u,SP,Western Metropolitan,4,8.2,3012.0,4.0,2.0,0.0,-37.809,144.8686,5058.0,530000.0
+h,S,Western Metropolitan,3,9.2,3012.0,3.0,1.0,576.0,-37.7808,144.8678,3873.0,715000.0
+u,SP,Northern Metropolitan,2,12.4,3060.0,2.0,1.0,97.0,-37.7118,144.9689,5070.0,333000.0
+u,SP,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,0.0,-37.7977,145.0333,10331.0,700000.0
+u,PI,Northern Metropolitan,3,1.9,3003.0,3.0,2.0,0.0,-37.8118,144.9526,2230.0,660000.0
+h,S,Southern Metropolitan,3,14.6,3189.0,3.0,1.0,653.0,-37.9392,145.0481,2555.0,920000.0
+h,VB,Southern Metropolitan,4,11.2,3145.0,4.0,3.0,222.0,-37.878,145.0666,8801.0,1350000.0
+h,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,178.0,-37.8226,145.0064,14949.0,1210000.0
+h,S,Southern Metropolitan,3,7.7,3184.0,3.0,1.0,345.0,-37.8865,144.9891,8989.0,1675000.0
+h,SP,Western Metropolitan,3,5.9,3032.0,3.0,1.0,536.0,-37.7728,144.9076,6567.0,1050000.0
+u,VB,Southern Metropolitan,1,5.0,3182.0,1.0,1.0,0.0,-37.8614,144.97406999999995,13240.0,350000.0
+h,S,Northern Metropolitan,4,12.1,3083.0,4.0,2.0,525.0,-37.70765,145.05556,10175.0,815000.0
+h,S,Western Victoria,4,29.8,3338.0,4.0,2.0,587.0,-37.69392,144.57468,4718.0,347500.0
+u,S,Southern Metropolitan,1,11.4,3163.0,1.0,1.0,0.0,-37.8968,145.0609,7822.0,345000.0
+h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,888.0,-37.8002,145.0949,7809.0,2251000.0
+h,SP,Western Metropolitan,4,8.0,3040.0,4.0,1.0,638.0,-37.7523,144.9052,9264.0,1535000.0
+h,S,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,120.0,-37.7715,145.0075,11364.0,695000.0
+u,S,Northern Metropolitan,2,2.5,3121.0,2.0,1.0,0.0,-37.8282,144.9897,1123.0,546000.0
+h,S,Southern Metropolitan,4,10.7,3187.0,4.0,2.0,674.0,-37.9022,145.0125,6938.0,2504000.0
+h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,414.0,-37.7432,144.9573,11204.0,990000.0
+t,S,Northern Metropolitan,3,1.8,3052.0,3.0,1.0,2429.0,-37.78033,144.95949,2309.0,850000.0
+u,S,Eastern Metropolitan,2,13.1,3128.0,2.0,1.0,151.0,-37.8234,145.1235,4605.0,636000.0
+t,PI,Western Metropolitan,3,10.5,3020.0,3.0,2.0,175.0,-37.77856,144.82443,2185.0,580000.0
+h,VB,South-Eastern Metropolitan,3,38.0,3199.0,3.0,2.0,539.0,-38.14406,145.16352,17055.0,550000.0
+u,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,118.0,-37.9245,145.0336,6795.0,635000.0
+h,SP,Northern Metropolitan,3,3.4,3068.0,3.0,3.0,142.0,-37.7928,145.0021,2954.0,1180000.0
+t,S,Western Metropolitan,4,12.8,3033.0,4.0,3.0,322.0,-37.7293,144.8659,5629.0,886000.0
+h,S,Southern Metropolitan,2,5.6,3101.0,2.0,1.0,381.0,-37.8033,145.036,10331.0,1275000.0
+u,S,Southern Metropolitan,2,9.3,3162.0,2.0,1.0,831.0,-37.8974,145.0294,5051.0,525500.0
+u,S,Southern Metropolitan,2,5.6,3101.0,2.0,2.0,0.0,-37.8099,145.0606,10331.0,562500.0
+h,S,Southern Metropolitan,4,13.0,3204.0,4.0,1.0,786.0,-37.9305,145.0449,6795.0,1420000.0
+h,SP,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,308.0,-37.7649,144.9641,11918.0,1020000.0
+h,S,Southern Metropolitan,5,7.4,3144.0,5.0,3.0,580.0,-37.8653,145.0304,4675.0,4240000.0
+h,VB,Southern Metropolitan,1,4.6,3122.0,1.0,1.0,0.0,-37.8143,145.0319,11308.0,300000.0
+h,S,Western Metropolitan,4,14.8,3023.0,4.0,2.0,709.0,-37.76336,144.7725,6388.0,650000.0
+h,S,Northern Metropolitan,3,1.6,3066.0,3.0,1.0,168.0,-37.7975,144.9924,4553.0,1309000.0
+u,S,Southern Metropolitan,2,1.2,3006.0,2.0,1.0,0.0,-37.8235,144.9655,8400.0,590000.0
+u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,165.0,-37.8927,145.0539,7822.0,737000.0
+h,S,Southern Metropolitan,2,11.2,3145.0,2.0,1.0,312.0,-37.8654,145.0412,8801.0,1400000.0
+h,S,Western Metropolitan,4,15.0,3021.0,4.0,3.0,654.0,-37.7392,144.8299,1202.0,1095000.0
+u,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,0.0,-37.8577,145.0002,4380.0,800000.0
+h,S,Western Metropolitan,5,9.7,3041.0,5.0,3.0,755.0,-37.7417,144.9133,3284.0,2650000.0
+h,S,Southern Metropolitan,4,13.7,3188.0,4.0,3.0,414.0,-37.9401,145.0083,5454.0,1740000.0
+h,S,Southern Metropolitan,3,4.5,3181.0,3.0,2.0,272.0,-37.8553,145.007,7717.0,1720000.0
+h,S,Southern Metropolitan,3,11.4,3163.0,2.0,1.0,461.0,-37.8988,145.0516,7822.0,1170000.0
+u,SP,Western Metropolitan,2,8.0,3016.0,2.0,1.0,0.0,-37.8504,144.8924,6380.0,375000.0
+h,S,Southern Metropolitan,4,9.2,3146.0,4.0,3.0,603.0,-37.8521,145.0657,10412.0,2725000.0
+h,S,Southern Metropolitan,3,7.3,3102.0,3.0,2.0,753.0,-37.79017,145.06381000000005,2671.0,2260000.0
+h,VB,Southern Metropolitan,3,8.4,3145.0,3.0,1.0,1128.0,-37.87923,145.08955,8801.0,2500000.0
+h,S,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,647.0,-37.7917,145.0868,7809.0,1905000.0
+h,S,Western Metropolitan,3,14.7,3030.0,3.0,1.0,239.0,-37.88831,144.63942,16166.0,372000.0
+h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,546.0,-37.85146,144.67109,13830.0,472000.0
+u,SP,Southern Metropolitan,1,4.6,3142.0,1.0,1.0,0.0,-37.8428,145.0081,7217.0,611000.0
+h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,713.0,-37.7982,145.0816,7809.0,1903000.0
+t,S,Southern Metropolitan,3,4.6,3122.0,3.0,2.0,299.0,-37.8299,145.0387,11308.0,1400000.0
+h,S,Eastern Metropolitan,4,7.9,3079.0,4.0,2.0,631.0,-37.7771,145.0448,5549.0,1720000.0
+u,VB,Southern Metropolitan,3,1.2,3006.0,3.0,2.0,0.0,-37.828,144.9683,8400.0,740000.0
+h,S,Southern Metropolitan,3,7.5,3123.0,3.0,1.0,780.0,-37.8198,145.0465,6482.0,2151000.0
+u,S,Eastern Metropolitan,3,16.1,3111.0,3.0,2.0,294.0,-37.79937,145.18164,4790.0,775000.0
+h,SP,Western Metropolitan,2,5.1,3011.0,2.0,1.0,126.0,-37.80473,144.89095,2417.0,945000.0
+h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,506.0,-37.7187,144.9433,7485.0,800000.0
+h,S,Western Metropolitan,3,18.4,3029.0,3.0,1.0,592.0,-37.88157,144.69426,13830.0,575000.0
+h,S,Eastern Metropolitan,4,11.8,3127.0,3.0,2.0,626.0,-37.8197,145.1106,2079.0,2000000.0
+u,S,Western Metropolitan,2,6.4,3011.0,2.0,1.0,0.0,-37.8006,144.881,7570.0,490000.0
+h,S,Northern Metropolitan,3,14.5,3087.0,3.0,2.0,447.0,-37.71627,145.08526,2329.0,835000.0
+h,VB,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,763.0,-37.8166,145.1215,4605.0,4000000.0
+h,VB,Southern Metropolitan,5,13.0,3204.0,5.0,3.0,772.0,-37.9168,145.04,6795.0,1600000.0
+h,PI,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,400.0,-37.8572,145.0555,10412.0,1650000.0
+u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8142,145.0308,11308.0,715000.0
+h,S,Eastern Metropolitan,3,14.2,3149.0,3.0,1.0,810.0,-37.86838,145.14664,13366.0,1530000.0
+h,VB,Northern Metropolitan,3,3.2,3054.0,3.0,1.0,203.0,-37.7824,144.9733,3106.0,1280000.0
+h,PI,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,715.0,-37.7943,145.083,7809.0,1702000.0
+h,S,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,280.0,-37.7719,144.9661,11918.0,1210000.0
+t,S,Southern Metropolitan,3,9.7,3103.0,3.0,2.0,306.0,-37.8106,145.0848,5682.0,1472000.0
+u,VB,Southern Metropolitan,2,8.1,3161.0,2.0,1.0,0.0,-37.861,145.0136,6923.0,420000.0
+h,SP,Eastern Metropolitan,4,25.0,3155.0,4.0,1.0,730.0,-37.87377,145.28688,9704.0,783000.0
+h,S,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,453.0,-37.7666,145.0132,11364.0,1170000.0
+h,S,Southern Metropolitan,4,6.3,3143.0,4.0,3.0,421.0,-37.8593,145.0275,4836.0,3660000.0
+u,S,Southern Metropolitan,2,3.5,3207.0,2.0,2.0,0.0,-37.84158,144.93809,8648.0,760000.0
+h,S,Western Metropolitan,2,6.4,3012.0,2.0,1.0,369.0,-37.79221,144.86408,5058.0,749000.0
+h,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,340.0,-37.9205,145.0007,10579.0,1390000.0
+u,SP,Northern Metropolitan,3,12.1,3046.0,3.0,1.0,225.0,-37.7068,144.9467,2606.0,465000.0
+h,S,Southern Metropolitan,2,7.8,3124.0,2.0,1.0,633.0,-37.8427,145.0824,8920.0,1900000.0
+u,VB,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8562,144.9844,13240.0,470000.0
+h,S,Southern Metropolitan,5,15.2,3191.0,5.0,3.0,545.0,-37.94953,145.00607,4497.0,2220000.0
+h,S,Southern Metropolitan,2,12.2,3147.0,2.0,1.0,583.0,-37.8693,145.1082,2894.0,995000.0
+h,S,Western Metropolitan,3,13.8,3018.0,3.0,1.0,604.0,-37.8631,144.8195,5301.0,740000.0
+u,S,Northern Metropolitan,2,1.5,3002.0,0.0,0.0,0.0,-37.8154,144.9851,3040.0,872000.0
+h,S,Southern Metropolitan,3,14.0,3166.0,3.0,1.0,548.0,-37.8951,145.101,3224.0,1033000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,529.0,-37.72017,144.99873,21650.0,935000.0
+h,VB,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,266.0,-37.9241,145.0028,10579.0,1600000.0
+h,S,Northern Metropolitan,2,9.9,3044.0,2.0,1.0,629.0,-37.7312,144.9399,7485.0,915000.0
+t,VB,Northern Metropolitan,2,8.5,3044.0,2.0,2.0,74.0,-37.72184,144.92969,7485.0,500000.0
+t,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,230.0,-37.7547,144.9239,9264.0,1162500.0
+h,PI,Southern Metropolitan,4,5.4,3101.0,4.0,2.0,399.0,-37.80526,145.04294,10331.0,2450000.0
+h,S,Western Metropolitan,3,6.9,3039.0,3.0,2.0,253.0,-37.7708,144.9234,6232.0,1260000.0
+h,S,Northern Metropolitan,4,9.9,3044.0,4.0,3.0,590.0,-37.7302,144.9357,7485.0,1395000.0
+h,SP,Western Metropolitan,3,14.7,3030.0,3.0,2.0,312.0,-37.89273,144.72558999999995,15542.0,520000.0
+h,S,Northern Metropolitan,3,12.4,3060.0,2.0,1.0,583.0,-37.6949,144.9619,5070.0,550000.0
+u,S,Western Metropolitan,1,4.3,3032.0,1.0,1.0,887.0,-37.76878,144.89197,4918.0,301000.0
+u,S,Southern Metropolitan,2,0.7,3006.0,2.0,1.0,0.0,-37.8281,144.96627,8400.0,600000.0
+h,PI,Western Metropolitan,4,8.4,3015.0,3.0,3.0,217.0,-37.8344,144.8764,5498.0,860000.0
+u,SP,Western Metropolitan,2,8.7,3032.0,2.0,1.0,3967.0,-37.7706,144.8805,4918.0,400000.0
+h,S,Southern Metropolitan,4,9.7,3104.0,4.0,2.0,605.0,-37.79466,145.06564,7809.0,1755000.0
+h,S,Northern Metropolitan,4,6.4,3078.0,4.0,2.0,606.0,-37.7743,145.0316,2211.0,1830000.0
+h,S,Southern Metropolitan,3,17.9,3192.0,3.0,1.0,584.0,-37.96911,145.07271,9758.0,1145000.0
+h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,600.0,-37.70067,144.92689,8870.0,721000.0
+h,S,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,734.0,-37.8286,145.092,5457.0,2200000.0
+h,S,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,662.0,-37.8246,145.1269,4605.0,1351000.0
+h,S,Eastern Metropolitan,2,9.4,3081.0,2.0,1.0,650.0,-37.7446,145.0404,2674.0,686000.0
+h,S,Southern Metropolitan,4,13.9,3165.0,4.0,2.0,592.0,-37.9351,145.0572,10969.0,1472500.0
+t,S,Northern Metropolitan,3,4.0,3057.0,3.0,2.0,138.0,-37.76292,144.97975,5533.0,959000.0
+h,S,Southern Metropolitan,3,17.9,3192.0,3.0,1.0,570.0,-37.96258,145.08038,9758.0,948000.0
+h,S,Northern Metropolitan,2,2.5,3067.0,2.0,2.0,98.0,-37.8005,144.9952,4019.0,1135000.0
+h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,233.0,-37.7549,144.9611,11204.0,1370000.0
+h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,560.0,-37.59943,144.91439,15510.0,600000.0
+h,S,Southern Metropolitan,5,7.2,3185.0,5.0,2.0,538.0,-37.89989,145.00883000000005,534.0,1880000.0
+u,SP,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,1882.0,-37.768,144.9895,11364.0,418000.0
+h,S,Northern Metropolitan,3,15.3,3074.0,3.0,1.0,573.0,-37.67939000000001,145.00143,7955.0,720000.0
+h,S,Southern Metropolitan,3,2.1,3205.0,3.0,1.0,276.0,-37.838,144.9489,5943.0,2633000.0
+h,S,Northern Metropolitan,2,2.4,3121.0,2.0,1.0,135.0,-37.82241,145.00235,14949.0,1341000.0
+h,S,Eastern Metropolitan,2,7.8,3079.0,2.0,1.0,697.0,-37.77345,145.06098,1554.0,1601000.0
+h,S,Southern Metropolitan,2,3.8,3207.0,2.0,1.0,111.0,-37.835,144.9373,8648.0,825000.0
+h,S,Northern Metropolitan,4,11.2,3073.0,4.0,1.0,855.0,-37.6997,145.0043,21650.0,770000.0
+h,S,Northern Metropolitan,4,5.9,3055.0,4.0,2.0,299.0,-37.764,144.9454,7082.0,930000.0
+u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,2679.0,-37.8261,145.0269,11308.0,635000.0
+u,PI,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8817,144.984,8989.0,630000.0
+h,S,Northern Metropolitan,2,4.0,3057.0,2.0,1.0,71.0,-37.77169,144.97557,5533.0,501000.0
+h,S,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,227.0,-37.7445,144.9917,14577.0,1000000.0
+h,S,Southern Metropolitan,3,10.2,3127.0,3.0,2.0,385.0,-37.82886,145.10093,5457.0,1385000.0
+h,S,Eastern Metropolitan,3,13.4,3130.0,3.0,2.0,887.0,-37.8429,145.14895,4387.0,905000.0
+h,S,Western Victoria,4,31.7,3337.0,4.0,2.0,643.0,-37.68834,144.56803,3600.0,400000.0
+h,S,Southern Metropolitan,3,11.2,3186.0,3.0,3.0,482.0,-37.912,144.9994,10579.0,2450000.0
+h,S,South-Eastern Metropolitan,4,18.8,3170.0,4.0,2.0,790.0,-37.91375,145.16438,7113.0,992000.0
+h,PI,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,490.0,-37.7413,145.0203,14577.0,1100000.0
+u,PI,Southern Metropolitan,2,7.8,3124.0,2.0,2.0,188.0,-37.8442,145.0668,8920.0,805000.0
+t,S,Southern Metropolitan,3,4.6,3181.0,3.0,2.0,15.0,-37.84969,145.00056999999995,7717.0,1463000.0
+h,S,Southern Metropolitan,4,16.7,3168.0,4.0,3.0,727.0,-37.90779000000001,145.15042,902.0,1211000.0
+h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,0.0,-37.7217,144.8783,3464.0,805000.0
+h,S,Northern Metropolitan,1,3.2,3054.0,1.0,1.0,93.0,-37.7888,144.9698,3106.0,885000.0
+h,S,Northern Metropolitan,3,4.5,3057.0,3.0,1.0,104.0,-37.7723,144.9761,5533.0,998000.0
+h,S,Northern Metropolitan,3,12.1,3046.0,3.0,1.0,570.0,-37.7106,144.9491,2606.0,730000.0
+h,PI,Southern Metropolitan,5,9.7,3103.0,5.0,5.0,651.0,-37.8071,145.0908,5682.0,3250000.0
+u,SP,Southern Metropolitan,2,7.4,3144.0,2.0,1.0,0.0,-37.8633,145.0338,4675.0,566000.0
+t,S,Western Metropolitan,3,6.4,3012.0,3.0,2.0,209.0,-37.78707,144.87608,3873.0,810000.0
+h,S,Western Metropolitan,4,9.7,3041.0,4.0,2.0,607.0,-37.7376,144.9154,3284.0,1430000.0
+h,SP,Northern Metropolitan,3,2.5,3067.0,3.0,2.0,134.0,-37.8093,144.9944,4019.0,1465000.0
+u,S,Southern Metropolitan,2,4.6,3181.0,2.0,2.0,0.0,-37.85184,145.01049,7717.0,660000.0
+h,S,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,630.0,-37.96543,145.20338999999996,8322.0,560000.0
+h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,1.0,713.0,-38.16483,145.16383000000005,17055.0,565000.0
+h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,603.0,-37.91655,145.02448,6795.0,1479000.0
+h,S,Southern Metropolitan,3,6.6,3183.0,3.0,1.0,178.0,-37.8663,144.9948,2952.0,1193000.0
+u,S,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.857,144.9867,13240.0,482000.0
+t,S,Northern Metropolitan,3,8.8,3072.0,3.0,2.0,242.0,-37.7506,145.0185,14577.0,880000.0
+u,SP,Southern Metropolitan,2,4.6,3181.0,2.0,1.0,0.0,-37.85924,145.00563,7717.0,500000.0
+u,S,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,564.0,-37.7665,144.9425,7082.0,471000.0
+h,S,Southern Metropolitan,2,14.0,3166.0,2.0,1.0,553.0,-37.9001,145.0981,3224.0,1010000.0
+h,SP,Eastern Metropolitan,4,11.8,3105.0,4.0,3.0,604.0,-37.7621,145.086,4480.0,1300000.0
+t,PI,Western Metropolitan,4,8.4,3015.0,4.0,3.0,278.0,-37.8468,144.874,5498.0,930000.0
+u,PI,Southern Metropolitan,3,4.6,3122.0,3.0,3.0,0.0,-37.8144,145.0153,11308.0,1000000.0
+u,VB,Southern Metropolitan,1,1.2,3006.0,1.0,1.0,546.0,-37.8274,144.9587,8400.0,370000.0
+u,S,Southern Metropolitan,2,1.2,3006.0,2.0,1.0,0.0,-37.828,144.9683,8400.0,623000.0
+h,S,Western Metropolitan,3,9.2,3012.0,3.0,2.0,260.0,-37.7825,144.8833,3873.0,725000.0
+u,S,Southern Metropolitan,2,7.2,3184.0,2.0,1.0,797.0,-37.87449,144.99059,8989.0,720000.0
+h,SP,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,551.0,-37.7194,145.0015,21650.0,801000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,694.0,-37.72089,145.0153,21650.0,900000.0
+h,S,Western Metropolitan,3,6.8,3016.0,3.0,2.0,297.0,-37.85537,144.87578,802.0,1190000.0
+h,S,Western Metropolitan,4,8.0,3016.0,4.0,2.0,897.0,-37.8679,144.899,6380.0,3130000.0
+h,S,Northern Metropolitan,4,7.8,3058.0,4.0,1.0,531.0,-37.7473,144.9528,11204.0,1075000.0
+h,S,Western Metropolitan,3,4.3,3032.0,3.0,2.0,359.0,-37.78606,144.88459,4918.0,983000.0
+u,S,Southern Metropolitan,2,1.2,3006.0,2.0,2.0,0.0,-37.828,144.9683,8400.0,600000.0
+h,S,Northern Metropolitan,3,4.4,3031.0,2.0,1.0,313.0,-37.7853,144.9235,3593.0,1315000.0
+u,S,Southern Metropolitan,2,16.0,3190.0,2.0,1.0,158.0,-37.95147,145.04904,4794.0,680500.0
+h,SP,Western Metropolitan,3,8.4,3015.0,3.0,2.0,360.0,-37.835,144.8814,5498.0,900000.0
+u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8268,145.0257,11308.0,605000.0
+h,SA,Eastern Victoria,4,35.2,3806.0,4.0,4.0,603.0,-38.06788,145.33966999999996,17093.0,950000.0
+h,S,South-Eastern Metropolitan,5,20.0,3194.0,5.0,2.0,720.0,-37.98439000000001,145.06812,6162.0,1485000.0
+h,VB,Southern Metropolitan,4,9.2,3104.0,3.0,2.0,754.0,-37.786,145.0886,7809.0,1700000.0
+u,SP,Southern Metropolitan,2,2.7,3141.0,0.0,1.0,0.0,-37.84468,145.00368,14887.0,535000.0
+h,S,Northern Metropolitan,3,7.8,3058.0,2.0,1.0,537.0,-37.7443,144.9494,11204.0,1194500.0
+h,PI,Western Metropolitan,3,5.9,3032.0,3.0,1.0,555.0,-37.7767,144.9125,6567.0,1060000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,460.0,-37.6917,144.9755,21650.0,601000.0
+h,VB,Northern Metropolitan,3,5.8,3078.0,3.0,1.0,553.0,-37.7728,145.0214,2970.0,1350000.0
+h,S,Northern Metropolitan,3,7.8,3058.0,3.0,1.0,527.0,-37.7359,144.9502,11204.0,1061000.0
+h,S,Northern Metropolitan,4,2.6,3121.0,4.0,1.0,434.0,-37.8185,145.0061,14949.0,1775000.0
+h,PI,Southern Metropolitan,2,2.1,3205.0,2.0,2.0,141.0,-37.8389,144.9612,5943.0,2000000.0
+u,S,Southern Metropolitan,2,3.8,3207.0,2.0,2.0,0.0,-37.8444,144.9421,8648.0,2250000.0
+h,S,Eastern Victoria,2,36.9,3782.0,2.0,1.0,1000.0,-37.93064,145.4453,2259.0,655000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,4679.0,-37.7093,145.0012,21650.0,800000.0
+h,S,Eastern Metropolitan,3,13.8,3107.0,3.0,1.0,799.0,-37.7698,145.1028,5420.0,1002000.0
+h,SP,Western Victoria,4,31.7,3337.0,4.0,2.0,547.0,-37.69026,144.57866,3600.0,320000.0
+h,S,Southern Metropolitan,2,3.3,3206.0,2.0,1.0,149.0,-37.8443,144.9481,3280.0,1322500.0
+h,S,Southern Metropolitan,3,13.0,3204.0,3.0,1.0,700.0,-37.9228,145.0476,6795.0,1327500.0
+h,S,Eastern Metropolitan,4,19.9,3134.0,4.0,2.0,1016.0,-37.78372,145.25311000000005,7785.0,1725000.0
+h,SP,Western Metropolitan,3,15.5,3038.0,3.0,2.0,749.0,-37.72321,144.80882,3656.0,735000.0
+u,S,Southern Metropolitan,2,13.8,3165.0,2.0,2.0,212.0,-37.91786,145.08243000000004,10969.0,690000.0
+t,S,Southern Metropolitan,3,14.6,3189.0,3.0,2.0,472.0,-37.9387,145.0461,2555.0,917000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,494.0,-37.71477,144.98235,21650.0,685000.0
+h,S,Northern Metropolitan,3,2.6,3121.0,3.0,1.0,209.0,-37.8206,144.9913,14949.0,1345000.0
+h,S,Northern Metropolitan,3,4.5,3057.0,3.0,1.0,812.0,-37.7673,144.9782,5533.0,1600000.0
+u,VB,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,861.0,-37.8539,145.0284,4836.0,550000.0
+h,SP,Southern Metropolitan,3,10.2,3147.0,3.0,1.0,648.0,-37.8614,145.08436,3052.0,1840000.0
+h,S,Western Metropolitan,3,12.8,3033.0,3.0,2.0,630.0,-37.7378,144.8705,5629.0,825000.0
+h,PI,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,95.0,-37.8193,144.9976,14949.0,900000.0
+h,S,Eastern Metropolitan,2,10.5,3081.0,2.0,1.0,620.0,-37.7377,145.0541,2947.0,650000.0
+h,PI,Southern Metropolitan,5,9.2,3146.0,5.0,2.0,654.0,-37.8515,145.0922,10412.0,2410000.0
+h,PI,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,543.0,-37.6946,144.9962,21650.0,510000.0
+h,S,Eastern Metropolitan,4,13.8,3084.0,4.0,2.0,648.0,-37.7363,145.0851,2698.0,1042500.0
+h,S,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,275.0,-37.9153,144.9924,10579.0,1875000.0
+h,S,Western Metropolitan,4,7.0,3013.0,4.0,1.0,766.0,-37.8126,144.8905,6543.0,1870000.0
+u,SP,Southern Metropolitan,1,10.4,3163.0,1.0,1.0,1162.0,-37.8911,145.0451,2403.0,260000.0
+u,SP,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,0.0,-37.8893,145.0589,7822.0,525000.0
+u,S,Eastern Metropolitan,3,9.0,3079.0,3.0,2.0,356.0,-37.7749,145.0568,1554.0,1260000.0
+h,PI,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,302.0,-37.758,144.9724,11918.0,835000.0
+u,S,Southern Metropolitan,3,5.6,3101.0,3.0,2.0,0.0,-37.8064,145.0182,10331.0,882000.0
+u,S,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,0.0,-37.7752,144.9463,7082.0,400000.0
+h,SP,Western Metropolitan,3,9.2,3012.0,3.0,1.0,300.0,-37.7896,144.8752,3873.0,832000.0
+h,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,249.0,-37.8778,144.9866,8989.0,1180000.0
+h,VB,Southern Metropolitan,3,9.2,3104.0,3.0,2.0,763.0,-37.7935,145.0865,7809.0,1700000.0
+u,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,304.0,-37.70733,144.94041,2606.0,500000.0
+t,PI,Western Metropolitan,5,8.7,3032.0,5.0,3.0,98.0,-37.7843,144.8939,4918.0,1000000.0
+h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,872.0,-37.87217,144.68746000000004,13830.0,723000.0
+h,S,Southern Metropolitan,4,13.7,3188.0,3.0,1.0,1504.0,-37.944,145.0015,5454.0,2600000.0
+h,S,Western Metropolitan,3,7.5,3040.0,3.0,3.0,226.0,-37.75485,144.9119,9264.0,1315000.0
+h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,858.0,-37.7442,144.8934,9264.0,1500000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,461.0,-37.72499000000001,145.00038,21650.0,1040000.0
+u,S,Southern Metropolitan,2,8.5,3185.0,2.0,1.0,99.0,-37.8951,145.0085,4898.0,700000.0
+h,S,Southern Metropolitan,4,13.0,3166.0,3.0,2.0,678.0,-37.898,145.0815,3145.0,1167500.0
+h,S,Southern Metropolitan,4,11.0,3147.0,4.0,3.0,480.0,-37.8726,145.0716,3052.0,2220000.0
+t,S,Northern Metropolitan,4,17.9,3082.0,4.0,1.0,199.0,-37.677,145.05658,10529.0,492000.0
+h,VB,Western Metropolitan,3,12.9,3043.0,3.0,1.0,529.0,-37.69485,144.89092,3285.0,650000.0
+u,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,119.0,-37.8958,144.9994,10579.0,826000.0
+u,S,Northern Metropolitan,2,9.9,3044.0,2.0,1.0,139.0,-37.7315,144.9301,7485.0,485000.0
+h,S,Southern Metropolitan,3,13.9,3165.0,4.0,1.0,701.0,-37.9178,145.0815,10969.0,1140000.0
+h,S,Southern Metropolitan,4,5.1,3181.0,4.0,1.0,230.0,-37.8549,144.9948,4380.0,1605000.0
+h,S,Northern Metropolitan,3,2.3,3051.0,3.0,2.0,230.0,-37.7987,144.9434,6821.0,2161000.0
+h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,587.0,-37.7007,144.9632,5070.0,670000.0
+h,S,Northern Metropolitan,4,16.1,3088.0,4.0,3.0,807.0,-37.70077,145.12743,8524.0,900000.0
+h,S,Southern Metropolitan,4,11.8,3204.0,3.0,1.0,805.0,-37.9066,145.0354,3578.0,1920000.0
+t,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,120.0,-37.7239,145.0083,21650.0,568000.0
+h,S,Northern Metropolitan,2,1.6,3066.0,3.0,1.0,282.0,-37.7985,144.9883,4553.0,1265000.0
+h,S,Southern Metropolitan,4,11.4,3204.0,4.0,2.0,567.0,-37.90998,145.03082,2397.0,1830000.0
+u,VB,Southern Metropolitan,1,8.1,3161.0,1.0,1.0,952.0,-37.8744,145.0371,6923.0,290000.0
+h,PI,Western Metropolitan,3,13.3,3020.0,3.0,1.0,541.0,-37.7669,144.8308,4217.0,490000.0
+h,S,Southern Metropolitan,3,13.7,3188.0,3.0,2.0,766.0,-37.9346,145.0049,5454.0,2513000.0
+h,S,Southern Metropolitan,5,13.0,3204.0,5.0,2.0,664.0,-37.9241,145.048,6795.0,1385000.0
+h,S,Northern Metropolitan,2,2.5,3067.0,3.0,1.0,220.0,-37.801,144.9989,4019.0,1097000.0
+h,PI,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,319.0,-37.7442,144.9958,14577.0,630000.0
+t,S,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,146.0,-37.8331,144.9665,5943.0,1450000.0
+h,VB,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,440.0,-37.7663,145.0118,11364.0,1000000.0
+u,S,South-Eastern Metropolitan,2,21.5,3195.0,2.0,1.0,0.0,-38.00765,145.0935,3650.0,600000.0
+h,S,Northern Metropolitan,3,20.6,3064.0,3.0,2.0,255.0,-37.63382,144.92774,5833.0,425000.0
+h,S,Eastern Metropolitan,3,23.0,3136.0,3.0,1.0,850.0,-37.78297,145.28774,11925.0,840500.0
+h,VB,Southern Metropolitan,3,7.2,3184.0,3.0,2.0,225.0,-37.87614,144.99081,8989.0,1400000.0
+h,VB,Western Metropolitan,4,12.9,3043.0,4.0,2.0,620.0,-37.70377,144.90216999999996,1071.0,820000.0
+h,SP,Western Metropolitan,4,14.7,3030.0,4.0,2.0,531.0,-37.89556,144.64268,16166.0,510000.0
+h,PI,Southern Metropolitan,3,3.3,3141.0,3.0,2.0,358.0,-37.8425,145.0031,14887.0,2200000.0
+h,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,414.0,-37.7466,144.8941,9264.0,953000.0
+h,PI,Northern Metropolitan,2,5.5,3070.0,2.0,1.0,326.0,-37.7681,145.0119,11364.0,970000.0
+h,S,Southern Metropolitan,3,10.1,3163.0,3.0,2.0,602.0,-37.90248,145.05943,7822.0,1570000.0
+h,S,Eastern Metropolitan,3,7.9,3079.0,3.0,1.0,630.0,-37.7635,145.039,5549.0,1465000.0
+h,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,378.0,-37.70175,144.93578,8870.0,605000.0
+h,S,Northern Metropolitan,3,4.4,3031.0,3.0,1.0,320.0,-37.7844,144.9359,3593.0,940000.0
+h,S,Southern Metropolitan,3,13.9,3165.0,3.0,1.0,576.0,-37.9362,145.0691,10969.0,1000000.0
+h,PI,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,234.0,-37.7923,144.9332,5263.0,1010000.0
+u,SP,Northern Metropolitan,1,3.5,3068.0,1.0,1.0,0.0,-37.7925,144.9786,6244.0,358500.0
+h,S,Western Metropolitan,3,6.2,3015.0,3.0,1.0,343.0,-37.84502,144.88612,5498.0,1250000.0
+h,S,South-Eastern Metropolitan,4,16.7,3150.0,4.0,2.0,650.0,-37.91026,145.19298,7392.0,1200000.0
+h,VB,Northern Metropolitan,3,11.5,3046.0,3.0,1.0,730.0,-37.7167,144.9216,2651.0,1400000.0
+h,S,Eastern Metropolitan,3,13.1,3128.0,3.0,1.0,702.0,-37.827,145.1209,4605.0,1325000.0
+u,SP,Western Metropolitan,2,14.0,3021.0,2.0,1.0,218.0,-37.74839,144.7735,1899.0,415000.0
+u,SP,Southern Metropolitan,2,7.7,3184.0,2.0,2.0,0.0,-37.8744,144.9888,8989.0,1122000.0
+h,SP,South-Eastern Metropolitan,4,34.9,3201.0,3.0,1.0,646.0,-38.10346,145.18159,8060.0,546000.0
+t,S,Western Metropolitan,3,4.3,3032.0,3.0,2.0,231.0,-37.77373,144.9311,6567.0,1130000.0
+t,S,Western Metropolitan,3,12.8,3033.0,3.0,2.0,224.0,-37.7426,144.868,5629.0,745000.0
+h,S,Southern Metropolitan,3,4.6,3122.0,3.0,1.0,199.0,-37.8172,145.0243,11308.0,1551000.0
+h,S,Western Metropolitan,3,8.4,3015.0,2.0,2.0,281.0,-37.8431,144.8845,5498.0,950000.0
+u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8452,145.0001,14887.0,499000.0
+h,PI,Western Metropolitan,3,6.4,3011.0,3.0,1.0,413.0,-37.7931,144.893,7570.0,930000.0
+t,S,Western Metropolitan,3,6.4,3012.0,3.0,2.0,262.0,-37.78284,144.88098,3873.0,895000.0
+t,S,Western Metropolitan,3,10.4,3042.0,3.0,1.0,242.0,-37.72893,144.88859,3464.0,730000.0
+h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,594.0,-37.7512,145.017,14577.0,690000.0
+h,VB,Eastern Metropolitan,3,23.0,3136.0,3.0,2.0,815.0,-37.7969,145.29273999999995,11925.0,720000.0
+h,S,Northern Metropolitan,2,3.4,3031.0,2.0,1.0,193.0,-37.78749000000001,144.93203,3593.0,1100000.0
+h,SP,Eastern Metropolitan,3,13.8,3084.0,3.0,1.0,698.0,-37.7343,145.0837,2698.0,870000.0
+h,S,Western Metropolitan,3,11.7,3033.0,3.0,3.0,326.0,-37.74009,144.87787,5629.0,1295000.0
+u,S,Southern Metropolitan,2,7.7,3184.0,2.0,1.0,0.0,-37.8758,144.9874,8989.0,717000.0
+h,PI,Western Metropolitan,3,8.0,3016.0,3.0,2.0,289.0,-37.8593,144.8881,6380.0,975000.0
+u,S,Northern Metropolitan,2,2.6,3121.0,2.0,1.0,0.0,-37.823,144.9982,14949.0,712000.0
+h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,464.0,-37.7945,145.0667,7809.0,1300000.0
+t,SP,Western Metropolitan,3,8.9,3016.0,3.0,2.0,146.0,-37.857,144.8846,802.0,720000.0
+h,S,Southern Metropolitan,4,11.7,3125.0,4.0,2.0,438.0,-37.8599,145.1101,5678.0,1255000.0
+h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,520.0,-37.7362,145.0232,14577.0,810000.0
+h,S,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,1313.0,-37.74694,145.07048,3540.0,1310000.0
+h,S,Eastern Metropolitan,5,8.9,3084.0,5.0,3.0,694.0,-37.73968,145.07973,3540.0,1170000.0
+h,PI,Western Metropolitan,3,8.0,3040.0,3.0,1.0,477.0,-37.7499,144.9127,9264.0,825000.0
+h,PI,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,678.0,-37.75967,144.97214,11918.0,1400000.0
+h,PI,Eastern Metropolitan,4,13.8,3084.0,4.0,2.0,780.0,-37.7308,145.0932,2698.0,1155000.0
+h,S,Southern Metropolitan,6,6.3,3143.0,5.0,3.0,1491.0,-37.8602,145.013,4836.0,5525000.0
+h,S,Eastern Metropolitan,3,16.7,3150.0,3.0,2.0,648.0,-37.88255,145.14727,15321.0,1550000.0
+h,PI,Northern Metropolitan,5,13.0,3046.0,5.0,3.0,700.0,-37.7095,144.9253,8870.0,1150000.0
+t,S,Southern Metropolitan,3,5.6,3101.0,3.0,1.0,111.0,-37.7941,145.0238,10331.0,690000.0
+h,VB,Western Metropolitan,5,7.5,3040.0,5.0,3.0,607.0,-37.75148,144.88519,588.0,2200000.0
+h,S,Southern Metropolitan,3,8.5,3185.0,3.0,2.0,492.0,-37.8789,145.0048,4898.0,1800000.0
+h,S,South-Eastern Metropolitan,4,15.5,3167.0,4.0,2.0,564.0,-37.93543,145.08408,3692.0,1120000.0
+u,S,Southern Metropolitan,2,4.6,3142.0,2.0,1.0,1119.0,-37.8498,145.0173,7217.0,620000.0
+u,S,Eastern Metropolitan,2,8.8,3081.0,2.0,1.0,94.0,-37.74432,145.04721,2674.0,444000.0
+h,S,Northern Metropolitan,4,5.5,3070.0,4.0,2.0,440.0,-37.7852,144.9975,11364.0,2270000.0
+h,S,Northern Metropolitan,3,16.3,3075.0,3.0,1.0,852.0,-37.67601,145.02955,8279.0,801000.0
+h,S,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,650.0,-37.9338,145.0143,5454.0,1789000.0
+h,S,Western Metropolitan,4,6.8,3016.0,4.0,3.0,650.0,-37.85925,144.88761,6380.0,2200000.0
+h,S,Southern Metropolitan,5,5.6,3101.0,5.0,3.0,853.0,-37.8151,145.0555,10331.0,4350000.0
+u,S,Northern Metropolitan,3,4.2,3031.0,3.0,1.0,4440.0,-37.7898,144.9233,5263.0,551000.0
+h,S,Southern Metropolitan,3,6.2,3123.0,3.0,2.0,304.0,-37.83277,145.04373,6482.0,1768000.0
+h,S,South-Eastern Metropolitan,3,31.2,3197.0,3.0,2.0,355.0,-38.07196,145.13036,1989.0,825000.0
+h,S,Southern Metropolitan,4,10.2,3147.0,4.0,1.0,778.0,-37.86301,145.11158999999995,2894.0,1425000.0
+u,S,Southern Metropolitan,2,1.2,3006.0,2.0,2.0,2955.0,-37.8299,144.9679,8400.0,622500.0
+t,S,Western Metropolitan,3,8.7,3032.0,3.0,2.0,225.0,-37.7867,144.8906,4918.0,1011000.0
+h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,501.0,-37.7771,145.0116,11364.0,1610000.0
+u,PI,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7755,145.0178,2970.0,505000.0
+t,S,Northern Metropolitan,4,3.0,3067.0,4.0,2.0,102.0,-37.80116,145.00066,4019.0,1525000.0
+h,PI,Southern Metropolitan,5,9.2,3104.0,3.0,4.0,886.0,-37.804,145.0951,7809.0,3250000.0
+u,PI,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,252.0,-37.9085,145.0683,10969.0,800000.0
+u,VB,Southern Metropolitan,2,4.5,3181.0,2.0,1.0,0.0,-37.8588,145.0024,7717.0,570000.0
+h,VB,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,0.0,-37.7711,144.9487,7082.0,545000.0
+h,SP,Northern Metropolitan,4,11.2,3073.0,4.0,1.0,605.0,-37.7051,145.0331,21650.0,800000.0
+h,VB,Western Metropolitan,6,8.0,3040.0,7.0,2.0,870.0,-37.7517,144.9018,9264.0,1800000.0
+t,S,Western Metropolitan,3,8.2,3012.0,3.0,1.0,203.0,-37.7928,144.8811,5058.0,815000.0
+t,S,Northern Metropolitan,5,13.0,3046.0,5.0,1.0,227.0,-37.6979,144.941,8870.0,645000.0
+h,SP,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,401.0,-37.76491,144.95253,11918.0,1405000.0
+h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,614.0,-37.7194,144.8827,3464.0,790000.0
+h,SP,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,247.0,-37.70952,145.02823999999995,21650.0,635000.0
+h,PI,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,597.0,-37.98265,145.22696000000005,10894.0,620000.0
+h,S,Western Metropolitan,4,6.2,3039.0,4.0,2.0,465.0,-37.76306,144.92851000000005,6232.0,1450000.0
+h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,931.0,-37.854,145.1195,5678.0,1755000.0
+h,SP,Southern Metropolitan,5,9.7,3103.0,5.0,3.0,678.0,-37.8063,145.0705,5682.0,3600000.0
+h,S,Western Metropolitan,2,6.9,3039.0,2.0,1.0,428.0,-37.773,144.9332,6232.0,940000.0
+h,S,Western Metropolitan,3,8.4,3015.0,3.0,1.0,588.0,-37.8485,144.8909,5498.0,1530000.0
+h,S,South-Eastern Metropolitan,3,38.0,3199.0,3.0,2.0,595.0,-38.16135,145.13374,17055.0,690000.0
+h,PI,Northern Metropolitan,5,13.0,3046.0,5.0,3.0,487.0,-37.7088,144.92600000000004,8870.0,850000.0
+h,PI,Northern Metropolitan,4,7.0,3071.0,4.0,2.0,348.0,-37.75582,144.98951,8870.0,1605000.0
+h,SP,Northern Metropolitan,3,12.1,3083.0,3.0,1.0,541.0,-37.70603,145.05423000000005,10175.0,715000.0
+t,SP,Southern Metropolitan,3,12.1,3163.0,3.0,2.0,0.0,-37.8939,145.0715,4442.0,860000.0
+h,S,Northern Metropolitan,3,5.2,3056.0,3.0,1.0,277.0,-37.7592,144.97321000000005,11918.0,905000.0
+t,S,Western Metropolitan,4,7.7,3015.0,3.0,3.0,278.0,-37.8261,144.8718,1223.0,840000.0
+u,S,Western Metropolitan,3,10.5,3020.0,3.0,1.0,284.0,-37.77933,144.81679,2185.0,675000.0
+u,VB,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,747.0,-37.8287,145.035,11308.0,750000.0
+h,S,Western Metropolitan,3,7.8,3012.0,2.0,1.0,259.0,-37.8046,144.8831,1808.0,817000.0
+h,S,Eastern Metropolitan,4,10.5,3081.0,4.0,1.0,722.0,-37.7477,145.0565,2947.0,856000.0
+h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,274.0,-37.9178,145.0413,6795.0,910000.0
+t,VB,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.8506,145.0465,10412.0,600000.0
+h,S,Southern Metropolitan,4,2.1,3205.0,4.0,3.0,577.0,-37.8357,144.9502,5943.0,1615000.0
+h,S,Western Metropolitan,6,18.0,3037.0,6.0,4.0,694.0,-37.68178,144.73779,5556.0,935000.0
+u,S,Southern Metropolitan,2,7.2,3185.0,2.0,1.0,538.0,-37.88295,145.00083,4898.0,834500.0
+h,S,Northern Metropolitan,4,8.8,3072.0,4.0,2.0,530.0,-37.7416,145.016,14577.0,900000.0
+h,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,367.0,-37.842,144.9873,14887.0,2430000.0
+u,VB,Western Metropolitan,2,12.8,3033.0,2.0,1.0,218.0,-37.7337,144.8617,5629.0,470000.0
+h,PI,Northern Metropolitan,4,8.8,3072.0,9.0,8.0,1254.0,-37.7367,144.9895,14577.0,760000.0
+h,S,Western Metropolitan,1,14.0,3021.0,1.0,1.0,617.0,-37.73385,144.80535,14042.0,565000.0
+h,VB,Eastern Metropolitan,4,23.0,3136.0,4.0,2.0,655.0,-37.79725,145.29651,11925.0,730000.0
+h,S,Northern Metropolitan,2,1.6,3065.0,3.0,1.0,95.0,-37.8032,144.9825,5825.0,1440000.0
+t,S,Western Metropolitan,3,13.8,3018.0,3.0,2.0,197.0,-37.8579,144.8181,5301.0,705000.0
+u,S,Southern Metropolitan,2,6.1,3182.0,2.0,1.0,0.0,-37.8679,144.9813,13240.0,509000.0
+u,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,0.0,-37.8268,145.0344,11308.0,560000.0
+h,SP,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,88.0,-37.8363,144.9628,5943.0,1600000.0
+u,PI,Northern Metropolitan,2,5.2,3056.0,2.0,1.0,0.0,-37.7738,144.9585,11918.0,1180000.0
+h,S,Southern Metropolitan,4,10.2,3147.0,4.0,2.0,668.0,-37.86096,145.10026000000005,2894.0,1540000.0
+h,S,Southern Metropolitan,3,9.2,3146.0,3.0,2.0,668.0,-37.8489,145.0711,10412.0,1880000.0
+h,S,Western Metropolitan,3,8.4,3015.0,3.0,1.0,752.0,-37.8481,144.8721,5498.0,1370000.0
+h,S,Northern Metropolitan,3,7.0,3071.0,3.0,1.0,612.0,-37.76063,145.02139,8870.0,1031000.0
+h,S,Western Metropolitan,4,5.1,3011.0,4.0,2.0,457.0,-37.80803,144.89601000000005,2417.0,1870000.0
+u,S,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.7907,144.8924,7570.0,227000.0
+h,S,Northern Metropolitan,4,3.4,3068.0,4.0,1.0,306.0,-37.7888,144.9929,2954.0,1825000.0
+h,SP,Northern Metropolitan,3,5.3,3070.0,3.0,1.0,375.0,-37.76397,144.99481,11364.0,1305000.0
+h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,623.0,-37.7184,144.882,3464.0,805000.0
+h,SP,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,171.0,-37.8391,144.9501,5943.0,1595000.0
+h,S,Northern Metropolitan,4,4.5,3057.0,4.0,2.0,227.0,-37.7776,144.9726,5533.0,1900000.0
+h,S,Western Metropolitan,2,8.4,3015.0,2.0,1.0,255.0,-37.8469,144.8766,5498.0,760000.0
+u,S,Southern Metropolitan,2,8.5,3185.0,2.0,1.0,0.0,-37.8817,145.0032,4898.0,632500.0
+h,S,Northern Metropolitan,5,20.5,3752.0,5.0,5.0,700.0,-37.6236,145.10629,7969.0,905000.0
+h,PI,Southern Metropolitan,3,4.6,3142.0,3.0,2.0,224.0,-37.8445,145.0124,7217.0,2400000.0
+u,SP,Northern Metropolitan,2,8.8,3072.0,2.0,1.0,0.0,-37.7499,145.0031,14577.0,413000.0
+u,S,Southern Metropolitan,2,5.1,3181.0,2.0,1.0,598.0,-37.8564,144.9971,4380.0,611000.0
+h,S,Western Metropolitan,4,6.4,3011.0,4.0,2.0,154.0,-37.7983,144.8911,7570.0,977000.0
+h,S,Western Metropolitan,3,13.3,3020.0,3.0,1.0,535.0,-37.757,144.8313,4217.0,520000.0
+h,S,Northern Metropolitan,3,12.4,3060.0,3.0,2.0,297.0,-37.6994,144.9686,5070.0,631000.0
+t,S,Eastern Metropolitan,3,24.8,3156.0,3.0,2.0,239.0,-37.886,145.28646,10788.0,670000.0
+u,SP,Northern Metropolitan,1,1.5,3002.0,2.0,1.0,0.0,-37.8136,144.9892,3040.0,750000.0
+h,VB,Northern Metropolitan,2,2.4,3121.0,2.0,2.0,189.0,-37.81684,145.00606000000005,14949.0,1200000.0
+h,S,South-Eastern Metropolitan,4,20.0,3194.0,4.0,2.0,408.0,-37.98707,145.0609,6162.0,2025000.0
+h,S,Northern Metropolitan,2,1.6,3066.0,2.0,1.0,118.0,-37.7976,144.993,4553.0,670000.0
+h,S,Eastern Metropolitan,3,9.4,3081.0,2.0,1.0,413.0,-37.7399,145.0369,2674.0,465000.0
+h,S,South-Eastern Metropolitan,3,14.7,3167.0,3.0,1.0,774.0,-37.9178,145.0872,3692.0,1260500.0
+h,S,Western Metropolitan,4,7.0,3013.0,4.0,2.0,431.0,-37.8186,144.8764,6543.0,1028000.0
+u,SP,Southern Metropolitan,2,5.3,3122.0,2.0,1.0,279.0,-37.82676,145.04443999999995,11308.0,525000.0
+h,PI,Northern Metropolitan,4,2.6,3121.0,4.0,2.0,0.0,-37.8206,145.0099,14949.0,1510000.0
+h,S,Southern Metropolitan,4,4.1,3206.0,4.0,2.0,182.0,-37.8518,144.9663,2019.0,3120000.0
+h,S,Southern Metropolitan,3,14.0,3166.0,3.0,1.0,602.0,-37.8923,145.1013,3224.0,1045000.0
+h,S,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,407.0,-37.7799,144.9937,11364.0,1715000.0
+h,PI,Southern Metropolitan,3,7.5,3123.0,3.0,1.0,477.0,-37.8257,145.0514,6482.0,1890000.0
+h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,2.0,648.0,-37.7798,145.1368,9028.0,1152500.0
+u,SP,Eastern Metropolitan,1,7.9,3079.0,1.0,1.0,0.0,-37.7638,145.0458,5549.0,385000.0
+h,SA,Southern Metropolitan,4,14.6,3189.0,4.0,2.0,318.0,-37.9443,145.0448,2555.0,1140000.0
+h,SP,Western Metropolitan,4,12.8,3033.0,4.0,2.0,745.0,-37.7479,144.8805,5629.0,920000.0
+u,S,Southern Metropolitan,2,7.5,3123.0,2.0,1.0,172.0,-37.8437,145.0536,6482.0,706000.0
+h,SP,Northern Metropolitan,4,7.8,3058.0,4.0,2.0,398.0,-37.7364,144.9787,11204.0,901000.0
+u,S,Northern Metropolitan,3,1.9,3003.0,3.0,2.0,0.0,-37.8117,144.9518,2230.0,650000.0
+h,S,Northern Metropolitan,3,17.9,3082.0,3.0,1.0,290.0,-37.6669,145.04234,10529.0,470000.0
+t,SP,Western Metropolitan,3,8.0,3016.0,3.0,2.0,233.0,-37.8573,144.8911,6380.0,935000.0
+u,S,Southern Metropolitan,2,15.2,3191.0,2.0,1.0,0.0,-37.95142,145.01977,4497.0,760000.0
+u,SP,Southern Metropolitan,2,5.1,3181.0,2.0,2.0,0.0,-37.8563,144.9916,4380.0,1200000.0
+u,S,Southern Metropolitan,1,5.4,3101.0,1.0,1.0,0.0,-37.80468,145.03614,10331.0,515000.0
+h,S,Northern Metropolitan,3,20.6,3064.0,3.0,2.0,654.0,-37.6038,144.9226,15510.0,607500.0
+u,SP,Southern Metropolitan,1,4.6,3181.0,1.0,1.0,0.0,-37.85012,144.99225,7717.0,440000.0
+h,SP,Eastern Metropolitan,3,24.8,3156.0,3.0,1.0,968.0,-37.88523,145.28553,10788.0,740000.0
+h,S,Northern Metropolitan,3,5.9,3055.0,3.0,1.0,275.0,-37.76300000000001,144.9439,7082.0,793000.0
+h,S,Southern Metropolitan,3,4.5,3181.0,3.0,1.0,231.0,-37.8498,145.0039,7717.0,1530000.0
+h,S,Southern Metropolitan,4,10.7,3187.0,5.0,2.0,999.0,-37.9135,145.0189,6938.0,2840000.0
+u,S,Southern Metropolitan,2,8.1,3161.0,2.0,1.0,0.0,-37.8625,145.0103,6923.0,570000.0
+u,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,204.0,-37.7234,144.9497,7485.0,600000.0
+h,S,Western Metropolitan,2,8.0,3016.0,2.0,1.0,265.0,-37.853,144.8928,6380.0,769000.0
+h,S,Southern Metropolitan,4,9.0,3126.0,4.0,2.0,796.0,-37.8266,145.072,3265.0,3145000.0
+u,S,Western Metropolitan,1,6.4,3011.0,1.0,1.0,0.0,-37.8006,144.881,7570.0,375000.0
+h,PI,Northern Metropolitan,3,4.5,3057.0,3.0,2.0,138.0,-37.7658,144.9823,5533.0,930000.0
+h,S,Southern Metropolitan,4,10.1,3163.0,4.0,2.0,632.0,-37.89465,145.06984,4442.0,1820000.0
+h,SP,Western Metropolitan,5,10.5,3034.0,5.0,2.0,574.0,-37.7639,144.8615,4502.0,850000.0
+u,S,Eastern Metropolitan,2,23.2,3153.0,2.0,1.0,418.0,-37.83777,145.26345,5030.0,520000.0
+h,S,Eastern Metropolitan,4,21.3,3135.0,4.0,2.0,842.0,-37.81109,145.25738,4407.0,1175000.0
+h,S,Southern Metropolitan,3,17.9,3192.0,3.0,2.0,638.0,-37.95569,145.06588,9758.0,1201000.0
+h,S,Western Metropolitan,3,14.5,3036.0,3.0,2.0,847.0,-37.72286,144.83252,2339.0,930000.0
+h,SA,Southern Metropolitan,4,10.7,3187.0,5.0,3.0,598.0,-37.9075,145.0248,6938.0,1650000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,2.0,301.0,-37.72839000000001,145.00105,21650.0,700000.0
+u,PI,Southern Metropolitan,2,9.2,3104.0,2.0,2.0,196.0,-37.785,145.0961,7809.0,800000.0
+h,S,Southern Metropolitan,4,7.3,3146.0,4.0,1.0,632.0,-37.86024000000001,145.04674,10412.0,2305000.0
+u,SP,Southern Metropolitan,1,8.7,3162.0,1.0,1.0,811.0,-37.89614,145.01323,5051.0,295000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,437.0,-37.7291,145.02100000000004,21650.0,705000.0
+u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8839,144.9903,8989.0,347000.0
+h,PI,Southern Metropolitan,6,7.9,3103.0,6.0,3.0,845.0,-37.80708,145.09698999999995,5682.0,2100000.0
+h,SP,Western Metropolitan,3,9.2,3012.0,3.0,1.0,285.0,-37.7813,144.8678,3873.0,673000.0
+h,PI,Southern Metropolitan,4,10.7,3187.0,4.0,2.0,0.0,-37.9292,145.0252,6938.0,1755000.0
+h,S,Eastern Metropolitan,2,10.5,3081.0,2.0,1.0,586.0,-37.7435,145.0486,2947.0,590000.0
+u,SP,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,183.0,-37.7078,145.033,21650.0,440000.0
+h,S,Western Metropolitan,4,15.5,3038.0,4.0,2.0,660.0,-37.72086,144.79399999999995,3656.0,720000.0
+h,S,Northern Metropolitan,4,3.4,3031.0,4.0,2.0,616.0,-37.78474,144.9322,3593.0,2268000.0
+h,S,Western Metropolitan,3,6.4,3011.0,3.0,1.0,0.0,-37.7962,144.8851,7570.0,853000.0
+h,PI,Southern Metropolitan,5,7.9,3103.0,5.0,3.0,646.0,-37.81369,145.09486,5682.0,2180000.0
+h,S,Northern Metropolitan,4,9.9,3044.0,4.0,1.0,733.0,-37.7194,144.9307,7485.0,975000.0
+u,S,Northern Metropolitan,3,1.8,3053.0,3.0,1.0,0.0,-37.8052,144.9604,6786.0,875000.0
+u,SP,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8394,144.9886,14887.0,775000.0
+u,S,Southern Metropolitan,2,4.6,3142.0,2.0,1.0,1087.0,-37.8373,145.0106,7217.0,867000.0
+h,SP,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,296.0,-37.9243,145.0376,6795.0,855000.0
+h,S,Southern Metropolitan,4,13.9,3165.0,4.0,2.0,633.0,-37.9272,145.0644,10969.0,1100000.0
+h,S,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,447.0,-37.7622,145.0074,8870.0,1200000.0
+h,S,Northern Metropolitan,3,6.4,3078.0,3.0,1.0,592.0,-37.7755,145.0288,2211.0,1540000.0
+h,S,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,625.0,-37.94027,145.03593,2356.0,1335000.0
+h,S,Northern Metropolitan,3,12.0,3073.0,3.0,1.0,601.0,-37.72943,145.01904,21650.0,911000.0
+h,VB,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,642.0,-37.7988,145.0726,7809.0,1900000.0
+u,S,Northern Metropolitan,2,16.3,3075.0,2.0,1.0,143.0,-37.67222,145.0231,8279.0,410000.0
+h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,1.0,814.0,-37.7909,145.1309,9028.0,1355000.0
+h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,428.0,-37.8167,144.8965,6543.0,871000.0
+t,VB,Southern Metropolitan,4,10.2,3147.0,4.0,2.0,338.0,-37.86058,145.08542,3052.0,1650000.0
+u,SP,Southern Metropolitan,2,9.8,3185.0,2.0,1.0,0.0,-37.8971,145.0093,534.0,393000.0
+h,SP,Southern Metropolitan,2,11.4,3204.0,2.0,1.0,368.0,-37.91304,145.0373,2397.0,1250000.0
+h,S,Northern Metropolitan,4,5.9,3055.0,4.0,1.0,384.0,-37.7733,144.9491,7082.0,988000.0
+h,S,Southern Metropolitan,3,3.8,3207.0,3.0,2.0,214.0,-37.8341,144.9458,8648.0,3705000.0
+h,S,Western Metropolitan,4,7.5,3040.0,4.0,2.0,951.0,-37.74997000000001,144.90752,9264.0,2165000.0
+h,SP,Western Metropolitan,3,13.3,3020.0,3.0,3.0,221.0,-37.7708,144.8401,4217.0,620000.0
+h,S,Northern Metropolitan,4,7.8,3058.0,4.0,1.0,856.0,-37.7432,144.9481,11204.0,1400000.0
+u,S,Southern Metropolitan,3,6.3,3143.0,3.0,2.0,1803.0,-37.8544,145.0164,4836.0,1635000.0
+h,S,Northern Metropolitan,3,9.2,3058.0,3.0,1.0,576.0,-37.7269,144.9654,3445.0,782000.0
+h,PI,Southern Metropolitan,4,13.0,3204.0,4.0,2.0,793.0,-37.9232,145.0502,6795.0,1260000.0
+h,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,460.0,-37.80646,145.04651,10331.0,1950000.0
+h,S,Eastern Metropolitan,3,11.4,3084.0,3.0,1.0,583.0,-37.7346,145.0715,3540.0,860000.0
+u,S,Southern Metropolitan,1,7.7,3184.0,1.0,1.0,0.0,-37.8761,144.9871,8989.0,520000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,1.0,563.0,-37.7258,145.0129,21650.0,787000.0
+h,VB,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,400.0,-37.93434,145.00816,5454.0,1325000.0
+h,S,Northern Metropolitan,2,6.5,3071.0,2.0,1.0,333.0,-37.7642,145.0192,8870.0,874000.0
+h,PI,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,530.0,-38.00429000000001,145.10286000000005,3650.0,1400000.0
+u,S,Southern Metropolitan,2,11.2,3145.0,2.0,1.0,92.0,-37.8706,145.0578,8801.0,720000.0
+h,S,Western Metropolitan,3,18.4,3029.0,3.0,2.0,913.0,-37.85152,144.70802,13830.0,650000.0
+t,S,Western Metropolitan,2,8.7,3032.0,2.0,2.0,0.0,-37.7692,144.8905,4918.0,470000.0
+h,S,Northern Metropolitan,4,12.0,3073.0,4.0,1.0,870.0,-37.70433,145.02068,21650.0,905000.0
+h,S,Southern Metropolitan,3,7.5,3123.0,3.0,2.0,224.0,-37.8368,145.0508,6482.0,1890000.0
+h,VB,Western Metropolitan,4,6.3,3013.0,4.0,2.0,277.0,-37.8215,144.89614,6543.0,800000.0
+h,S,Southern Metropolitan,3,11.2,3145.0,3.0,1.0,615.0,-37.8846,145.0861,8801.0,1782500.0
+h,S,Eastern Metropolitan,4,11.8,3127.0,4.0,2.0,609.0,-37.8149,145.11,2079.0,1865000.0
+h,S,Western Metropolitan,3,7.0,3013.0,3.0,3.0,197.0,-37.8197,144.8875,6543.0,910000.0
+h,S,Southern Metropolitan,3,13.8,3188.0,3.0,1.0,1469.0,-37.93433,145.02983999999995,2356.0,1140000.0
+h,S,Southern Metropolitan,4,12.1,3163.0,4.0,2.0,660.0,-37.8932,145.0681,4442.0,1610000.0
+h,S,Southern Metropolitan,3,11.2,3186.0,3.0,1.0,366.0,-37.9038,145.0001,10579.0,1635000.0
+h,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,141.0,-37.8408,144.9977,14887.0,1500000.0
+h,S,Western Metropolitan,3,9.1,3040.0,3.0,2.0,311.0,-37.7603,144.8921,1543.0,1860000.0
+h,SP,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,472.0,-37.7765,145.0027,11364.0,1655000.0
+h,S,Northern Metropolitan,4,11.5,3046.0,4.0,1.0,780.0,-37.7182,144.9246,2651.0,1005000.0
+u,S,Western Metropolitan,3,9.1,3015.0,3.0,1.0,144.0,-37.8268,144.8675,984.0,508000.0
+u,S,Southern Metropolitan,2,4.1,3142.0,2.0,2.0,586.0,-37.84266,145.02355,7217.0,1140000.0
+h,S,Western Metropolitan,4,12.6,3020.0,4.0,2.0,504.0,-37.7952,144.8325,3755.0,930000.0
+h,S,Southern Metropolitan,5,9.0,3126.0,5.0,3.0,645.0,-37.8317,145.0738,3265.0,2930000.0
+u,PI,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,0.0,-37.9436,145.0027,5454.0,500000.0
+h,SP,Western Metropolitan,4,11.1,3025.0,3.0,1.0,559.0,-37.8448,144.8529,5132.0,875000.0
+t,PI,Northern Metropolitan,3,6.4,3078.0,3.0,2.0,528.0,-37.7817,145.0319,2211.0,1285000.0
+h,S,Eastern Metropolitan,3,15.4,3131.0,3.0,1.0,590.0,-37.84308,145.168,4385.0,1011000.0
+h,S,Eastern Metropolitan,4,22.2,3179.0,4.0,2.0,960.0,-37.88992,145.22123,2206.0,1120000.0
+u,VB,Southern Metropolitan,1,4.6,3122.0,1.0,1.0,0.0,-37.8216,145.0343,11308.0,310000.0
+t,S,Southern Metropolitan,3,11.2,3186.0,3.0,2.0,352.0,-37.9139,145.0035,10579.0,1750000.0
+h,S,South-Eastern Metropolitan,4,38.0,3199.0,4.0,2.0,2716.0,-38.17488,145.1234,7566.0,1350000.0
+u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7494,144.921,9264.0,453000.0
+h,S,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,585.0,-37.91722,145.04836,6795.0,1640000.0
+u,S,Eastern Metropolitan,4,10.6,3084.0,4.0,2.0,1180.0,-37.7551,145.0646,2890.0,830000.0
+u,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,0.0,-37.7112,144.9989,21650.0,335000.0
+h,S,Northern Metropolitan,4,5.7,3078.0,4.0,3.0,720.0,-37.77928,145.02993,2211.0,1680000.0
+u,S,Southern Metropolitan,1,5.3,3122.0,1.0,1.0,0.0,-37.81823,145.03972,11308.0,455000.0
+h,S,Northern Metropolitan,3,25.9,3754.0,3.0,2.0,400.0,-37.60697,145.09208,5812.0,517000.0
+u,S,Eastern Metropolitan,2,16.7,3150.0,2.0,1.0,345.0,-37.89919,145.14856,15321.0,656000.0
+u,S,Western Metropolitan,2,7.5,3040.0,2.0,1.0,0.0,-37.74444,144.92084,9264.0,420000.0
+u,S,Southern Metropolitan,2,3.3,3141.0,2.0,2.0,742.0,-37.8357,144.9867,14887.0,863000.0
+h,SP,Northern Metropolitan,3,12.4,3060.0,4.0,2.0,620.0,-37.7034,144.9663,5070.0,662000.0
+u,VB,Southern Metropolitan,2,10.1,3163.0,2.0,1.0,830.0,-37.88414,145.0528,7822.0,480000.0
+h,S,South-Eastern Metropolitan,3,28.8,3177.0,3.0,1.0,680.0,-37.99581,145.24098,3533.0,508000.0
+h,S,Northern Metropolitan,2,14.9,3087.0,2.0,1.0,605.0,-37.7118,145.088,2329.0,662000.0
+t,PI,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,83.0,-37.7171,145.0019,21650.0,380000.0
+u,S,Western Metropolitan,3,11.2,3042.0,3.0,2.0,203.0,-37.7465,144.8867,2291.0,815000.0
+h,S,Western Metropolitan,3,6.3,3013.0,3.0,1.0,433.0,-37.81183,144.88568999999995,6543.0,1230000.0
+h,PI,Northern Metropolitan,4,19.6,3076.0,4.0,2.0,571.0,-37.63762,145.03538999999995,10926.0,658000.0
+h,PI,Southern Metropolitan,3,5.9,3144.0,3.0,2.0,312.0,-37.86162,145.03065,4675.0,1700000.0
+h,S,Northern Metropolitan,3,3.5,3068.0,3.0,2.0,177.0,-37.7851,144.9934,6244.0,2170000.0
+h,SP,Western Metropolitan,3,13.6,3043.0,3.0,2.0,691.0,-37.7089,144.8927,1071.0,700000.0
+h,S,Southern Metropolitan,3,12.2,3147.0,3.0,1.0,723.0,-37.8642,145.1109,2894.0,1120000.0
+u,S,Southern Metropolitan,3,13.8,3188.0,3.0,2.0,0.0,-37.93538,145.00224,5454.0,970000.0
+h,S,Northern Metropolitan,3,2.4,3121.0,3.0,2.0,93.0,-37.82921,145.00826,438.0,1837500.0
+u,S,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,1111.0,-37.7896,144.9321,5263.0,666000.0
+h,PI,Northern Metropolitan,4,5.2,3056.0,4.0,1.0,264.0,-37.7611,144.9677,11918.0,1115000.0
+u,S,Southern Metropolitan,2,13.7,3188.0,2.0,1.0,0.0,-37.9419,145.0019,5454.0,587000.0
+h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,687.0,-37.7585,144.9316,9264.0,1330000.0
+h,SP,Northern Metropolitan,5,5.5,3070.0,4.0,4.0,569.0,-37.7763,144.9964,11364.0,4300000.0
+t,SP,Northern Metropolitan,3,4.5,3057.0,3.0,2.0,116.0,-37.7667,144.9821,5533.0,899000.0
+h,SP,Eastern Victoria,3,36.9,3200.0,3.0,1.0,624.0,-38.1187,145.14973,2500.0,452000.0
+h,S,Western Metropolitan,2,10.5,3020.0,2.0,1.0,630.0,-37.76701,144.84444,4217.0,732500.0
+h,S,Northern Metropolitan,3,5.2,3056.0,3.0,2.0,470.0,-37.7788,144.9688,11918.0,1691500.0
+h,S,Northern Metropolitan,4,11.2,3073.0,3.0,1.0,771.0,-37.705,145.0035,21650.0,801000.0
+h,S,Western Metropolitan,2,8.2,3012.0,2.0,1.0,0.0,-37.7956,144.8762,5058.0,450000.0
+u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7412,144.8974,9264.0,736000.0
+h,S,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,495.0,-37.7521,145.0114,14577.0,890000.0
+u,S,Eastern Metropolitan,3,13.8,3107.0,3.0,1.0,349.0,-37.7694,145.1082,5420.0,641000.0
+h,S,Southern Metropolitan,2,4.6,3122.0,2.0,1.0,243.0,-37.8198,145.0245,11308.0,1535000.0
+h,VB,Southern Metropolitan,3,11.2,3127.0,3.0,1.0,763.0,-37.8148,145.0965,5457.0,1700000.0
+h,S,Northern Metropolitan,4,12.0,3073.0,4.0,2.0,650.0,-37.71556,145.01129,21650.0,1190000.0
+h,S,Western Metropolitan,6,8.0,3040.0,6.0,4.0,860.0,-37.7484,144.9025,9264.0,2620000.0
+h,S,Southern Metropolitan,6,5.6,3101.0,6.0,6.0,1334.0,-37.8029,145.0267,10331.0,6500000.0
+h,SP,South-Eastern Metropolitan,5,27.0,3196.0,5.0,6.0,900.0,-38.04105,145.1216,2546.0,1260000.0
+h,S,Northern Metropolitan,3,9.9,3044.0,3.0,1.0,735.0,-37.7282,144.9304,7485.0,1305000.0
+h,S,South-Eastern Metropolitan,3,24.7,3175.0,3.0,1.0,713.0,-37.98997,145.2254,10894.0,669000.0
+h,S,Southern Metropolitan,3,9.2,3104.0,3.0,1.0,635.0,-37.7982,145.0839,7809.0,1320000.0
+h,VB,Southern Metropolitan,3,3.3,3206.0,3.0,3.0,0.0,-37.8399,144.9577,3280.0,2800000.0
+h,PI,Southern Metropolitan,4,7.7,3184.0,4.0,2.0,560.0,-37.8822,144.9913,8989.0,2400000.0
+u,SP,Northern Metropolitan,1,2.5,3067.0,1.0,1.0,0.0,-37.8016,144.9988,4019.0,426000.0
+h,S,Northern Metropolitan,2,2.5,3067.0,2.0,1.0,195.0,-37.8084,144.9973,4019.0,1172500.0
+h,VB,Eastern Metropolitan,4,7.8,3079.0,4.0,2.0,585.0,-37.75722,145.04399999999995,5549.0,1200000.0
+t,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,325.0,-37.81614,145.05056000000005,10331.0,1405000.0
+h,S,Western Metropolitan,4,12.9,3043.0,4.0,2.0,462.0,-37.69953,144.89941000000005,1071.0,931000.0
+u,S,Northern Metropolitan,3,2.6,3052.0,3.0,2.0,0.0,-37.7818,144.9576,2309.0,1360000.0
+u,S,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,0.0,-37.8127,145.0094,14949.0,327000.0
+h,S,Western Metropolitan,3,12.6,3020.0,3.0,2.0,286.0,-37.7896,144.8369,3755.0,590000.0
+u,S,Southern Metropolitan,2,10.4,3163.0,2.0,2.0,0.0,-37.8878,145.0407,2403.0,695000.0
+u,SP,Western Metropolitan,2,6.4,3011.0,2.0,1.0,0.0,-37.8017,144.8957,7570.0,433000.0
+h,PI,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,627.0,-37.7848,145.0788,7809.0,2000000.0
+h,S,Southern Metropolitan,5,13.8,3165.0,5.0,2.0,611.0,-37.93228,145.07026000000005,10969.0,1220000.0
+t,S,Southern Metropolitan,3,12.1,3163.0,1.0,2.0,242.0,-37.9017,145.0739,4442.0,1100000.0
+h,SP,Eastern Metropolitan,4,18.0,3095.0,4.0,2.0,816.0,-37.72308,145.14011000000005,6990.0,1065000.0
+u,SP,Northern Metropolitan,1,4.4,3031.0,1.0,1.0,2077.0,-37.7852,144.9216,3593.0,380000.0
+h,S,Eastern Metropolitan,4,17.2,3132.0,4.0,1.0,842.0,-37.81235,145.19756,6871.0,1302000.0
+u,VB,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,151.0,-37.7126,144.99,21650.0,340000.0
+h,S,Western Metropolitan,3,13.5,3042.0,3.0,1.0,594.0,-37.7203,144.8757,3464.0,872500.0
+u,S,Northern Metropolitan,2,4.4,3031.0,2.0,1.0,0.0,-37.783,144.93200000000004,3593.0,476000.0
+h,S,Eastern Metropolitan,4,11.8,3105.0,4.0,2.0,723.0,-37.7724,145.1033,4480.0,1225000.0
+h,S,Eastern Metropolitan,4,13.8,3107.0,4.0,2.0,650.0,-37.7694,145.1222,5420.0,1421000.0
+h,SP,Eastern Metropolitan,4,14.3,3109.0,4.0,3.0,654.0,-37.79739,145.14671,10999.0,1320000.0
+u,VB,Southern Metropolitan,2,7.3,3146.0,2.0,1.0,200.0,-37.85743,145.0468,10412.0,580000.0
+h,S,Northern Metropolitan,3,5.7,3078.0,3.0,2.0,292.0,-37.77788,145.01963999999995,2970.0,1492000.0
+u,SP,Northern Metropolitan,2,5.9,3055.0,2.0,1.0,501.0,-37.7741,144.9449,7082.0,537000.0
+t,S,Northern Metropolitan,3,12.4,3060.0,3.0,3.0,247.0,-37.7133,144.975,5070.0,543500.0
+h,S,Southern Metropolitan,3,4.5,3181.0,3.0,1.0,199.0,-37.8494,145.005,7717.0,1442000.0
+u,S,Northern Metropolitan,2,6.4,3078.0,2.0,1.0,0.0,-37.7773,145.0314,2211.0,465000.0
+h,SP,Northern Metropolitan,4,3.4,3031.0,4.0,2.0,302.0,-37.7845,144.93582,3593.0,1340000.0
+t,S,Northern Metropolitan,2,13.0,3046.0,2.0,1.0,125.0,-37.7042,144.9211,8870.0,434500.0
+h,VB,Southern Metropolitan,5,11.2,3145.0,5.0,3.0,488.0,-37.8733,145.0507,8801.0,2500000.0
+h,SP,Northern Metropolitan,5,2.6,3121.0,5.0,3.0,618.0,-37.8157,145.0073,14949.0,3200000.0
+u,VB,Southern Metropolitan,2,6.4,3183.0,2.0,1.0,2283.0,-37.87078,144.99898000000005,2952.0,600000.0
+h,S,Western Metropolitan,3,8.0,3040.0,3.0,1.0,551.0,-37.7571,144.9335,9264.0,1134000.0
+h,S,Northern Metropolitan,4,12.4,3060.0,4.0,2.0,647.0,-37.7089,144.9695,5070.0,771000.0
+u,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,187.0,-37.7098,144.9163,8870.0,470000.0
+u,S,Western Metropolitan,2,7.5,3040.0,2.0,1.0,0.0,-37.75465,144.9107,9264.0,468000.0
+u,PI,Southern Metropolitan,2,13.9,3165.0,3.0,2.0,0.0,-37.9212,145.0674,10969.0,380000.0
+h,S,Western Metropolitan,3,12.8,3033.0,3.0,1.0,412.0,-37.7459,144.8643,5629.0,583000.0
+h,SP,Western Metropolitan,4,8.0,3040.0,4.0,3.0,519.0,-37.7517,144.9086,9264.0,1600000.0
+u,SP,Northern Metropolitan,2,2.3,3051.0,2.0,1.0,0.0,-37.7988,144.9475,6821.0,528500.0
+t,S,Southern Metropolitan,3,13.9,3165.0,3.0,2.0,343.0,-37.9112,145.0738,10969.0,920000.0
+h,S,Northern Metropolitan,3,5.8,3078.0,3.0,1.0,235.0,-37.7698,145.0183,2970.0,1064000.0
+t,S,Southern Metropolitan,3,7.2,3184.0,3.0,2.0,797.0,-37.8867,144.99141,8989.0,1360000.0
+u,SP,Southern Metropolitan,2,9.2,3146.0,2.0,1.0,0.0,-37.85,145.0461,10412.0,411000.0
+h,S,Eastern Metropolitan,4,11.8,3127.0,4.0,2.0,813.0,-37.8118,145.1065,2079.0,2335000.0
+h,S,Northern Metropolitan,4,3.5,3068.0,4.0,2.0,233.0,-37.7786,144.9818,6244.0,1950000.0
+h,S,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,697.0,-37.7097,144.9256,8870.0,895000.0
+t,S,Eastern Metropolitan,3,7.9,3079.0,3.0,0.0,219.0,-37.7679,145.0466,5549.0,1060000.0
+u,S,Southern Metropolitan,3,11.8,3204.0,3.0,2.0,199.0,-37.9034,145.0409,3578.0,960000.0
+h,PI,Eastern Metropolitan,3,16.7,3150.0,4.0,2.0,1590.0,-37.86957,145.17543999999995,15321.0,3056000.0
+h,VB,Southern Metropolitan,4,9.2,3104.0,4.0,2.0,610.0,-37.7843,145.0891,7809.0,1740000.0
+h,S,Northern Metropolitan,3,20.6,3064.0,3.0,1.0,368.0,-37.63693,144.92581,5833.0,400000.0
+t,SP,Northern Metropolitan,3,13.0,3046.0,3.0,1.0,258.0,-37.7047,144.9087,8870.0,595000.0
+h,PI,Southern Metropolitan,3,13.0,3204.0,3.0,1.0,694.0,-37.9315,145.0445,6795.0,1165000.0
+h,S,Southern Metropolitan,2,4.6,3181.0,2.0,1.0,136.0,-37.85542,144.99571,4380.0,957500.0
+h,SP,Western Metropolitan,3,6.4,3012.0,3.0,1.0,377.0,-37.79285,144.86969,5058.0,879000.0
+h,SA,Northern Metropolitan,3,16.3,3075.0,3.0,1.0,535.0,-37.67324,145.03513,8279.0,690000.0
+u,S,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,141.0,-37.7149,145.0009,21650.0,424000.0
+h,PI,Southern Metropolitan,3,11.2,3145.0,3.0,2.0,700.0,-37.8807,145.0547,8801.0,1915000.0
+u,S,Western Metropolitan,2,14.7,3030.0,2.0,1.0,471.0,-37.88275,144.66661000000005,16166.0,302500.0
+h,SP,Western Metropolitan,4,8.0,3016.0,4.0,3.0,338.0,-37.8633,144.8963,6380.0,2910000.0
+u,SP,Northern Metropolitan,1,3.4,3031.0,1.0,1.0,0.0,-37.78993,144.92306000000005,5263.0,392000.0
+u,S,Western Metropolitan,2,8.0,3040.0,2.0,1.0,0.0,-37.7598,144.9128,9264.0,606000.0
+h,S,Southern Metropolitan,4,13.8,3188.0,4.0,2.0,743.0,-37.94176,145.00931,5454.0,3150000.0
+h,SP,Northern Metropolitan,3,5.5,3070.0,3.0,1.0,318.0,-37.7733,145.0078,11364.0,1090000.0
+h,S,Western Metropolitan,4,18.4,3029.0,4.0,2.0,572.0,-37.87373,144.69131000000004,13830.0,537000.0
+u,S,Northern Metropolitan,2,5.8,3078.0,2.0,1.0,0.0,-37.7762,145.0213,2970.0,438000.0
+h,S,Southern Metropolitan,3,14.6,3189.0,2.0,1.0,576.0,-37.9363,145.0495,2555.0,975000.0
+u,SP,Western Metropolitan,3,4.3,3032.0,3.0,2.0,3215.0,-37.78344,144.92183,6567.0,870000.0
+h,S,Southern Metropolitan,3,13.9,3165.0,3.0,1.0,721.0,-37.9244,145.0665,10969.0,1200000.0
+u,S,Southern Metropolitan,1,10.1,3163.0,1.0,1.0,0.0,-37.89166,145.06718,4442.0,330000.0
+h,S,Eastern Metropolitan,3,13.9,3108.0,3.0,1.0,651.0,-37.7827,145.1051,9028.0,1300000.0
+u,SP,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,116.0,-37.9245,145.0336,6795.0,650000.0
+t,S,Southern Metropolitan,3,13.0,3204.0,3.0,3.0,338.0,-37.9166,145.0418,6795.0,1375000.0
+h,S,Northern Metropolitan,3,11.2,3073.0,3.0,2.0,527.0,-37.7238,144.9964,21650.0,921500.0
+h,S,Southern Metropolitan,3,5.6,3101.0,3.0,1.0,633.0,-37.7916,145.0352,10331.0,1211000.0
+h,S,Northern Metropolitan,2,7.8,3058.0,2.0,1.0,457.0,-37.7505,144.9725,11204.0,1008000.0
+h,SP,Northern Metropolitan,5,20.4,3059.0,5.0,4.0,602.0,-37.65039,144.89948,4864.0,830000.0
+h,VB,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,204.0,-37.8357,144.9531,5943.0,1650000.0
+h,S,Southern Metropolitan,3,7.8,3124.0,3.0,3.0,417.0,-37.831,145.0621,8920.0,3300000.0
+h,VB,Eastern Metropolitan,2,8.8,3081.0,2.0,1.0,588.0,-37.73764,145.05323,2947.0,780000.0
+h,S,Southern Metropolitan,3,11.7,3125.0,3.0,1.0,695.0,-37.8551,145.1121,5678.0,1151000.0
+u,S,Southern Metropolitan,2,16.0,3190.0,2.0,1.0,130.0,-37.94874,145.02631,4794.0,677500.0
+h,S,Southern Metropolitan,3,13.0,3166.0,2.0,1.0,715.0,-37.9072,145.0762,3145.0,1316000.0
+h,SP,Northern Metropolitan,2,12.1,3046.0,2.0,1.0,591.0,-37.7128,144.9471,2606.0,545000.0
+h,S,Western Metropolitan,3,7.0,3013.0,3.0,2.0,230.0,-37.8124,144.8875,6543.0,1414000.0
+h,PI,Western Metropolitan,4,12.6,3020.0,4.0,1.0,603.0,-37.7945,144.8267,3755.0,670000.0
+h,S,Southern Metropolitan,3,2.1,3205.0,3.0,2.0,197.0,-37.8395,144.9489,5943.0,2240000.0
+u,SP,Northern Metropolitan,3,6.5,3071.0,3.0,1.0,242.0,-37.7639,145.0135,8870.0,880000.0
+t,S,Southern Metropolitan,3,12.1,3163.0,3.0,2.0,0.0,-37.8969,145.0654,4442.0,785000.0
+h,S,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,181.0,-37.8542,145.01506,4836.0,1460000.0
+h,S,South-Eastern Metropolitan,3,33.3,3976.0,3.0,2.0,646.0,-38.03378,145.2621,8256.0,561000.0
+h,S,Western Metropolitan,4,7.0,3013.0,4.0,1.0,445.0,-37.8228,144.8769,6543.0,985500.0
+h,S,Southern Metropolitan,3,8.5,3185.0,3.0,2.0,439.0,-37.892,145.0103,4898.0,1940000.0
+u,S,Southern Metropolitan,2,11.2,3186.0,2.0,1.0,122.0,-37.9188,144.9942,10579.0,905500.0
+h,S,Southern Metropolitan,3,13.7,3188.0,3.0,1.0,495.0,-37.9428,145.0092,5454.0,1403000.0
+u,PI,Northern Metropolitan,1,4.2,3031.0,1.0,1.0,0.0,-37.7896,144.9321,5263.0,370000.0
+t,S,Southern Metropolitan,4,10.7,3187.0,4.0,3.0,898.0,-37.9176,145.0087,6938.0,2570000.0
+t,VB,Eastern Metropolitan,3,9.0,3079.0,3.0,2.0,180.0,-37.772,145.0538,1554.0,1050000.0
+h,S,South-Eastern Metropolitan,3,22.2,3172.0,3.0,2.0,533.0,-37.986,145.12035,3940.0,816000.0
+h,S,Northern Metropolitan,2,11.2,3046.0,2.0,1.0,716.0,-37.71589,144.92176,2651.0,1006000.0
+h,S,Western Metropolitan,3,6.6,3011.0,3.0,3.0,168.0,-37.8066,144.887,2417.0,1330000.0
+h,S,Western Metropolitan,3,13.5,3020.0,3.0,1.0,700.0,-37.7845,144.8131,6763.0,660000.0
+h,S,Northern Metropolitan,4,20.6,3064.0,4.0,2.0,484.0,-37.58012,144.91998,15510.0,523000.0
+h,PI,Northern Metropolitan,3,20.4,3059.0,3.0,2.0,775.0,-37.64385,144.89221,4864.0,770000.0
+u,S,Southern Metropolitan,2,3.3,3141.0,2.0,1.0,0.0,-37.8406,145.0035,14887.0,550000.0
+h,S,Eastern Metropolitan,4,16.7,3150.0,4.0,2.0,768.0,-37.89965,145.17135,15321.0,1292000.0
+h,S,Western Metropolitan,3,9.7,3041.0,3.0,2.0,371.0,-37.7229,144.9055,3284.0,1021000.0
+h,PI,Northern Metropolitan,3,8.8,3072.0,3.0,1.0,244.0,-37.7444,145.0202,14577.0,485000.0
+h,PI,Western Metropolitan,3,9.2,3012.0,3.0,1.0,584.0,-37.7858,144.8757,3873.0,760000.0
+h,S,Western Metropolitan,3,8.0,3016.0,3.0,1.0,470.0,-37.8587,144.8871,6380.0,1270000.0
+h,SP,Western Metropolitan,3,6.4,3012.0,3.0,1.0,275.0,-37.81167,144.88346,1808.0,900000.0
+h,S,Western Metropolitan,3,10.5,3020.0,3.0,1.0,631.0,-37.78956,144.84526,3755.0,805000.0
+h,S,Southern Metropolitan,2,6.3,3143.0,2.0,1.0,249.0,-37.8543,145.016,4836.0,1210000.0
+u,S,Southern Metropolitan,2,11.4,3163.0,2.0,1.0,0.0,-37.8981,145.0619,7822.0,635000.0
+h,S,Eastern Metropolitan,3,17.2,3132.0,3.0,1.0,684.0,-37.82771,145.20763,6871.0,1071000.0
+h,S,Southern Metropolitan,4,11.2,3127.0,4.0,2.0,486.0,-37.8258,145.1116,5457.0,1530000.0
+h,S,Western Metropolitan,4,14.7,3030.0,4.0,2.0,612.0,-37.88177,144.74221,15542.0,765000.0
+h,SP,Southern Metropolitan,3,11.4,3204.0,3.0,1.0,401.0,-37.92999,145.04932,6795.0,1045000.0
+h,S,South-Eastern Metropolitan,4,21.5,3195.0,4.0,2.0,695.0,-38.00477,145.10078,3650.0,1440000.0
+h,PI,Northern Metropolitan,2,11.2,3073.0,2.0,1.0,101.0,-37.7091,145.0259,21650.0,375000.0
+h,S,Eastern Metropolitan,3,18.0,3095.0,3.0,2.0,795.0,-37.72503,145.15142,6990.0,915000.0
+h,S,Southern Metropolitan,3,7.5,3123.0,3.0,2.0,185.0,-37.8255,145.0481,6482.0,1405000.0
+u,VB,Southern Metropolitan,1,11.4,3163.0,1.0,1.0,0.0,-37.8983,145.0627,7822.0,260000.0
+h,S,Western Metropolitan,4,5.9,3032.0,4.0,2.0,418.0,-37.7766,144.9187,6567.0,1430000.0
+h,S,Southern Metropolitan,3,9.2,3146.0,3.0,2.0,217.0,-37.8639,145.0641,10412.0,1425000.0
+u,PI,Western Metropolitan,2,5.1,3011.0,2.0,1.0,0.0,-37.78895,144.89014,7570.0,270000.0
+h,S,Northern Metropolitan,3,11.2,3046.0,3.0,1.0,655.0,-37.70608,144.92541,8870.0,830000.0
+h,S,Western Metropolitan,4,8.0,3040.0,3.0,2.0,725.0,-37.7551,144.9047,9264.0,1775000.0
+t,SP,Southern Metropolitan,3,11.2,3127.0,3.0,2.0,302.0,-37.8289,145.1005,5457.0,1325000.0
+h,S,Western Metropolitan,3,27.2,3024.0,3.0,2.0,300.0,-37.88162,144.62082,5262.0,475000.0
+h,PI,Northern Metropolitan,4,5.5,3070.0,4.0,2.0,282.0,-37.7709,145.0057,11364.0,1310000.0
+h,S,Northern Metropolitan,3,12.4,3060.0,3.0,1.0,531.0,-37.7022,144.9669,5070.0,605000.0
+u,VB,Eastern Metropolitan,3,23.2,3153.0,3.0,2.0,535.0,-37.82481,145.26519,3598.0,700000.0
+h,S,Southern Metropolitan,2,13.0,3204.0,2.0,1.0,599.0,-37.925,145.046,6795.0,1275000.0
+h,S,Northern Metropolitan,3,3.1,3003.0,3.0,1.0,121.0,-37.80973,144.9472,2230.0,1370000.0
+h,S,Northern Metropolitan,4,4.5,3057.0,4.0,2.0,470.0,-37.7735,144.9833,5533.0,1550000.0
+u,PI,Northern Metropolitan,1,2.6,3121.0,1.0,1.0,0.0,-37.8127,145.0094,14949.0,380000.0
+h,S,Western Metropolitan,3,31.7,3429.0,3.0,1.0,582.0,-37.58025,144.71759,14092.0,487000.0
+h,SP,Northern Metropolitan,3,6.5,3071.0,2.0,2.0,417.0,-37.7595,145.0017,8870.0,1400000.0
+h,S,Eastern Metropolitan,5,15.4,3131.0,5.0,3.0,477.0,-37.84252,145.17258,4385.0,1205000.0
+u,VB,Eastern Metropolitan,2,8.9,3084.0,2.0,1.0,2020.0,-37.75692,145.06426000000005,2890.0,450000.0
+h,PI,Northern Metropolitan,3,2.6,3121.0,3.0,2.0,345.0,-37.8159,144.994,14949.0,2180000.0
+t,VB,Southern Metropolitan,3,11.7,3125.0,3.0,2.0,267.0,-37.8463,145.1071,5678.0,975000.0
+h,S,Western Metropolitan,4,8.0,3040.0,4.0,2.0,291.0,-37.7544,144.9325,9264.0,1670000.0
+h,SP,Western Metropolitan,3,6.4,3011.0,3.0,1.0,242.0,-37.7956,144.8848,7570.0,775000.0
+h,SP,South-Eastern Metropolitan,4,14.7,3167.0,4.0,2.0,539.0,-37.9413,145.0957,3692.0,937500.0
+h,PI,Western Metropolitan,5,12.6,3020.0,4.0,2.0,690.0,-37.7933,144.8408,3755.0,895000.0
+h,S,Southern Metropolitan,4,10.7,3187.0,2.0,2.0,676.0,-37.9198,145.0137,6938.0,1720000.0
+u,S,Southern Metropolitan,3,14.3,3189.0,3.0,2.0,301.0,-37.94511,145.04426999999995,2555.0,828000.0
+u,S,Southern Metropolitan,2,7.8,3124.0,2.0,1.0,0.0,-37.8357,145.0595,8920.0,810000.0
+h,PI,Eastern Metropolitan,4,13.8,3107.0,4.0,2.0,654.0,-37.7639,145.1145,5420.0,1100000.0
+h,PI,Eastern Metropolitan,4,14.3,3109.0,4.0,2.0,775.0,-37.80126,145.1665,10999.0,1470000.0
+h,SP,Northern Metropolitan,2,4.2,3031.0,2.0,1.0,183.0,-37.7959,144.9342,5263.0,1211000.0
+t,S,Eastern Metropolitan,4,13.9,3108.0,4.0,2.0,234.0,-37.7932,145.1275,9028.0,973500.0
+h,S,Western Metropolitan,3,13.3,3020.0,3.0,1.0,568.0,-37.7727,144.8417,4217.0,790000.0
+u,PI,Western Metropolitan,1,13.9,3020.0,2.0,1.0,36.0,-37.7833,144.8266,2185.0,145000.0
+h,PI,Eastern Metropolitan,4,13.1,3128.0,3.0,2.0,729.0,-37.8233,145.1267,4605.0,1560000.0
+u,S,Northern Metropolitan,2,3.6,3068.0,2.0,1.0,0.0,-37.78745,145.00061000000005,2954.0,556000.0
+h,S,Northern Metropolitan,3,14.0,3047.0,3.0,1.0,341.0,-37.6886,144.92281,4294.0,386000.0
+h,S,Eastern Metropolitan,4,14.7,3151.0,4.0,2.0,592.0,-37.84839,145.14299,4048.0,1600000.0
+u,VB,Northern Metropolitan,2,1.8,3053.0,2.0,1.0,0.0,-37.7939,144.9663,6786.0,480000.0
+u,S,Eastern Metropolitan,2,17.2,3132.0,2.0,1.0,300.0,-37.80574,145.18832,6871.0,695000.0
+h,S,Southern Metropolitan,3,5.4,3101.0,3.0,2.0,650.0,-37.80359,145.06002,10331.0,2200000.0
+h,S,Northern Metropolitan,3,5.3,3070.0,3.0,1.0,507.0,-37.77124,145.002,11364.0,1605000.0
+h,S,Northern Metropolitan,2,5.2,3055.0,2.0,1.0,539.0,-37.77279,144.94069,7082.0,1150000.0
+h,S,South-Eastern Metropolitan,3,18.8,3170.0,3.0,1.0,656.0,-37.93271,145.17792,7113.0,840000.0
+u,S,Northern Metropolitan,3,2.8,3000.0,2.0,2.0,0.0,-37.8095,144.9691,17496.0,760000.0
diff --git a/test/unit/data/test_data_categorical.py b/test/unit/data/test_data_categorical.py
new file mode 100644
index 0000000000..c7742b954c
--- /dev/null
+++ b/test/unit/data/test_data_categorical.py
@@ -0,0 +1,205 @@
+import os
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from fedot.api.api_utils.api_data import ApiDataProcessor
+from fedot.core.data.data import InputData
+from fedot.core.repository.tasks import Task, TaskTypesEnum
+from fedot.core.utils import fedot_project_root
+
+
+def get_dataset_with_cats(output_mode: str = None):
+    path_to_csv = fedot_project_root().joinpath('test/data/melb_data.csv')
+    df = pd.read_csv(path_to_csv)
+
+    if output_mode == 'path':
+        return path_to_csv, 'Price'
+
+    elif output_mode == 'dataframe':
+        return df.drop(['Price'], axis=1), df['Price']
+
+    elif output_mode == 'numpy':
+        return df.drop(['Price'], axis=1).to_numpy(), df.Price.to_numpy(), df.columns.values
+
+
+def get_dataset_without_cats(output_mode: str = None):
+    path_to_csv = fedot_project_root().joinpath('test/data/scoring/scoring_train.csv')
+    df = pd.read_csv(path_to_csv)
+    df = df.drop(['ID'], axis=1)
+
+    if output_mode == 'path':
+        return path_to_csv, 'target'
+
+    elif output_mode == 'dataframe':
+        return df.drop(['target'], axis=1), df['target']
+
+    elif output_mode == 'numpy':
+        return df.drop(['target'], axis=1).to_numpy(), df.target.to_numpy(), df.columns.values
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([0, 1, 2, 3, 6, 7])),
+    ([], np.array([]), np.array([0, 1, 2])),
+    (np.array([]), np.array([]), np.array([0, 1, 2])),
+    (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])),
+    ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2]))
+])
+def test_from_numpy_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    X, y, features_names = get_dataset_with_cats(output_mode='numpy')
+
+    input_data = InputData.from_numpy(
+        features_array=X,
+        target_array=y,
+        features_names=features_names,
+        categorical_idx=categorical_idx,
+        task='regression'
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([0, 1, 2, 3, 6, 7])),
+    ([], np.array([]), np.array([0, 1, 2])),
+    (np.array([]), np.array([]), np.array([0, 1, 2])),
+    (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])),
+    ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2]))
+])
+def test_from_dataframe_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    X_df, y_df = get_dataset_with_cats(output_mode='dataframe')
+
+    input_data = InputData.from_dataframe(
+        features_df=X_df,
+        target_df=y_df,
+        categorical_idx=categorical_idx,
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([0, 1, 2, 3, 6, 7])),
+    ([], np.array([]), np.array([0, 1, 2])),
+    (np.array([]), np.array([]), np.array([0, 1, 2])),
+    (['Type', 'Method', 'Regionname'], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array(['Type', 'Method', 'Regionname']), np.array([0, 1, 2]), np.array([0, 1, 2])),
+    ([0, 1, 2], np.array([0, 1, 2]), np.array([0, 1, 2])),
+    (np.array([0, 1, 2]), np.array([0, 1, 2]), np.array([0, 1, 2]))
+])
+def test_from_csv_with_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    path, target_columns = get_dataset_with_cats(output_mode='path')
+
+    input_data = InputData.from_csv(
+        file_path=path,
+        target_columns=target_columns,
+        categorical_idx=categorical_idx
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([2, 6, 7, 9])),
+    ([], np.array([]), np.array([])),
+    (np.array([]), np.array([]), np.array([])),
+])
+def test_from_numpy_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    X, y, features_names = get_dataset_without_cats(output_mode='numpy')
+
+    input_data = InputData.from_numpy(
+        features_array=X,
+        target_array=y,
+        features_names=features_names,
+        categorical_idx=categorical_idx,
+        task='regression'
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([2, 6, 7, 9])),
+    ([], np.array([]), np.array([])),
+    (np.array([]), np.array([]), np.array([])),
+])
+def test_from_dataframe_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    X_df, y_df = get_dataset_without_cats(output_mode='dataframe')
+
+    input_data = InputData.from_dataframe(
+        features_df=X_df,
+        target_df=y_df,
+        categorical_idx=categorical_idx,
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()
+
+
+@pytest.mark.parametrize('categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing', [
+    (None, None, np.array([2, 6, 7, 9])),
+    ([], np.array([]), np.array([])),
+    (np.array([]), np.array([]), np.array([])),
+])
+def test_from_csv_without_cats(categorical_idx, expected_idx_after_opening, expected_idx_after_preprocessing):
+    path, target_columns = get_dataset_without_cats(output_mode='path')
+
+    input_data = InputData.from_csv(
+        file_path=path,
+        target_columns=target_columns,
+        categorical_idx=categorical_idx
+    )
+
+    if isinstance(input_data.categorical_idx, np.ndarray):
+        assert (input_data.categorical_idx == expected_idx_after_opening).all()
+    else:
+        assert input_data.categorical_idx == expected_idx_after_opening
+
+    data_preprocessor = ApiDataProcessor(task=Task(TaskTypesEnum.classification))
+    preprocessed_input_data = data_preprocessor.fit_transform(input_data)
+
+    assert (preprocessed_input_data.categorical_idx == expected_idx_after_preprocessing).all()

From b1cfadc4c81c2a10f76845a4b6e4383e716cdd13 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Thu, 22 Aug 2024 19:08:57 +0300
Subject: [PATCH 52/69] Fixing for test_metrics with py3.10

---
 test/data/expected_metric_values.json | 12 ++++++------
 test/unit/composer/test_metrics.py    | 12 +++++++++++-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/test/data/expected_metric_values.json b/test/data/expected_metric_values.json
index 4b81051a1d..5018aa8d3f 100644
--- a/test/data/expected_metric_values.json
+++ b/test/data/expected_metric_values.json
@@ -31,14 +31,14 @@
     "rmse_pen": 52.64510049434378
   },
   "multitarget": {
-    "rmse": 15.753366859480218,
-    "mse": 377.5025166058113,
+    "rmse": [15.753366859480218, 15.715344581042293],
+    "mse": [377.5025166058113, 375.07708740234375],
     "neg_mean_squared_log_error": 0.030627538521796293,
     "mape": 0.15337090733886807,
-    "smape": 14.144394353302935,
-    "mae": 13.50645038033778,
-    "r2": -2.9713973901034954,
-    "rmse_pen": 15.784873593199178
+    "smape": [14.144394353302935, 14.117428843762253],
+    "mae": [13.50645038033778, 13.459635416666666],
+    "r2": [-2.9713973901034954, -2.960510176151834],
+    "rmse_pen": [15.784873593199178, 15.746775270204378]
   },
   "ts": {
     "mase": 0.6080909603204148,
diff --git a/test/unit/composer/test_metrics.py b/test/unit/composer/test_metrics.py
index b8b868a9e7..d16bb40aac 100644
--- a/test/unit/composer/test_metrics.py
+++ b/test/unit/composer/test_metrics.py
@@ -134,7 +134,17 @@ def test_metrics(metric: ClassificationMetricsEnum, pipeline_func: Callable[[],
 
     if not update_expected_values:
         expected_value = expected_values[task_type][str(metric)]
-        assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001)
+
+        if isinstance(expected_value, list):
+            expression_expected_value = []
+
+            for value in expected_value:
+                expression_expected_value.append(np.isclose(metric_value, value, rtol=0.001, atol=0.001))
+            assert any(expression_expected_value)
+
+        else:
+            assert np.isclose(metric_value, expected_value, rtol=0.001, atol=0.001)
+
         assert not np.isclose(metric_value, metric_class.default_value, rtol=0.01, atol=0.01)
     else:
         with open(fedot_project_root() / 'test/data/expected_metric_values.json', 'w') as f:

From 888f484250ae9699bf86b2eeaf4f749894a9a9da Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 23 Aug 2024 14:25:10 +0300
Subject: [PATCH 53/69] Fix test_from_ ... with broadcast

---
 fedot/core/data/data.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index b382eb6839..fa684e7a3b 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -145,12 +145,12 @@ def from_dataframe(cls,
             if isinstance(categorical_idx, list):
                 categorical_idx = np.array(categorical_idx)
 
-            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+            if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None:
                 raise ValueError(
                     'Impossible to specify categorical features by name when the features_names are not specified'
                 )
 
-            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+            if categorical_idx.size != 0 and isinstance(categorical_idx[0], str):
                 categorical_idx = np.array(
                     [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
                 )
@@ -222,12 +222,12 @@ def from_csv(cls,
             if isinstance(categorical_idx, list):
                 categorical_idx = np.array(categorical_idx)
 
-            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+            if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None:
                 raise ValueError(
                     'Impossible to specify categorical features by name when the features_names are not specified'
                 )
 
-            if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+            if categorical_idx.size != 0 and isinstance(categorical_idx[0], str):
                 categorical_idx = np.array(
                     [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
                 )
@@ -928,12 +928,12 @@ def array_to_input_data(features_array: np.ndarray,
         if isinstance(categorical_idx, list):
             categorical_idx = np.array(categorical_idx)
 
-        if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str) and features_names is None:
+        if categorical_idx.size != 0 and isinstance(categorical_idx[0], str) and features_names is None:
             raise ValueError(
                 'Impossible to specify categorical features by name when the features_names are not specified'
             )
 
-        if categorical_idx != np.array([]) and isinstance(categorical_idx[0], str):
+        if categorical_idx.size != 0 and isinstance(categorical_idx[0], str):
             categorical_idx = np.array(
                 [idx for idx, column in enumerate(features_names) if column in set(categorical_idx)]
             )

From f963d099af55fa59e904237692718cd633885eaf Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Fri, 23 Aug 2024 17:42:24 +0300
Subject: [PATCH 54/69] Hide preprocessing messages under debug logging (2)

---
 fedot/api/api_utils/api_data.py      |  8 +++---
 fedot/core/data/data.py              |  9 ++++---
 fedot/preprocessing/data_types.py    | 17 +++++++-----
 fedot/preprocessing/preprocessing.py | 40 ++++++++++++++--------------
 4 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 9607ad40aa..7ecd150249 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -143,16 +143,16 @@ def fit_transform(self, train_data: InputData) -> InputData:
         self.log.message(
             f'Train Data (Original) Memory Usage: {memory_usage} Data Shapes: {features_shape, target_shape}')
 
-        self.log.message('- Obligatory preprocessing started')
+        self.log.debug('- Obligatory preprocessing started')
         train_data = self.preprocessor.obligatory_prepare_for_fit(data=train_data)
 
-        self.log.message('- Optional preprocessing started')
+        self.log.debug('- Optional preprocessing started')
         train_data = self.preprocessor.optional_prepare_for_fit(pipeline=Pipeline(), data=train_data)
 
-        self.log.message('- Converting indexes for fitting started')
+        self.log.debug('- Converting indexes for fitting started')
         train_data = self.preprocessor.convert_indexes_for_fit(pipeline=Pipeline(), data=train_data)
 
-        self.log.message('- Reducing memory started')
+        self.log.debug('- Reducing memory started')
         train_data = self.preprocessor.reduce_memory_size(data=train_data)
 
         train_data.supplementary_data.is_auto_preprocessed = True
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index fa684e7a3b..43160ac2eb 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -178,7 +178,7 @@ def from_csv(cls,
                  task: Union[Task, str] = 'classification',
                  data_type: DataTypesEnum = DataTypesEnum.table,
                  columns_to_drop: Optional[List[Union[str, int]]] = None,
-                 target_columns: Union[str, List[Union[str, int]]] = '',
+                 target_columns: Union[str, List[Union[str, int]], None] = '',
                  categorical_idx: Union[list[int, str], np.ndarray[int, str]] = None,
                  index_col: Optional[Union[str, int]] = None,
                  possible_idx_keywords: Optional[List[str]] = None) -> InputData:
@@ -210,11 +210,12 @@ def from_csv(cls,
         df = get_df_from_csv(file_path, delimiter, index_col, possible_idx_keywords, columns_to_drop=columns_to_drop)
         idx = df.index.to_numpy()
 
-        if not target_columns:
-            features_names = df.columns.to_numpy()[:-1]
-        else:
+        if target_columns:
             features_names = df.drop(target_columns, axis=1).columns.to_numpy()
 
+        else:
+            features_names = df.columns.to_numpy()
+
         features, target = process_target_and_features(df, target_columns)
 
         categorical_features = None
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 566cdafbde..52ed706648 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -88,7 +88,7 @@ def convert_data_for_fit(self, data: InputData):
         column_types_info = self.prepare_column_types_info(predictors=data.features, target=data.target, task=data.task)
         data.supplementary_data.col_type_ids = column_types_info
         col_types_info_message = prepare_log_message_with_cols_types(column_types_info, data.features_names)
-        self.log.message(f'--- The information about types of each feature are {col_types_info_message}')
+        self.log.message(f'The detected types of data are as follows: {col_types_info_message}')
         self._into_numeric_features_transformation_for_fit(data)
         # Launch conversion float and integer features into categorical
         self._into_categorical_features_transformation_for_fit(data)
@@ -320,11 +320,15 @@ def _into_categorical_features_transformation_for_fit(self, data: InputData):
             if np.size(all_cat_col_ids) > 0:
                 if data.features_names is not None:
                     cat_features_names = data.features_names[all_cat_col_ids]
-                    self.log.message(f'--- Preprocessing define next cols {cat_features_names} as categorical')
+                    self.log.message(
+                        f'Preprocessing defines the following columns as categorical: {cat_features_names}'
+                    )
                 else:
-                    self.log.message(f'--- Preprocessing define next cols {all_cat_col_ids} as categorical')
+                    self.log.message(
+                        f'Preprocessing defines the following columns as categorical: {all_cat_col_ids}'
+                    )
             else:
-                self.log.message('--- Preprocessing was unable to define the categorical columns')
+                self.log.message('Preprocessing was unable to define the categorical columns')
 
     def _into_categorical_features_transformation_for_predict(self, data: InputData):
         """ Apply conversion into categorical string column for every signed column """
@@ -534,13 +538,14 @@ def _process_predict_column_values_one_by_one(value, current_type: type):
 
 
 def prepare_log_message_with_cols_types(col_types_info, features_names):
-    message = '\n'
+    message = '\n' + 'Features\n'
     for type_name, type_id in TYPE_TO_ID.items():
         count_types = np.count_nonzero(col_types_info['features'] == type_id)
         features_idx = np.where(col_types_info['features'] == type_id)[0]
         names_or_indexes = features_names[features_idx] if features_names is not None else features_idx
-        message += f'TYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \
+        message += f'\tTYPE {type_name} - count {count_types} - features {names_or_indexes} \n' \
 
+    message += '-' * 10 + '\n'
     message += f'Target: TYPE {_convertable_types[col_types_info["target"][0]]}'
 
     return message
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index ca03a8eb29..a9303f903b 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -193,7 +193,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
             return data
 
         # Convert datetime data to numerical
-        self.log.message('-- Converting datetime data to numerical')
+        self.log.debug('-- Converting datetime data to numerical')
         data.features = np_datetime_to_numeric(data.features)
         if data.target is not None:
             data.target = np_datetime_to_numeric(data.target)
@@ -202,39 +202,39 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
         data.idx = np.asarray(data.idx)
 
         # Fix tables / time series sizes
-        self.log.message('-- Fixing table / time series shapes')
+        self.log.debug('-- Fixing table / time series shapes')
         data = self._correct_shapes(data)
         replace_inf_with_nans(data)
 
         # Find incorrect features which must be removed
         if is_fit_stage:
-            self.log.message('-- Finding incorrect features')
+            self.log.debug('-- Finding incorrect features')
             self._find_features_lacking_nans(data, source_name)
 
-        self.log.message('-- Removing incorrect features')
+        self.log.debug('-- Removing incorrect features')
         self._take_only_correct_features(data, source_name)
 
         if is_fit_stage:
-            self.log.message('-- Dropping rows with NaN-values in target')
+            self.log.debug('-- Dropping rows with NaN-values in target')
             data = self._drop_rows_with_nan_in_target(data)
 
             # Column types processing - launch after correct features selection
-            self.log.message('-- Features types processing')
+            self.log.debug('-- Features types processing')
             self.types_correctors[source_name].convert_data_for_fit(data)
 
             if self.types_correctors[source_name].target_converting_has_errors:
-                self.log.message('-- Dropping rows with NaN-values in target')
+                self.log.debug('-- Dropping rows with NaN-values in target')
                 data = self._drop_rows_with_nan_in_target(data)
 
             # Train Label Encoder for categorical target if necessary and apply it
-            self.log.message('-- Applying the Label Encoder to Target due to the presence of categories')
+            self.log.debug('-- Applying the Label Encoder to Target due to the presence of categories')
             if source_name not in self.target_encoders:
                 self._train_target_encoder(data, source_name)
 
             data.target = self._apply_target_encoding(data, source_name)
 
         else:
-            self.log.message('-- Converting data for predict')
+            self.log.debug('-- Converting data for predict')
             self.types_correctors[source_name].convert_data_for_predict(data)
 
         feature_type_ids = data.supplementary_data.col_type_ids['features']
@@ -247,7 +247,7 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
 
         elif data_type_is_table(data):
             if is_fit_stage:
-                self.log.message('-- Searching binary categorical features to encode them')
+                self.log.debug('-- Searching binary categorical features to encode them')
                 data = self.binary_categorical_processors[source_name].fit_transform(data)
             else:
                 data = self.binary_categorical_processors[source_name].transform(data)
@@ -273,9 +273,9 @@ def _prepare_optional(self, pipeline, data: InputData, source_name: str):
             (data_has_missing_values, 'imputation', self._apply_imputation_unidata),
             (data_has_categorical_features, 'encoding', self._apply_categorical_encoding)
         ]:
-            self.log.message(f'-- Deciding to apply {tag_to_check} for data')
+            self.log.debug(f'Deciding to apply {tag_to_check} for data')
             if has_problems(data):
-                self.log.message(f'-- Finding {tag_to_check} is required and trying to apply')
+                self.log.debug(f'Finding {tag_to_check} is required and trying to apply')
                 # Data contains missing values
                 has_tag = PipelineStructureExplorer.check_structure_by_tag(
                     pipeline, tag_to_check=tag_to_check, source_name=source_name)
@@ -366,17 +366,17 @@ def _apply_imputation_unidata(self, data: InputData, source_name: str) -> InputD
         Returns:
             imputed ``data``
         """
-        self.log.message('--- Initialising imputer')
+        self.log.debug('--- Initialising imputer')
         imputer = self.features_imputers.get(source_name)
 
         if not imputer:
             imputer = ImputationImplementation()
-            self.log.message('--- Fitting and transforming imputer for missings')
+            self.log.debug('--- Fitting and transforming imputer for missings')
             output_data = imputer.fit_transform(data)
             self.features_imputers[source_name] = imputer
 
         else:
-            self.log.message('--- Transforming imputer for missings')
+            self.log.debug('--- Transforming imputer for missings')
             output_data = imputer.transform(data)
 
         data.features = output_data.predict
@@ -394,7 +394,7 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu
         Returns:
             encoded ``data``
         """
-        self.log.message('--- Initialising categorical encoder')
+        self.log.debug('--- Initialising categorical encoder')
         encoder = self.features_encoders.get(source_name)
 
         if encoder is None:
@@ -402,8 +402,8 @@ def _apply_categorical_encoding(self, data: InputData, source_name: str) -> Inpu
             encoder.fit(data)
             self.features_encoders[source_name] = encoder
 
-        self.log.message(f'--- {encoder.__class__.__name__} was chosen')
-        self.log.message('--- Fitting and transforming data')
+        self.log.debug(f'--- {encoder.__class__.__name__} was chosen as categorical encoder')
+        self.log.debug('--- Fitting and transforming data')
         output_data = encoder.transform_for_fit(data)
         output_data.predict = output_data.predict.astype(float)
         data.features = output_data.predict
@@ -602,11 +602,11 @@ def reduce_mem_usage_np(arr, initial_types):
                 #  It required to add this to reduce memory for them
                 pass
             else:
-                self.log.message('-- Reduce memory in features')
+                self.log.debug('-- Reduce memory in features')
                 data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
 
                 if data.target is not None:
-                    self.log.message('-- Reduce memory in target')
+                    self.log.debug('-- Reduce memory in target')
                     data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
                     data.target = data.target.to_numpy()
 

From a542088a8307769fc826e6e3fd646d539330be00 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 25 Aug 2024 22:12:31 +0300
Subject: [PATCH 55/69] Fix TypeError with float16, rejection from this type

---
 fedot/core/data/data.py              | 9 +++++++++
 fedot/preprocessing/preprocessing.py | 4 +---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 43160ac2eb..3780207894 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -792,6 +792,15 @@ def __setitem__(self, key, value):
         else:
             raise NotImplementedError("Setting values by index without specifying a column is not supported.")
 
+    def __get__(self):
+        output = np.empty(self._shape, dtype=np.object_)
+
+        for i in range(self._shape[0]):
+            for j, col in enumerate(self._columns):
+                output[i, j] = col[i]
+
+        return output
+
     def __len__(self):
         return self._shape[0] if self._columns else 0
 
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index a9303f903b..b04b6fbc85 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -585,9 +585,7 @@ def reduce_mem_usage_np(arr, initial_types):
                             reduced_columns.add_column(col.astype(np.int64))
 
                     elif np.issubdtype(col.dtype, np.floating):
-                        if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
-                            reduced_columns.add_column(col.astype(np.float16))
-                        elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
+                        if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                             reduced_columns.add_column(col.astype(np.float32))
                         else:
                             reduced_columns.add_column(col.astype(np.float64))

From 776d7f5a5d5e0c3cbc4283bdbc8a32fd5cc0f27b Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 4 Sep 2024 01:27:00 +0300
Subject: [PATCH 56/69] Refactoring OptimisedFeatures - _columns: np.ndarray ->
 _columns: pd.DataFrame

---
 fedot/core/data/data.py | 110 ++++++++++++++++++++--------------------
 1 file changed, 54 insertions(+), 56 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 3780207894..d68ed664ed 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -528,6 +528,17 @@ def from_json_files(files_path: str,
         return InputData(idx=idx, features=features,
                          target=target, task=task, data_type=data_type)
 
+    @property
+    def features(self):
+        if isinstance(self._features, OptimisedFeatures):
+            return self._features.items
+
+        return self._features
+
+    @features.setter
+    def features(self, value):
+        self._features = value
+
     def to_csv(self, path_to_save):
         dataframe = pd.DataFrame(data=self.features, index=self.idx)
         if self.target is not None:
@@ -539,6 +550,9 @@ def to_csv(self, path_to_save):
 class InputData(Data):
     """Data class for input data for the nodes
     """
+    def __init__(self, features, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._features = features
 
     def __post_init__(self):
         if self.numerical_idx is None:
@@ -749,90 +763,74 @@ class OptimisedFeatures:
     """``Data`` type for optimised storage data.
     It based on numpy ndarray, but the features storages in list of np.ndarray with own optimal dtype
     """
-    _columns: list = field(default_factory=list, init=False)
-    _shape: tuple = field(default=(0, 0), init=False)
-    _nbytes: int = 0
+    _columns: pd.DataFrame = field(default_factory=pd.DataFrame, init=False)
+    _cols_names: list = field(default_factory=list, init=False)
     ndim: int = 2
 
-    def add_column(self, data: np.ndarray):
-        if not isinstance(data, np.ndarray):
-            raise ValueError("Data should be a NumPy array.")
+    def set_data(self, data: pd.DataFrame):
+        if isinstance(data, pd.DataFrame):
+            self._columns = data.copy(deep=True)
+            self._cols_names = list(range(0, len(self._columns.columns)))
 
-        if self._shape == (0, 0):
-            self._shape = (data.shape[0], 1)
         else:
-            if data.shape[0] != self._shape[0]:
-                raise ValueError("All columns must have the same number of rows.")
-
-            self._shape = (self._shape[0], self._shape[1] + 1)
+            raise ValueError("data in set_data should be a pandas DataFrame.")
 
-        self._columns.append(data)
-        self._nbytes += data.nbytes
+    def add_column(self, arr: np.ndarray):
+        if isinstance(arr, np.ndarray):
+            if self._columns.empty:
+                self._cols_names = [0]
+                self._columns = pd.DataFrame(arr, columns=self._cols_names)
 
-    def __getitem__(self, key):
-        if isinstance(key, tuple):
-            row_idx, col_idx = key
-            if isinstance(col_idx, int):
-                return self._columns[col_idx][row_idx]
             else:
-                selected_columns = [self._columns[i] for i in col_idx]
-                return np.column_stack(selected_columns)[row_idx]
+                self._cols_names.append(self._cols_names[-1] + 1)
+                self._columns.insert(self._cols_names[-1], self._cols_names[-1], arr)
         else:
-            result = np.column_stack(self._columns)[key]
-            return result if result.ndim > 1 else result.ravel()
+            raise ValueError("arr in add_column should be a NumPy array.")
 
-    def __setitem__(self, key, value):
+    def __getitem__(self, key: Union[tuple[int, int], int]) -> Union[pd.DataFrame, pd.Series]:
         if isinstance(key, tuple):
             row_idx, col_idx = key
-            if isinstance(col_idx, int):
-                self._columns[col_idx][row_idx] = value
-            else:
-                for i, col in zip(col_idx, value):
-                    self._columns[i][row_idx] = col
-        else:
-            raise NotImplementedError("Setting values by index without specifying a column is not supported.")
+            return self._columns.iloc[row_idx, col_idx]
 
-    def __get__(self):
-        output = np.empty(self._shape, dtype=np.object_)
-
-        for i in range(self._shape[0]):
-            for j, col in enumerate(self._columns):
-                output[i, j] = col[i]
-
-        return output
+        else:
+            return self._columns.iloc[key]
 
-    def __len__(self):
-        return self._shape[0] if self._columns else 0
+    def __len__(self) -> int:
+        return self._columns.shape[0] if self._columns else 0
 
-    def take(self, indices, axis=0):
+    def take(self, indices: np.ndarray[int], axis: int = 0) -> OptimisedFeatures:
         output = OptimisedFeatures()
 
+        # Takes rows
         if axis == 0:
-            # Takes rows
-            for col in self._columns:
-                output.add_column(np.take(col, indices, axis))
+            output.set_data(self._columns.iloc[indices, :])
+
+        # Takes columns
         elif axis == 1:
-            # Takes columns
-            for i in indices:
-                output.add_column(self._columns[i])
+            output.set_data(self._columns.iloc[:, indices])
+
         else:
             raise ValueError("Axis must be 0 (rows) or 1 (columns)")
 
         return output
 
-    def copy(self):
-        return self._columns.copy()
+    def copy(self) -> pd.DataFrame:
+        return self._columns.copy(deep=True)
 
-    def to_numpy(self):
-        return np.transpose(np.array(self._columns))
+    def to_numpy(self) -> np.ndarray:
+        return self._columns.to_numpy()
+
+    @property
+    def items(self):
+        return self._columns
 
     @property
-    def shape(self):
-        return self._shape
+    def shape(self) -> tuple[int, int]:
+        return self._columns.shape
 
     @property
-    def nbytes(self):
-        return self._nbytes
+    def nbytes(self) -> int:
+        return self._columns.memory_usage(index=True, deep=True).sum()
 
 
 def _resize_image(file_path: str, target_size: Tuple[int, int]):

From 4cc8a3ddbdc6ea4a4a373ad62e4f7264810c50e3 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 4 Sep 2024 22:38:59 +0300
Subject: [PATCH 57/69] Revert changes with features property

---
 fedot/core/data/data.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index d68ed664ed..141bb9b2c8 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -528,17 +528,6 @@ def from_json_files(files_path: str,
         return InputData(idx=idx, features=features,
                          target=target, task=task, data_type=data_type)
 
-    @property
-    def features(self):
-        if isinstance(self._features, OptimisedFeatures):
-            return self._features.items
-
-        return self._features
-
-    @features.setter
-    def features(self, value):
-        self._features = value
-
     def to_csv(self, path_to_save):
         dataframe = pd.DataFrame(data=self.features, index=self.idx)
         if self.target is not None:
@@ -550,10 +539,6 @@ def to_csv(self, path_to_save):
 class InputData(Data):
     """Data class for input data for the nodes
     """
-    def __init__(self, features, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._features = features
-
     def __post_init__(self):
         if self.numerical_idx is None:
             if self.features is not None and isinstance(self.features, np.ndarray) and self.features.ndim > 1:

From 762f89225b9ac73a856e94674d0d6a08fc02b2f1 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Wed, 4 Sep 2024 22:57:28 +0300
Subject: [PATCH 58/69] Fixes various tests

---
 fedot/core/data/data.py                        |  2 +-
 .../operations/evaluation/classification.py    | 16 +++++++++++++---
 .../evaluation/evaluation_interfaces.py        |  8 ++++++--
 .../implementation_interfaces.py               | 18 +++++++++++-------
 4 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 141bb9b2c8..a56a3dcbc6 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -781,7 +781,7 @@ def __getitem__(self, key: Union[tuple[int, int], int]) -> Union[pd.DataFrame, p
             return self._columns.iloc[key]
 
     def __len__(self) -> int:
-        return self._columns.shape[0] if self._columns else 0
+        return self._columns.shape[0] if self._columns.size > 0 else 0
 
     def take(self, indices: np.ndarray[int], axis: int = 0) -> OptimisedFeatures:
         output = OptimisedFeatures()
diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py
index a6bdf15069..8c3b14a05c 100644
--- a/fedot/core/operations/evaluation/classification.py
+++ b/fedot/core/operations/evaluation/classification.py
@@ -1,7 +1,7 @@
 import warnings
 from typing import Optional
 
-from fedot.core.data.data import InputData, OutputData
+from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
 from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
     import DecomposerClassImplementation
@@ -35,8 +35,18 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData:
         :return: prediction target
         """
 
-        prediction = self._sklearn_compatible_prediction(trained_operation=trained_operation,
-                                                         features=predict_data.features)
+        if isinstance(predict_data.features, OptimisedFeatures):
+            prediction = self._sklearn_compatible_prediction(
+                trained_operation=trained_operation,
+                features=predict_data.features.items
+            )
+
+        else:
+            prediction = self._sklearn_compatible_prediction(
+                trained_operation=trained_operation,
+                features=predict_data.features
+            )
+
         converted = self._convert_to_output(prediction, predict_data)
         return converted
 
diff --git a/fedot/core/operations/evaluation/evaluation_interfaces.py b/fedot/core/operations/evaluation/evaluation_interfaces.py
index 5849ab3f17..ab11190aee 100644
--- a/fedot/core/operations/evaluation/evaluation_interfaces.py
+++ b/fedot/core/operations/evaluation/evaluation_interfaces.py
@@ -27,7 +27,7 @@
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from xgboost import XGBClassifier, XGBRegressor
 
-from fedot.core.data.data import InputData, OutputData
+from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operation_type_from_id
@@ -228,7 +228,11 @@ def fit(self, train_data: InputData):
                 operation_implementation = convert_to_multivariate_model(operation_implementation,
                                                                          train_data)
             else:
-                operation_implementation.fit(train_data.features, train_data.target)
+                if isinstance(train_data.features, OptimisedFeatures):
+                    operation_implementation.fit(train_data.features.items, train_data.target)
+
+                else:
+                    operation_implementation.fit(train_data.features, train_data.target)
         return operation_implementation
 
     @abstractmethod
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index a08c9a9f12..b92397eddb 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -163,16 +163,20 @@ def _reasonability_check(features):
 
         # For every column in table make check
         if isinstance(features, OptimisedFeatures):
-            features = features._columns
+            features = features.items
+
+            bool_ids = features.select_dtypes('bool').columns.values.tolist()
+            non_bool_ids = [col_idx for col_idx in features.columns.values.tolist() if col_idx not in bool_ids]
+
         elif isinstance(features, np.ndarray):
             features = features.T
 
-        for column_id, column in enumerate(features):
-            # column = features[:, column_id] if columns_amount > 1 else features.copy()
-            if len(set(column)) > 2:
-                non_bool_ids.append(column_id)
-            else:
-                bool_ids.append(column_id)
+            for column_id, column in enumerate(features):
+                # column = features[:, column_id] if columns_amount > 1 else features.copy()
+                if len(set(column)) > 2:
+                    non_bool_ids.append(column_id)
+                else:
+                    bool_ids.append(column_id)
 
         return bool_ids, non_bool_ids
 

From 4efdad54d60b85636ea86ed78b94a50e4431f120 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 17:34:59 +0300
Subject: [PATCH 59/69] Global refactoring - Rejection from separate class

---
 fedot/api/api_utils/api_data.py               |  8 +-
 fedot/core/data/data.py                       | 96 +++----------------
 fedot/core/data/data_split.py                 | 10 +-
 .../operations/evaluation/classification.py   | 16 +---
 .../evaluation/evaluation_interfaces.py       | 12 +--
 .../data_operations/categorical_encoders.py   | 26 +++--
 .../implementation_interfaces.py              | 25 ++---
 .../models/boostings_implementations.py       |  7 +-
 fedot/preprocessing/data_types.py             |  1 +
 fedot/preprocessing/preprocessing.py          | 53 +++++-----
 10 files changed, 80 insertions(+), 174 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 7ecd150249..3a5aaf5e31 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -137,7 +137,7 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod
     def fit_transform(self, train_data: InputData) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(train_data.features.nbytes)
+        memory_usage = convert_memory_size(train_data.features.memory_usage)
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
         self.log.message(
@@ -157,7 +157,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
 
         train_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(train_data.features.nbytes)
+        memory_usage = convert_memory_size(train_data.features.memory_usage)
 
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
@@ -170,7 +170,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
     def transform(self, test_data: InputData, current_pipeline) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(test_data.features.nbytes)
+        memory_usage = convert_memory_size(test_data.features.memory_usage)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
@@ -184,7 +184,7 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
 
         test_data = self.preprocessor.reduce_memory_size(data=test_data)
 
-        memory_usage = convert_memory_size(test_data.features.nbytes)
+        memory_usage = convert_memory_size(test_data.features.memory_usages)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index a56a3dcbc6..f1ba556bf4 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -42,7 +42,7 @@ class Data:
     idx: np.ndarray
     task: Task
     data_type: DataTypesEnum
-    features: Union[np.ndarray, OptimisedFeatures]
+    features: Union[np.ndarray, pd.DataFrame]
     categorical_features: Optional[np.ndarray] = None
     categorical_idx: Optional[np.ndarray] = None
     numerical_idx: Optional[np.ndarray] = None
@@ -439,7 +439,7 @@ def from_text_meta_file(meta_file_path: str = None,
 
         features = np.array(messages)
         target = np.array(df_text[label]).reshape(-1, 1)
-        idx = [index for index in range(len(target))]
+        idx = np.array([index for index in range(len(target))])
 
         return InputData(idx=idx, features=features,
                          target=target, task=task, data_type=data_type)
@@ -457,7 +457,7 @@ def from_text_files(files_path: str,
 
         features = np.array(df_text['text'])
         target = np.array(df_text[label]).reshape(-1, 1)
-        idx = [index for index in range(len(target))]
+        idx = np.array([index for index in range(len(target))])
 
         return InputData(idx=idx, features=features,
                          target=target, task=task, data_type=data_type)
@@ -523,7 +523,7 @@ def from_json_files(files_path: str,
         else:
             target = np.array(df_data[label])
 
-        idx = [index for index in range(len(target))]
+        idx = np.array([index for index in range(len(target))])
 
         return InputData(idx=idx, features=features,
                          target=target, task=task, data_type=data_type)
@@ -534,6 +534,13 @@ def to_csv(self, path_to_save):
             dataframe['target'] = self.target
         dataframe.to_csv(path_to_save)
 
+    @property
+    def memory_usage(self):
+        if isinstance(self.features, np.ndarray):
+            return self.features.nbytes
+        else:
+            return self.features.memory_usage().sum()
+
 
 @dataclass
 class InputData(Data):
@@ -642,7 +649,7 @@ def convert_non_int_indexes_for_fit(self, pipeline):
         copied_data = deepcopy(self)
         is_timestamp = isinstance(copied_data.idx[0], pd._libs.tslibs.timestamps.Timestamp)
         is_numpy_datetime = isinstance(copied_data.idx[0], np.datetime64)
-        # if fit stage- just creating range of integers
+        # if fit stage-just creating range of integers
         if is_timestamp or is_numpy_datetime:
             copied_data.supplementary_data.non_int_idx = copy(copied_data.idx)
             copied_data.idx = np.array(range(len(copied_data.idx)))
@@ -684,7 +691,7 @@ def get_not_encoded_data(self):
         num_features_names, cat_features_names = None, None
 
         # Checking numerical data exists
-        if self.numerical_idx.size != 0:
+        if self.numerical_idx is not None and self.numerical_idx.size != 0:
             num_features = self.features[:, self.numerical_idx]
 
             if self.features_names is not None and np.size(self.features_names):
@@ -693,7 +700,7 @@ def get_not_encoded_data(self):
                 num_features_names = np.array([f'num_feature_{i}' for i in range(1, num_features.shape[1] + 1)])
 
         # Checking categorical data exists
-        if self.categorical_idx.size != 0:
+        if self.categorical_idx is not None and self.categorical_idx.size != 0:
             cat_features = self.categorical_features
 
             if self.features_names is not None and np.size(self.features_names):
@@ -743,81 +750,6 @@ class OutputData(Data):
     encoded_idx: Optional[np.ndarray] = None
 
 
-@dataclass
-class OptimisedFeatures:
-    """``Data`` type for optimised storage data.
-    It based on numpy ndarray, but the features storages in list of np.ndarray with own optimal dtype
-    """
-    _columns: pd.DataFrame = field(default_factory=pd.DataFrame, init=False)
-    _cols_names: list = field(default_factory=list, init=False)
-    ndim: int = 2
-
-    def set_data(self, data: pd.DataFrame):
-        if isinstance(data, pd.DataFrame):
-            self._columns = data.copy(deep=True)
-            self._cols_names = list(range(0, len(self._columns.columns)))
-
-        else:
-            raise ValueError("data in set_data should be a pandas DataFrame.")
-
-    def add_column(self, arr: np.ndarray):
-        if isinstance(arr, np.ndarray):
-            if self._columns.empty:
-                self._cols_names = [0]
-                self._columns = pd.DataFrame(arr, columns=self._cols_names)
-
-            else:
-                self._cols_names.append(self._cols_names[-1] + 1)
-                self._columns.insert(self._cols_names[-1], self._cols_names[-1], arr)
-        else:
-            raise ValueError("arr in add_column should be a NumPy array.")
-
-    def __getitem__(self, key: Union[tuple[int, int], int]) -> Union[pd.DataFrame, pd.Series]:
-        if isinstance(key, tuple):
-            row_idx, col_idx = key
-            return self._columns.iloc[row_idx, col_idx]
-
-        else:
-            return self._columns.iloc[key]
-
-    def __len__(self) -> int:
-        return self._columns.shape[0] if self._columns.size > 0 else 0
-
-    def take(self, indices: np.ndarray[int], axis: int = 0) -> OptimisedFeatures:
-        output = OptimisedFeatures()
-
-        # Takes rows
-        if axis == 0:
-            output.set_data(self._columns.iloc[indices, :])
-
-        # Takes columns
-        elif axis == 1:
-            output.set_data(self._columns.iloc[:, indices])
-
-        else:
-            raise ValueError("Axis must be 0 (rows) or 1 (columns)")
-
-        return output
-
-    def copy(self) -> pd.DataFrame:
-        return self._columns.copy(deep=True)
-
-    def to_numpy(self) -> np.ndarray:
-        return self._columns.to_numpy()
-
-    @property
-    def items(self):
-        return self._columns
-
-    @property
-    def shape(self) -> tuple[int, int]:
-        return self._columns.shape
-
-    @property
-    def nbytes(self) -> int:
-        return self._columns.memory_usage(index=True, deep=True).sum()
-
-
 def _resize_image(file_path: str, target_size: Tuple[int, int]):
     """Function resizes and rewrites the input image
     """
diff --git a/fedot/core/data/data_split.py b/fedot/core/data/data_split.py
index 1c2f34e60a..a000c6e46b 100644
--- a/fedot/core/data/data_split.py
+++ b/fedot/core/data/data_split.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.model_selection import train_test_split
 
-from fedot.core.data.data import InputData, OptimisedFeatures
+from fedot.core.data.data import InputData
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.tasks import TaskTypesEnum
@@ -30,13 +30,9 @@ def _split_input_data_by_indexes(origin_input_data: Union[InputData, MultiModalD
         return data
     elif isinstance(origin_input_data, InputData):
         idx = np.take(origin_input_data.idx, index, 0)
-        if isinstance(origin_input_data.features, OptimisedFeatures):
-            features = origin_input_data.features.take(index)
-            target = origin_input_data.target.take(index)
 
-        else:
-            features = np.take(origin_input_data.features, index, 0)
-            target = np.take(origin_input_data.target, index, 0)
+        features = np.take(origin_input_data.features, index, 0)
+        target = np.take(origin_input_data.target, index, 0)
 
         if origin_input_data.categorical_features is not None:
             categorical_features = np.take(origin_input_data.categorical_features, index, 0)
diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py
index 8c3b14a05c..04cc061c69 100644
--- a/fedot/core/operations/evaluation/classification.py
+++ b/fedot/core/operations/evaluation/classification.py
@@ -1,7 +1,7 @@
 import warnings
 from typing import Optional
 
-from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
+from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, SkLearnEvaluationStrategy
 from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose \
     import DecomposerClassImplementation
@@ -35,17 +35,11 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData:
         :return: prediction target
         """
 
-        if isinstance(predict_data.features, OptimisedFeatures):
-            prediction = self._sklearn_compatible_prediction(
-                trained_operation=trained_operation,
-                features=predict_data.features.items
-            )
 
-        else:
-            prediction = self._sklearn_compatible_prediction(
-                trained_operation=trained_operation,
-                features=predict_data.features
-            )
+        prediction = self._sklearn_compatible_prediction(
+            trained_operation=trained_operation,
+            features=predict_data.features
+        )
 
         converted = self._convert_to_output(prediction, predict_data)
         return converted
diff --git a/fedot/core/operations/evaluation/evaluation_interfaces.py b/fedot/core/operations/evaluation/evaluation_interfaces.py
index ab11190aee..60e00f297c 100644
--- a/fedot/core/operations/evaluation/evaluation_interfaces.py
+++ b/fedot/core/operations/evaluation/evaluation_interfaces.py
@@ -27,7 +27,7 @@
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from xgboost import XGBClassifier, XGBRegressor
 
-from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
+from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operation_type_from_id
@@ -225,14 +225,10 @@ def fit(self, train_data: InputData):
         with ImplementationRandomStateHandler(implementation=operation_implementation):
             if is_model_not_support_multi and is_multi_target:
                 # Manually wrap the regressor into multi-output model
-                operation_implementation = convert_to_multivariate_model(operation_implementation,
-                                                                         train_data)
-            else:
-                if isinstance(train_data.features, OptimisedFeatures):
-                    operation_implementation.fit(train_data.features.items, train_data.target)
+                operation_implementation = convert_to_multivariate_model(operation_implementation, train_data)
+
+            operation_implementation.fit(train_data.features, train_data.target)
 
-                else:
-                    operation_implementation.fit(train_data.features, train_data.target)
         return operation_implementation
 
     @abstractmethod
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index 057702c6ba..182fd346aa 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
-from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
+from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import (
     DataOperationImplementation
 )
@@ -21,16 +21,16 @@ def __init__(self, params: Optional[OperationParameters] = None):
             'handle_unknown': 'ignore'
         }
         self.encoder = OneHotEncoder(**{**default_params, **self.params.to_dict()})
-        self.categorical_ids: List[int] = []
-        self.non_categorical_ids: List[int] = []
-        self.encoded_ids: List[int] = []
-        self.new_numerical_idx: List[int] = []
+        self.categorical_ids: np.ndarray = np.array([])
+        self.non_categorical_ids: np.ndarray = np.array([])
+        self.encoded_ids: np.ndarray = np.array([])
+        self.new_numerical_idx: np.ndarray = np.array([])
 
     def fit(self, input_data: InputData):
         """ Method for fit encoder with automatic determination of categorical features
 
-        :param input_data: data with features, target and ids for encoder training
-        :return encoder: trained encoder (optional output)
+        :param input_data: data with features, target and ids for encoder fitting
+        :return encoder: encoder (optional output)
         """
         features = input_data.features
         self.categorical_ids, self.non_categorical_ids = input_data.categorical_idx, input_data.numerical_idx
@@ -152,17 +152,13 @@ def _apply_label_encoder(self, data: np.ndarray):
             column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column)))
 
             transformed_column = column_encoder.transform(column)
-            nan_idxs = np.flatnonzero(column == 'nan')
-            if len(nan_idxs):
+            nan_indices = np.flatnonzero(column == 'nan')
+            if len(nan_indices):
                 # Store np.nan values
                 transformed_column = transformed_column.astype(object)
-                transformed_column[nan_idxs] = np.nan
-
-            if isinstance(data, np.ndarray):
-                data[:, column_id] = transformed_column
+                transformed_column[nan_indices] = np.nan
 
-            elif isinstance(data, OptimisedFeatures):
-                data._columns[column_id] = transformed_column
+            data[:, column_id] = transformed_column
 
     def get_params(self) -> OperationParameters:
         """ Due to LabelEncoder has no parameters - return empty set """
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index b92397eddb..ed27670f92 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -5,7 +5,7 @@
 import numpy as np
 from golem.core.log import default_log
 
-from fedot.core.data.data import InputData, OutputData, OptimisedFeatures
+from fedot.core.data.data import InputData, OutputData
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.utilities.custom_errors import AbstractMethodNotImplementError
@@ -162,21 +162,14 @@ def _reasonability_check(features):
         non_bool_ids = []
 
         # For every column in table make check
-        if isinstance(features, OptimisedFeatures):
-            features = features.items
-
-            bool_ids = features.select_dtypes('bool').columns.values.tolist()
-            non_bool_ids = [col_idx for col_idx in features.columns.values.tolist() if col_idx not in bool_ids]
-
-        elif isinstance(features, np.ndarray):
-            features = features.T
-
-            for column_id, column in enumerate(features):
-                # column = features[:, column_id] if columns_amount > 1 else features.copy()
-                if len(set(column)) > 2:
-                    non_bool_ids.append(column_id)
-                else:
-                    bool_ids.append(column_id)
+        features = features.T
+
+        for column_id, column in enumerate(features):
+            # column = features[:, column_id] if columns_amount > 1 else features.copy()
+            if len(set(column)) > 2:
+                non_bool_ids.append(column_id)
+            else:
+                bool_ids.append(column_id)
 
         return bool_ids, non_bool_ids
 
diff --git a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py
index cfd6a37cbd..143b686fa0 100644
--- a/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py
+++ b/fedot/core/operations/evaluation/operation_implementations/models/boostings_implementations.py
@@ -80,6 +80,9 @@ def check_and_update_params(self):
         if booster == 'gblinear' and enable_categorical:
             self.params.update(enable_categorical=False)
 
+        if booster == 'gbtree' and enable_categorical:
+            self.params.update(enable_categorical=False)
+
     def get_feature_importance(self) -> list:
         return self.model.features_importances_
 
@@ -91,7 +94,7 @@ def plot_feature_importance(self, importance_type='weight'):
     @staticmethod
     def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
         dataframe = pd.DataFrame(data=data.features)
-        if data.target is not None:
+        if data.target is not None and data.target.size > 0:
             dataframe['target'] = np.ravel(data.target)
         else:
             # TODO: temp workaround in case data.target is set to None intentionally
@@ -236,7 +239,7 @@ def set_eval_metric(n_classes):
     @staticmethod
     def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
         dataframe = pd.DataFrame(data=data.features, columns=data.features_names)
-        if data.target is not None:
+        if data.target is not None and data.target.size > 0:
             dataframe['target'] = np.ravel(data.target)
         else:
             # TODO: temp workaround in case data.target is set to None intentionally
diff --git a/fedot/preprocessing/data_types.py b/fedot/preprocessing/data_types.py
index 52ed706648..6c8e52e8cf 100644
--- a/fedot/preprocessing/data_types.py
+++ b/fedot/preprocessing/data_types.py
@@ -16,6 +16,7 @@
 _type_ids = range(len(_convertable_types))
 
 TYPE_TO_ID = dict(zip(_convertable_types, _type_ids))
+ID_TO_TYPE = dict(zip(_type_ids, _convertable_types))
 
 _TYPES = 'types'
 _FLOAT_NUMBER = 'float_number'
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index b04b6fbc85..c6056eee49 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -8,7 +8,7 @@
 from golem.core.paths import copy_doc
 from sklearn.preprocessing import LabelEncoder
 
-from fedot.core.data.data import InputData, np_datetime_to_numeric, OptimisedFeatures
+from fedot.core.data.data import InputData, np_datetime_to_numeric
 from fedot.core.data.data import OutputData, data_type_is_table, data_type_is_text, data_type_is_ts
 from fedot.core.data.data_preprocessing import (
     data_has_categorical_features,
@@ -30,7 +30,7 @@
 from fedot.preprocessing.base_preprocessing import BasePreprocessor
 from fedot.preprocessing.categorical import BinaryCategoricalPreprocessor
 from fedot.preprocessing.data_type_check import exclude_image, exclude_multi_ts, exclude_ts
-from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, _convertable_types
+from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, _convertable_types, ID_TO_TYPE
 from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME, PipelineStructureExplorer
 
 # The allowed percent of empty samples in features.
@@ -561,38 +561,33 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
 
     @copy_doc(BasePreprocessor.reduce_memory_size)
     def reduce_memory_size(self, data: InputData) -> InputData:
-        def reduce_mem_usage_np(arr, initial_types):
-            reduced_columns = OptimisedFeatures()
-
-            for i in range(arr.shape[1]):
-                col = arr[:, i]
-                init_type = _convertable_types[initial_types[i]]
-                col = col.astype(init_type)
-                col_type = col.dtype.name
-
-                if col_type not in ['object'] and not bool(re.match(r'str\d*$', col_type)):
-                    c_min = col.max()
-                    c_max = col.max()
-
-                    if np.issubdtype(col.dtype, np.integer):
+        def reduce_mem_usage(features, initial_types):
+            df = pd.DataFrame(features)
+            types_array = [ID_TO_TYPE[_type] for _type in initial_types]
+
+            for index, col in enumerate(df.columns):
+                df[col] = df[col].astype(types_array[index])
+                col_type = df[col].dtype.name
+
+                if col_type not in ['object', 'category', 'datetime64[ns, UTC]']:
+                    c_min = df[col].min()
+                    c_max = df[col].max()
+                    if str(col_type)[:3] == 'int':
                         if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
-                            reduced_columns.add_column(col.astype(np.int8))
+                            df[col] = df[col].astype(np.int8)
                         elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
-                            reduced_columns.add_column(col.astype(np.int16))
+                            df[col] = df[col].astype(np.int16)
                         elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
-                            reduced_columns.add_column(col.astype(np.int32))
+                            df[col] = df[col].astype(np.int32)
                         elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
-                            reduced_columns.add_column(col.astype(np.int64))
-
-                    elif np.issubdtype(col.dtype, np.floating):
+                            df[col] = df[col].astype(np.int64)
+                    else:
                         if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
-                            reduced_columns.add_column(col.astype(np.float32))
+                            df[col] = df[col].astype(np.float32)
                         else:
-                            reduced_columns.add_column(col.astype(np.float64))
-                else:
-                    reduced_columns.add_column(col)
+                            df[col] = df[col].astype(np.float64)
 
-            return reduced_columns
+            return df
 
         if isinstance(data, InputData):
             if data.task.task_type == TaskTypesEnum.ts_forecasting:
@@ -601,11 +596,11 @@ def reduce_mem_usage_np(arr, initial_types):
                 pass
             else:
                 self.log.debug('-- Reduce memory in features')
-                data.features = reduce_mem_usage_np(data.features, data.supplementary_data.col_type_ids['features'])
+                data.features = reduce_mem_usage(data.features, data.supplementary_data.col_type_ids['features'])
 
                 if data.target is not None:
                     self.log.debug('-- Reduce memory in target')
-                    data.target = reduce_mem_usage_np(data.target, data.supplementary_data.col_type_ids['target'])
+                    data.target = reduce_mem_usage(data.target, data.supplementary_data.col_type_ids['target'])
                     data.target = data.target.to_numpy()
 
         return data

From bfe617d1535feef33bb814878f1fe769553576c7 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 17:55:51 +0300
Subject: [PATCH 60/69] Fix pep8, wrong code correction & test

---
 fedot/core/operations/evaluation/classification.py          | 1 -
 fedot/core/operations/evaluation/evaluation_interfaces.py   | 4 ++--
 .../operation_implementations/implementation_interfaces.py  | 6 +++++-
 fedot/preprocessing/preprocessing.py                        | 4 ++--
 test/unit/data/test_data_categorical.py                     | 2 --
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/fedot/core/operations/evaluation/classification.py b/fedot/core/operations/evaluation/classification.py
index 04cc061c69..2765f21b3f 100644
--- a/fedot/core/operations/evaluation/classification.py
+++ b/fedot/core/operations/evaluation/classification.py
@@ -35,7 +35,6 @@ def predict(self, trained_operation, predict_data: InputData) -> OutputData:
         :return: prediction target
         """
 
-
         prediction = self._sklearn_compatible_prediction(
             trained_operation=trained_operation,
             features=predict_data.features
diff --git a/fedot/core/operations/evaluation/evaluation_interfaces.py b/fedot/core/operations/evaluation/evaluation_interfaces.py
index 60e00f297c..e0b21e0c1a 100644
--- a/fedot/core/operations/evaluation/evaluation_interfaces.py
+++ b/fedot/core/operations/evaluation/evaluation_interfaces.py
@@ -226,8 +226,8 @@ def fit(self, train_data: InputData):
             if is_model_not_support_multi and is_multi_target:
                 # Manually wrap the regressor into multi-output model
                 operation_implementation = convert_to_multivariate_model(operation_implementation, train_data)
-
-            operation_implementation.fit(train_data.features, train_data.target)
+            else:
+                operation_implementation.fit(train_data.features, train_data.target)
 
         return operation_implementation
 
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index ed27670f92..86b7b7dd3c 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -165,7 +165,11 @@ def _reasonability_check(features):
         features = features.T
 
         for column_id, column in enumerate(features):
-            # column = features[:, column_id] if columns_amount > 1 else features.copy()
+            if isinstance(features, np.ndarray):
+                column = features[:, column_id] if columns_amount > 1 else features.copy()
+            else:
+                column = features.iloc[:, column_id] if columns_amount > 1 else features.copy()
+
             if len(set(column)) > 2:
                 non_bool_ids.append(column_id)
             else:
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index c6056eee49..650b98a171 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -1,4 +1,4 @@
-import re
+from copy import copy
 from copy import copy
 from typing import Optional, Union
 
@@ -30,7 +30,7 @@
 from fedot.preprocessing.base_preprocessing import BasePreprocessor
 from fedot.preprocessing.categorical import BinaryCategoricalPreprocessor
 from fedot.preprocessing.data_type_check import exclude_image, exclude_multi_ts, exclude_ts
-from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, _convertable_types, ID_TO_TYPE
+from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, ID_TO_TYPE
 from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME, PipelineStructureExplorer
 
 # The allowed percent of empty samples in features.
diff --git a/test/unit/data/test_data_categorical.py b/test/unit/data/test_data_categorical.py
index c7742b954c..01cd66245a 100644
--- a/test/unit/data/test_data_categorical.py
+++ b/test/unit/data/test_data_categorical.py
@@ -1,5 +1,3 @@
-import os
-
 import numpy as np
 import pandas as pd
 import pytest

From 68e7610123aa0f3122d85443a0dcb6e64aea0d69 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:04:58 +0300
Subject: [PATCH 61/69] Fixes bug with memory_usage & test

---
 fedot/api/api_utils/api_data.py |  8 ++++----
 fedot/core/data/data.py         | 14 ++++++++++++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 3a5aaf5e31..7776c32aad 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -137,7 +137,7 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod
     def fit_transform(self, train_data: InputData) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(train_data.features.memory_usage)
+        memory_usage = convert_memory_size(train_data.memory_usage)
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
         self.log.message(
@@ -157,7 +157,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
 
         train_data.supplementary_data.is_auto_preprocessed = True
 
-        memory_usage = convert_memory_size(train_data.features.memory_usage)
+        memory_usage = convert_memory_size(train_data.memory_usage)
 
         features_shape = train_data.features.shape
         target_shape = train_data.target.shape
@@ -170,7 +170,7 @@ def fit_transform(self, train_data: InputData) -> InputData:
     def transform(self, test_data: InputData, current_pipeline) -> InputData:
         start_time = datetime.now()
         self.log.message('Preprocessing data')
-        memory_usage = convert_memory_size(test_data.features.memory_usage)
+        memory_usage = convert_memory_size(test_data.memory_usage)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
@@ -184,7 +184,7 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
 
         test_data = self.preprocessor.reduce_memory_size(data=test_data)
 
-        memory_usage = convert_memory_size(test_data.features.memory_usages)
+        memory_usage = convert_memory_size(test_data.memory_usages)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index f1ba556bf4..d68a5b9702 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -692,7 +692,10 @@ def get_not_encoded_data(self):
 
         # Checking numerical data exists
         if self.numerical_idx is not None and self.numerical_idx.size != 0:
-            num_features = self.features[:, self.numerical_idx]
+            if isinstance(self.features, np.ndarray):
+                num_features = self.features[:, self.numerical_idx]
+            else:
+                num_features = self.features.iloc[:, self.numerical_idx]
 
             if self.features_names is not None and np.size(self.features_names):
                 num_features_names = self.features_names[self.numerical_idx]
@@ -709,7 +712,11 @@ def get_not_encoded_data(self):
                 cat_features_names = np.array([f'cat_feature_{i}' for i in range(1, cat_features.shape[1] + 1)])
 
         if num_features is not None and cat_features is not None:
-            new_features = np.hstack((num_features, cat_features))
+            if isinstance(self.features, np.ndarray):
+                new_features = np.hstack((num_features, cat_features))
+            else:
+                new_features = pd.concat([num_features, cat_features])
+
             new_features_names = np.hstack((num_features_names, cat_features_names))
             new_features_idx = np.array(range(new_features.shape[1]))
             new_num_idx = new_features_idx[:num_features.shape[1]]
@@ -727,6 +734,9 @@ def get_not_encoded_data(self):
         else:
             raise ValueError('There is no features')
 
+        if isinstance(new_features, pd.DataFrame):
+            new_features.columns = new_features_names
+
         return InputData(idx=self.idx, features=new_features, features_names=new_features_names,
                          numerical_idx=new_num_idx, categorical_idx=new_cat_idx,
                          target=self.target, task=self.task, data_type=self.data_type)

From bef6bf2bad3079012e8742fd785adf475394d6cc Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:13:30 +0300
Subject: [PATCH 62/69] Fixes bug with invalid slice

---
 .../operation_implementations/implementation_interfaces.py   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 86b7b7dd3c..9b62113f7c 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -89,7 +89,10 @@ def fit(self, input_data: InputData):
         self.ids_to_process = ids_to_process
         self.bool_ids = bool_ids
         if len(ids_to_process) > 0:
-            features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
+            if isinstance(features, np.ndarray):
+                features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
+            else:
+                features_to_process = np.array(features.iloc[:, ids_to_process]) if features.ndim > 1 else features
             self.operation.fit(features_to_process)
         return self.operation
 

From bc1681ded755bcccdc14f113aaf7e617b1212220 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:13:45 +0300
Subject: [PATCH 63/69] pep8 fix

---
 fedot/preprocessing/preprocessing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 650b98a171..299c697c92 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -1,5 +1,4 @@
 from copy import copy
-from copy import copy
 from typing import Optional, Union
 
 import numpy as np

From 4843f7b24a6a454b88bc278f2d9f50fddf0ac88e Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:33:03 +0300
Subject: [PATCH 64/69] test fixes

---
 .../implementation_interfaces.py               | 18 ++++++++++++++----
 test/data/expected_metric_values.json          |  6 +++---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 9b62113f7c..9156da89b6 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 import numpy as np
+import pandas as pd
 from golem.core.log import default_log
 
 from fedot.core.data.data import InputData, OutputData
@@ -92,7 +93,7 @@ def fit(self, input_data: InputData):
             if isinstance(features, np.ndarray):
                 features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
             else:
-                features_to_process = np.array(features.iloc[:, ids_to_process]) if features.ndim > 1 else features
+                features_to_process = np.array(features[ids_to_process]) if features.ndim > 1 else features
             self.operation.fit(features_to_process)
         return self.operation
 
@@ -125,7 +126,11 @@ def _make_new_table(self, features):
         :param features: tabular data for processing
         :return transformed_features: transformed features table
         """
-        features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy()
+        if isinstance(features, np.ndarray):
+            features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy()
+        else:
+            features_to_process = np.array(features[self.ids_to_process]) if features.ndim > 1 else features.copy()
+
         transformed_part = self.operation.transform(features_to_process)
 
         # If there are no binary features in the dataset
@@ -133,7 +138,11 @@ def _make_new_table(self, features):
             transformed_features = transformed_part
         else:
             # Stack transformed features and bool features
-            bool_features = np.array(features[:, self.bool_ids])
+            if isinstance(features, np.ndarray):
+                bool_features = np.array(features[:, self.bool_ids])
+            else:
+                bool_features = np.array(features[self.bool_ids])
+
             frames = (bool_features, transformed_part)
             transformed_features = np.hstack(frames)
 
@@ -165,7 +174,8 @@ def _reasonability_check(features):
         non_bool_ids = []
 
         # For every column in table make check
-        features = features.T
+        if isinstance(features, np.ndarray):
+            features = features.T
 
         for column_id, column in enumerate(features):
             if isinstance(features, np.ndarray):
diff --git a/test/data/expected_metric_values.json b/test/data/expected_metric_values.json
index 5018aa8d3f..102c0ca31b 100644
--- a/test/data/expected_metric_values.json
+++ b/test/data/expected_metric_values.json
@@ -13,11 +13,11 @@
     "accuracy": -0.95
   },
   "multiclass": {
-    "roc_auc": -0.9881784881784883,
+    "roc_auc": [-0.9881784881784883, -0.9832500832500832],
     "precision": -0.9777777777777779,
     "f1": -0.9719701552732407,
-    "neg_log_loss": 0.17094588819131074,
-    "roc_auc_pen": -0.9838963813963815,
+    "neg_log_loss": [0.17094588819131074, 0.1732861818492787],
+    "roc_auc_pen": [-0.9838963813963815, -0.9789893328893329],
     "accuracy": -0.9722222222222222
   },
   "regression": {

From a066f31875e84d696e13e17f565718770cfa7a03 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:41:24 +0300
Subject: [PATCH 65/69] pep8 fix

---
 .../operation_implementations/implementation_interfaces.py       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 9156da89b6..389b9e8b90 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -3,7 +3,6 @@
 from typing import Optional
 
 import numpy as np
-import pandas as pd
 from golem.core.log import default_log
 
 from fedot.core.data.data import InputData, OutputData

From 8aac969d89fbf1c7dc1b80560c22565ac27bd816 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 18:41:48 +0300
Subject: [PATCH 66/69] fix bug with memory_usage

---
 fedot/api/api_utils/api_data.py | 2 +-
 fedot/core/data/data.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fedot/api/api_utils/api_data.py b/fedot/api/api_utils/api_data.py
index 7776c32aad..5a421397eb 100644
--- a/fedot/api/api_utils/api_data.py
+++ b/fedot/api/api_utils/api_data.py
@@ -184,7 +184,7 @@ def transform(self, test_data: InputData, current_pipeline) -> InputData:
 
         test_data = self.preprocessor.reduce_memory_size(data=test_data)
 
-        memory_usage = convert_memory_size(test_data.memory_usages)
+        memory_usage = convert_memory_size(test_data.memory_usage)
         features_shape = test_data.features.shape
         target_shape = test_data.target.shape
         self.log.message(
diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index d68a5b9702..6153e44af5 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -537,7 +537,7 @@ def to_csv(self, path_to_save):
     @property
     def memory_usage(self):
         if isinstance(self.features, np.ndarray):
-            return self.features.nbytes
+            return sum([feature.nbytes for feature in self.features.T])
         else:
             return self.features.memory_usage().sum()
 

From 1039392f2cab77db4123da3fbe661a01cf4d7846 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 19:45:19 +0300
Subject: [PATCH 67/69] reduce_memory_usage in utils, fix test with operations

---
 .../data_operations/categorical_encoders.py   | 33 ++++++++++++++-----
 .../sklearn_imbalanced_class.py               | 33 ++++++++++++++++---
 .../implementation_interfaces.py              | 17 ++++++----
 fedot/preprocessing/preprocessing.py          | 29 +---------------
 fedot/utilities/memory.py                     | 33 +++++++++++++++++++
 5 files changed, 99 insertions(+), 46 deletions(-)

diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index 182fd346aa..62328b9f99 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -1,7 +1,8 @@
 from copy import deepcopy
-from typing import List, Optional
+from typing import List, Optional, Union
 
 import numpy as np
+import pandas as pd
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 
 from fedot.core.data.data import InputData, OutputData
@@ -10,6 +11,7 @@
 )
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.preprocessing.data_types import TYPE_TO_ID
+from fedot.utilities.memory import reduce_mem_usage
 
 
 class OneHotEncodingImplementation(DataOperationImplementation):
@@ -37,7 +39,11 @@ def fit(self, input_data: InputData):
 
         # If there are categorical features - process it
         if self.categorical_ids.size > 0:
-            updated_cat_features = features[:, self.categorical_ids].astype(str)
+            if isinstance(features, np.ndarray):
+                updated_cat_features = features[:, self.categorical_ids].astype(str)
+            else:
+                updated_cat_features = features.iloc[:, self.categorical_ids].astype(str)
+
             self.encoder.fit(updated_cat_features)
 
         return self.encoder
@@ -58,9 +64,15 @@ def transform(self, input_data: InputData) -> OutputData:
             transformed_features = self._apply_one_hot_encoding(transformed_features)
 
         # Update features
-        output_data = self._convert_to_output(copied_data,
-                                              transformed_features)
+        output_data = self._convert_to_output(copied_data, transformed_features)
         self._update_column_types(output_data)
+
+        if isinstance(output_data.features, pd.DataFrame):
+            output_data.predict = reduce_mem_usage(
+                transformed_features,
+                output_data.supplementary_data.col_type_ids['features']
+            )
+
         return output_data
 
     def _update_column_types(self, output_data: OutputData):
@@ -77,17 +89,22 @@ def _update_column_types(self, output_data: OutputData):
             output_data.encoded_idx = self.encoded_ids
             output_data.supplementary_data.col_type_ids['features'] = numerical_columns
 
-    def _apply_one_hot_encoding(self, features: np.ndarray) -> np.ndarray:
+    def _apply_one_hot_encoding(self, features: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
         """
         The method creates a table based on categorical and real features after One Hot Encoding transformation
 
         :param features: tabular data for processing
         :return transformed_features: transformed features table
         """
-        transformed_categorical = self.encoder.transform(features[:, self.categorical_ids]).toarray()
+        if isinstance(features, np.ndarray):
+            transformed_categorical = self.encoder.transform(features[:, self.categorical_ids]).toarray()
+            # Stack transformed categorical and non-categorical data, ignore if none
+            non_categorical_features = features[:, self.non_categorical_ids]
+
+        else:
+            transformed_categorical = self.encoder.transform(features.iloc[:, self.categorical_ids]).toarray()
+            non_categorical_features = features.iloc[:, self.non_categorical_ids].to_numpy()
 
-        # Stack transformed categorical and non-categorical data, ignore if none
-        non_categorical_features = features[:, self.non_categorical_ids]
         frames = (non_categorical_features, transformed_categorical)
         transformed_features = np.hstack(frames)
         self.encoded_ids = np.array(range(non_categorical_features.shape[1], transformed_features.shape[1]))
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py
index 641996dd7b..3f838cd4e5 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_imbalanced_class.py
@@ -1,7 +1,8 @@
 from copy import copy
-from typing import Optional
+from typing import Optional, Union
 
 import numpy as np
+import pandas as pd
 from golem.core.log import default_log
 from sklearn.utils import resample
 
@@ -10,6 +11,7 @@
     DataOperationImplementation
 )
 from fedot.core.operations.operation_parameters import OperationParameters
+from fedot.utilities.memory import reduce_mem_usage
 
 GLOBAL_PREFIX = 'sklearn_imbalanced_class:'
 
@@ -93,6 +95,12 @@ def transform_for_fit(self, input_data: InputData) -> OutputData:
             # If number of elements of each class are equal that transformation is not required
             return self._convert_to_output(input_data, input_data.features)
 
+        if isinstance(copied_data.features, pd.DataFrame):
+            copied_data.features = copied_data.features.to_numpy()
+
+        if isinstance(copied_data.target, pd.DataFrame):
+            copied_data.target = copied_data.target.to_numpy()
+
         min_data, maj_data = self._get_data_by_target(copied_data.features, copied_data.target,
                                                       unique_class, number_of_elements)
 
@@ -116,18 +124,35 @@ def transform_for_fit(self, input_data: InputData) -> OutputData:
 
         transformed_data = np.concatenate((min_data, maj_data), axis=0).transpose()
 
+        if isinstance(input_data.features, pd.DataFrame):
+            predict = reduce_mem_usage(
+                transformed_data[:-1].transpose(),
+                input_data.supplementary_data.col_type_ids['features']
+            )
+
+            target = reduce_mem_usage(
+                transformed_data[-1],
+                input_data.supplementary_data.col_type_ids['target']
+            )
+
+        else:
+            predict = transformed_data[:-1].transpose()
+            target = transformed_data[-1]
+
         output_data = OutputData(
             idx=np.arange(transformed_data.shape[1]),
             features=input_data.features,
-            predict=transformed_data[:-1].transpose(),
+            predict=predict,
             task=input_data.task,
-            target=transformed_data[-1],
+            target=target,
             data_type=input_data.data_type,
             supplementary_data=input_data.supplementary_data)
+
         return output_data
 
     @staticmethod
-    def _get_data_by_target(features: np.array, target: np.array, unique: np.array,
+    def _get_data_by_target(features: Union[np.array, pd.DataFrame], target: Union[np.array, pd.DataFrame],
+                            unique: np.array,
                             number_of_elements: np.array) -> np.array:
         """Unify features and target in one array and split into classes
         """
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 389b9e8b90..5ecb41b0a9 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 import numpy as np
+import pandas as pd
 from golem.core.log import default_log
 
 from fedot.core.data.data import InputData, OutputData
@@ -82,7 +83,10 @@ def fit(self, input_data: InputData):
         :return operation: trained transformer (optional output)
         """
 
-        features = input_data.features
+        if input_data.task.task_type.name == 'ts_forecasting' and input_data.features.ndim == 2:
+            features = input_data.features.ravel()
+        else:
+            features = input_data.features
 
         # Find boolean columns in features table
         bool_ids, ids_to_process = self._reasonability_check(features)
@@ -90,6 +94,9 @@ def fit(self, input_data: InputData):
         self.bool_ids = bool_ids
         if len(ids_to_process) > 0:
             if isinstance(features, np.ndarray):
+                if input_data.task.task_type.name == 'ts_forecasting' and input_data.features.ndim == 2:
+                    features = features.reshape(-1, 1)
+
                 features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
             else:
                 features_to_process = np.array(features[ids_to_process]) if features.ndim > 1 else features
@@ -173,16 +180,14 @@ def _reasonability_check(features):
         non_bool_ids = []
 
         # For every column in table make check
-        if isinstance(features, np.ndarray):
-            features = features.T
-
-        for column_id, column in enumerate(features):
+        for column_id in range(columns_amount):
             if isinstance(features, np.ndarray):
                 column = features[:, column_id] if columns_amount > 1 else features.copy()
             else:
                 column = features.iloc[:, column_id] if columns_amount > 1 else features.copy()
 
-            if len(set(column)) > 2:
+            if (isinstance(column, pd.DataFrame) and len(set(column)) > 2) or \
+               (isinstance(column, np.ndarray) and len(np.unique(column)) > 2):
                 non_bool_ids.append(column_id)
             else:
                 bool_ids.append(column_id)
diff --git a/fedot/preprocessing/preprocessing.py b/fedot/preprocessing/preprocessing.py
index 299c697c92..5b0c03d7a2 100644
--- a/fedot/preprocessing/preprocessing.py
+++ b/fedot/preprocessing/preprocessing.py
@@ -31,6 +31,7 @@
 from fedot.preprocessing.data_type_check import exclude_image, exclude_multi_ts, exclude_ts
 from fedot.preprocessing.data_types import TYPE_TO_ID, TableTypesCorrector, ID_TO_TYPE
 from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME, PipelineStructureExplorer
+from fedot.utilities.memory import reduce_mem_usage
 
 # The allowed percent of empty samples in features.
 # Example: 90% objects in features are 'nan', then drop this feature from data.
@@ -560,34 +561,6 @@ def update_indices_for_time_series(self, test_data: Union[InputData, MultiModalD
 
     @copy_doc(BasePreprocessor.reduce_memory_size)
     def reduce_memory_size(self, data: InputData) -> InputData:
-        def reduce_mem_usage(features, initial_types):
-            df = pd.DataFrame(features)
-            types_array = [ID_TO_TYPE[_type] for _type in initial_types]
-
-            for index, col in enumerate(df.columns):
-                df[col] = df[col].astype(types_array[index])
-                col_type = df[col].dtype.name
-
-                if col_type not in ['object', 'category', 'datetime64[ns, UTC]']:
-                    c_min = df[col].min()
-                    c_max = df[col].max()
-                    if str(col_type)[:3] == 'int':
-                        if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
-                            df[col] = df[col].astype(np.int8)
-                        elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
-                            df[col] = df[col].astype(np.int16)
-                        elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
-                            df[col] = df[col].astype(np.int32)
-                        elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
-                            df[col] = df[col].astype(np.int64)
-                    else:
-                        if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
-                            df[col] = df[col].astype(np.float32)
-                        else:
-                            df[col] = df[col].astype(np.float64)
-
-            return df
-
         if isinstance(data, InputData):
             if data.task.task_type == TaskTypesEnum.ts_forecasting:
                 # TODO: TS data has col_type_ids['features'] = None.
diff --git a/fedot/utilities/memory.py b/fedot/utilities/memory.py
index b25eb9d757..53091fd02e 100644
--- a/fedot/utilities/memory.py
+++ b/fedot/utilities/memory.py
@@ -2,8 +2,12 @@
 import tracemalloc
 from typing import Optional
 
+import numpy as np
+import pandas as pd
 from golem.core.log import default_log
 
+from fedot.preprocessing.data_types import ID_TO_TYPE
+
 
 class MemoryAnalytics:
     is_active = False
@@ -55,3 +59,32 @@ def log(cls, logger: Optional[logging.LoggerAdapter] = None,
                 logger = default_log(prefix=cls.__name__)
             logger.log(logging_level, message)
         return message
+
+
+def reduce_mem_usage(features, initial_types):
+    df = pd.DataFrame(features)
+    types_array = [ID_TO_TYPE[_type] for _type in initial_types]
+
+    for index, col in enumerate(df.columns):
+        df[col] = df[col].astype(types_array[index])
+        col_type = df[col].dtype.name
+
+        if col_type not in ['object', 'category', 'datetime64[ns, UTC]']:
+            c_min = df[col].min()
+            c_max = df[col].max()
+            if str(col_type)[:3] == 'int':
+                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
+                    df[col] = df[col].astype(np.int8)
+                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
+                    df[col] = df[col].astype(np.int16)
+                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
+                    df[col] = df[col].astype(np.int32)
+                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
+                    df[col] = df[col].astype(np.int64)
+            else:
+                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
+                    df[col] = df[col].astype(np.float32)
+                else:
+                    df[col] = df[col].astype(np.float64)
+
+    return df
\ No newline at end of file

From 9a0ccabac0f67a814d2f03b6551fbd2fe43286b0 Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 20:53:12 +0300
Subject: [PATCH 68/69] fix tests

---
 fedot/core/data/data.py                       |  5 +-
 .../data_operations/categorical_encoders.py   | 68 +++++++++++++------
 .../data_operations/sklearn_selectors.py      |  6 +-
 .../implementation_interfaces.py              |  8 ++-
 test/unit/preprocessing/test_preprocessors.py |  6 +-
 5 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index 6153e44af5..e8b16cd953 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -611,8 +611,11 @@ def subset_features(self, feature_ids: np.array) -> Optional[InputData]:
         """
         if feature_ids is None or feature_ids.size == 0:
             return None
+        if isinstance(self.features, np.ndarray):
+            subsample_features = self.features[:, feature_ids]
+        else:
+            subsample_features = self.features.iloc[:, feature_ids]
 
-        subsample_features = self.features[:, feature_ids]
         subsample_input = InputData(
             features=subsample_features,
             data_type=self.data_type,
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
index 62328b9f99..4f9c759c24 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/categorical_encoders.py
@@ -148,34 +148,62 @@ def _update_column_types(self, output_data: OutputData):
         feature_type_ids = output_data.supplementary_data.col_type_ids['features']
         feature_type_ids[self.categorical_ids] = TYPE_TO_ID[int]
 
-    def _fit_label_encoders(self, data: np.ndarray):
+    def _fit_label_encoders(self, data: Union[np.ndarray, pd.DataFrame]):
         """ Fit LabelEncoder for every categorical column in the dataset """
-        categorical_columns = data[:, self.categorical_ids].astype(str)
-        for column_id, column in zip(self.categorical_ids, categorical_columns.T):
-            le = LabelEncoder()
-            le.fit(column)
-            self.encoders[column_id] = le
+        if isinstance(data, np.ndarray):
+            categorical_columns = data[:, self.categorical_ids].astype(str)
 
-    def _apply_label_encoder(self, data: np.ndarray):
+            for column_id, column in zip(self.categorical_ids, categorical_columns.T):
+                le = LabelEncoder()
+                le.fit(column)
+                self.encoders[column_id] = le
+
+        else:
+            categorical_columns = data.iloc[:, self.categorical_ids].astype(str)
+
+            for column_id in self.categorical_ids:
+                le = LabelEncoder()
+                le.fit(categorical_columns.iloc[:, column_id])
+                self.encoders[column_id] = le
+
+    def _apply_label_encoder(self, data: Union[np.ndarray, pd.DataFrame]):
         """
         Applies fitted LabelEncoder for all categorical features inplace
 
         Args:
             data: numpy array with all features
         """
-        categorical_columns = data[:, self.categorical_ids].astype(str)
-        for column_id, column in zip(self.categorical_ids, categorical_columns.T):
-            column_encoder = self.encoders[column_id]
-            column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column)))
-
-            transformed_column = column_encoder.transform(column)
-            nan_indices = np.flatnonzero(column == 'nan')
-            if len(nan_indices):
-                # Store np.nan values
-                transformed_column = transformed_column.astype(object)
-                transformed_column[nan_indices] = np.nan
-
-            data[:, column_id] = transformed_column
+        if isinstance(data, np.ndarray):
+            categorical_columns = data[:, self.categorical_ids].astype(str)
+
+            for column_id, column in zip(self.categorical_ids, categorical_columns.T):
+                column_encoder = self.encoders[column_id]
+                column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column)))
+
+                transformed_column = column_encoder.transform(column)
+                nan_indices = np.flatnonzero(column == 'nan')
+                if len(nan_indices):
+                    # Store np.nan values
+                    transformed_column = transformed_column.astype(object)
+                    transformed_column[nan_indices] = np.nan
+
+                data[:, column_id] = transformed_column
+        else:
+            categorical_columns = data.iloc[:, self.categorical_ids].astype(str)
+
+            for column_id in self.categorical_ids:
+                column_encoder = self.encoders[column_id]
+                column = categorical_columns[column_id]
+                column_encoder.classes_ = np.unique(np.concatenate((column_encoder.classes_, column)))
+
+                transformed_column = column_encoder.transform(column)
+                nan_indices = np.flatnonzero(column == 'nan')
+                if len(nan_indices):
+                    # Store np.nan values
+                    transformed_column = transformed_column.astype(object)
+                    transformed_column[nan_indices] = np.nan
+
+                data.iloc[:, column_id] = transformed_column
 
     def get_params(self) -> OperationParameters:
         """ Due to LabelEncoder has no parameters - return empty set """
diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py
index fa880ae7fd..51cf3a28ff 100644
--- a/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py
+++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/sklearn_selectors.py
@@ -97,7 +97,11 @@ def _make_new_table(self, features):
 
         # Bool vector - mask for columns
         self.remain_features_mask = self.operation.support_
-        transformed_features = features[:, self.remain_features_mask]
+        if isinstance(features, np.ndarray):
+            transformed_features = features[:, self.remain_features_mask]
+        else:
+            transformed_features = features.iloc[:, self.remain_features_mask]
+
         return transformed_features
 
     @staticmethod
diff --git a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
index 5ecb41b0a9..0573139643 100644
--- a/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
+++ b/fedot/core/operations/evaluation/operation_implementations/implementation_interfaces.py
@@ -99,7 +99,7 @@ def fit(self, input_data: InputData):
 
                 features_to_process = np.array(features[:, ids_to_process]) if features.ndim > 1 else features
             else:
-                features_to_process = np.array(features[ids_to_process]) if features.ndim > 1 else features
+                features_to_process = np.array(features.iloc[:, ids_to_process]) if features.ndim > 1 else features
             self.operation.fit(features_to_process)
         return self.operation
 
@@ -135,7 +135,9 @@ def _make_new_table(self, features):
         if isinstance(features, np.ndarray):
             features_to_process = np.array(features[:, self.ids_to_process]) if features.ndim > 1 else features.copy()
         else:
-            features_to_process = np.array(features[self.ids_to_process]) if features.ndim > 1 else features.copy()
+            features_to_process = np.array(
+                features.iloc[:, self.ids_to_process]
+            ) if features.ndim > 1 else features.copy()
 
         transformed_part = self.operation.transform(features_to_process)
 
@@ -186,7 +188,7 @@ def _reasonability_check(features):
             else:
                 column = features.iloc[:, column_id] if columns_amount > 1 else features.copy()
 
-            if (isinstance(column, pd.DataFrame) and len(set(column)) > 2) or \
+            if (isinstance(column, pd.Series) and len(set(column)) > 2) or \
                (isinstance(column, np.ndarray) and len(np.unique(column)) > 2):
                 non_bool_ids.append(column_id)
             else:
diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py
index d4d52c4884..f436ff008f 100644
--- a/test/unit/preprocessing/test_preprocessors.py
+++ b/test/unit/preprocessing/test_preprocessors.py
@@ -242,7 +242,8 @@ def test_mixed_column_with_str_and_float_values():
     # column with index 0 must be converted to string and encoded with OHE
     train_predicted = fit_predict_cycle_for_testing(idx=0)
     assert train_predicted.features.shape[1] == 5
-    assert all(isinstance(el, np.ndarray) for el in train_predicted.features)
+    assert isinstance(train_predicted.features, pd.DataFrame) or \
+           all(isinstance(el, np.ndarray) for el in train_predicted.features)
 
     # column with index 1 must be converted to float and the gaps must be filled
     train_predicted = fit_predict_cycle_for_testing(idx=1)
@@ -254,7 +255,8 @@ def test_mixed_column_with_str_and_float_values():
     )
 
     assert train_predicted.features.shape[1] == 1
-    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features)
+    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features.to_numpy()) or \
+           all(isinstance(el[0], types_encountered) for el in train_predicted.features)
 
     # column with index 2 must be removed due to unclear type of data
     try:

From 0d8796d9a81de2bb31a3720c8f037b9563d0295b Mon Sep 17 00:00:00 2001
From: Andrey Stebenkov <a.stebenkov75@yandex.ru>
Date: Sun, 8 Sep 2024 22:41:25 +0300
Subject: [PATCH 69/69] fix tests in main api

---
 fedot/core/data/data.py                       | 2 +-
 test/integration/api/test_main_api.py         | 3 ++-
 test/unit/preprocessing/test_preprocessors.py | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py
index e8b16cd953..93dc628bc8 100644
--- a/fedot/core/data/data.py
+++ b/fedot/core/data/data.py
@@ -757,7 +757,7 @@ class OutputData(Data):
     """``Data`` type for data prediction in the node
     """
 
-    features: Optional[np.ndarray] = None
+    features: Optional[np.ndarray, pd.DataFrame] = None
     predict: Optional[np.ndarray] = None
     target: Optional[np.ndarray] = None
     encoded_idx: Optional[np.ndarray] = None
diff --git a/test/integration/api/test_main_api.py b/test/integration/api/test_main_api.py
index 351dc6e24a..7b57c0b240 100644
--- a/test/integration/api/test_main_api.py
+++ b/test/integration/api/test_main_api.py
@@ -231,7 +231,8 @@ def test_categorical_preprocessing_unidata_predefined_linear():
     )
 
     for i in range(prediction.features.shape[1]):
-        assert all(list(map(lambda x: isinstance(x, types_encountered), prediction.features[:, i])))
+        assert all(list(map(lambda x: isinstance(x, types_encountered), prediction.features.to_numpy()[:, i]))) or \
+               all(list(map(lambda x: isinstance(x, types_encountered), prediction.features[:, i])))
 
 
 def test_fill_nan_without_categorical():
diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py
index f436ff008f..4b0b9ed41e 100644
--- a/test/unit/preprocessing/test_preprocessors.py
+++ b/test/unit/preprocessing/test_preprocessors.py
@@ -223,7 +223,8 @@ def test_binary_pseudo_string_column_process_correctly():
     )
 
     assert train_predicted.features.shape[1] == 1
-    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features)
+    assert all(isinstance(el[0], types_encountered) for el in train_predicted.features.to_numpy()) or \
+           all(isinstance(el[0], types_encountered) for el in train_predicted.features)
 
 
 def fit_predict_cycle_for_testing(idx: int):