From 49d6e5a6d3892c2e412ea76a83a32aa8bb4d15fd Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova Date: Fri, 21 Jul 2023 18:57:58 +0300 Subject: [PATCH 1/5] Fix params setting --- fedot/core/pipelines/node.py | 21 ++++++++----------- fedot/core/pipelines/tuning/search_space.py | 7 +++++-- fedot/core/utils.py | 1 + .../pipelines/tuning/test_pipeline_tuning.py | 4 ++-- test/unit/pipelines/test_node.py | 16 ++++++++++++++ 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/fedot/core/pipelines/node.py b/fedot/core/pipelines/node.py index 6be58464a1..ed415032be 100644 --- a/fedot/core/pipelines/node.py +++ b/fedot/core/pipelines/node.py @@ -15,7 +15,7 @@ from fedot.core.operations.operation import Operation from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.repository.operation_types_repository import OperationTypesRepository -from fedot.core.utils import DEFAULT_PARAMS_STUB +from fedot.core.utils import DEFAULT_PARAMS_STUB, NESTED_PARAMS_LABEL @register_serializable @@ -57,9 +57,6 @@ def __init__(self, operation_type: Optional[Union[str, Operation]] = None, # Define operation, based on content dictionary operation = self._process_content_init(passed_content) params = passed_content.get('params', {}) - # The check for "default_params" is needed for backward compatibility. - if params == DEFAULT_PARAMS_STUB: - params = {} self.metadata = passed_content.get('metadata', NodeMetadata()) else: # There is no content for node @@ -72,9 +69,9 @@ def __init__(self, operation_type: Optional[Union[str, Operation]] = None, self.fit_time_in_seconds = 0 self.inference_time_in_seconds = 0 - self._parameters = OperationParameters.from_operation_type(operation.operation_type, **params) - super().__init__(content={'name': operation, - 'params': self._parameters.to_dict()}, nodes_from=nodes_from) + super().__init__(content={'name': operation}, nodes_from=nodes_from) + self.parameters = params + self.log = default_log(self) self._fitted_operation = None self.rating = None @@ -325,18 +322,18 @@ def parameters(self) -> dict: @parameters.setter def parameters(self, params: dict): - """Sets custom parameters of the node + """Sets custom parameters of the node or set default Args: params: new parameters to be placed instead of existing """ - if params: + if params is not None: # The check for "default_params" is needed for backward compatibility. if params == DEFAULT_PARAMS_STUB: params = {} - # take nested composer params if they appeared - if 'nested_space' in params: - params = params['nested_space'] + # take nested params if they appeared (mostly used for tuning) + if NESTED_PARAMS_LABEL in params: + params = params[NESTED_PARAMS_LABEL] self._parameters = OperationParameters.from_operation_type(self.operation.operation_type, **params) self.content['params'] = self._parameters.to_dict() diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b6c383d2b0..90c453fb20 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -4,6 +4,8 @@ from golem.core.tuning.search_space import SearchSpace, OperationParametersMapping from hyperopt import hp +from fedot.core.utils import NESTED_PARAMS_LABEL + class PipelineSearchSpace(SearchSpace): """ @@ -19,9 +21,10 @@ def __init__(self, replace_default_search_space: bool = False): self.custom_search_space = custom_search_space self.replace_default_search_space = replace_default_search_space - parameters_per_operation = self.get_parameters_dict() + parameters_per_operation = self.get_parameters_dict super().__init__(parameters_per_operation) + @property def get_parameters_dict(self): parameters_per_operation = { 'kmeans': { @@ -325,7 +328,7 @@ def get_parameters_dict(self): 'type': 'continuous'} }, 'glm': { - 'nested_space': { + NESTED_PARAMS_LABEL: { 'hyperopt-dist': hp.choice, 'sampling-scope': [[ { diff --git a/fedot/core/utils.py b/fedot/core/utils.py index 9ca374b602..40ad8bb39a 100644 --- a/fedot/core/utils.py +++ b/fedot/core/utils.py @@ -11,6 +11,7 @@ from sklearn.model_selection import train_test_split DEFAULT_PARAMS_STUB = 'default_params' +NESTED_PARAMS_LABEL = 'nested_space' def fedot_project_root() -> Path: diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index ff0a7df811..b9960bf6aa 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -22,7 +22,7 @@ from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum, ClassificationMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum -from fedot.core.utils import fedot_project_root +from fedot.core.utils import fedot_project_root, NESTED_PARAMS_LABEL from test.unit.tasks.test_forecasting import get_ts_data seed(1) @@ -489,7 +489,7 @@ def test_complex_search_space(): space = PipelineSearchSpace() for i in range(20): operation_parameters = space.parameters_per_operation.get("glm") - new_value = hp_sample(operation_parameters["nested_space"]) + new_value = hp_sample(operation_parameters[NESTED_PARAMS_LABEL]) for params in new_value['sampling-scope'][0]: assert params['link'] in GLMImplementation.family_distribution[params['family']]['available_links'] diff --git a/test/unit/pipelines/test_node.py b/test/unit/pipelines/test_node.py index e8cec6d323..39169eb8b4 100644 --- a/test/unit/pipelines/test_node.py +++ b/test/unit/pipelines/test_node.py @@ -12,6 +12,7 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import DEFAULT_PARAMS_STUB, NESTED_PARAMS_LABEL @pytest.fixture() @@ -108,3 +109,18 @@ def test_node_return_correct_operation_info(): correct_tags = ["simple", "imputation"] assert all(correct_tag in operation_tags for correct_tag in correct_tags) + + +@pytest.mark.parametrize('params, expected_params', [(DEFAULT_PARAMS_STUB, {"family": "gaussian", "link": "identity"}), + ({}, {"family": "gaussian", "link": "identity"}), + ({NESTED_PARAMS_LABEL: {'family': 'gaussian', 'link': 'log'}}, + {'family': 'gaussian', 'link': 'log'}), + ({'family': 'inverse_gaussian', 'link': 'inverse_power'}, + {'family': 'inverse_gaussian', 'link': 'inverse_power'})]) +def test_init_and_set_pipeline_node_with_params(params, expected_params): + node = PipelineNode(content={'name': 'glm', 'params': params}) + assert node.parameters == expected_params + + node = PipelineNode('glm') + node.parameters = params + assert node.parameters == expected_params From 1c3b8a8d827a6750510c434f3afe7aef113239bb Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova Date: Wed, 26 Jul 2023 11:48:51 +0300 Subject: [PATCH 2/5] Minor --- ...multitask_classification_regression_api.py | 52 ------------------- fedot/core/pipelines/tuning/search_space.py | 3 +- 2 files changed, 1 insertion(+), 54 deletions(-) delete mode 100644 examples/simple/multitask_classification_regression_api.py diff --git a/examples/simple/multitask_classification_regression_api.py b/examples/simple/multitask_classification_regression_api.py deleted file mode 100644 index 9ed2587cbf..0000000000 --- a/examples/simple/multitask_classification_regression_api.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -from typing import Tuple - -import numpy as np -import pandas as pd - -from fedot.api.main import Fedot -from fedot.core.utils import fedot_project_root - - -def load_train_test_dataframes() -> Tuple[dict, dict, dict]: - """ Load data for multitask regression / classification problem """ - data_path = fedot_project_root().joinpath('examples/data') - train_df = pd.read_csv(os.path.join(data_path, 'train_synthetic_regression_classification.csv')) - test_df = pd.read_csv(os.path.join(data_path, 'test_synthetic_regression_classification.csv')) - - # Prepare numpy arrays into dictionaries - # For regression and classification features are the same - mm_train_features = {'regression': np.array(train_df[['feature_1', 'feature_2']]), - 'classification': np.array(train_df[['feature_1', 'feature_2']])} - # Target is different for train - mm_train_target = {'regression': np.array(train_df['concentration']), - 'classification': np.array(train_df['class'])} - # For test features the same - mm_test_features = {'regression': np.array(test_df[['feature_1', 'feature_2']]), - 'classification': np.array(test_df[['feature_1', 'feature_2']])} - return mm_train_features, mm_train_target, mm_test_features - - -def launch_multitask_api_example(): - """ - Demonstration of an example with running a multitask pipeline composing procedure through FEDOT API. - Synthetic data is used. Task: predict the category of the substance (column "class") <- classification, - and then predict the concentration based on the predicted category (column "concentration") <- regression. - """ - train_features, train_target, test_features = load_train_test_dataframes() - - # The priority of the task is determined by the order. So, main task is regression - problem = 'regression/classification' - - # TODO finish this example - it is not working now and represents the desired interface for multitask - model = Fedot(problem=problem, timeout=5) - model.fit(features=train_features, - target=train_target) - - predicted_concentrations, predicted_classes = model.predict(features=test_features) - print(f'Predicted classes: {predicted_classes}') - print(f'Predicted concentrations: {predicted_concentrations}') - - -if __name__ == '__main__': - launch_multitask_api_example() diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index 90c453fb20..aacff36019 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -21,10 +21,9 @@ def __init__(self, replace_default_search_space: bool = False): self.custom_search_space = custom_search_space self.replace_default_search_space = replace_default_search_space - parameters_per_operation = self.get_parameters_dict + parameters_per_operation = self.get_parameters_dict() super().__init__(parameters_per_operation) - @property def get_parameters_dict(self): parameters_per_operation = { 'kmeans': { From 4e6d94bdfce854fc78a3761243e94fa63ea44c2c Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova Date: Wed, 26 Jul 2023 12:01:35 +0300 Subject: [PATCH 3/5] PEP 8 --- test/unit/pipelines/test_node.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/pipelines/test_node.py b/test/unit/pipelines/test_node.py index 39169eb8b4..726187e977 100644 --- a/test/unit/pipelines/test_node.py +++ b/test/unit/pipelines/test_node.py @@ -1,6 +1,5 @@ import numpy as np import pytest -from golem.core.dag.graph_utils import ordered_subnodes_hierarchy from golem.core.dag.linked_graph_node import LinkedGraphNode from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression From 15c2d97cada5681fc269f16b3479e763661e6f28 Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova Date: Fri, 28 Jul 2023 13:03:48 +0300 Subject: [PATCH 4/5] Correct test --- .../pipelines/tuning/test_pipeline_tuning.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index b9960bf6aa..5509d5662b 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -497,15 +497,19 @@ def test_complex_search_space(): @pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) def test_complex_search_space_tuning_correct(tuner): """ Tests SimultaneousTuner for time series forecasting task with GLM model that has a complex glm search space""" - train_data, test_data = get_ts_data(n_steps=200, forecast_length=5) + train_data, test_data = get_ts_data(n_steps=700, forecast_length=20) glm_pipeline = Pipeline(PipelineNode('glm')) - glm_custom_params = glm_pipeline.nodes[0].parameters + initial_parameters = glm_pipeline.nodes[0].parameters tuner = TunerBuilder(train_data.task) \ .with_tuner(tuner) \ .with_metric(RegressionMetricsEnum.MSE) \ .with_iterations(100) \ .build(train_data) tuned_glm_pipeline = tuner.tune(glm_pipeline) - new_custom_params = tuned_glm_pipeline.nodes[0].parameters - assert glm_custom_params == new_custom_params + found_parameters = tuned_glm_pipeline.nodes[0].parameters + if tuner.init_metric == tuner.obtained_metric: + # TODO: (YamLyubov) Remove the check when IOptTuner will be able to tune categorical parameters. + assert initial_parameters == found_parameters + else: + assert initial_parameters != found_parameters From 8c053dfecf20c13b197190362912623f0a1da3b7 Mon Sep 17 00:00:00 2001 From: Lyubov Yamshchikova Date: Fri, 28 Jul 2023 14:10:56 +0300 Subject: [PATCH 5/5] Add comment --- fedot/core/pipelines/node.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fedot/core/pipelines/node.py b/fedot/core/pipelines/node.py index ed415032be..bd05c87ebb 100644 --- a/fedot/core/pipelines/node.py +++ b/fedot/core/pipelines/node.py @@ -70,6 +70,7 @@ def __init__(self, operation_type: Optional[Union[str, Operation]] = None, self.inference_time_in_seconds = 0 super().__init__(content={'name': operation}, nodes_from=nodes_from) + # use parameters.setter to process input parameters correctly self.parameters = params self.log = default_log(self)