From 85e1f0670f11fb21e568d21070e64dfc2926b34a Mon Sep 17 00:00:00 2001 From: Andrey Stebenkov Date: Thu, 22 Feb 2024 17:45:49 +0300 Subject: [PATCH] @andreygetmanov requested fixes (2) --- fedot/api/api_utils/api_composer.py | 33 ++++++++-------- fedot/api/main.py | 48 +++++++++++++++--------- fedot/utilities/composer_timer.py | 58 ++++++++++++++++++----------- 3 files changed, 85 insertions(+), 54 deletions(-) diff --git a/fedot/api/api_utils/api_composer.py b/fedot/api/api_utils/api_composer.py index 184aa2bc7e..cc3bae010d 100644 --- a/fedot/api/api_utils/api_composer.py +++ b/fedot/api/api_utils/api_composer.py @@ -53,26 +53,29 @@ def init_cache(self): def obtain_model(self, train_data: InputData) -> Tuple[Pipeline, Sequence[Pipeline], OptHistory]: """ Function for composing FEDOT pipeline model """ - timeout: float = self.params.timeout - with_tuning = self.params.get('with_tuning') - self.timer = ApiTime(time_for_automl=timeout, with_tuning=with_tuning) + with fedot_composer_timer.launch_composing(): + timeout: float = self.params.timeout + with_tuning = self.params.get('with_tuning') - initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data) + self.timer = ApiTime(time_for_automl=timeout, with_tuning=with_tuning) - multi_objective = len(self.metrics) > 1 - self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective) + initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data) - self.log.message(f"AutoML configured." - f" Parameters tuning: {with_tuning}." - f" Time limit: {timeout} min." - f" Set of candidate models: {self.params.get('available_operations')}.") + multi_objective = len(self.metrics) > 1 + self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective) + + self.log.message(f"AutoML configured." + f" Parameters tuning: {with_tuning}." + f" Time limit: {timeout} min." + f" Set of candidate models: {self.params.get('available_operations')}.") + + best_pipeline, best_pipeline_candidates, gp_composer = self.compose_pipeline( + train_data, + initial_assumption, + fitted_assumption + ) - best_pipeline, best_pipeline_candidates, gp_composer = self.compose_pipeline( - train_data, - initial_assumption, - fitted_assumption - ) if with_tuning: with fedot_composer_timer.launch_tuning('composing'): best_pipeline = self.tune_final_pipeline(train_data, best_pipeline) diff --git a/fedot/api/main.py b/fedot/api/main.py index 6400d68946..0f5f322dbf 100644 --- a/fedot/api/main.py +++ b/fedot/api/main.py @@ -148,18 +148,17 @@ def fit(self, self.params.update_available_operations_by_preset(self.train_data) - with fedot_composer_timer.launch_applying_recommendations('fit'): - if self.params.get('use_input_preprocessing'): - # Launch data analyser - it gives recommendations for data preprocessing - recommendations_for_data, recommendations_for_params = \ - self.data_analyser.give_recommendations(input_data=self.train_data, - input_params=self.params) - self.data_processor.accept_and_apply_recommendations(input_data=self.train_data, - recommendations=recommendations_for_data) - self.params.accept_and_apply_recommendations(input_data=self.train_data, - recommendations=recommendations_for_params) - else: - recommendations_for_data = None + if self.params.get('use_input_preprocessing'): + # Launch data analyser - it gives recommendations for data preprocessing + recommendations_for_data, recommendations_for_params = \ + self.data_analyser.give_recommendations(input_data=self.train_data, + input_params=self.params) + self.data_processor.accept_and_apply_recommendations(input_data=self.train_data, + recommendations=recommendations_for_data) + self.params.accept_and_apply_recommendations(input_data=self.train_data, + recommendations=recommendations_for_params) + else: + recommendations_for_data = None self._init_remote_if_necessary() @@ -181,11 +180,13 @@ def fit(self, full_train_not_preprocessed = deepcopy(self.train_data) # Final fit for obtained pipeline on full dataset - if self.history and not self.history.is_empty() or not self.current_pipeline.is_fitted: - self._train_pipeline_on_full_dataset(recommendations_for_data, full_train_not_preprocessed) - self.log.message('Final pipeline was fitted') - else: - self.log.message('Already fitted initial pipeline is used') + + with fedot_composer_timer.launch_train_inference(): + if self.history and not self.history.is_empty() or not self.current_pipeline.is_fitted: + self._train_pipeline_on_full_dataset(recommendations_for_data, full_train_not_preprocessed) + self.log.message('Final pipeline was fitted') + else: + self.log.message('Already fitted initial pipeline is used') # Merge API & pipelines encoders if it is required self.current_pipeline.preprocessor = BasePreprocessor.merge_preprocessors( @@ -512,6 +513,19 @@ def explain(self, features: FeaturesType = None, return explainer def return_report(self) -> pd.DataFrame: + """ Functions returns report of time-consuming. + + The following steps are presented in this report: + - 'Data Definition (fit)': Time spent on data definition in fit(). + - 'Data Preprocessing': Total time spent on preprocessing data, includes fitting and predicting stages. + - 'Fitting (summary)': Total time spent for Composing, Tuning and Training Inference. + - 'Composing': Time spent on searching best pipeline. + - 'Train Inference': Time spent on training the found pipeline during composing. + - 'Tuning (composing)': Time spent on hyperparameters tuning in whole fitting, if `with_tune` is True. + - 'Tuning (after)': Time spent on .tune() (hyperparameters tuning) after composing. + - 'Data Definition (predict)'. Time spent on data definition in predict(). + - 'Predicting'. Time spent for predicting (inference). + """ report = fedot_composer_timer.report if self.current_pipeline is None: diff --git a/fedot/utilities/composer_timer.py b/fedot/utilities/composer_timer.py index 4934ef333e..7f5906adda 100644 --- a/fedot/utilities/composer_timer.py +++ b/fedot/utilities/composer_timer.py @@ -6,26 +6,26 @@ class ComposerTimer: def __init__(self): self.data_definition_fit_spend_time = None self.data_definition_predict_spend_time = None - self.applying_recs_fit_spend_time = None - self.applying_recs_predict_spend_time = None self.preprocessing_spend_time = None self.fitting_spend_time = None self.predicting_spend_time = None self.tuning_composing_spend_time = None self.tuning_post_spend_time = None + self.train_on_full_dataset_time = None + self.compoising_spend_time = None self.reset_timer() def reset_timer(self): self.data_definition_fit_spend_time = datetime.timedelta(minutes=0) self.data_definition_predict_spend_time = datetime.timedelta(minutes=0) - self.applying_recs_fit_spend_time = datetime.timedelta(minutes=0) - self.applying_recs_predict_spend_time = datetime.timedelta(minutes=0) self.preprocessing_spend_time = datetime.timedelta(minutes=0) self.fitting_spend_time = datetime.timedelta(minutes=0) self.predicting_spend_time = datetime.timedelta(minutes=0) self.tuning_composing_spend_time = datetime.timedelta(minutes=0) self.tuning_post_spend_time = datetime.timedelta(minutes=0) + self.train_on_full_dataset_time = datetime.timedelta(minutes=0) + self.compoising_spend_time = datetime.timedelta(minutes=0) @contextmanager def launch_data_definition(self, stage: str): @@ -39,18 +39,6 @@ def launch_data_definition(self, stage: str): elif stage == 'predict': self.data_definition_predict_spend_time += ending_time - starting_time - @contextmanager - def launch_applying_recommendations(self, stage: str): - starting_time = datetime.datetime.now() - yield - - ending_time = datetime.datetime.now() - if stage == 'fit': - self.applying_recs_fit_spend_time += ending_time - starting_time - - elif stage == 'predict': - self.applying_recs_predict_spend_time += ending_time - starting_time - @contextmanager def launch_preprocessing(self): starting_time = datetime.datetime.now() @@ -84,17 +72,43 @@ def launch_tuning(self, stage: str): elif stage == 'post': self.tuning_post_spend_time += ending_time - starting_time + @contextmanager + def launch_train_inference(self): + starting_time = datetime.datetime.now() + yield + ending_time = datetime.datetime.now() + self.train_on_full_dataset_time += ending_time - starting_time + + @contextmanager + def launch_composing(self): + starting_time = datetime.datetime.now() + yield + ending_time = datetime.datetime.now() + self.compoising_spend_time += ending_time - starting_time + @property - def report(self): + def report(self) -> dict: + """ Return dict with the next columns: + - 'Data Definition (fit)': Time spent on data definition in fit(). + - 'Data Preprocessing': Total time spent on preprocessing data, includes fitting and predicting stages. + - 'Fitting (summary)': Total time spent for Composing, Tuning and Training Inference. + - 'Composing': Time spent on searching best pipeline. + - 'Train Inference': Time spent on training the found pipeline during composing. + - 'Tuning (composing)': Time spent on hyperparameters tuning in whole fitting, if `with_tune` is True. + - 'Tuning (after)': Time spent on .tune() (hyperparameters tuning) after composing. + - 'Data Definition (predict)'. Time spent on data definition in predict(). + - 'Predicting'. Time spent for predicting (inference). + """ + output = { 'Data Definition (fit)': self.data_definition_fit_spend_time, - 'Applying Recommendation (fit)': self.applying_recs_fit_spend_time, 'Data Preprocessing': self.preprocessing_spend_time, - 'Fitting': self.fitting_spend_time, - 'Tuning (fit)': self.tuning_composing_spend_time, - 'Tuning (post)': self.tuning_post_spend_time, + 'Fitting (summary)': self.fitting_spend_time, + 'Composing': self.compoising_spend_time, + 'Train Inference': self.train_on_full_dataset_time, + 'Tuning (composing)': self.tuning_composing_spend_time, + 'Tuning (after)': self.tuning_post_spend_time, 'Data Definition (predict)': self.data_definition_predict_spend_time, - 'Applying Recommendation (predict)': self.applying_recs_predict_spend_time, 'Predicting': self.predicting_spend_time, }