@andreygetmanov requested fixes (2)

aimclub · Feb 22, 2024 · 85e1f06 · 85e1f06
1 parent 25eec37
commit 85e1f06
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 54 deletions.
diff --git a/fedot/api/api_utils/api_composer.py b/fedot/api/api_utils/api_composer.py
@@ -53,26 +53,29 @@ def init_cache(self):
 
     def obtain_model(self, train_data: InputData) -> Tuple[Pipeline, Sequence[Pipeline], OptHistory]:
         """ Function for composing FEDOT pipeline model """
-        timeout: float = self.params.timeout
-        with_tuning = self.params.get('with_tuning')
 
-        self.timer = ApiTime(time_for_automl=timeout, with_tuning=with_tuning)
+        with fedot_composer_timer.launch_composing():
+            timeout: float = self.params.timeout
+            with_tuning = self.params.get('with_tuning')
 
-        initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data)
+            self.timer = ApiTime(time_for_automl=timeout, with_tuning=with_tuning)
 
-        multi_objective = len(self.metrics) > 1
-        self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective)
+            initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data)
 
-        self.log.message(f"AutoML configured."
-                         f" Parameters tuning: {with_tuning}."
-                         f" Time limit: {timeout} min."
-                         f" Set of candidate models: {self.params.get('available_operations')}.")
+            multi_objective = len(self.metrics) > 1
+            self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective)
+
+            self.log.message(f"AutoML configured."
+                             f" Parameters tuning: {with_tuning}."
+                             f" Time limit: {timeout} min."
+                             f" Set of candidate models: {self.params.get('available_operations')}.")
+
+            best_pipeline, best_pipeline_candidates, gp_composer = self.compose_pipeline(
+                train_data,
+                initial_assumption,
+                fitted_assumption
+            )
 
-        best_pipeline, best_pipeline_candidates, gp_composer = self.compose_pipeline(
-            train_data,
-            initial_assumption,
-            fitted_assumption
-        )
         if with_tuning:
             with fedot_composer_timer.launch_tuning('composing'):
                 best_pipeline = self.tune_final_pipeline(train_data, best_pipeline)

diff --git a/fedot/api/main.py b/fedot/api/main.py
@@ -148,18 +148,17 @@ def fit(self,
 
         self.params.update_available_operations_by_preset(self.train_data)
 
-        with fedot_composer_timer.launch_applying_recommendations('fit'):
-            if self.params.get('use_input_preprocessing'):
-                # Launch data analyser - it gives recommendations for data preprocessing
-                recommendations_for_data, recommendations_for_params = \
-                    self.data_analyser.give_recommendations(input_data=self.train_data,
-                                                            input_params=self.params)
-                self.data_processor.accept_and_apply_recommendations(input_data=self.train_data,
-                                                                     recommendations=recommendations_for_data)
-                self.params.accept_and_apply_recommendations(input_data=self.train_data,
-                                                             recommendations=recommendations_for_params)
-            else:
-                recommendations_for_data = None
+        if self.params.get('use_input_preprocessing'):
+            # Launch data analyser - it gives recommendations for data preprocessing
+            recommendations_for_data, recommendations_for_params = \
+                self.data_analyser.give_recommendations(input_data=self.train_data,
+                                                        input_params=self.params)
+            self.data_processor.accept_and_apply_recommendations(input_data=self.train_data,
+                                                                 recommendations=recommendations_for_data)
+            self.params.accept_and_apply_recommendations(input_data=self.train_data,
+                                                         recommendations=recommendations_for_params)
+        else:
+            recommendations_for_data = None
 
         self._init_remote_if_necessary()
 
@@ -181,11 +180,13 @@ def fit(self,
 
                 full_train_not_preprocessed = deepcopy(self.train_data)
                 # Final fit for obtained pipeline on full dataset
-                if self.history and not self.history.is_empty() or not self.current_pipeline.is_fitted:
-                    self._train_pipeline_on_full_dataset(recommendations_for_data, full_train_not_preprocessed)
-                    self.log.message('Final pipeline was fitted')
-                else:
-                    self.log.message('Already fitted initial pipeline is used')
+
+                with fedot_composer_timer.launch_train_inference():
+                    if self.history and not self.history.is_empty() or not self.current_pipeline.is_fitted:
+                        self._train_pipeline_on_full_dataset(recommendations_for_data, full_train_not_preprocessed)
+                        self.log.message('Final pipeline was fitted')
+                    else:
+                        self.log.message('Already fitted initial pipeline is used')
 
         # Merge API & pipelines encoders if it is required
         self.current_pipeline.preprocessor = BasePreprocessor.merge_preprocessors(
@@ -512,6 +513,19 @@ def explain(self, features: FeaturesType = None,
         return explainer
 
     def return_report(self) -> pd.DataFrame:
+        """ Functions returns report of time-consuming.
+
+        The following steps are presented in this report:
+            - 'Data Definition (fit)': Time spent on data definition in fit().
+            - 'Data Preprocessing': Total time spent on preprocessing data, includes fitting and predicting stages.
+            - 'Fitting (summary)': Total time spent for Composing, Tuning and Training Inference.
+            - 'Composing': Time spent on searching best pipeline.
+            - 'Train Inference': Time spent on training the found pipeline during composing.
+            - 'Tuning (composing)': Time spent on hyperparameters tuning in whole fitting, if `with_tune` is True.
+            - 'Tuning (after)': Time spent on .tune() (hyperparameters tuning) after composing.
+            - 'Data Definition (predict)'. Time spent on data definition in predict().
+            - 'Predicting'. Time spent for predicting (inference).
+        """
         report = fedot_composer_timer.report
 
         if self.current_pipeline is None:

diff --git a/fedot/utilities/composer_timer.py b/fedot/utilities/composer_timer.py
@@ -6,26 +6,26 @@ class ComposerTimer:
     def __init__(self):
         self.data_definition_fit_spend_time = None
         self.data_definition_predict_spend_time = None
-        self.applying_recs_fit_spend_time = None
-        self.applying_recs_predict_spend_time = None
         self.preprocessing_spend_time = None
         self.fitting_spend_time = None
         self.predicting_spend_time = None
         self.tuning_composing_spend_time = None
         self.tuning_post_spend_time = None
+        self.train_on_full_dataset_time = None
+        self.compoising_spend_time = None
 
         self.reset_timer()
 
     def reset_timer(self):
         self.data_definition_fit_spend_time = datetime.timedelta(minutes=0)
         self.data_definition_predict_spend_time = datetime.timedelta(minutes=0)
-        self.applying_recs_fit_spend_time = datetime.timedelta(minutes=0)
-        self.applying_recs_predict_spend_time = datetime.timedelta(minutes=0)
         self.preprocessing_spend_time = datetime.timedelta(minutes=0)
         self.fitting_spend_time = datetime.timedelta(minutes=0)
         self.predicting_spend_time = datetime.timedelta(minutes=0)
         self.tuning_composing_spend_time = datetime.timedelta(minutes=0)
         self.tuning_post_spend_time = datetime.timedelta(minutes=0)
+        self.train_on_full_dataset_time = datetime.timedelta(minutes=0)
+        self.compoising_spend_time = datetime.timedelta(minutes=0)
 
     @contextmanager
     def launch_data_definition(self, stage: str):
@@ -39,18 +39,6 @@ def launch_data_definition(self, stage: str):
         elif stage == 'predict':
             self.data_definition_predict_spend_time += ending_time - starting_time
 
-    @contextmanager
-    def launch_applying_recommendations(self, stage: str):
-        starting_time = datetime.datetime.now()
-        yield
-
-        ending_time = datetime.datetime.now()
-        if stage == 'fit':
-            self.applying_recs_fit_spend_time += ending_time - starting_time
-
-        elif stage == 'predict':
-            self.applying_recs_predict_spend_time += ending_time - starting_time
-
     @contextmanager
     def launch_preprocessing(self):
         starting_time = datetime.datetime.now()
@@ -84,17 +72,43 @@ def launch_tuning(self, stage: str):
         elif stage == 'post':
             self.tuning_post_spend_time += ending_time - starting_time
 
+    @contextmanager
+    def launch_train_inference(self):
+        starting_time = datetime.datetime.now()
+        yield
+        ending_time = datetime.datetime.now()
+        self.train_on_full_dataset_time += ending_time - starting_time
+
+    @contextmanager
+    def launch_composing(self):
+        starting_time = datetime.datetime.now()
+        yield
+        ending_time = datetime.datetime.now()
+        self.compoising_spend_time += ending_time - starting_time
+
     @property
-    def report(self):
+    def report(self) -> dict:
+        """ Return dict with the next columns:
+            - 'Data Definition (fit)': Time spent on data definition in fit().
+            - 'Data Preprocessing': Total time spent on preprocessing data, includes fitting and predicting stages.
+            - 'Fitting (summary)': Total time spent for Composing, Tuning and Training Inference.
+            - 'Composing': Time spent on searching best pipeline.
+            - 'Train Inference': Time spent on training the found pipeline during composing.
+            - 'Tuning (composing)': Time spent on hyperparameters tuning in whole fitting, if `with_tune` is True.
+            - 'Tuning (after)': Time spent on .tune() (hyperparameters tuning) after composing.
+            - 'Data Definition (predict)'. Time spent on data definition in predict().
+            - 'Predicting'. Time spent for predicting (inference).
+        """
+
         output = {
             'Data Definition (fit)': self.data_definition_fit_spend_time,
-            'Applying Recommendation (fit)': self.applying_recs_fit_spend_time,
             'Data Preprocessing': self.preprocessing_spend_time,
-            'Fitting': self.fitting_spend_time,
-            'Tuning (fit)': self.tuning_composing_spend_time,
-            'Tuning (post)': self.tuning_post_spend_time,
+            'Fitting (summary)': self.fitting_spend_time,
+            'Composing': self.compoising_spend_time,
+            'Train Inference': self.train_on_full_dataset_time,
+            'Tuning (composing)': self.tuning_composing_spend_time,
+            'Tuning (after)': self.tuning_post_spend_time,
             'Data Definition (predict)': self.data_definition_predict_spend_time,
-            'Applying Recommendation (predict)': self.applying_recs_predict_spend_time,
             'Predicting': self.predicting_spend_time,
         }