Skip to content

Commit

Permalink
Return changes, fix bug in boostings
Browse files Browse the repository at this point in the history
  • Loading branch information
aPovidlo committed Jul 31, 2024
1 parent f684be4 commit afed3f9
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,26 @@ def fit(self, input_data: InputData):
if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))

train_x, train_y = train_input.drop(columns=['target']), train_input['target']
eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']
X_train, y_train = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
X_eval, y_eval = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))

self.model.eval_metric = self.set_eval_metric(self.classes_)

self.model.fit(X=train_x, y=train_y, eval_set=[(eval_x, eval_y)], verbose=self.model_params['verbosity'])
self.model.fit(X=X_train, y=y_train, eval_set=[(X_eval, y_eval)], verbose=self.model_params['verbosity'])
else:
train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
X_train, y_train = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
self.features_names = input_data.features_names
train_x, train_y = train_data.drop(columns=['target']), train_data['target']

self.model.fit(X=train_x, y=train_y, verbose=self.model_params['verbosity'])
self.model.fit(X=X_train, y=y_train, verbose=self.model_params['verbosity'])

return self.model

def predict(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x, _ = input_data.drop(columns=['target']), input_data['target']
prediction = self.model.predict(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict(X)

return prediction

Expand Down Expand Up @@ -89,7 +84,7 @@ def plot_feature_importance(self, importance_type='weight'):
@staticmethod
def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
dataframe = pd.DataFrame(data=data.features)
dataframe['target'] = data.target
dataframe['target'] = np.ravel(data.target)

if identify_cats and data.categorical_idx is not None:
for col in dataframe.columns[data.categorical_idx]:
Expand All @@ -99,7 +94,7 @@ def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
for col in dataframe.columns[data.numerical_idx]:
dataframe[col] = dataframe[col].astype('float')

return dataframe
return dataframe.drop(columns=['target']), dataframe['target']

@staticmethod
def set_eval_metric(n_classes):
Expand Down Expand Up @@ -127,9 +122,8 @@ def predict_proba(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict_proba(X)
return prediction


Expand Down Expand Up @@ -161,27 +155,23 @@ def fit(self, input_data: InputData):
if self.params.get('use_eval_set'):
train_input, eval_input = train_test_data_setup(input_data)

train_input = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
eval_input = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))

train_x, train_y = train_input.drop(columns=['target']), train_input['target']
eval_x, eval_y = eval_input.drop(columns=['target']), eval_input['target']
X_train, y_train = self.convert_to_dataframe(train_input, identify_cats=self.params.get('enable_categorical'))
X_eval, y_eval = self.convert_to_dataframe(eval_input, identify_cats=self.params.get('enable_categorical'))

eval_metric = self.set_eval_metric(self.classes_)
callbacks = self.update_callbacks()

self.model.fit(
X=train_x, y=train_y,
eval_set=[(eval_x, eval_y)], eval_metric=eval_metric,
X=X_train, y=y_train,
eval_set=[(X_eval, y_eval)], eval_metric=eval_metric,
callbacks=callbacks
)

else:
train_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x, train_y = train_data.drop(columns=['target']), train_data['target']
X_train, y_train = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))

self.model.fit(
X=train_x, y=train_y,
X=X_train, y=y_train,
)

return self.model
Expand All @@ -190,9 +180,8 @@ def predict(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict(train_x)
X, _ = self.convert_to_dataframe(input_data, identify_cats=self.params.get('enable_categorical'))
prediction = self.model.predict(X)

return prediction

Expand Down Expand Up @@ -228,7 +217,7 @@ def set_eval_metric(n_classes):
@staticmethod
def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
dataframe = pd.DataFrame(data=data.features, columns=data.features_names)
dataframe['target'] = data.target
dataframe['target'] = np.ravel(data.target)

if identify_cats and data.categorical_idx is not None:
for col in dataframe.columns[data.categorical_idx]:
Expand All @@ -238,7 +227,7 @@ def convert_to_dataframe(data: Optional[InputData], identify_cats: bool):
for col in dataframe.columns[data.numerical_idx]:
dataframe[col] = dataframe[col].astype('float')

return dataframe
return dataframe.drop(columns=['target']), dataframe['target']

def plot_feature_importance(self):
plot_feature_importance(self.features_names, self.model.feature_importances_)
Expand All @@ -258,9 +247,8 @@ def predict_proba(self, input_data: InputData):
if self.params.get('enable_categorical'):
input_data = input_data.get_not_encoded_data()

input_data = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
train_x = input_data.drop(columns=['target'])
prediction = self.model.predict_proba(train_x)
X, _ = self.convert_to_dataframe(input_data, self.params.get('enable_categorical'))
prediction = self.model.predict_proba(X)
return prediction


Expand Down
1 change: 0 additions & 1 deletion fedot/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ def _prepare_obligatory_unimodal(self, data: InputData, source_name: str,
data.target = self._apply_target_encoding(data, source_name)
else:
self.types_correctors[source_name].convert_data_for_predict(data)
data.target = self._apply_target_encoding(data, source_name)

# TODO andreygetmanov target encoding must be obligatory for all data types
if data_type_is_text(data):
Expand Down

0 comments on commit afed3f9

Please sign in to comment.