Skip to content

Commit

Permalink
Remove unnecessary Custom classes
Browse files Browse the repository at this point in the history
  • Loading branch information
diogomatoschaves committed Feb 25, 2024
1 parent fd17780 commit 02d9e21
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 75 deletions.
2 changes: 0 additions & 2 deletions stratestic/strategies/machine_learning/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,4 @@
from stratestic.strategies.machine_learning.helpers._pipeline_custom_classes import (
FeatureSelector,
CustomOneHotEncoder,
CustomStandardScaler,
CustomPolynomialFeatures
)
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.preprocessing import OneHotEncoder


class FeatureSelector(BaseEstimator, TransformerMixin):
Expand Down Expand Up @@ -90,69 +89,3 @@ def transform(self, X, y=None, **transform_params):
transformed_data = super(CustomOneHotEncoder, self).transform(X, **transform_params).toarray()

return pd.DataFrame(data=transformed_data, columns=self.columns)


class CustomStandardScaler(StandardScaler):
"""
A custom implementation of StandardScaler to include feature name handling.
This class extends the StandardScaler to retain feature names for downstream use in the pipeline.
Parameters
----------
None
Attributes
----------
features : list of str
Feature names after scaling.
Methods
-------
fit(X, y=None, **fit_params)
Fits the scaler to X and stores feature names.
transform(X, y=None, **transform_params)
Scales features of X using the mean and standard deviation.
get_feature_names()
Returns the feature names after transformation.
"""

def fit(self, X, y=None, **fit_params):
self.features = X.columns

return super(CustomStandardScaler, self).fit(X, y, **fit_params)


class CustomPolynomialFeatures(PolynomialFeatures):
"""
A custom implementation of PolynomialFeatures that retains original feature names.
This class extends PolynomialFeatures to include original feature names in the output DataFrame, facilitating interpretability.
Parameters
----------
None
Attributes
----------
columns : list of str
Original feature names.
Methods
-------
fit(X, y=None)
Fits the transformer to X and stores original feature names.
transform(X, **transform_params)
Transforms X by adding polynomial features and returns a DataFrame with appropriate feature names.
"""

def fit(self, X, y=None, **kwargs):
self.columns = X.columns

return super(CustomPolynomialFeatures, self).fit(X, y)

def transform(self, X, **transform_params):

transformed_data = super(CustomPolynomialFeatures, self).transform(X)

return pd.DataFrame(data=transformed_data, columns=self.columns)
17 changes: 12 additions & 5 deletions stratestic/strategies/machine_learning/helpers/_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sklearn.base import is_classifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

from stratestic.strategies.machine_learning.helpers._defaults import estimator_mapping, estimator_params
from stratestic.strategies.machine_learning.helpers._helpers import train_test_split_ts
Expand All @@ -13,8 +14,6 @@
from stratestic.strategies.machine_learning.helpers._pipeline_custom_classes import (
FeatureSelector,
CustomOneHotEncoder,
CustomStandardScaler,
CustomPolynomialFeatures
)

grid_search_params_defaults = {
Expand All @@ -27,7 +26,7 @@
configure_logger()


def build_pipeline(estimator, polynomial_degree=1):
def build_pipeline(estimator, polynomial_degree=1, interaction_only=False):
"""
Constructs a machine learning pipeline with optional grid search capability.
Expand All @@ -37,6 +36,10 @@ def build_pipeline(estimator, polynomial_degree=1):
The estimator (machine learning algorithm) to use.
polynomial_degree : int, optional
Degree of polynomial features to generate. Default is 1.
interaction_only : bool, default=False
If `True`, only interaction features are produced: features that are
products of at most `degree` *distinct* input features, i.e. terms with
power of 2 or higher of the same input feature are excluded.
Returns
-------
Expand All @@ -52,8 +55,12 @@ def build_pipeline(estimator, polynomial_degree=1):
('features', FeatureUnion([
('num_features', Pipeline([
('selector', FeatureSelector('num')),
('feature_polynomials', CustomPolynomialFeatures(degree=polynomial_degree, include_bias=False)),
('scaling', CustomStandardScaler())
('feature_polynomials', PolynomialFeatures(
degree=polynomial_degree,
include_bias=False,
interaction_only=interaction_only
)),
('scaling', StandardScaler())
])),
('cat_features', Pipeline([
('selector', FeatureSelector('cat')),
Expand Down

0 comments on commit 02d9e21

Please sign in to comment.