Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Popular and PopInCategory configs #188

1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

### Added
- Configs for Popular, PopularInCategory models ([#188](https://github.com/MobileTeleSystems/RecTools/pull/188))
- Configs for EASE, Random, PureSVD models ([#178](https://github.com/MobileTeleSystems/RecTools/pull/178))
- Configs for implicit models ([#167](https://github.com/MobileTeleSystems/RecTools/pull/167))

Expand Down
138 changes: 105 additions & 33 deletions rectools/models/popular.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import numpy as np
import pandas as pd
import typing_extensions as tpe
from pydantic import PlainSerializer, PlainValidator
from tqdm.auto import tqdm

from rectools import Columns, InternalIds
Expand All @@ -41,17 +43,89 @@ class Popularity(Enum):
SUM_WEIGHT = "sum_weight"


def _deserialize_timedelta(td: tp.Union[dict, timedelta]) -> timedelta:
if isinstance(td, dict):
return timedelta(**td)
return td


def _serialize_timedelta(td: timedelta) -> dict:
serialized_td = {
key: value
for key, value in {"days": td.days, "seconds": td.seconds, "microseconds": td.microseconds}.items()
if value != 0
}
return serialized_td


TimeDelta = tpe.Annotated[
timedelta,
PlainValidator(func=_deserialize_timedelta),
PlainSerializer(func=_serialize_timedelta),
]


class PopularModelConfig(ModelConfig):
"""Config for `PopularModel`."""

popularity: Popularity = Popularity.N_USERS
period: tp.Optional[timedelta] = None
period: tp.Optional[TimeDelta] = None
begin_from: tp.Optional[datetime] = None
add_cold: bool = False
inverse: bool = False


class PopularModel(FixedColdRecoModelMixin, ModelBase):
PopularityOptions = tp.Literal["n_users", "n_interactions", "mean_weight", "sum_weight"]


class PopularModelMixin:
"""Mixin for models based on popularity."""

@classmethod
def _validate_popularity(
cls,
popularity: PopularityOptions,
) -> Popularity:
try:
return Popularity(popularity)
except ValueError:
possible_values = {item.value for item in Popularity.__members__.values()}
raise ValueError(f"`popularity` must be one of the {possible_values}. Got {popularity}.")

@classmethod
def _validate_time_attributes(
cls,
period: tp.Optional[TimeDelta],
begin_from: tp.Optional[datetime],
) -> None:
if period is not None and begin_from is not None:
raise ValueError("Only one of `period` and `begin_from` can be set")

@classmethod
def _filter_interactions(
feldlime marked this conversation as resolved.
Show resolved Hide resolved
cls, interactions: pd.DataFrame, period: tp.Optional[TimeDelta], begin_from: tp.Optional[datetime]
) -> pd.DataFrame:
if begin_from is not None:
interactions = interactions.loc[interactions[Columns.Datetime] >= begin_from]
elif period is not None:
begin_from = interactions[Columns.Datetime].max() - period
interactions = interactions.loc[interactions[Columns.Datetime] >= begin_from]
return interactions

@classmethod
def _get_groupby_col_and_agg_func(cls, popularity: Popularity) -> tp.Tuple[str, str]:
if popularity == Popularity.N_USERS:
return Columns.User, "nunique"
if popularity == Popularity.N_INTERACTIONS:
return Columns.User, "count"
if popularity == Popularity.MEAN_WEIGHT:
return Columns.Weight, "mean"
if popularity == Popularity.SUM_WEIGHT:
return Columns.Weight, "sum"
raise ValueError(f"Unexpected popularity {popularity}")


class PopularModel(FixedColdRecoModelMixin, PopularModelMixin, ModelBase[PopularModelConfig]):
"""
Model generating recommendations based on popularity of items.

Expand Down Expand Up @@ -87,25 +161,22 @@ class PopularModel(FixedColdRecoModelMixin, ModelBase):
recommends_for_warm = False
recommends_for_cold = True

config_class = PopularModelConfig

def __init__(
self,
popularity: tp.Literal["n_users", "n_interactions", "mean_weight", "sum_weight"] = "n_users",
popularity: PopularityOptions = "n_users",
period: tp.Optional[timedelta] = None,
begin_from: tp.Optional[datetime] = None,
add_cold: bool = False,
inverse: bool = False,
verbose: int = 0,
):
super().__init__(verbose=verbose)

try:
self.popularity = Popularity(popularity)
except ValueError:
feldlime marked this conversation as resolved.
Show resolved Hide resolved
possible_values = {item.value for item in Popularity.__members__.values()}
raise ValueError(f"`popularity` must be one of the {possible_values}. Got {popularity}.")

if period is not None and begin_from is not None:
raise ValueError("Only one of `period` and `begin_from` can be set")
super().__init__(
verbose=verbose,
)
self.popularity = self._validate_popularity(popularity)
self._validate_time_attributes(period, begin_from)
self.period = period
self.begin_from = begin_from

Expand All @@ -114,16 +185,29 @@ def __init__(

self.popularity_list: tp.Tuple[InternalIdsArray, ScoresArray]

def _filter_interactions(self, interactions: pd.DataFrame) -> pd.DataFrame:
if self.begin_from is not None:
interactions = interactions.loc[interactions[Columns.Datetime] >= self.begin_from]
elif self.period is not None:
begin_from = interactions[Columns.Datetime].max() - self.period
interactions = interactions.loc[interactions[Columns.Datetime] >= begin_from]
return interactions
def _get_config(self) -> PopularModelConfig:
return PopularModelConfig(
popularity=self.popularity,
period=self.period,
begin_from=self.begin_from,
add_cold=self.add_cold,
inverse=self.inverse,
verbose=self.verbose,
)

@classmethod
def _from_config(cls, config: PopularModelConfig) -> tpe.Self:
return cls(
popularity=config.popularity.value,
period=config.period,
begin_from=config.begin_from,
add_cold=config.add_cold,
inverse=config.inverse,
verbose=config.verbose,
)

def _fit(self, dataset: Dataset) -> None: # type: ignore
interactions = self._filter_interactions(dataset.interactions.df)
interactions = self._filter_interactions(dataset.interactions.df, self.period, self.begin_from)

col, func = self._get_groupby_col_and_agg_func(self.popularity)
items_scores = interactions.groupby(Columns.Item)[col].agg(func).sort_values(ascending=False)
Expand All @@ -141,18 +225,6 @@ def _fit(self, dataset: Dataset) -> None: # type: ignore

self.popularity_list = (items, scores)

@classmethod
def _get_groupby_col_and_agg_func(cls, popularity: Popularity) -> tp.Tuple[str, str]:
if popularity == Popularity.N_USERS:
return Columns.User, "nunique"
if popularity == Popularity.N_INTERACTIONS:
return Columns.User, "count"
if popularity == Popularity.MEAN_WEIGHT:
return Columns.Weight, "mean"
if popularity == Popularity.SUM_WEIGHT:
return Columns.Weight, "sum"
raise ValueError(f"Unexpected popularity {popularity}")

def _recommend_u2i(
self,
user_ids: InternalIdsArray,
Expand Down
85 changes: 65 additions & 20 deletions rectools/models/popular_in_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@

import numpy as np
import pandas as pd
import typing_extensions as tpe

from rectools import Columns, InternalIds
from rectools.dataset import Dataset, Interactions, features
from rectools.types import InternalIdsArray

from .base import Scores
from .popular import PopularModel
from .base import ModelBase, Scores
from .popular import FixedColdRecoModelMixin, PopularModel, PopularModelConfig, PopularModelMixin, PopularityOptions


class MixingStrategy(Enum):
Expand All @@ -44,7 +45,18 @@ class RatioStrategy(Enum):
PROPORTIONAL = "proportional"


class PopularInCategoryModel(PopularModel):
class PopularInCategoryModelConfig(PopularModelConfig):
"""Config for `PopularInCategoryModel`."""

category_feature: str
n_categories: tp.Optional[int] = None
mixing_strategy: MixingStrategy = MixingStrategy.ROTATE
ratio_strategy: RatioStrategy = RatioStrategy.PROPORTIONAL


class PopularInCategoryModel(
FixedColdRecoModelMixin, PopularModelMixin, ModelBase[PopularInCategoryModelConfig]
): # pylint: disable=too-many-instance-attributes
"""
Model generating recommendations based on popularity of items.

Expand Down Expand Up @@ -98,40 +110,34 @@ class PopularInCategoryModel(PopularModel):
recommends_for_warm = False
recommends_for_cold = True

config_class = PopularInCategoryModelConfig

def __init__(
self,
category_feature: str,
n_categories: tp.Optional[int] = None,
mixing_strategy: tp.Literal["rotate", "group"] = "rotate",
ratio_strategy: tp.Literal["proportional", "equal"] = "proportional",
popularity: tp.Literal["n_users", "n_interactions", "mean_weight", "sum_weight"] = "n_users",
popularity: PopularityOptions = "n_users",
period: tp.Optional[timedelta] = None,
begin_from: tp.Optional[datetime] = None,
add_cold: bool = False,
inverse: bool = False,
verbose: int = 0,
):
super().__init__(
popularity=popularity,
period=period,
begin_from=begin_from,
add_cold=add_cold,
inverse=inverse,
verbose=verbose,
)

self.category_feature = category_feature
self.category_columns: tp.List[int] = []
self.category_interactions: tp.Dict[int, pd.DataFrame] = {}
self.category_scores: pd.Series
self.models: tp.Dict[int, PopularModel] = {}
self.n_effective_categories: int
self.popularity = self._validate_popularity(popularity)
self._validate_time_attributes(period, begin_from)
self.period = period
self.begin_from = begin_from

if n_categories is None or n_categories > 0:
self.n_categories = n_categories
else:
raise ValueError(f"`n_categories` must be a positive number. Got {n_categories}")
self.add_cold = add_cold
self.inverse = inverse

self.category_feature = category_feature
try:
self.mixing_strategy = MixingStrategy(mixing_strategy)
except ValueError:
Expand All @@ -143,6 +149,45 @@ def __init__(
except ValueError:
possible_values = {item.value for item in RatioStrategy.__members__.values()}
raise ValueError(f"`ratio_strategy` must be one of the {possible_values}. Got {ratio_strategy}.")
self.category_columns: tp.List[int] = []
self.category_interactions: tp.Dict[int, pd.DataFrame] = {}
self.category_scores: pd.Series
self.models: tp.Dict[int, PopularModel] = {}
self.n_effective_categories: int

if n_categories is None or n_categories > 0:
self.n_categories = n_categories
else:
raise ValueError(f"`n_categories` must be a positive number. Got {n_categories}")

def _get_config(self) -> PopularInCategoryModelConfig:
return PopularInCategoryModelConfig(
category_feature=self.category_feature,
n_categories=self.n_categories,
mixing_strategy=self.mixing_strategy,
ratio_strategy=self.ratio_strategy,
popularity=self.popularity,
period=self.period,
begin_from=self.begin_from,
add_cold=self.add_cold,
inverse=self.inverse,
verbose=self.verbose,
)

@classmethod
def _from_config(cls, config: PopularInCategoryModelConfig) -> tpe.Self:
return cls(
category_feature=config.category_feature,
n_categories=config.n_categories,
mixing_strategy=config.mixing_strategy.value,
ratio_strategy=config.ratio_strategy.value,
popularity=config.popularity.value,
period=config.period,
begin_from=config.begin_from,
add_cold=config.add_cold,
inverse=config.inverse,
verbose=config.verbose,
)

def _check_category_feature(self, dataset: Dataset) -> None:
if not dataset.item_features:
Expand Down Expand Up @@ -200,7 +245,7 @@ def _fit(self, dataset: Dataset) -> None: # type: ignore
self.n_effective_categories = 0

self._check_category_feature(dataset)
interactions = self._filter_interactions(dataset.interactions.df)
interactions = self._filter_interactions(dataset.interactions.df, self.period, self.begin_from)
self._calc_category_scores(dataset, interactions)
self._define_categories_for_analysis()

Expand Down
2 changes: 1 addition & 1 deletion tests/model_selection/test_cross_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def setup_method(self) -> None:
"intersection": Intersection(1),
}

self.models = {
self.models: tp.Dict[str, ModelBase] = {
"popular": PopularModel(),
"random": RandomModel(random_state=42),
}
Expand Down
3 changes: 1 addition & 2 deletions tests/models/test_ease.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,7 @@ def test_get_config_and_from_config_compatibility(self, simple_types: bool) -> N
"num_threads": 1,
"verbose": 1,
}
model = EASEModel()
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config, simple_types)
assert_get_config_and_from_config_compatibility(EASEModel, DATASET, initial_config, simple_types)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
default_config: tp.Dict[str, int] = {}
Expand Down
3 changes: 1 addition & 2 deletions tests/models/test_implicit_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,7 @@ def test_get_config_and_from_config_compatibility(self, simple_types: bool) -> N
},
"verbose": 1,
}
model = ImplicitALSWrapperModel(model=AlternatingLeastSquares())
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config, simple_types)
assert_get_config_and_from_config_compatibility(ImplicitALSWrapperModel, DATASET, initial_config, simple_types)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
default_config: tp.Dict[str, tp.Any] = {"model": {}}
Expand Down
5 changes: 3 additions & 2 deletions tests/models/test_implicit_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,9 @@ def test_get_config_and_from_config_compatibility(self, simple_types: bool) -> N
},
"verbose": 1,
}
model = ImplicitItemKNNWrapperModel(model=ItemItemRecommender())
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config, simple_types)
assert_get_config_and_from_config_compatibility(
ImplicitItemKNNWrapperModel, DATASET, initial_config, simple_types
)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
default_config: tp.Dict[str, tp.Any] = {"model": {}}
Expand Down
Loading
Loading