Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configs for simple models #178

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

### Added
- Configs for EASE, Random, PureSVD models ([#178](https://github.com/MobileTeleSystems/RecTools/pull/178))
- Configs for implicit models ([#167](https://github.com/MobileTeleSystems/RecTools/pull/167))


Expand Down
15 changes: 15 additions & 0 deletions rectools/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
import pandas as pd
import typing_extensions as tpe
from pydantic import PlainSerializer
from pydantic_core import PydanticSerializationError

from rectools import AnyIds, Columns, InternalIds
Expand All @@ -40,6 +41,20 @@
RecoTriplet_T = tp.TypeVar("RecoTriplet_T", InternalRecoTriplet, SemiInternalRecoTriplet, RecoTriplet)


def _serialize_random_state(rs: tp.Optional[tp.Union[None, int, np.random.RandomState]]) -> tp.Union[None, int]:
if rs is None or isinstance(rs, int):
return rs

# NOBUG: We can add serialization using get/set_state, but it's not human readable
raise TypeError("`random_state` must be ``None`` or have ``int`` type to convert it to simple type")


RandomState = tpe.Annotated[
tp.Union[None, int, np.random.RandomState],
PlainSerializer(func=_serialize_random_state, when_used="json"),
]


class ModelConfig(BaseConfig):
"""Base model config."""

Expand Down
21 changes: 20 additions & 1 deletion rectools/models/ease.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,26 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from scipy import sparse

from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.models.base import ModelConfig
from rectools.types import InternalIdsArray

from .base import ModelBase, Scores
from .rank import Distance, ImplicitRanker


class EASEModel(ModelBase):
class EASEModelConfig(ModelConfig):
"""Config for `EASE` model."""

regularization: float = 500.0
num_threads: int = 1


class EASEModel(ModelBase[EASEModelConfig]):
"""
Embarrassingly Shallow Autoencoders for Sparse Data model.

Expand All @@ -51,17 +60,27 @@ class EASEModel(ModelBase):
recommends_for_warm = False
recommends_for_cold = False

config_class = EASEModelConfig

def __init__(
self,
regularization: float = 500.0,
num_threads: int = 1,
verbose: int = 0,
):

super().__init__(verbose=verbose)
self.weight: np.ndarray
self.regularization = regularization
self.num_threads = num_threads

def _get_config(self) -> EASEModelConfig:
return EASEModelConfig(regularization=self.regularization, num_threads=self.num_threads, verbose=self.verbose)

@classmethod
def _from_config(cls, config: EASEModelConfig) -> tpe.Self:
return cls(regularization=config.regularization, num_threads=config.num_threads, verbose=config.verbose)

def _fit(self, dataset: Dataset) -> None: # type: ignore
ui_csr = dataset.get_user_item_matrix(include_weights=True)

Expand Down
15 changes: 1 addition & 14 deletions rectools/models/implicit_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from rectools.utils.config import BaseConfig
from rectools.utils.misc import get_class_or_function_full_path, import_object

from .base import RandomState
from .rank import Distance
from .vector import Factors, VectorModel

Expand Down Expand Up @@ -68,20 +69,6 @@ def _serialize_alternating_least_squares_class(
]


def _serialize_random_state(rs: tp.Optional[tp.Union[None, int, np.random.RandomState]]) -> tp.Union[None, int]:
if rs is None or isinstance(rs, int):
return rs

# NOBUG: We can add serialization using get/set_state, but it's not human readable
raise TypeError("`random_state` must be ``None`` or have ``int`` type to convert it to simple type")


RandomState = tpe.Annotated[
tp.Union[None, int, np.random.RandomState],
PlainSerializer(func=_serialize_random_state, when_used="json"),
]


class AlternatingLeastSquaresParams(tpe.TypedDict):
"""Params for implicit `AlternatingLeastSquares` model."""

Expand Down
34 changes: 33 additions & 1 deletion rectools/models/pure_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from scipy.sparse.linalg import svds

from rectools.dataset import Dataset
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.models.rank import Distance
from rectools.models.vector import Factors, VectorModel


class PureSVDModel(VectorModel):
class PureSVDModelConfig(ModelConfig):
"""Config for `PureSVD` model."""

factors: int = 10
tol: float = 0
maxiter: tp.Optional[int] = None
random_state: tp.Optional[int] = None


class PureSVDModel(VectorModel[PureSVDModelConfig]):
"""
PureSVD matrix factorization model.

Expand All @@ -51,6 +62,8 @@ class PureSVDModel(VectorModel):
u2i_dist = Distance.DOT
i2i_dist = Distance.COSINE

config_class = PureSVDModelConfig

def __init__(
self,
factors: int = 10,
Expand All @@ -69,6 +82,25 @@ def __init__(
self.user_factors: np.ndarray
self.item_factors: np.ndarray

def _get_config(self) -> PureSVDModelConfig:
return PureSVDModelConfig(
factors=self.factors,
tol=self.tol,
maxiter=self.maxiter,
random_state=self.random_state,
verbose=self.verbose,
)

@classmethod
def _from_config(cls, config: PureSVDModelConfig) -> tpe.Self:
return cls(
factors=config.factors,
tol=config.tol,
maxiter=config.maxiter,
random_state=config.random_state,
verbose=config.verbose,
)

def _fit(self, dataset: Dataset) -> None: # type: ignore
ui_csr = dataset.get_user_item_matrix(include_weights=True)

Expand Down
19 changes: 18 additions & 1 deletion rectools/models/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from tqdm.auto import tqdm

from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.models.base import ModelConfig
from rectools.types import AnyIdsArray, InternalId, InternalIdsArray
from rectools.utils import fast_isin_for_sorted_test_elements

Expand Down Expand Up @@ -50,7 +52,13 @@ def sample(self, n: int) -> np.ndarray:
return sampled


class RandomModel(ModelBase):
class RandomModelConfig(ModelConfig):
"""Config for `Random` model."""

random_state: tp.Optional[int] = None


class RandomModel(ModelBase[RandomModelConfig]):
"""
Model generating random recommendations.

Expand All @@ -70,13 +78,22 @@ class RandomModel(ModelBase):
recommends_for_warm = False
recommends_for_cold = True

config_class = RandomModelConfig

def __init__(self, random_state: tp.Optional[int] = None, verbose: int = 0):
super().__init__(verbose=verbose)
self.random_state = random_state
self.random_gen = _RandomGen(random_state)

self.all_item_ids: np.ndarray

def _get_config(self) -> RandomModelConfig:
return RandomModelConfig(random_state=self.random_state, verbose=self.verbose)

@classmethod
def _from_config(cls, config: RandomModelConfig) -> tpe.Self:
return cls(random_state=config.random_state, verbose=config.verbose)

def _fit(self, dataset: Dataset) -> None: # type: ignore
self.all_item_ids = dataset.item_id_map.internal_ids

Expand Down
47 changes: 46 additions & 1 deletion tests/models/test_ease.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@
from rectools.models import EASEModel

from .data import DATASET, INTERACTIONS
from .utils import assert_second_fit_refits_model
from .utils import (
assert_default_config_and_default_model_params_are_the_same,
assert_get_config_and_from_config_compatibility,
assert_second_fit_refits_model,
)


class TestEASEModel:
Expand Down Expand Up @@ -220,3 +224,44 @@ def test_i2i_with_warm_and_cold_items(self, item_features: tp.Optional[pd.DataFr
dataset=dataset,
k=2,
)


class TestEASEModelConfiguration:
def test_from_config(self) -> None:
config = {
"regularization": 500,
"num_threads": 1,
"verbose": 1,
}
model = EASEModel.from_config(config)
assert model.num_threads == 1
assert model.verbose == 1
assert model.regularization == 500

def test_get_config(self) -> None:
model = EASEModel(
regularization=500,
num_threads=1,
verbose=1,
)
config = model.get_config()
expected = {
"regularization": 500,
"num_threads": 1,
"verbose": 1,
}
assert config == expected

def test_get_config_and_from_config_compatibility(self) -> None:
initial_config = {
"regularization": 500,
"num_threads": 1,
"verbose": 1,
}
model = EASEModel()
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
default_config: tp.Dict[str, int] = {}
model = EASEModel()
assert_default_config_and_default_model_params_are_the_same(model, default_config)
28 changes: 10 additions & 18 deletions tests/models/test_implicit_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from rectools.models.utils import recommend_from_scores

from .data import DATASET
from .utils import assert_second_fit_refits_model
from .utils import (
assert_default_config_and_default_model_params_are_the_same,
assert_get_config_and_from_config_compatibility,
assert_second_fit_refits_model,
)


@pytest.mark.filterwarnings("ignore:Converting sparse features to dense")
Expand Down Expand Up @@ -451,28 +455,16 @@ def test_custom_model_class(self) -> None:

@pytest.mark.parametrize("simple_types", (False, True))
def test_get_config_and_from_config_compatibility(self, simple_types: bool) -> None:
def get_reco(model: ImplicitALSWrapperModel) -> pd.DataFrame:
return model.fit(DATASET).recommend(users=[10, 20], dataset=DATASET, k=2, filter_viewed=False)

initial_config = {
"model": {
"params": {"factors": 16, "num_threads": 2, "iterations": 3, "random_state": 42},
},
"verbose": 1,
}

model_1 = ImplicitALSWrapperModel.from_config(initial_config)
reco_1 = get_reco(model_1)
config_1 = model_1.get_config(simple_types=simple_types)

model_2 = ImplicitALSWrapperModel.from_config(config_1)
reco_2 = get_reco(model_2)
config_2 = model_2.get_config(simple_types=simple_types)

assert config_1 == config_2
pd.testing.assert_frame_equal(reco_1, reco_2)
model = ImplicitALSWrapperModel(model=AlternatingLeastSquares())
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config, simple_types)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
model_from_config = ImplicitALSWrapperModel.from_config({"model": {}})
model_from_params = ImplicitALSWrapperModel(model=AlternatingLeastSquares())
assert model_from_config.get_config() == model_from_params.get_config()
default_config: tp.Dict[str, tp.Any] = {"model": {}}
model = ImplicitALSWrapperModel(model=AlternatingLeastSquares())
assert_default_config_and_default_model_params_are_the_same(model, default_config)
28 changes: 10 additions & 18 deletions tests/models/test_implicit_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
from rectools.models import ImplicitItemKNNWrapperModel

from .data import DATASET, INTERACTIONS
from .utils import assert_second_fit_refits_model
from .utils import (
assert_default_config_and_default_model_params_are_the_same,
assert_get_config_and_from_config_compatibility,
assert_second_fit_refits_model,
)


class TestImplicitItemKNNWrapperModel:
Expand Down Expand Up @@ -309,29 +313,17 @@ def test_to_config(

@pytest.mark.parametrize("simple_types", (False, True))
def test_get_config_and_from_config_compatibility(self, simple_types: bool) -> None:
def get_reco(model: ImplicitItemKNNWrapperModel) -> pd.DataFrame:
return model.fit(DATASET).recommend(users=np.array([10, 20]), dataset=DATASET, k=2, filter_viewed=False)

initial_config = {
"model": {
"cls": TFIDFRecommender,
"params": {"K": 3},
},
"verbose": 1,
}

model_1 = ImplicitItemKNNWrapperModel.from_config(initial_config)
reco_1 = get_reco(model_1)
config_1 = model_1.get_config(simple_types=simple_types)

model_2 = ImplicitItemKNNWrapperModel.from_config(config_1)
reco_2 = get_reco(model_2)
config_2 = model_2.get_config(simple_types=simple_types)

assert config_1 == config_2
pd.testing.assert_frame_equal(reco_1, reco_2)
model = ImplicitItemKNNWrapperModel(model=ItemItemRecommender())
assert_get_config_and_from_config_compatibility(model, DATASET, initial_config, simple_types)

def test_default_config_and_default_model_params_are_the_same(self) -> None:
model_from_config = ImplicitItemKNNWrapperModel.from_config({"model": {}})
model_from_params = ImplicitItemKNNWrapperModel(model=ItemItemRecommender())
assert model_from_config.get_config() == model_from_params.get_config()
default_config: tp.Dict[str, tp.Any] = {"model": {}}
model = ImplicitItemKNNWrapperModel(model=ItemItemRecommender())
assert_default_config_and_default_model_params_are_the_same(model, default_config)
Loading
Loading