Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configs for simple models #178

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

### Added
- Configs for EASE, Random, PureSVD models ([#178](https://github.com/MobileTeleSystems/RecTools/pull/178))
- Configs for implicit models ([#167](https://github.com/MobileTeleSystems/RecTools/pull/167))


Expand Down
15 changes: 15 additions & 0 deletions rectools/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
import pandas as pd
import typing_extensions as tpe
from pydantic import PlainSerializer
from pydantic_core import PydanticSerializationError

from rectools import AnyIds, Columns, InternalIds
Expand All @@ -40,6 +41,20 @@
RecoTriplet_T = tp.TypeVar("RecoTriplet_T", InternalRecoTriplet, SemiInternalRecoTriplet, RecoTriplet)


def _serialize_random_state(rs: tp.Optional[tp.Union[None, int, np.random.RandomState]]) -> tp.Union[None, int]:
if rs is None or isinstance(rs, int):
return rs

# NOBUG: We can add serialization using get/set_state, but it's not human readable
raise TypeError("`random_state` must be ``None`` or have ``int`` type to convert it to simple type")


RandomState = tpe.Annotated[
tp.Union[None, int, np.random.RandomState],
PlainSerializer(func=_serialize_random_state, when_used="json"),
]


class ModelConfig(BaseConfig):
"""Base model config."""

Expand Down
25 changes: 24 additions & 1 deletion rectools/models/ease.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,26 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from scipy import sparse

from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.models.base import ModelConfig
from rectools.types import InternalIdsArray

from .base import ModelBase, Scores
from .rank import Distance, ImplicitRanker


class EASEModel(ModelBase):
class EASEModelConfig(ModelConfig):
"""Config for `EASE` model."""

regularization: float = 500.0
num_threads: int = 1


class EASEModel(ModelBase[EASEModelConfig]):
"""
Embarrassingly Shallow Autoencoders for Sparse Data model.

Expand All @@ -51,17 +60,31 @@ class EASEModel(ModelBase):
recommends_for_warm = False
recommends_for_cold = False

config_class = EASEModelConfig

def __init__(
self,
regularization: float = 500.0,
num_threads: int = 1,
verbose: int = 0,
):
self._config = self._make_config(regularization, num_threads, verbose)
feldlime marked this conversation as resolved.
Show resolved Hide resolved

super().__init__(verbose=verbose)
self.weight: np.ndarray
self.regularization = regularization
self.num_threads = num_threads

def _make_config(self, regularization: float, num_threads: int, verbose: int) -> EASEModelConfig:
return EASEModelConfig(regularization=regularization, num_threads=num_threads, verbose=verbose)

def _get_config(self) -> EASEModelConfig:
return self._config

@classmethod
def _from_config(cls, config: EASEModelConfig) -> tpe.Self:
return cls(regularization=config.regularization, num_threads=config.num_threads, verbose=config.verbose)

def _fit(self, dataset: Dataset) -> None: # type: ignore
ui_csr = dataset.get_user_item_matrix(include_weights=True)

Expand Down
15 changes: 1 addition & 14 deletions rectools/models/implicit_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from rectools.utils.config import BaseConfig
from rectools.utils.misc import get_class_or_function_full_path, import_object

from .base import RandomState
from .rank import Distance
from .vector import Factors, VectorModel

Expand Down Expand Up @@ -68,20 +69,6 @@ def _serialize_alternating_least_squares_class(
]


def _serialize_random_state(rs: tp.Optional[tp.Union[None, int, np.random.RandomState]]) -> tp.Union[None, int]:
if rs is None or isinstance(rs, int):
return rs

# NOBUG: We can add serialization using get/set_state, but it's not human readable
raise TypeError("`random_state` must be ``None`` or have ``int`` type to convert it to simple type")


RandomState = tpe.Annotated[
tp.Union[None, int, np.random.RandomState],
PlainSerializer(func=_serialize_random_state, when_used="json"),
]


class AlternatingLeastSquaresParams(tpe.TypedDict):
"""Params for implicit `AlternatingLeastSquares` model."""

Expand Down
34 changes: 33 additions & 1 deletion rectools/models/pure_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from scipy.sparse.linalg import svds

from rectools.dataset import Dataset
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.models.rank import Distance
from rectools.models.vector import Factors, VectorModel


class PureSVDModel(VectorModel):
class PureSVDModelConfig(ModelConfig):
"""Config for `PureSVD` model."""

factors: int = 10
tol: float = 0
maxiter: tp.Optional[int] = None
random_state: tp.Optional[int] = None


class PureSVDModel(VectorModel[PureSVDModelConfig]):
"""
PureSVD matrix factorization model.

Expand All @@ -51,6 +62,8 @@ class PureSVDModel(VectorModel):
u2i_dist = Distance.DOT
i2i_dist = Distance.COSINE

config_class = PureSVDModelConfig

def __init__(
self,
factors: int = 10,
Expand All @@ -59,6 +72,7 @@ def __init__(
random_state: tp.Optional[int] = None,
verbose: int = 0,
):
self._config = self._make_config(factors, tol, maxiter, random_state, verbose)
feldlime marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(verbose=verbose)

self.factors = factors
Expand All @@ -69,6 +83,24 @@ def __init__(
self.user_factors: np.ndarray
self.item_factors: np.ndarray

def _make_config(
self, factors: int, tol: float, maxiter: tp.Optional[int], random_state: tp.Optional[int], verbose: int
) -> PureSVDModelConfig:
return PureSVDModelConfig(factors=factors, tol=tol, maxiter=maxiter, random_state=random_state, verbose=verbose)

def _get_config(self) -> PureSVDModelConfig:
return self._config

@classmethod
def _from_config(cls, config: PureSVDModelConfig) -> tpe.Self:
return cls(
factors=config.factors,
tol=config.tol,
maxiter=config.maxiter,
random_state=config.random_state,
verbose=config.verbose,
)

def _fit(self, dataset: Dataset) -> None: # type: ignore
ui_csr = dataset.get_user_item_matrix(include_weights=True)

Expand Down
23 changes: 22 additions & 1 deletion rectools/models/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
import typing as tp

import numpy as np
import typing_extensions as tpe
from tqdm.auto import tqdm

from rectools import InternalIds
from rectools.dataset import Dataset
from rectools.models.base import ModelConfig
from rectools.types import AnyIdsArray, InternalId, InternalIdsArray
from rectools.utils import fast_isin_for_sorted_test_elements

Expand Down Expand Up @@ -50,7 +52,13 @@ def sample(self, n: int) -> np.ndarray:
return sampled


class RandomModel(ModelBase):
class RandomModelConfig(ModelConfig):
"""Config for `Random` model."""

random_state: tp.Optional[int] = None


class RandomModel(ModelBase[RandomModelConfig]):
"""
Model generating random recommendations.

Expand All @@ -70,13 +78,26 @@ class RandomModel(ModelBase):
recommends_for_warm = False
recommends_for_cold = True

config_class = RandomModelConfig

def __init__(self, random_state: tp.Optional[int] = None, verbose: int = 0):
self._config = self._make_config(random_state, verbose)
feldlime marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(verbose=verbose)
self.random_state = random_state
self.random_gen = _RandomGen(random_state)

self.all_item_ids: np.ndarray

def _make_config(self, random_state: tp.Optional[int], verbose: int) -> RandomModelConfig:
return RandomModelConfig(random_state=random_state, verbose=verbose)

def _get_config(self) -> RandomModelConfig:
return self._config

@classmethod
def _from_config(cls, config: RandomModelConfig) -> tpe.Self:
return cls(random_state=config.random_state, verbose=config.verbose)

def _fit(self, dataset: Dataset) -> None: # type: ignore
self.all_item_ids = dataset.item_id_map.internal_ids

Expand Down
25 changes: 25 additions & 0 deletions tests/models/test_ease.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,31 @@


class TestEASEModel:
def test_from_config(self) -> None:
feldlime marked this conversation as resolved.
Show resolved Hide resolved
config = {
"regularization": 500,
"num_threads": 1,
"verbose": 1,
}
model = EASEModel.from_config(config)
assert model.num_threads == 1
assert model.verbose == 1
assert model.regularization == 500

def test_get_config(self) -> None:
model = EASEModel(
regularization=500,
num_threads=1,
verbose=1,
)
config = model.get_config()
expected = {
"regularization": 500,
"num_threads": 1,
"verbose": 1,
}
assert config == expected

@pytest.fixture
def dataset(self) -> Dataset:
return DATASET
Expand Down
35 changes: 35 additions & 0 deletions tests/models/test_pure_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,41 @@


class TestPureSVDModel:

def test_from_config(self) -> None:
config = {
"factors": 100,
"tol": 0,
"maxiter": 100,
"random_state": 32,
"verbose": 0,
}
model = PureSVDModel.from_config(config)
assert model.factors == 100
assert model.tol == 0
assert model.maxiter == 100
assert model.random_state == 32
assert model.verbose == 0

@pytest.mark.parametrize("random_state", (None, 42))
def test_get_config(self, random_state: tp.Optional[int]) -> None:
model = PureSVDModel(
factors=100,
tol=1,
maxiter=100,
random_state=random_state,
verbose=1,
)
config = model.get_config()
expected = {
"factors": 100,
"tol": 1,
"maxiter": 100,
"random_state": random_state,
"verbose": 1,
}
assert config == expected

@pytest.fixture
def dataset(self) -> Dataset:
return DATASET
Expand Down
23 changes: 23 additions & 0 deletions tests/models/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,29 @@


class TestRandomSampler:

def test_from_config(self) -> None:
config = {
"random_state": 32,
"verbose": 0,
}
model = RandomModel.from_config(config)
assert model.random_state == 32
assert model.verbose == 0

@pytest.mark.parametrize("random_state", (None, 42))
def test_get_config(self, random_state: tp.Optional[int]) -> None:
model = RandomModel(
random_state=random_state,
verbose=1,
)
config = model.get_config()
expected = {
"random_state": random_state,
"verbose": 1,
}
assert config == expected

def test_sample_small_n(self) -> None:
gen = _RandomGen(42)
sampler = _RandomSampler(np.arange(10), gen)
Expand Down
Loading