From 0e46d15eae9749a684d52d635a7580cb91c571e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 13:23:49 +0300 Subject: [PATCH 1/7] fixed mistake --- rectools/models/vector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rectools/models/vector.py b/rectools/models/vector.py index 10614e82..2f34d0e3 100644 --- a/rectools/models/vector.py +++ b/rectools/models/vector.py @@ -99,7 +99,8 @@ def calc(self, subject_id: int) -> np.ndarray: elif self.distance == Distance.EUCLIDEAN: subject_dot = self.subjects_dots[subject_id] dot = self.objects_factors @ subject_factors - scores = np.sqrt(self.objects_dots + subject_dot - 2 * dot) + d2 = self.objects_dots + subject_dot - 2 * dot + scores = np.sqrt(np.maximum(d2, 0)) # Theoretically d2 >= 0, but can be <0 because of rounding errors elif self.distance == Distance.COSINE: subject_norm = self.subjects_norms[subject_id] if subject_norm == 0: From 4a31a8228ce8b12feeac38c8cb95352aca581e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 13:24:54 +0300 Subject: [PATCH 2/7] added changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..72f2ac18 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,11 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + +## Unreleased + +### Added From 5e509a31f51111b8a0493f05c5f131b5f42d414a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 13:51:17 +0300 Subject: [PATCH 3/7] improved ci naming --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6638238..34688c69 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,6 +28,7 @@ jobs: run: make lint test: + name: test (${{ matrix.python-version }}, old-deps - ${{ matrix.old-deps }}) runs-on: ubuntu-latest strategy: fail-fast: false From 03d9fd5375e670d9913f57378994085626ab77cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 15:45:44 +0300 Subject: [PATCH 4/7] renamed license file --- LICENSE.txt => LICENSE | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename LICENSE.txt => LICENSE (100%) diff --git a/LICENSE.txt b/LICENSE similarity index 100% rename from LICENSE.txt rename to LICENSE From a31d628bedb508253585ef4b5e4878a7a8fc3ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 15:46:29 +0300 Subject: [PATCH 5/7] small hotfix for tutorials docs --- docs/source/tutorials.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 942bc01e..e51d6a30 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -1,6 +1,8 @@ Tutorials ========= +See tutorials here: https://github.com/MobileTeleSystems/RecTools/tree/main/examples + .. toctree:: :maxdepth: 4 :glob: From c8f1f86f5831c3b2d1cfd8d9d700366dcf3e19e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A4=D0=B5=D0=BB=D1=8C=D0=B4=D0=BC=D0=B0=D0=BD=20=D0=AD?= =?UTF-8?q?=D0=BC=D0=B8=D0=BB=D0=B8=D0=B9=20=D0=9E=D0=BB=D0=B5=D0=B3=D0=BE?= =?UTF-8?q?=D0=B2=D0=B8=D1=87?= Date: Fri, 29 Jul 2022 15:47:07 +0300 Subject: [PATCH 6/7] completed pyproject.toml --- pyproject.toml | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e9375c09..a7e85514 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,33 @@ [tool.poetry] -name = "rectools" +name = "RecTools" version = "0.0.3" -repository = "https://github.com/MobileTeleSystems/RecTools" -readme = "README.md" description = "An easy-to-use Python library for building recommendation systems" license = "Apache-2.0" authors = [ - "Daniil Potapov ", + "Daniil Potapov ", + "Ildar Safilo ", + "Emiliy Feldman ", + "Julia Karamnova ", + "Artem Senin ", + "Mikhail Khasykov ", + "Daria Tikhonovich ", ] maintainers = [ - "Daniil Potapov ", + "Daniil Potapov ", + "Ildar Safilo ", + "Emiliy Feldman Date: Fri, 29 Jul 2022 15:47:45 +0300 Subject: [PATCH 7/7] added readme --- README.md | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bde52663..23541799 100644 --- a/README.md +++ b/README.md @@ -1 +1,110 @@ -# RecTools \ No newline at end of file +# RecTools + +[![Python versions](https://img.shields.io/pypi/pyversions/rectools.svg)](https://pypi.org/project/rectools) +[![PyPI](https://img.shields.io/pypi/v/rectools.svg)](https://pypi.org/project/rectools) +[![Docs](https://img.shields.io/github/workflow/status/MobileTeleSystems/RecTools/Publish?label=docs)](https://rectools.readthedocs.io) + +[![License](https://img.shields.io/github/license/MobileTeleSystems/RecTools.svg)](https://github.com/MobileTeleSystems/RecTools/blob/main/LICENSE) +[![Coverage](https://img.shields.io/codecov/c/github/MobileTeleSystems/RecTools.svg)](https://app.codecov.io/gh/MobileTeleSystems/RecTools) +[![Tests](https://img.shields.io/github/workflow/status/MobileTeleSystems/RecTools/Test/main?label=tests)](https://github.com/MobileTeleSystems/RecTools/actions/workflows/test.yml?query=branch%3Amain++) + +[![Contributors](https://img.shields.io/github/contributors/MobileTeleSystems/RecTools.svg)](https://github.com/MobileTeleSystems/RecTools/graphs/contributors) +[![Telegram](https://img.shields.io/badge/channel-telegram-blue)](https://t.me/RecTools_Support) + +RecTools is an easy-to-use Python library which makes the process of building recommendation systems easier, +faster and more structured than ever before. +It includes built in toolkits for data processing and metrics calculation, +a variety of recommender models, some wrappers for already existing implementations of popular algorithms +and model selection framework. +The aim is to collect ready-to-use solutions and best practices in one place to make processes +of creating your first MVP and deploying model to production as fast and easy as possible. + +RecTools allows to easily work with dense and sparse features. +It features such basic models as ones based on random suggestions or popularity and more advanced ones, e.g. LightFM. +It also contains a wide variety of metrics to choose from to better suit recommender system to your needs. + +For more details, see the [Documentation](https://rectools.readthedocs.io/) +and [Tutorials](https://github.com/MobileTeleSystems/RecTools/tree/main/examples). + +## Get started + +Prepare data with + +```shell +wget https://files.grouplens.org/datasets/movielens/ml-1m.zip +unzip ml-1m.zip +``` + +```python +import pandas as pd +from implicit.nearest_neighbours import TFIDFRecommender + +from rectools import Columns +from rectools.dataset import Dataset +from rectools.models import ImplicitItemKNNWrapperModel + +# Read the data +ratings = pd.read_csv( + "ml-1m/ratings.dat", + sep="::", + engine="python", # Because of 2-chars separators + header=None, + names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime], +) + +# Create dataset +dataset = Dataset.construct(ratings) + +# Fit model +model = ImplicitItemKNNWrapperModel(TFIDFRecommender(K=10)) +model.fit(dataset) + +# Make recommendations +recos = model.recommend( + users=ratings[Columns.User].unique(), + dataset=dataset, + k=10, + filter_viewed=True, +) +``` + +## Installation + +RecTools is on PyPI, so you can use `pip` to install it. +``` +pip install rectools +``` + + +## Contribution + +To install all requirements run +``` +make install +``` +You must have `python3` and `poetry` installed. + +For autoformatting run +``` +make autoformat +``` + +For linters check run +``` +make lint +``` + +For tests run +``` +make test +``` + +For coverage run +``` +make coverage +``` + +To remove virtual environment run +``` +make clean +```