diff --git a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb index 572ace114..dd22a1378 100644 --- a/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb +++ b/examples/Building-and-deploying-multi-stage-RecSys/01-Building-Recommender-Systems-with-Merlin.ipynb @@ -129,11 +129,11 @@ "outputs": [], "source": [ "# for running this example on GPU, install the following libraries\n", - "# %pip install \"feast<0.20\" faiss-gpu\n", + "# %pip install \"feast==0.31\" faiss-gpu\n", "\n", "# for running this example on CPU, uncomment the following lines\n", - "# %pip install tensorflow-cpu \"feast<0.20\" faiss-cpu\n", - "# %pip uninstall cudf\n" + "# %pip install tensorflow-cpu \"feast==0.31\" faiss-cpu\n", + "# %pip uninstall cudf" ] }, { @@ -146,14 +146,49 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-09-22 23:24:35.828030: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-06-20 23:45:23.539085: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-09-22 23:24:36.997030: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16249 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n" + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n", + "[INFO]: sparse_operation_kit is imported\n", + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n", + "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.1.4-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-20 23:45:31.002019: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-06-20 23:45:31.232986: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-06-20 23:45:31.233033: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-06-20 23:45:31.233242: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8192 MB memory: -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:06:00.0, compute capability: 7.0\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[SOK INFO] Initialize finished, communication tool: horovod\n" ] } ], "source": [ "import os\n", + "# for running this example on CPU, comment out the line below\n", + "os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"\n", + "\n", "import nvtabular as nvt\n", "from nvtabular.ops import Rename, Filter, Dropna, LambdaOp, Categorify, \\\n", " TagAsUserFeatures, TagAsUserID, TagAsItemFeatures, TagAsItemID, AddMetadata\n", @@ -165,8 +200,7 @@ "from merlin.datasets.ecommerce import transform_aliccp\n", "import tensorflow as tf\n", "\n", - "# for running this example on CPU, comment out the line below\n", - "os.environ[\"TF_GPU_ALLOCATOR\"] = \"cuda_malloc_async\"" + "import logging" ] }, { @@ -177,8 +211,6 @@ "outputs": [], "source": [ "# disable INFO and DEBUG logging everywhere\n", - "import logging\n", - "\n", "logging.disable(logging.WARNING)" ] }, @@ -219,18 +251,7 @@ "execution_count": 6, "id": "b44b3378-7297-4946-a271-742a9239bc3e", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "from merlin.datasets.synthetic import generate_data\n", "\n", @@ -326,16 +347,7 @@ "execution_count": 9, "id": "814e8438-642a-4f03-baaf-44dab8d1b5e5", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "transform_aliccp(\n", " (train_raw, valid_raw), output_path, nvt_workflow=outputs, workflow_name=\"workflow\"\n", @@ -494,17 +506,25 @@ "id": "4d47cb8b-e06a-4932-9a19-fb244ef43152", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "5/5 [==============================] - 10s 412ms/step - loss: 8.9092 - recall_at_10: 0.0117 - ndcg_at_10: 0.0075 - regularization_loss: 0.0000e+00 - val_loss: 8.9037 - val_recall_at_10: 0.0191 - val_ndcg_at_10: 0.0144 - val_regularization_loss: 0.0000e+00\n" + "5/5 [==============================] - 18s 712ms/step - loss: 8.9090 - recall_at_10: 0.0069 - ndcg_at_10: 0.0045 - regularization_loss: 0.0000e+00 - loss_batch: 8.5771 - val_loss: 8.9027 - val_recall_at_10: 0.0113 - val_ndcg_at_10: 0.0072 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 8.7921\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 16, @@ -580,20 +600,7 @@ "execution_count": 18, "id": "cb870461-6ac2-49b2-ba6a-2da6ecb57f1d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.USER_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "# define train and valid dataset objects\n", "train = Dataset(os.path.join(output_path, \"train\", \"*.parquet\"), part_size=\"500MB\")\n", @@ -659,13 +666,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "5/5 [==============================] - 5s 251ms/step - loss: 0.6932 - auc: 0.4982 - regularization_loss: 0.0000e+00 - val_loss: 0.6932 - val_auc: 0.5000 - val_regularization_loss: 0.0000e+00\n" + "5/5 [==============================] - 9s 519ms/step - loss: 0.6932 - auc: 0.5008 - regularization_loss: 0.0000e+00 - loss_batch: 0.6931 - val_loss: 0.6932 - val_auc: 0.5034 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.6932\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 21, @@ -733,14 +740,14 @@ "output_type": "stream", "text": [ "\n", - "Creating a new Feast repository in \u001b[1m\u001b[32m/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo\u001b[0m.\n", + "Creating a new Feast repository in \u001b[1m\u001b[32m/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo\u001b[0m.\n", "\n" ] } ], "source": [ - "!rm -rf $BASE_DIR/feast\n", - "!cd $BASE_DIR && feast init feast" + "!rm -rf $BASE_DIR/feast_repo\n", + "!cd $BASE_DIR && feast init feast_repo" ] }, { @@ -758,7 +765,7 @@ "metadata": {}, "outputs": [], "source": [ - "feature_repo_path = os.path.join(BASE_DIR, \"feast/feature_repo\")\n", + "feature_repo_path = os.path.join(BASE_DIR, \"feast_repo/feature_repo\")\n", "if os.path.exists(f\"{feature_repo_path}/example_repo.py\"):\n", " os.remove(f\"{feature_repo_path}/example_repo.py\")\n", "if os.path.exists(f\"{feature_repo_path}/data/driver_stats.parquet\"):\n", @@ -834,83 +841,83 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", " 6\n", " \n", " \n", " 1\n", - " 2\n", - " 2\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 2\n", - " 2\n", - " 2\n", + " 4\n", + " 4\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 4\n", + " 4\n", + " 4\n", " 8\n", " \n", " \n", " 2\n", + " 5\n", + " 5\n", + " 3\n", " 3\n", " 3\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", " 3\n", " 3\n", " 3\n", + " 3\n", + " 5\n", + " 5\n", + " 5\n", " 7\n", " \n", " \n", " 3\n", - " 4\n", - " 4\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 4\n", - " 4\n", - " 4\n", + " 6\n", + " 6\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 6\n", + " 6\n", + " 6\n", " 5\n", " \n", " \n", " 4\n", - " 5\n", - " 5\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 5\n", - " 5\n", - " 5\n", - " 10\n", + " 7\n", + " 7\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 7\n", + " 7\n", + " 7\n", + " 9\n", " \n", " \n", "\n", @@ -918,25 +925,25 @@ ], "text/plain": [ " user_id user_shops user_profile user_group user_gender user_age \\\n", - "0 1 1 1 1 1 1 \n", - "1 2 2 1 1 1 1 \n", - "2 3 3 1 1 1 1 \n", - "3 4 4 1 1 1 1 \n", - "4 5 5 1 1 1 1 \n", + "0 3 3 3 3 3 3 \n", + "1 4 4 3 3 3 3 \n", + "2 5 5 3 3 3 3 \n", + "3 6 6 3 3 3 3 \n", + "4 7 7 3 3 3 3 \n", "\n", " user_consumption_2 user_is_occupied user_geography user_intentions \\\n", - "0 1 1 1 1 \n", - "1 1 1 1 2 \n", - "2 1 1 1 3 \n", - "3 1 1 1 4 \n", - "4 1 1 1 5 \n", + "0 3 3 3 3 \n", + "1 3 3 3 4 \n", + "2 3 3 3 5 \n", + "3 3 3 3 6 \n", + "4 3 3 3 7 \n", "\n", " user_brands user_categories user_id_raw \n", - "0 1 1 6 \n", - "1 2 2 8 \n", - "2 3 3 7 \n", - "3 4 4 5 \n", - "4 5 5 10 " + "0 3 3 6 \n", + "1 4 4 8 \n", + "2 5 5 7 \n", + "3 6 6 5 \n", + "4 7 7 9 " ] }, "execution_count": 26, @@ -1018,93 +1025,93 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", " 6\n", - " 2022-09-22 23:02:17.150145\n", - " 2022-09-22 23:02:17.152070\n", + " 2023-06-20 23:47:09.436667\n", + " 2023-06-20 23:47:09.438518\n", " \n", " \n", " 1\n", - " 2\n", - " 2\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 2\n", - " 2\n", - " 2\n", + " 4\n", + " 4\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 4\n", + " 4\n", + " 4\n", " 8\n", - " 2022-09-22 23:02:17.150145\n", - " 2022-09-22 23:02:17.152070\n", + " 2023-06-20 23:47:09.436667\n", + " 2023-06-20 23:47:09.438518\n", " \n", " \n", " 2\n", + " 5\n", + " 5\n", + " 3\n", " 3\n", " 3\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", " 3\n", " 3\n", " 3\n", + " 3\n", + " 5\n", + " 5\n", + " 5\n", " 7\n", - " 2022-09-22 23:02:17.150145\n", - " 2022-09-22 23:02:17.152070\n", + " 2023-06-20 23:47:09.436667\n", + " 2023-06-20 23:47:09.438518\n", " \n", " \n", " 3\n", - " 4\n", - " 4\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 4\n", - " 4\n", - " 4\n", + " 6\n", + " 6\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 6\n", + " 6\n", + " 6\n", " 5\n", - " 2022-09-22 23:02:17.150145\n", - " 2022-09-22 23:02:17.152070\n", + " 2023-06-20 23:47:09.436667\n", + " 2023-06-20 23:47:09.438518\n", " \n", " \n", " 4\n", - " 5\n", - " 5\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 5\n", - " 5\n", - " 5\n", - " 10\n", - " 2022-09-22 23:02:17.150145\n", - " 2022-09-22 23:02:17.152070\n", + " 7\n", + " 7\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 3\n", + " 7\n", + " 7\n", + " 7\n", + " 9\n", + " 2023-06-20 23:47:09.436667\n", + " 2023-06-20 23:47:09.438518\n", " \n", " \n", "\n", @@ -1112,32 +1119,32 @@ ], "text/plain": [ " user_id user_shops user_profile user_group user_gender user_age \\\n", - "0 1 1 1 1 1 1 \n", - "1 2 2 1 1 1 1 \n", - "2 3 3 1 1 1 1 \n", - "3 4 4 1 1 1 1 \n", - "4 5 5 1 1 1 1 \n", + "0 3 3 3 3 3 3 \n", + "1 4 4 3 3 3 3 \n", + "2 5 5 3 3 3 3 \n", + "3 6 6 3 3 3 3 \n", + "4 7 7 3 3 3 3 \n", "\n", " user_consumption_2 user_is_occupied user_geography user_intentions \\\n", - "0 1 1 1 1 \n", - "1 1 1 1 2 \n", - "2 1 1 1 3 \n", - "3 1 1 1 4 \n", - "4 1 1 1 5 \n", + "0 3 3 3 3 \n", + "1 3 3 3 4 \n", + "2 3 3 3 5 \n", + "3 3 3 3 6 \n", + "4 3 3 3 7 \n", "\n", " user_brands user_categories user_id_raw datetime \\\n", - "0 1 1 6 2022-09-22 23:02:17.150145 \n", - "1 2 2 8 2022-09-22 23:02:17.150145 \n", - "2 3 3 7 2022-09-22 23:02:17.150145 \n", - "3 4 4 5 2022-09-22 23:02:17.150145 \n", - "4 5 5 10 2022-09-22 23:02:17.150145 \n", + "0 3 3 6 2023-06-20 23:47:09.436667 \n", + "1 4 4 8 2023-06-20 23:47:09.436667 \n", + "2 5 5 7 2023-06-20 23:47:09.436667 \n", + "3 6 6 5 2023-06-20 23:47:09.436667 \n", + "4 7 7 9 2023-06-20 23:47:09.436667 \n", "\n", " created \n", - "0 2022-09-22 23:02:17.152070 \n", - "1 2022-09-22 23:02:17.152070 \n", - "2 2022-09-22 23:02:17.152070 \n", - "3 2022-09-22 23:02:17.152070 \n", - "4 2022-09-22 23:02:17.152070 " + "0 2023-06-20 23:47:09.438518 \n", + "1 2023-06-20 23:47:09.438518 \n", + "2 2023-06-20 23:47:09.438518 \n", + "3 2023-06-20 23:47:09.438518 \n", + "4 2023-06-20 23:47:09.438518 " ] }, "execution_count": 28, @@ -1156,9 +1163,7 @@ "metadata": {}, "outputs": [], "source": [ - "user_features.to_parquet(\n", - " os.path.join(feature_repo_path, \"data\", \"user_features.parquet\")\n", - ")" + "user_features.to_parquet(os.path.join(feature_repo_path, \"data\", \"user_features.parquet\"))" ] }, { @@ -1227,53 +1232,53 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 5\n", - " 2022-09-22 23:02:17.245267\n", - " 2022-09-22 23:02:17.246515\n", - " \n", - " \n", - " 1\n", - " 2\n", - " 2\n", - " 2\n", - " 2\n", - " 7\n", - " 2022-09-22 23:02:17.245267\n", - " 2022-09-22 23:02:17.246515\n", - " \n", - " \n", - " 2\n", " 3\n", " 3\n", " 3\n", " 3\n", - " 10\n", - " 2022-09-22 23:02:17.245267\n", - " 2022-09-22 23:02:17.246515\n", + " 6\n", + " 2023-06-20 23:47:09.557793\n", + " 2023-06-20 23:47:09.559325\n", " \n", " \n", - " 3\n", + " 1\n", " 4\n", " 4\n", " 4\n", " 4\n", - " 6\n", - " 2022-09-22 23:02:17.245267\n", - " 2022-09-22 23:02:17.246515\n", + " 7\n", + " 2023-06-20 23:47:09.557793\n", + " 2023-06-20 23:47:09.559325\n", " \n", " \n", - " 4\n", + " 2\n", " 5\n", " 5\n", " 5\n", " 5\n", + " 10\n", + " 2023-06-20 23:47:09.557793\n", + " 2023-06-20 23:47:09.559325\n", + " \n", + " \n", + " 3\n", + " 6\n", + " 6\n", + " 6\n", + " 6\n", " 8\n", - " 2022-09-22 23:02:17.245267\n", - " 2022-09-22 23:02:17.246515\n", + " 2023-06-20 23:47:09.557793\n", + " 2023-06-20 23:47:09.559325\n", + " \n", + " \n", + " 4\n", + " 7\n", + " 7\n", + " 7\n", + " 7\n", + " 5\n", + " 2023-06-20 23:47:09.557793\n", + " 2023-06-20 23:47:09.559325\n", " \n", " \n", "\n", @@ -1281,18 +1286,18 @@ ], "text/plain": [ " item_id item_category item_shop item_brand item_id_raw \\\n", - "0 1 1 1 1 5 \n", - "1 2 2 2 2 7 \n", - "2 3 3 3 3 10 \n", - "3 4 4 4 4 6 \n", - "4 5 5 5 5 8 \n", + "0 3 3 3 3 6 \n", + "1 4 4 4 4 7 \n", + "2 5 5 5 5 10 \n", + "3 6 6 6 6 8 \n", + "4 7 7 7 7 5 \n", "\n", " datetime created \n", - "0 2022-09-22 23:02:17.245267 2022-09-22 23:02:17.246515 \n", - "1 2022-09-22 23:02:17.245267 2022-09-22 23:02:17.246515 \n", - "2 2022-09-22 23:02:17.245267 2022-09-22 23:02:17.246515 \n", - "3 2022-09-22 23:02:17.245267 2022-09-22 23:02:17.246515 \n", - "4 2022-09-22 23:02:17.245267 2022-09-22 23:02:17.246515 " + "0 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325 \n", + "1 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325 \n", + "2 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325 \n", + "3 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325 \n", + "4 2023-06-20 23:47:09.557793 2023-06-20 23:47:09.559325 " ] }, "execution_count": 32, @@ -1330,16 +1335,7 @@ "execution_count": 34, "id": "00f1fe65-882e-4962-bb16-19a130fda215", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:148: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "item_embs = model_tt.item_embeddings(\n", " Dataset(item_features, schema=schema), batch_size=1024\n", @@ -1413,123 +1409,123 @@ " \n", " \n", " 0\n", - " 1\n", - " 0.030497\n", - " 0.029997\n", - " 0.012621\n", - " -0.001204\n", - " 0.012877\n", - " -0.031165\n", - " -0.009491\n", - " -0.024208\n", - " -0.011206\n", + " 3\n", + " -0.055164\n", + " -0.018832\n", + " -0.009478\n", + " -0.016874\n", + " 0.015988\n", + " -0.022928\n", + " 0.022611\n", + " -0.030984\n", + " -0.045701\n", " ...\n", - " 0.010395\n", - " -0.044563\n", - " 0.002028\n", - " -0.011641\n", - " -0.017367\n", - " -0.016538\n", - " 0.003312\n", - " -0.020471\n", - " 0.016938\n", - " 0.037699\n", + " 0.007060\n", + " 0.032204\n", + " 0.011515\n", + " 0.012811\n", + " 0.002650\n", + " 0.023448\n", + " 0.021759\n", + " -0.011316\n", + " -0.035275\n", + " -0.004572\n", " \n", " \n", " 1\n", - " 2\n", - " 0.014305\n", - " 0.004831\n", - " -0.006791\n", - " -0.010725\n", - " 0.002375\n", - " -0.010010\n", - " -0.006006\n", - " -0.016317\n", - " 0.019688\n", + " 4\n", + " -0.027412\n", + " -0.007417\n", + " -0.023730\n", + " -0.008385\n", + " 0.028241\n", + " -0.004143\n", + " 0.001301\n", + " -0.040613\n", + " -0.020645\n", " ...\n", - " -0.023776\n", - " -0.028429\n", - " -0.039675\n", - " 0.035854\n", - " 0.007236\n", - " -0.001316\n", - " 0.014094\n", - " 0.024848\n", - " 0.023687\n", - " 0.020931\n", + " 0.001835\n", + " 0.010697\n", + " 0.006311\n", + " 0.007290\n", + " -0.014959\n", + " 0.025217\n", + " 0.041697\n", + " -0.012126\n", + " -0.022523\n", + " -0.001903\n", " \n", " \n", " 2\n", - " 3\n", - " 0.026491\n", - " -0.011876\n", - " 0.023269\n", - " -0.004026\n", - " 0.038133\n", - " 0.016866\n", - " -0.037301\n", - " -0.014816\n", - " 0.018586\n", + " 5\n", + " -0.009581\n", + " 0.016263\n", + " -0.027931\n", + " -0.023079\n", + " 0.006483\n", + " 0.006133\n", + " -0.027449\n", + " 0.027797\n", + " 0.045743\n", " ...\n", - " -0.016928\n", - " -0.003044\n", - " 0.017992\n", - " -0.043302\n", - " 0.000884\n", - " -0.027940\n", - " 0.005639\n", - " -0.008831\n", - " -0.009807\n", - " -0.000746\n", + " -0.003662\n", + " 0.054940\n", + " 0.013501\n", + " -0.004127\n", + " -0.001858\n", + " -0.000462\n", + " -0.018047\n", + " 0.036427\n", + " 0.009524\n", + " 0.006689\n", " \n", " \n", " 3\n", - " 4\n", - " 0.046828\n", - " 0.017710\n", - " -0.033954\n", - " -0.039186\n", - " 0.014467\n", - " -0.056866\n", - " -0.011080\n", - " 0.001606\n", - " -0.000757\n", + " 6\n", + " -0.007599\n", + " -0.012074\n", + " 0.024879\n", + " -0.008080\n", + " -0.025010\n", + " -0.000266\n", + " 0.005489\n", + " -0.014263\n", + " -0.019343\n", " ...\n", - " -0.014907\n", - " -0.020841\n", - " -0.039584\n", - " 0.009472\n", - " -0.009085\n", - " -0.037578\n", - " 0.006459\n", - " 0.008231\n", - " 0.010318\n", - " -0.005625\n", + " -0.030220\n", + " 0.011863\n", + " -0.008515\n", + " 0.011286\n", + " -0.000907\n", + " 0.014882\n", + " 0.035699\n", + " -0.007068\n", + " 0.012995\n", + " 0.001644\n", " \n", " \n", " 4\n", - " 5\n", - " 0.050902\n", - " -0.001969\n", - " -0.003946\n", - " -0.050269\n", - " -0.011292\n", - " -0.016854\n", - " -0.031103\n", - " -0.010389\n", - " 0.007709\n", + " 7\n", + " -0.070002\n", + " 0.001031\n", + " -0.001309\n", + " -0.014118\n", + " -0.036672\n", + " -0.012943\n", + " 0.009711\n", + " -0.008856\n", + " -0.032054\n", " ...\n", - " 0.009147\n", - " -0.000667\n", - " 0.019289\n", - " -0.006992\n", - " 0.018633\n", - " 0.013128\n", - " -0.017529\n", - " 0.040066\n", - " 0.040147\n", - " 0.035671\n", + " -0.023113\n", + " 0.000600\n", + " -0.005711\n", + " 0.044277\n", + " -0.004765\n", + " 0.016184\n", + " 0.028223\n", + " 0.002914\n", + " 0.032516\n", + " 0.026521\n", " \n", " \n", "\n", @@ -1538,25 +1534,25 @@ ], "text/plain": [ " item_id 0 1 2 3 4 5 \\\n", - "0 1 0.030497 0.029997 0.012621 -0.001204 0.012877 -0.031165 \n", - "1 2 0.014305 0.004831 -0.006791 -0.010725 0.002375 -0.010010 \n", - "2 3 0.026491 -0.011876 0.023269 -0.004026 0.038133 0.016866 \n", - "3 4 0.046828 0.017710 -0.033954 -0.039186 0.014467 -0.056866 \n", - "4 5 0.050902 -0.001969 -0.003946 -0.050269 -0.011292 -0.016854 \n", + "0 3 -0.055164 -0.018832 -0.009478 -0.016874 0.015988 -0.022928 \n", + "1 4 -0.027412 -0.007417 -0.023730 -0.008385 0.028241 -0.004143 \n", + "2 5 -0.009581 0.016263 -0.027931 -0.023079 0.006483 0.006133 \n", + "3 6 -0.007599 -0.012074 0.024879 -0.008080 -0.025010 -0.000266 \n", + "4 7 -0.070002 0.001031 -0.001309 -0.014118 -0.036672 -0.012943 \n", "\n", " 6 7 8 ... 54 55 56 57 \\\n", - "0 -0.009491 -0.024208 -0.011206 ... 0.010395 -0.044563 0.002028 -0.011641 \n", - "1 -0.006006 -0.016317 0.019688 ... -0.023776 -0.028429 -0.039675 0.035854 \n", - "2 -0.037301 -0.014816 0.018586 ... -0.016928 -0.003044 0.017992 -0.043302 \n", - "3 -0.011080 0.001606 -0.000757 ... -0.014907 -0.020841 -0.039584 0.009472 \n", - "4 -0.031103 -0.010389 0.007709 ... 0.009147 -0.000667 0.019289 -0.006992 \n", + "0 0.022611 -0.030984 -0.045701 ... 0.007060 0.032204 0.011515 0.012811 \n", + "1 0.001301 -0.040613 -0.020645 ... 0.001835 0.010697 0.006311 0.007290 \n", + "2 -0.027449 0.027797 0.045743 ... -0.003662 0.054940 0.013501 -0.004127 \n", + "3 0.005489 -0.014263 -0.019343 ... -0.030220 0.011863 -0.008515 0.011286 \n", + "4 0.009711 -0.008856 -0.032054 ... -0.023113 0.000600 -0.005711 0.044277 \n", "\n", " 58 59 60 61 62 63 \n", - "0 -0.017367 -0.016538 0.003312 -0.020471 0.016938 0.037699 \n", - "1 0.007236 -0.001316 0.014094 0.024848 0.023687 0.020931 \n", - "2 0.000884 -0.027940 0.005639 -0.008831 -0.009807 -0.000746 \n", - "3 -0.009085 -0.037578 0.006459 0.008231 0.010318 -0.005625 \n", - "4 0.018633 0.013128 -0.017529 0.040066 0.040147 0.035671 \n", + "0 0.002650 0.023448 0.021759 -0.011316 -0.035275 -0.004572 \n", + "1 -0.014959 0.025217 0.041697 -0.012126 -0.022523 -0.001903 \n", + "2 -0.001858 -0.000462 -0.018047 0.036427 0.009524 0.006689 \n", + "3 -0.000907 0.014882 0.035699 -0.007068 0.012995 0.001644 \n", + "4 -0.004765 0.016184 0.028223 0.002914 0.032516 0.026521 \n", "\n", "[5 rows x 65 columns]" ] @@ -1711,9 +1707,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.3.1)\n", - "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.1.0)\n", - "Requirement already satisfied: emoji in /usr/local/lib/python3.8/dist-packages (from seedir) (2.0.0)\n" + "Requirement already satisfied: seedir in /usr/local/lib/python3.8/dist-packages (0.4.2)\n", + "Requirement already satisfied: natsort in /usr/local/lib/python3.8/dist-packages (from seedir) (8.4.0)\n" ] } ], @@ -1732,23 +1727,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "feature_repo/\n", + "feast_repo/\n", + "├─README.md\n", "├─__init__.py\n", - "├─data/\n", - "│ ├─item_features.parquet\n", - "│ └─user_features.parquet\n", - "├─feature_store.yaml\n", - "├─item_features.py\n", - "└─user_features.py\n" + "└─feature_repo/\n", + " ├─__init__.py\n", + " ├─__pycache__/\n", + " │ ├─__init__.cpython-38.pyc\n", + " │ ├─example_repo.cpython-38.pyc\n", + " │ └─test_workflow.cpython-38.pyc\n", + " ├─data/\n", + " │ ├─item_features.parquet\n", + " │ └─user_features.parquet\n", + " ├─feature_store.yaml\n", + " ├─item_features.py\n", + " ├─test_workflow.py\n", + " └─user_features.py\n" ] } ], "source": [ "import seedir as sd\n", "\n", - "feature_repo_path = os.path.join(BASE_DIR, \"feast\")\n", "sd.seedir(\n", - " feature_repo_path,\n", + " os.path.join(BASE_DIR, \"feast_repo\"),\n", " style=\"lines\",\n", " itemlimit=10,\n", " depthlimit=3,\n", diff --git a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb index ed0728a7e..e2b0e5470 100644 --- a/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb +++ b/examples/Building-and-deploying-multi-stage-RecSys/02-Deploying-multi-stage-RecSys-with-Merlin-Systems.ipynb @@ -75,20 +75,59 @@ "\n", "In case you need to install them for running this example on GPU, execute the following script in a cell.\n", "```\n", - "%pip install \"feast<0.20\" faiss-gpu\n", + "%pip install \"feast==0.31\" faiss-gpu\n", "```\n", "or the following script in a cell for CPU.\n", "```\n", - "%pip install tensorflow-cpu \"feast<0.20\" faiss-cpu\n", + "%pip install tensorflow-cpu \"feast==0.31\" faiss-cpu\n", "```" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "4db1b5f1-c8fa-4e03-8744-1197873c5bee", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " DESCRIPTOR = _descriptor.FileDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:37: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _descriptor.FieldDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/cudf/utils/metadata/orc_column_statistics_pb2.py:30: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _INTEGERSTATISTICS = _descriptor.Descriptor(\n", + "2023-06-20 23:49:49.177129: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:18: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " DESCRIPTOR = _descriptor.FileDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:36: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _descriptor.FieldDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/tensor_shape_pb2.py:29: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _TENSORSHAPEPROTO_DIM = _descriptor.Descriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/types_pb2.py:19: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " DESCRIPTOR = _descriptor.FileDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/types_pb2.py:33: DeprecationWarning: Call to deprecated create function EnumValueDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _descriptor.EnumValueDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/types_pb2.py:27: DeprecationWarning: Call to deprecated create function EnumDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _DATATYPE = _descriptor.EnumDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/types_pb2.py:287: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _descriptor.FieldDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/types_pb2.py:280: DeprecationWarning: Call to deprecated create function Descriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _SERIALIZEDDTYPE = _descriptor.Descriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:20: DeprecationWarning: Call to deprecated create function FileDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " DESCRIPTOR = _descriptor.FileDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/core/framework/resource_handle_pb2.py:39: DeprecationWarning: Call to deprecated create function FieldDescriptor(). Note: Create unlinked descriptors is going to go away. Please use get/find descriptors from generated code or query the descriptor_pool.\n", + " _descriptor.FieldDescriptor(\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "/usr/local/lib/python3.8/dist-packages/nvtabular/loader/__init__.py:19: DeprecationWarning: The `nvtabular.loader` module has moved to a new repository, at https://github.com/NVIDIA-Merlin/dataloader . Support for importing from `nvtabular.loader` is deprecated, and will be removed in a future version. Please update your imports to refer to `merlinloader`.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "import os\n", "import numpy as np\n", @@ -132,7 +171,7 @@ "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/Merlin/examples/Building-and-deploying-multi-stage-RecSys/\")\n", "\n", "# define feature repo path\n", - "feast_repo_path = os.path.join(BASE_DIR, \"feast/feature_repo/\")" + "feast_repo_path = os.path.join(BASE_DIR, \"feast_repo/feature_repo/\")" ] }, { @@ -145,16 +184,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feature_repo\n", - "/usr/local/lib/python3.8/dist-packages/feast/feature_view.py:100: DeprecationWarning: The argument 'input' is being deprecated. Please use 'batch_source' instead. Feast 0.13 and onwards will not support the argument 'input'.\n", - " warnings.warn(\n", - "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n", + "/Merlin/examples/Building-and-deploying-multi-stage-RecSys/feast_repo/feature_repo\n", "Created entity \u001b[1m\u001b[32muser_id_raw\u001b[0m\n", - "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n", + "Created entity \u001b[1m\u001b[32mitem_id\u001b[0m\n", "Created feature view \u001b[1m\u001b[32muser_features\u001b[0m\n", + "Created feature view \u001b[1m\u001b[32mitem_features\u001b[0m\n", "\n", - "Created sqlite table \u001b[1m\u001b[32mfeature_repo_item_features\u001b[0m\n", - "Created sqlite table \u001b[1m\u001b[32mfeature_repo_user_features\u001b[0m\n", + "Created sqlite table \u001b[1m\u001b[32mfeast_repo_item_features\u001b[0m\n", + "Created sqlite table \u001b[1m\u001b[32mfeast_repo_user_features\u001b[0m\n", "\n" ] } @@ -190,10 +227,10 @@ "text": [ "Materializing \u001b[1m\u001b[32m2\u001b[0m feature views from \u001b[1m\u001b[32m1995-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", "\n", - "\u001b[1m\u001b[32mitem_features\u001b[0m:\n", - "100%|███████████████████████████████████████████████████████████| 437/437 [00:00<00:00, 3870.31it/s]\n", "\u001b[1m\u001b[32muser_features\u001b[0m:\n", - "100%|███████████████████████████████████████████████████████████| 442/442 [00:00<00:00, 1423.30it/s]\n" + "100%|███████████████████████████████████████████████████████████| 456/456 [00:00<00:00, 1136.51it/s]\n", + "\u001b[1m\u001b[32mitem_features\u001b[0m:\n", + "100%|███████████████████████████████████████████████████████████| 436/436 [00:00<00:00, 2878.99it/s]\n" ] } ], @@ -219,22 +256,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "feature_repo/\n", + "feast_repo/\n", + "├─README.md\n", "├─__init__.py\n", - "├─data/\n", - "│ ├─item_features.parquet\n", - "│ ├─online_store.db\n", - "│ ├─registry.db\n", - "│ └─user_features.parquet\n", - "├─feature_store.yaml\n", - "├─item_features.py\n", - "└─user_features.py\n" + "└─feature_repo/\n", + " ├─__init__.py\n", + " ├─data/\n", + " │ ├─item_features.parquet\n", + " │ ├─online_store.db\n", + " │ ├─registry.db\n", + " │ └─user_features.parquet\n", + " ├─feature_store.yaml\n", + " ├─item_features.py\n", + " ├─test_workflow.py\n", + " └─user_features.py\n" ] } ], "source": [ "# set up the base dir to for feature store\n", - "sd.seedir(os.path.join(BASE_DIR, 'feast'), style='lines', itemlimit=10, depthlimit=5, exclude_folders=['.ipynb_checkpoints', '__pycache__'], sort=True)" + "sd.seedir(os.path.join(BASE_DIR, 'feast_repo'), style='lines', itemlimit=10, depthlimit=5, exclude_folders=['.ipynb_checkpoints', '__pycache__'], sort=True)" ] }, { @@ -304,7 +345,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING clustering 437 points to 32 centroids: please provide at least 1248 training points\n" + "WARNING clustering 436 points to 32 centroids: please provide at least 1248 training points\n" ] } ], @@ -348,7 +389,24 @@ "execution_count": 11, "id": "3decbe7b-03e3-4978-baac-03f6a0b078c9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-20 23:50:04+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", + "\n", + "\u001b[1m\u001b[32muser_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-20 23:50:04+00:00\u001b[0m:\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "0it [00:00, ?it/s]\n" + ] + } + ], "source": [ "from merlin.systems.dag.ops.feast import QueryFeast \n", "\n", @@ -402,10 +460,24 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-09-14 15:28:46.303447: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-06-20 23:50:06.005776: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2022-09-14 15:28:47.443330: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16249 MB memory: -> device: 0, name: Quadro GV100, pci bus id: 0000:2d:00.0, compute capability: 7.0\n", - "09/14/2022 03:28:49 PM WARNING:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n" + "2023-06-20 23:50:09.981326: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1621] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8192 MB memory: -> device: 0, name: Tesla V100-SXM2-16GB-N, pci bus id: 0000:06:00.0, compute capability: 7.0\n", + "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 52). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmp7n9o9yv2/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmp7n9o9yv2/assets\n" ] } ], @@ -433,7 +505,24 @@ "execution_count": 14, "id": "b270f663-0ae1-4356-acd4-5f8c986abf4d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2023-06-20 23:50:17+00:00\u001b[0m into the \u001b[1m\u001b[32msqlite\u001b[0m online store.\n", + "\n", + "\u001b[1m\u001b[32mitem_features\u001b[0m from \u001b[1m\u001b[32m2025-01-01 01:01:01+00:00\u001b[0m to \u001b[1m\u001b[32m2023-06-20 23:50:17+00:00\u001b[0m:\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "0it [00:00, ?it/s]\n" + ] + } + ], "source": [ "item_features = retrieval[\"candidate_ids\"] >> QueryFeast.from_feature_view(\n", " store=feature_store,\n", @@ -492,7 +581,29 @@ "execution_count": 16, "id": "ce31723e-af4d-4827-bb60-3a9fafcd9da6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as restored_function_body, restored_function_body, restored_function_body, restored_function_body, restored_function_body while saving (showing 5 of 98). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpbt6mf1gw/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpbt6mf1gw/assets\n" + ] + } + ], "source": [ "ranking = combined_features >> PredictTensorflow(ranking_model_path)" ] @@ -580,7 +691,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['ordered_ids']\n" + "['ordered_ids', 'ordered_scores']\n" ] } ], @@ -615,50 +726,38 @@ "output_type": "stream", "text": [ "poc_ensemble/\n", - "├─0_queryfeast/\n", - "│ ├─1/\n", - "│ │ └─model.py\n", - "│ └─config.pbtxt\n", - "├─1_predicttensorflow/\n", + "├─0_predicttensorflowtriton/\n", "│ ├─1/\n", "│ │ └─model.savedmodel/\n", "│ │ ├─assets/\n", + "│ │ ├─fingerprint.pb\n", "│ │ ├─keras_metadata.pb\n", "│ │ ├─saved_model.pb\n", "│ │ └─variables/\n", "│ │ ├─variables.data-00000-of-00001\n", "│ │ └─variables.index\n", "│ └─config.pbtxt\n", - "├─2_queryfaiss/\n", - "│ ├─1/\n", - "│ │ ├─index.faiss/\n", - "│ │ │ └─index.faiss\n", - "│ │ └─model.py\n", - "│ └─config.pbtxt\n", - "├─3_queryfeast/\n", - "│ ├─1/\n", - "│ │ └─model.py\n", - "│ └─config.pbtxt\n", - "├─4_unrollfeatures/\n", - "│ ├─1/\n", - "│ │ └─model.py\n", - "│ └─config.pbtxt\n", - "├─5_predicttensorflow/\n", + "├─1_predicttensorflowtriton/\n", "│ ├─1/\n", "│ │ └─model.savedmodel/\n", + "│ │ ├─.merlin/\n", + "│ │ │ ├─input_schema.json\n", + "│ │ │ └─output_schema.json\n", "│ │ ├─assets/\n", + "│ │ ├─fingerprint.pb\n", "│ │ ├─keras_metadata.pb\n", "│ │ ├─saved_model.pb\n", "│ │ └─variables/\n", "│ │ ├─variables.data-00000-of-00001\n", "│ │ └─variables.index\n", "│ └─config.pbtxt\n", - "├─6_softmaxsampling/\n", - "│ ├─1/\n", - "│ │ └─model.py\n", - "│ └─config.pbtxt\n", "└─executor_model/\n", " ├─1/\n", + " │ ├─ensemble/\n", + " │ │ ├─ensemble.pkl\n", + " │ │ ├─index.faiss\n", + " │ │ └─metadata.json\n", + " │ └─model.py\n", " └─config.pbtxt\n" ] } @@ -750,26 +849,20 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "id": "74ec62f2-5935-45c6-8058-e1cdade6f80f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'ordered_ids': array([[117],\n", - " [415],\n", - " [228],\n", - " [985],\n", - " [ 76],\n", - " [410],\n", - " [193],\n", - " [120],\n", - " [ 87],\n", - " [139]], dtype=int32)}" + "{'ordered_ids': array([[266, 381, 145, 6, 232, 651, 83, 244, 107, 69]], dtype=int32),\n", + " 'ordered_scores': array([[0.50194645, 0.50282484, 0.50340647, 0.5027974 , 0.50236404,\n", + " 0.50230837, 0.50244445, 0.5022982 , 0.50169003, 0.50216776]],\n", + " dtype=float32)}" ] }, - "execution_count": 25, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py b/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py index fa6fdbe3c..be647c654 100644 --- a/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py +++ b/tests/integration/examples/test_ci_building_deploying_multi_stage_RecSys.py @@ -24,7 +24,7 @@ def test_func(tmpdir): import os os.system("mkdir -p {tmpdir / 'examples/'}") os.system("mkdir -p {tmpdir / 'data/'}") - os.system("mkdir -p {tmpdir / 'feast/feature_repo/data/'}") + os.system("mkdir -p {tmpdir / 'feast_repo/feature_repo/data/'}") os.environ["DATA_FOLDER"] = "{tmpdir / 'data/'}" os.environ["BASE_DIR"] = "{tmpdir / 'examples/'}" """ @@ -45,14 +45,14 @@ def test_func(tmpdir): ) tb1.execute_cell(list(range(28, NUM_OF_CELLS))) assert os.path.isdir(f"{tmpdir / 'examples/dlrm'}") - assert os.path.isdir(f"{tmpdir / 'examples/feast/feature_repo'}") + assert os.path.isdir(f"{tmpdir / 'examples/feast_repo/feature_repo'}") assert os.path.isdir(f"{tmpdir / 'examples/query_tower'}") assert os.path.isfile(f"{tmpdir / 'examples/item_embeddings.parquet'}") assert os.path.isfile( - f"{tmpdir / 'examples/feast/feature_repo/user_features.py'}" + f"{tmpdir / 'examples/feast_repo/feature_repo/user_features.py'}" ) assert os.path.isfile( - f"{tmpdir / 'examples/feast/feature_repo/item_features.py'}" + f"{tmpdir / 'examples/feast_repo/feature_repo/item_features.py'}" ) with testbook( @@ -81,7 +81,7 @@ def test_func(tmpdir): from merlin.systems.triton.utils import run_ensemble_on_tritonserver import pandas as pd configure_tensorflow() - user_features = pd.read_parquet("{tmpdir / 'examples/feast/feature_repo/data/user_features.parquet'}") + user_features = pd.read_parquet("{tmpdir / 'examples/feast_repo/feature_repo/data/user_features.parquet'}") request = user_features[["user_id_raw"]].sample(1) request["user_id_raw"] = request["user_id_raw"].astype(np.int32) response = run_ensemble_on_tritonserver( diff --git a/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py b/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py index ad5408524..435742499 100644 --- a/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py +++ b/tests/unit/examples/test_building_deploying_multi_stage_RecSys.py @@ -26,7 +26,7 @@ def test_func(tmpdir): import os os.system("mkdir -p {tmpdir / 'examples/'}") os.system("mkdir -p {tmpdir / 'data/'}") - os.system("mkdir -p {tmpdir / 'feast/feature_repo/data/'}") + os.system("mkdir -p {tmpdir / 'feast_repo/feature_repo/data/'}") os.environ["DATA_FOLDER"] = "{tmpdir / 'data/'}" os.environ["NUM_ROWS"] = "100000" os.environ["BASE_DIR"] = "{tmpdir / 'examples/'}" @@ -34,14 +34,14 @@ def test_func(tmpdir): ) tb1.execute() assert os.path.isdir(f"{tmpdir / 'examples/dlrm'}") - assert os.path.isdir(f"{tmpdir / 'examples/feast/feature_repo'}") + assert os.path.isdir(f"{tmpdir / 'examples/feast_repo/feature_repo'}") assert os.path.isdir(f"{tmpdir / 'examples/query_tower'}") assert os.path.isfile(f"{tmpdir / 'examples/item_embeddings.parquet'}") assert os.path.isfile( - f"{tmpdir / 'examples/feast/feature_repo/user_features.py'}" + f"{tmpdir / 'examples/feast_repo/feature_repo/user_features.py'}" ) assert os.path.isfile( - f"{tmpdir / 'examples/feast/feature_repo/item_features.py'}" + f"{tmpdir / 'examples/feast_repo/feature_repo/item_features.py'}" ) with testbook(