From 14a5b3b8e129083aaf094bc4bf3ff4c69b01a825 Mon Sep 17 00:00:00 2001 From: qqiao Date: Tue, 18 Jun 2024 03:22:51 -0700 Subject: [PATCH 1/6] Add arrow and protobuf --- docker/dockerfile.merlin.ctr | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index 29d5d5fe7..cb390eb1f 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -80,10 +80,9 @@ RUN ln -s /usr/bin/python3 /usr/bin/python # 2023-10-06: onnxruntime==1.15.1 the latest version changed api which is not compatible with hugectr RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake<3.25.0" ninja scikit-build pandas==1.5.2 \ nvidia-pyindex pybind11 pytest \ - tensorflow-metadata \ "scikit-learn<1.2" \ tritonclient[all] -RUN pip install --no-cache-dir protobuf==3.20.3 onnx onnxruntime==1.15.1 pycuda +RUN pip install --no-cache-dir onnx onnxruntime pycuda RUN pip install --no-cache-dir onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com # Triton Server @@ -220,8 +219,10 @@ ENV PYTHONPATH=$PYTHONPATH:/usr/local/lib/python${PYTHON_VERSION}/dist-packages/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libcudf* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libarrow* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libparquet* /usr/lib/ +COPY --chown=1000:1000 --from=dlfw /usr/lib/libprotobuf* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Arrow /usr/lib/cmake/Arrow/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Parquet /usr/lib/cmake/Parquet/ +COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/protobuf /usr/lib/cmake/protobuf/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libnvcomp* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/include/fmt /usr/include/fmt/ @@ -244,6 +245,7 @@ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-p COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf.dist-info/ # Triton TF backends COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backends/tensorflow/ @@ -253,7 +255,7 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backe # Need to install transformers after tensorflow has been pulled in, so it builds artifacts correctly. # upgrade to 4.36.0 due to GHSA-3863-2447-669p # Torch Metrics and Lightning (without torch) -RUN pip install --no-cache-dir tensorflow==2.14.0 protobuf==3.20.3 wrapt==1.14.0 transformers==4.36.0 \ +RUN pip install --no-cache-dir tensorflow==2.15.0 wrapt==1.14.0 transformers==4.36.0 \ && pip uninstall tensorflow keras -y \ && pip install --no-cache-dir --no-deps torch torchmetrics \ && rm -rf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch \ From 0f3a11fa41344c9db8ddcea4f3bd437232640ce1 Mon Sep 17 00:00:00 2001 From: qqiao Date: Tue, 18 Jun 2024 04:45:22 -0700 Subject: [PATCH 2/6] Fix a typo --- docker/dockerfile.merlin.ctr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index cb390eb1f..24c2f7462 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -245,7 +245,7 @@ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-p COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy.dist-info/ -COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf.dist-info/ # Triton TF backends COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backends/tensorflow/ From 6ed22b3cf723bd6f10c696affe6d1d3278b1d0db Mon Sep 17 00:00:00 2001 From: qqiao Date: Tue, 18 Jun 2024 20:33:16 -0700 Subject: [PATCH 3/6] Add absl package which needs by protobuf --- docker/dockerfile.merlin.ctr | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index 24c2f7462..88ce41582 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -220,9 +220,11 @@ COPY --chown=1000:1000 --from=dlfw /usr/lib/libcudf* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libarrow* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libparquet* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libprotobuf* /usr/lib/ +COPY --chown=1000:1000 --from=dlfw /usr/lib/libabsl* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Arrow /usr/lib/cmake/Arrow/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/Parquet /usr/lib/cmake/Parquet/ COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/protobuf /usr/lib/cmake/protobuf/ +COPY --chown=1000:1000 --from=dlfw /usr/lib/cmake/absl /usr/lib/cmake/absl/ COPY --chown=1000:1000 --from=dlfw /usr/lib/libnvcomp* /usr/lib/ COPY --chown=1000:1000 --from=dlfw /usr/include/fmt /usr/include/fmt/ @@ -239,13 +241,14 @@ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-p COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupyx /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupyx COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_backends /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_backends - +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/absl /usr/local/lib/python${PYTHON_VERSION}/dist-packages/absl COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cudf-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cudf.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pyarrow.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy_*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cupy.dist-info/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/protobuf.dist-info/ +COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/absl_py*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/absl_py.dist-info/ # Triton TF backends COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backends/tensorflow/ From d8b7dad9875fae2eca68ecc7c88808b23af8036b Mon Sep 17 00:00:00 2001 From: qqiao Date: Wed, 19 Jun 2024 00:23:42 -0700 Subject: [PATCH 4/6] add missing header files for absl --- docker/dockerfile.merlin.ctr | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index 88ce41582..9af6ffdab 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -233,6 +233,7 @@ COPY --chown=1000:1000 --from=dlfw /usr/include/rmm /usr/include/rmm/ COPY --chown=1000:1000 --from=dlfw /usr/include/parquet /usr/include/parquet/ COPY --chown=1000:1000 --from=dlfw /usr/include/arrow /usr/include/arrow/ COPY --chown=1000:1000 --from=dlfw /usr/include/cudf /usr/include/cudf/ +COPY --chown=1000:1000 --from=dlfw /usr/include/absl /usr/include/absl/ COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm /usr/local/lib/python${PYTHON_VERSION}/dist-packages/rmm COPY --chown=1000:1000 --from=dlfw /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cuda /usr/local/lib/python${PYTHON_VERSION}/dist-packages/cuda From 3249d460fc24e5a1fa649d9e40c19cb36cac0ec5 Mon Sep 17 00:00:00 2001 From: qqiao Date: Wed, 26 Jun 2024 06:24:22 -0700 Subject: [PATCH 5/6] Update for new torch --- docker/dockerfile.merlin.ctr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index 9af6ffdab..d797702fc 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -261,8 +261,7 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorflow backe # Torch Metrics and Lightning (without torch) RUN pip install --no-cache-dir tensorflow==2.15.0 wrapt==1.14.0 transformers==4.36.0 \ && pip uninstall tensorflow keras -y \ - && pip install --no-cache-dir --no-deps torch torchmetrics \ - && rm -rf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch \ + && pip install --no-cache-dir --no-deps torchmetrics \ && rm -rf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/caffe2 # DLFW Tensorflow packages @@ -285,6 +284,7 @@ RUN ln -s /opt/tritonserver/backends/pytorch/* /usr/local/lib/ # DLFW Python packages COPY --chown=1000:1000 --from=torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch COPY --chown=1000:1000 --from=torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch.dist-info/ +COPY --chown=1000:1000 --from=torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen COPY --chown=1000:1000 --from=torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorrt /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorrt COPY --chown=1000:1000 --from=torch /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorrt-*.dist-info /usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorrt.dist-info/ From 3f4e03185165f508c1ef18a27b0c06cc732aecbd Mon Sep 17 00:00:00 2001 From: qqiao Date: Sat, 6 Jul 2024 01:50:38 +0000 Subject: [PATCH 6/6] Add numpy because it is needed by hps_torch_plugin --- docker/dockerfile.merlin.ctr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/dockerfile.merlin.ctr b/docker/dockerfile.merlin.ctr index d797702fc..53275a008 100644 --- a/docker/dockerfile.merlin.ctr +++ b/docker/dockerfile.merlin.ctr @@ -82,7 +82,7 @@ RUN pip install --no-cache-dir --upgrade pip; pip install --no-cache-dir "cmake< nvidia-pyindex pybind11 pytest \ "scikit-learn<1.2" \ tritonclient[all] -RUN pip install --no-cache-dir onnx onnxruntime pycuda +RUN pip install --no-cache-dir numpy==1.22.4 onnx onnxruntime pycuda RUN pip install --no-cache-dir onnx_graphsurgeon --index-url https://pypi.ngc.nvidia.com # Triton Server