Skip to content

Commit

Permalink
Merge pull request #14 from oracle-devrel/cw24
Browse files Browse the repository at this point in the history
feat: added files from nvidia-nim-oke
  • Loading branch information
WSPluta authored Sep 7, 2024
2 parents 0bf194f + 6a74070 commit d63c8b1
Show file tree
Hide file tree
Showing 32 changed files with 895 additions and 0 deletions.
481 changes: 481 additions & 0 deletions nvidia-nim-oke/README.md

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions nvidia-nim-oke/helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v2
name: nim-llm
description: A Helm chart for NVIDIA NIM for LLMs
type: application
kubeVersion: ">=v1.23.0-0"
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.2.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.0.0"
27 changes: 27 additions & 0 deletions nvidia-nim-oke/helm/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
image:
repository: nvcr.io/nim/meta/llama3-8b-instruct:latest
pullPolicy: IfNotPresent
model: llama3-8b-instruct
numGpus: 1

service:
type: ClusterIP
openaiPort: 8000
annotations: {}
labels: {}
name: "" # override the default service name
# below options are deprecated
# http_port: 8000 # exposes http interface used in healthchecks to the service
# grpc_port: 8001 # exposes the triton grpc interface
# metrics_port: 8002 # expose metrics through the main service
# openai_port: 8005
# nemo_port: 8006

mount:
name: /opt/nim/cache

imageCredentials:
registry: nvcr.io
username: $oauthtoken
password: <YOUR_KEY_FROM_NVIDIA>
email: <YOUR_EMAIL>
17 changes: 17 additions & 0 deletions nvidia-nim-oke/helm/values_triton.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
image:
imageName: nvcr.io/nvidia/tritonserver:latest
pullPolicy: IfNotPresent
model: llama3-8b-instruct
numGpus: 1

service:
type: LoadBalancer

mount:
name: /opt/nim/cache

imageCredentials:
registry: nvcr.io
username: $oauthtoken
password: <YOUR_KEY_FROM_NVIDIA>
email: <YOUR_EMAIL>
Binary file added nvidia-nim-oke/img/1.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/AccessCluster.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/api_key.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/cloud-init.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/completions_openai.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/creation_1.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/creation_2.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/creation_3.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/custom_ssh_key.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/custom_volume_size.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/get_nodes.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/inference.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/invoke_model_python.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/ngc_arch.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/nvidia_nim.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/similar_output.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added nvidia-nim-oke/img/token_generations.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 19 additions & 0 deletions nvidia-nim-oke/kserve/kserve_llama3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
autoscaling.knative.dev/target: "10"
name: llama3-8b-instruct-1xgpu
spec:
predictor:
minReplicas: 1
model:
modelFormat:
name: nvidia-nim-llama3-8b-instruct
resources:
limits:
nvidia.com/gpu: "1"
requests:
nvidia.com/gpu: "1"
runtime: nvidia-nim-llama3-8b-instruct-24.05
storageUri: pvc://nvidia-nim-pvc/
17 changes: 17 additions & 0 deletions nvidia-nim-oke/pod/docker-pod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: v1
kind: Pod
metadata:
name: docker
labels:
name: docker
spec:
containers:
- name: docker
image: docker:latest
securityContext:
privileged: true
command: ["tail", "-f", "/dev/null"]
resources:
limits:
nvidia.com/gpu: 1
hostNetwork: true
21 changes: 21 additions & 0 deletions nvidia-nim-oke/pod/llama3-pod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
name: docker
labels:
name: docker
spec:
containers:
- name: docker
image: nvcr.io/nim/meta/llama3-8b-instruct:latest
securityContext:
privileged: true
command: ["tail", "-f", "/dev/null"]
resources:
limits:
nvidia.com/gpu: 1
#ephemeral-storage: "100Gi"
hostNetwork: true

imagePullSecrets:
- name: registry-secret
21 changes: 21 additions & 0 deletions nvidia-nim-oke/pod/testpod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: v1
kind: Pod
metadata:
name: docker
labels:
name: docker
spec:
containers:
- name: docker
#image: ubuntu
image: nvcr.io/nim/meta/llama3-8b-instruct:latest
securityContext:
privileged: true
command: ["tail", "-f", "/dev/null"]
resources:
limits:
nvidia.com/gpu: 1
hostNetwork: true

imagePullSecrets:
- name: registry-secret
15 changes: 15 additions & 0 deletions nvidia-nim-oke/pod/time-slicing-config-all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# configure GPU time-slicing if you have fewer than four GPUs.
apiVersion: v1
kind: ConfigMap
metadata:
name: time-slicing-config-all
data:
any: |-
version: v1
flags:
migStrategy: none
sharing:
timeSlicing:
resources:
- name: nvidia.com/gpu
replicas: 4
18 changes: 18 additions & 0 deletions nvidia-nim-oke/pod/ubuntu-pod.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: Pod
metadata:
name: docker
labels:
name: docker
spec:
containers:
- name: docker
image: ubuntu:latest
securityContext:
privileged: true
command: ["tail", "-f", "/dev/null"]
resources:
limits:
nvidia.com/gpu: 1
#ephemeral-storage: "100Gi"
hostNetwork: true
128 changes: 128 additions & 0 deletions nvidia-nim-oke/scripts/alpine_cuda.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
Author of this guide is Arto Bendiken from https://arto.s3.amazonaws.com/

Drivers

https://developer.nvidia.com/vulkan-driver

$ lsmod | fgrep nvidia

$ nvidia-smi

Driver Installation

https://us.download.nvidia.com/XFree86/Linux-x86_64/390.77/README/
https://github.com/NVIDIA/nvidia-installer

Driver Installation on Alpine Linux

https://github.com/sgerrand/alpine-pkg-glibc
https://github.com/sgerrand/alpine-pkg-glibc/releases
https://wiki.alpinelinux.org/wiki/Running_glibc_programs

$ apk add sudo bash ca-certificates wget xz make gcc linux-headers

$ wget -q -O /etc/apk/keys/sgerrand.rsa.pub https://raw.githubusercontent.com/sgerrand/alpine-pkg-glibc/master/sgerrand.rsa.pub

$ wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.27-r0/glibc-2.27-r0.apk
$ wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.27-r0/glibc-bin-2.27-r0.apk
$ wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.27-r0/glibc-dev-2.27-r0.apk
$ wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.27-r0/glibc-i18n-2.27-r0.apk

$ apk add glibc-2.27-r0.apk glibc-bin-2.27-r0.apk glibc-dev-2.27-r0.apk glibc-i18n-2.27-r0.apk

$ /usr/glibc-compat/bin/localedef -i en_US -f UTF-8 en_US.UTF-8

$ bash NVIDIA-Linux-x86_64-390.77.run --check

$ bash NVIDIA-Linux-x86_64-390.77.run --extract-only

$ cd NVIDIA-Linux-x86_64-390.77 && ./nvidia-installer

Driver Uninstallation

$ nvidia-uninstall

Driver Troubleshooting

Uncompressing NVIDIA Accelerated Graphics Driver for Linux-x86_64 390.77NVIDIA-Linux-x86_64-390.77.run: line 998: /tmp/makeself.XXX/xz: No such file or directory\nExtraction failed.

$ apk add xz # Alpine Linux

bash: ./nvidia-installer: No such file or directory

Install the glibc compatibility layer package for Alpine Linux.

ERROR: You do not appear to have libc header files installed on your system. Please install your distribution's libc development package.

$ apk add musl-dev # Alpine Linux

ERROR: Unable to find the kernel source tree for the currently running kernel. Please make sure you have installed the kernel source files for your kernel and that they are properly configured

$ apk add linux-vanilla-dev # Alpine Linux

ERROR: Failed to execute `/sbin/ldconfig`: The installer has encountered the following error during installation: 'Failed to execute `/sbin/ldconfig`'. Would you like to continue installation anyway?

Continue installation.

Toolkit

https://developer.nvidia.com/cuda-toolkit
https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/

Toolkit Download

https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&target_distro=Ubuntu&target_version=1604&target_type=runfilelocal

$ wget -c https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers/cuda_9.2.148_396.37_linux

Toolkit Installation

https://docs.nvidia.com/cuda/cuda-installation-guide-linux/

Toolkit Installation on Alpine Linux

$ apk add sudo bash

$ sudo bash cuda_9.2.148_396.37_linux

# You are attempting to install on an unsupported configuration. Do you wish to continue? y
# Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 396.37? y
# Do you want to install the OpenGL libraries? y
# Do you want to run nvidia-xconfig? n
# Install the CUDA 9.2 Toolkit? y
# Enter Toolkit Location: /opt/cuda-9.2
# Do you want to install a symbolic link at /usr/local/cuda? y
# Install the CUDA 9.2 Samples? y
# Enter CUDA Samples Location: /opt/cuda-9.2/samples

$ sudo ln -s cuda-9.2 /opt/cuda

$ export PATH="/opt/cuda/bin:$PATH"

Toolkit Uninstallation

$ sudo /opt/cuda-9.2/bin/uninstall_cuda_9.2.pl

Toolkit Troubleshooting

Cannot find termcap: Can't find a valid termcap file at /usr/share/perl5/core_perl/Term/ReadLine.pm line 377.

$ export PERL_RL="Perl o=0"

gcc: error trying to exec 'cc1plus': execvp: No such file or directory

$ apk add g++ # Alpine Linux

cicc: Relink `/usr/lib/libgcc_s.so.1' with `/usr/glibc-compat/lib/libc.so.6' for IFUNC symbol `memset'

https://github.com/sgerrand/alpine-pkg-glibc/issues/58

$ scp /lib/x86_64-linux-gnu/libgcc_s.so.1 root@alpine:/usr/glibc-compat/lib/libgcc_s.so.1

$ sudo /usr/glibc-compat/sbin/ldconfig /usr/glibc-compat/lib /lib /usr/lib

Compiler

https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/

$ nvcc -V
46 changes: 46 additions & 0 deletions nvidia-nim-oke/scripts/fresh_ubuntu_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
# author @jasperan
# this script installs all required dependencies on a fresh ubuntu image, which allows you to run NVIDIA container runtime workloads on docker.

# install sudo, curl (to download docker), gnupg2
apt-get update -y && apt-get install sudo curl gnupg2 -y

# declare environment variable
export NGC_API_KEY=<YOUR_NVIDIA_NGC_API_KEY>

# download and install docker
curl -fsSL https://get.docker.com -o get-docker.sh
sh get-docker.sh

# install nvidia container toolkit (required to run their images on NVIDIA GPUs)

curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list


sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list

sudo apt-get update -y
sudo apt-get install -y nvidia-container-toolkit
sudo apt install nvidia-cuda-toolkit -y
sudo apt install nvidia-driver-525 -y # for ubuntu 22.04, change this to your recommended driver
# you can find your recommended driver for your specific docker image by running: ~ sudo ubuntu-drivers devices ~

# run the docker image inside the container.

# Choose a container name for bookkeeping
export CONTAINER_NAME=llama3-8b-instruct
export IMG_NAME="nvcr.io/nim/meta/llama3-8b-instruct:latest"
export LOCAL_NIM_CACHE="/home/ubuntu/nim/cache"
mkdir -p "$LOCAL_NIM_CACHE"

# login to NVIDIA NGC and run any image.

echo "$NGC_API_KEY" | docker login nvcr.io --username '$oauthtoken' --password-stdin

# launch dockerd if it wasn't previously launched on the background.
nohup dockerd &
# Start the LLM NIM
docker run -it --privileged --rm --name=$CONTAINER_NAME --runtime=nvidia --gpus 1 --env NGC_API_KEY="$NGC_API_KEY" -v "$LOCAL_NIM_CACHE:/opt/nim/cache" -u $(id -u) -p 8000:8000 $IMG_NAME
Loading

0 comments on commit d63c8b1

Please sign in to comment.