Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release updates #1294

Merged
merged 12 commits into from
Aug 7, 2023
16 changes: 8 additions & 8 deletions config.example/group_vars/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ sftp_chroot: false
################################################################################
# NVIDIA GPU configuration
# Playbook: nvidia-cuda
cuda_version: cuda-toolkit-11-5
# Install latest version by default,
# if you want a specific version, use i.e. cuda-toolkit=12.2.0-1
# cuda_version: cuda-toolkit

# DGX-specific vars may be used to target specific models,
# because available versions for DGX may differ from the generic repo
Expand All @@ -146,9 +148,9 @@ nvidia_driver_force_install: false
# Docker configuration
# Playbook: docker, nvidia-docker, k8s-cluster
#
# For supported Docker versions, see: kubespray/roles/container-engine/docker/vars/*
# For supported Docker versions, see: submodules/kubespray/roles/container-engine/docker/vars/*
docker_install: yes
docker_version: '20.10'
# docker_version: 'latest'
docker_dns_servers_strict: no
docker_storage_options: -s overlay2
#docker_options: "--bip=192.168.99.1/24"
Expand Down Expand Up @@ -196,7 +198,9 @@ enroot_environ_config_files_dgx:
# Singularity configuration
# Playbook: singularity, slurm-cluster
# Singularity target version
singularity_version: "3.7.3"
# set an alternate singularity version here;
# see roles/singularity_wrapper/defaults/main.yml for default
# singularity_version:
singularity_conf_path: "/etc/singularity/singularity.conf"
bind_paths: []
# example:
Expand Down Expand Up @@ -275,10 +279,6 @@ deepops_dir: /opt/deepops
# Roles: K8s GPU operator, GPU plugin, OpenShift/K8s
deepops_venv: '{{ deepops_dir }}/venv'

# OpenMPI
# Playbook: openmpi
openmpi_version: 4.0.3

# Disable cloud-init
deepops_disable_cloud_init: true

Expand Down
14 changes: 7 additions & 7 deletions config.example/group_vars/slurm-cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
################################################################################
# Slurm job scheduler configuration
# Playbook: slurm, slurm-cluster, slurm-perf, slurm-perf-cluster, slurm-validation
slurm_version: "22.05.2"
# slurm_version: ""
slurm_install_prefix: /usr/local
pmix_install_prefix: /opt/deepops/pmix
hwloc_install_prefix: /opt/deepops/hwloc
Expand Down Expand Up @@ -137,10 +137,10 @@ sm_install_host: "slurm-master[0]"
slurm_install_hpcsdk: true

# Select the version of HPC SDK to download
hpcsdk_major_version: "22"
hpcsdk_minor_version: "1"
hpcsdk_file_cuda: "11.5"
hpcsdk_arch: "x86_64"
#hpcsdk_major_version: ""
#hpcsdk_minor_version: ""
#hpcsdk_file_cuda: ""
#hpcsdk_arch: "x86_64"

# In a Slurm cluster, default to setting up HPC SDK as modules rather than in
# the default user environment
Expand All @@ -156,7 +156,7 @@ hpcsdk_install_in_path: false
# this can help you get started.
################################################################################
slurm_cluster_install_openmpi: false
openmpi_version: 4.0.4
#openmpi_version:
openmpi_install_prefix: "/usr/local"
openmpi_configure: "./configure --prefix={{ openmpi_install_prefix }} --disable-dependency-tracking --disable-getpwuid --with-pmix={{ pmix_install_prefix }} --with-hwloc={{ hwloc_install_prefix }} --with-pmi={{ slurm_install_prefix }} --with-slurm={{ slurm_install_prefix }} --with-libevent=/usr"

Expand Down Expand Up @@ -185,7 +185,7 @@ allow_user_set_gpu_clocks: no
################################################################################
slurm_install_enroot: true
slurm_install_pyxis: true
slurm_pyxis_version: 0.11.1
#slurm_pyxis_version:

# /run is default partition of pyxis runtime_path
resize_run_partition: false
Expand Down
2 changes: 1 addition & 1 deletion roles/nhc/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
nhc_version: "1.4.2"
nhc_version: "1.4.3"
nhc_src_url: "https://github.com/mej/nhc/releases/download/{{ nhc_version }}/lbnl-nhc-{{ nhc_version }}.tar.xz"
nhc_install_dir: "/usr"
nhc_config_dir: "/etc"
Expand Down
6 changes: 6 additions & 0 deletions roles/nhc/molecule/default/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ platforms:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
privileged: true
pre_build_image: true
- name: nhc-ubuntu-2204
image: geerlingguy/docker-ubuntu2204-ansible
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup:ro
privileged: true
pre_build_image: true
- name: nhc-centos-7
image: geerlingguy/docker-centos7-ansible
volumes:
Expand Down
5 changes: 5 additions & 0 deletions roles/nhc/vars/ubuntu-22.04.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
nhc_build_deps:
- build-essential

nhc_ssh_daemon: "sshd:"
5 changes: 3 additions & 2 deletions roles/nvidia_cuda/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
# 'cuda' is the generic package and will pull the latest version
cuda_version: "cuda-toolkit-11-7"
# 'cuda-toolkit' is the generic package and will pull the latest version
# if you want a specific version, use i.e. cuda-toolkit=12.2.0-1
cuda_version: cuda-toolkit

# DGX-specific vars may be used to target specific models,
# because available versions for DGX may differ from the generic repo
Expand Down
8 changes: 4 additions & 4 deletions roles/nvidia_hpc_sdk/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
# See https://developer.nvidia.com/nvidia-hpc-sdk-downloads for more detail on available downloads.

# Version strings used to construct download URL
hpcsdk_major_version: "22"
hpcsdk_minor_version: "1"
hpcsdk_file_cuda: "11.5"
hpcsdk_major_version: "23"
hpcsdk_minor_version: "7"
hpcsdk_file_cuda: "12.2"
hpcsdk_arch: "x86_64"

# We need to specify the default CUDA toolkit to use during installation.
# This should usually be the latest CUDA included in the HPC SDK you are
# installing.
hpcsdk_default_cuda: "11.5"
hpcsdk_default_cuda: "12.2"

# Add HPC SDK modules to the MODULEPATH?
hpcsdk_install_as_modules: false
Expand Down
2 changes: 1 addition & 1 deletion roles/singularity_wrapper/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
# vars for lecorguille.singularity
singularity_version: "3.7.3"
singularity_version: "3.11.4"
singularity_conf_path: "/etc/singularity/singularity.conf"
bind_paths: []

Expand Down
2 changes: 1 addition & 1 deletion roles/slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ hwloc_build_dir: /opt/deepops/build/hwloc
pmix_build_dir: /opt/deepops/build/pmix

slurm_workflow_build: yes
slurm_version: "22.05.2"
slurm_version: "23.02.4"
slurm_src_url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar.bz2"
slurm_build_make_clean: no
slurm_build_dir_cleanup: no
Expand Down
3 changes: 2 additions & 1 deletion roles/slurm/vars/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ slurm_build_deps:
- build-essential
- libmunge-dev
- libmariadb-dev
- libmariadbclient-dev
- libmariadbclient-dev-compat
- libpam0g-dev
- libdbus-1-dev
- python3-minimal
- ruby-dev
- wget
Expand Down
Loading