Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEBUG only {2023.06}[2023a] PyTorch-Bundle v2.1.2 -- tweaked hooks employing LD_PRELOAD #688

Open
wants to merge 20 commits into
base: 2023.06-software.eessi.io
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ echo "bot/build.sh: EESSI_OS_TYPE='${EESSI_OS_TYPE}'"
declare -a COMMON_ARGS=()
COMMON_ARGS+=("--verbose")
COMMON_ARGS+=("--access" "rw")
COMMON_ARGS+=("--mode" "run")
[[ ! -z ${CONTAINER} ]] && COMMON_ARGS+=("--container" "${CONTAINER}")
[[ ! -z ${HTTP_PROXY} ]] && COMMON_ARGS+=("--http-proxy" "${HTTP_PROXY}")
[[ ! -z ${HTTPS_PROXY} ]] && COMMON_ARGS+=("--https-proxy" "${HTTPS_PROXY}")
Expand All @@ -179,6 +178,28 @@ fi
[[ ! -z ${BUILD_LOGS_DIR} ]] && INSTALL_SCRIPT_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
[[ ! -z ${SHARED_FS_PATH} ]] && INSTALL_SCRIPT_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")

# The following is only a proof-of-concept to replace the container's /bin/bash
# with the bash from the compat layer.
#
# replace /bin/bash with bash from compat layer
# - first obtain them
# - then use them
# EESSI_EPREFIX points to the compat layer
# CMD='cp ${EESSI_EPREFIX}/bin/bash .'
CMD="cp /cvmfs/software.eessi.io/versions/2023.06/compat/linux/${HOST_ARCH}/bin/bash ."
get_bash_outerr=$(mktemp get_bash.outerr.XXXX)
echo "Executing command to obtain bash executable from compat layer:"
echo "./eessi_container.sh ${COMMON_ARGS[@]} --mode shell --storage ${STORAGE} <<< ${CMD} 2>&1 | tee -a ${get_bash_outerr}"
./eessi_container.sh "${COMMON_ARGS[@]}" --mode shell --storage "${STORAGE}" <<< "${CMD}" 2>&1 | tee -a ${get_bash_outerr}

compat_bash="${PWD}/bash"
if [[ -z ${SINGULARITY_BIND} ]]; then
export SINGULARITY_BIND="${compat_bash}:/bin/bash"
else
export SINGULARITY_BIND="${SINGULARITY_BIND},${compat_bash}:/bin/bash"
fi

COMMON_ARGS+=("--mode" "run")
# determine if the removal step has to be run
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ easyconfigs:
options:
# see https://github.com/easybuilders/easybuild-easyconfigs/pull/20880
from-commit: bc6e08f89759b8b70166de5bfcb5056b9db8ec90
- libmad-0.15.1b-GCCcore-12.3.0.eb
- PyTorch-bundle-2.1.2-foss-2023a.eb
53 changes: 53 additions & 0 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,26 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix):
raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!")


def parse_hook_sentencepiece(ec, eprefix):
"""
LD_PRELOAD `libtcmalloc_minimal.so.4` on AARCH64-based systems
Avoids "libtcmalloc_minimal.so.4: cannot allocate memory in static TLS block" error
See https://github.com/EESSI/software-layer/pull/585/#issuecomment-2286068465
"""
msg = f"Adding LD_PRELOAD to single sanity check command for {ec.name}"
print(msg)
if ec.name == "SentencePiece" and get_cpu_architecture() == AARCH64:
ec.log.info(msg)
# We need to tweak the sanitycheck command 'python -c 'import sentencepiece'" by
# adding the LD_PRELOAD. Since shell variables seem not get expanded we use a hardcoded path for now.
eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH')
libtcmalloc_path = os.path.join(eessi_software_path, 'software', 'gperftools', '2.12-GCCcore-12.3.0', 'lib64', 'libtcmalloc_minimal.so')
scc = ec['sanity_check_commands']
scc_add_ld_preload = f"LD_PRELOAD={libtcmalloc_path} {scc[1]}"
ec['sanity_check_commands'][1] = scc_add_ld_preload
return ec


def parse_hook_ucx_eprefix(ec, eprefix):
"""Make UCX aware of compatibility layer via additional configuration options."""
if ec.name == 'UCX':
Expand Down Expand Up @@ -725,6 +745,34 @@ def post_sanitycheck_cuda(self, *args, **kwargs):
raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!")


def post_sanitycheck_sentencepiece(self, *args, **kwargs):
"""
LD_PRELOAD `libtcmalloc_minimal.so.4` on AARCH64-based systems
Avoids "libtcmalloc_minimal.so.4: cannot allocate memory in static TLS block" error
See https://github.com/EESSI/software-layer/pull/585/#issuecomment-2286068465
"""
msg = f"Adding LD_PRELOAD to modluafooter for {self.name}"
print(msg)
if self.name == "SentencePiece" and get_cpu_architecture() == AARCH64:
# We want to set LD_PRELOAD so that it loads the libtcmalloc_minimal.so library from gperftools
# However, if LD_PRELOAD is already set, we need to prepend to it.
# An existing LD_PRELOAD can be space or colon separated, both are allowed
# So first we make sure it is colon seperated
# Colon seperation is allowed for LD_PRELOAD, so the easiest way to prepend to whats there
# is to specify it through modextrapaths
self.log.info(msg)
self.cfg['modluafooter'] = """
libtcmalloc = pathJoin(os.getenv("EBROOTGPERFTOOLS"), "lib64", "libtcmalloc_minimal.so")
prepend_path("LD_PRELOAD", libtcmalloc)
"""


def pre_sanitycheck_hook(self,*args, **kwargs):
"""Main pre-sanitycheck hook: trigger custom functions based on software name."""
if self.name in PRE_SANITYCHECK_HOOKS:
PRE_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs)


def inject_gpu_property(ec):
"""
Add 'gpu' property, via modluafooter easyconfig parameter
Expand Down Expand Up @@ -762,6 +810,7 @@ def inject_gpu_property(ec):
'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors,
'pybind11': parse_hook_pybind11_replace_catch2,
'Qt5': parse_hook_qt5_check_qtwebengine_disable,
'SentencePiece': parse_hook_sentencepiece,
'UCX': parse_hook_ucx_eprefix,
}

Expand Down Expand Up @@ -804,6 +853,10 @@ def inject_gpu_property(ec):
'numpy': post_single_extension_numpy,
}

PRE_SANITYCHECK_HOOKS = {
}

POST_SANITYCHECK_HOOKS = {
'CUDA': post_sanitycheck_cuda,
'SentencePiece': post_sanitycheck_sentencepiece,
}
Loading