Skip to content

Commit

Permalink
Merge pull request #201 from bedroge/map_bucket_to_cvmfs_repo
Browse files Browse the repository at this point in the history
[Automated ingestion] Map S3 buckets to CVMFS repositories
  • Loading branch information
boegel authored Oct 8, 2024
2 parents ca1aa51 + 4158341 commit 86053f0
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 39 deletions.
9 changes: 6 additions & 3 deletions scripts/automated_ingestion/automated_ingestion.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ ingestion_script = /absolute/path/to/ingest-tarball.sh
metadata_file_extension = .meta.txt

[aws]
staging_buckets = eessi-staging, eessi-staging-2023.06
staging_buckets = {
"software.eessi.io-2023.06": "software.eessi.io",
"dev.eessi.io-2024.09": "dev.eessi.io",
"riscv.eessi.io-20240402": "riscv.eessi.io" }

[cvmfs]
ingest_as_root = yes
Expand Down Expand Up @@ -44,7 +47,7 @@ failed_tarball_overview_issue_body = An error occurred while trying to get the c
```
pr_body = A new tarball has been staged for {pr_url}.
Please review the contents of this tarball carefully.
Merging this PR will lead to automatic ingestion of the tarball.
Merging this PR will lead to automatic ingestion of the tarball to the repository {cvmfs_repo}.

<details>
<summary>Metadata of tarball</summary>
Expand All @@ -64,7 +67,7 @@ pr_body = A new tarball has been staged for {pr_url}.

[slack]
ingestion_notification = yes
ingestion_message = Tarball `{tarball}` has been ingested into the CVMFS repository.
ingestion_message = Tarball `{tarball}` has been ingested into the CVMFS repository `{cvmfs_repo}`.

[logging]
level = WARN
Expand Down
7 changes: 4 additions & 3 deletions scripts/automated_ingestion/automated_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import botocore
import configparser
import github
import json
import logging
import os
import pid
Expand Down Expand Up @@ -99,15 +100,15 @@ def main():
aws_secret_access_key=config['secrets']['aws_secret_access_key'],
)

buckets = [x.strip() for x in config['aws']['staging_buckets'].split(',')]
for bucket in buckets:
buckets = json.loads(config['aws']['staging_buckets'])
for bucket, cvmfs_repo in buckets.items():
tarballs = find_tarballs(s3, bucket)
if args.list_only:
for num, tarball in enumerate(tarballs):
print(f'[{bucket}] {num}: {tarball}')
else:
for tarball in tarballs:
tar = EessiTarball(tarball, config, gh_staging_repo, s3, bucket)
tar = EessiTarball(tarball, config, gh_staging_repo, s3, bucket, cvmfs_repo)
tar.run_handler()


Expand Down
11 changes: 7 additions & 4 deletions scripts/automated_ingestion/eessitarball.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ class EessiTarball:
for which it interfaces with the S3 bucket, GitHub, and CVMFS.
"""

def __init__(self, object_name, config, git_staging_repo, s3, bucket):
def __init__(self, object_name, config, git_staging_repo, s3, bucket, cvmfs_repo):
"""Initialize the tarball object."""
self.config = config
self.git_repo = git_staging_repo
self.metadata_file = object_name + config['paths']['metadata_file_extension']
self.object = object_name
self.s3 = s3
self.bucket = bucket
self.cvmfs_repo = cvmfs_repo
self.local_path = os.path.join(config['paths']['download_dir'], os.path.basename(object_name))
self.local_metadata_path = self.local_path + config['paths']['metadata_file_extension']
self.url = f'https://{bucket}.s3.amazonaws.com/{object_name}'
Expand Down Expand Up @@ -177,7 +178,7 @@ def ingest(self):
sudo = ['sudo'] if self.config['cvmfs'].getboolean('ingest_as_root', True) else []
logging.info(f'Running the ingestion script for {self.object}...')
ingest_cmd = subprocess.run(
sudo + [script, self.local_path],
sudo + [script, self.cvmfs_repo, self.local_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if ingest_cmd.returncode == 0:
Expand All @@ -186,7 +187,7 @@ def ingest(self):
if self.config.has_section('slack') and self.config['slack'].getboolean('ingestion_notification', False):
send_slack_message(
self.config['secrets']['slack_webhook'],
self.config['slack']['ingestion_message'].format(tarball=os.path.basename(self.object))
self.config['slack']['ingestion_message'].format(tarball=os.path.basename(self.object), cvmfs_repo=self.cvmfs_repo)
)
else:
issue_title = f'Failed to ingest {self.object}'
Expand Down Expand Up @@ -295,11 +296,13 @@ def make_approval_request(self):
try:
tarball_contents = self.get_contents_overview()
pr_body = self.config['github']['pr_body'].format(
cvmfs_repo=self.cvmfs_repo,
pr_url=pr_url,
tar_overview=self.get_contents_overview(),
metadata=metadata,
)
self.git_repo.create_pull(title='Ingest ' + filename, body=pr_body, head=git_branch, base='main')
pr_title = '[%s] Ingest %s' % (self.cvmfs_repo, filename)
self.git_repo.create_pull(title=pr_title, body=pr_body, head=git_branch, base='main')
except Exception as err:
issue_title = f'Failed to get contents of {self.object}'
issue_body = self.config['github']['failed_tarball_overview_issue_body'].format(
Expand Down
55 changes: 30 additions & 25 deletions scripts/ingest-tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

# Only if it passes these checks, the tarball gets ingested to the base dir in the repository specified below.

repo=software.eessi.io
basedir=versions
decompress="gunzip -c"
cvmfs_server="cvmfs_server"
Expand Down Expand Up @@ -42,15 +41,15 @@ function error() {
}

function is_repo_owner() {
if [ -f "/etc/cvmfs/repositories.d/${repo}/server.conf" ]
if [ -f "/etc/cvmfs/repositories.d/${cvmfs_repo}/server.conf" ]
then
. "/etc/cvmfs/repositories.d/${repo}/server.conf"
. "/etc/cvmfs/repositories.d/${cvmfs_repo}/server.conf"
[ x"$(whoami)" = x"$CVMFS_USER" ]
fi
}

function check_repo_vars() {
if [ -z "${repo}" ]
if [ -z "${cvmfs_repo}" ]
then
error "the 'repo' variable has to be set to the name of the CVMFS repository."
fi
Expand All @@ -73,8 +72,8 @@ function check_version() {
fi

# Check if the EESSI version number encoded in the filename
# is valid, i.e. matches the format YYYY.DD
if ! echo "${version}" | egrep -q '^20[0-9][0-9]\.(0[0-9]|1[0-2])$'
# is valid, i.e. matches the format YYYY.MM or YYYYMMDD
if ! echo "${version}" | egrep '(^20[0-9][0-9]\.(0[0-9]|1[0-2])$)|(^20[0-9][0-9][0-9][0-9][0-9][0-9]$)'
then
error "${version} is not a valid EESSI version."
fi
Expand Down Expand Up @@ -113,28 +112,28 @@ function check_contents_type() {
function cvmfs_regenerate_nested_catalogs() {
# Use the .cvmfsdirtab to generate nested catalogs for the ingested tarball
echo "Generating the nested catalogs..."
${cvmfs_server} transaction "${repo}"
${cvmfs_server} publish -m "Generate catalogs after ingesting ${tar_file_basename}" "${repo}"
${cvmfs_server} transaction "${cvmfs_repo}"
${cvmfs_server} publish -m "Generate catalogs after ingesting ${tar_file_basename}" "${cvmfs_repo}"
ec=$?
if [ $ec -eq 0 ]
then
echo_green "Nested catalogs for ${repo} have been created!"
echo_green "Nested catalogs for ${cvmfs_repo} have been created!"
else
echo_red "failure when creating nested catalogs for ${repo}."
echo_red "failure when creating nested catalogs for ${cvmfs_repo}."
fi
}

function cvmfs_ingest_tarball() {
# Do a regular "cvmfs_server ingest" for a given tarball,
# followed by regenerating the nested catalog
echo "Ingesting tarball ${tar_file} to ${repo}..."
${decompress} "${tar_file}" | ${cvmfs_server} ingest -t - -b "${basedir}" "${repo}"
echo "Ingesting tarball ${tar_file} to ${cvmfs_repo}..."
${decompress} "${tar_file}" | ${cvmfs_server} ingest -t - -b "${basedir}" "${cvmfs_repo}"
ec=$?
if [ $ec -eq 0 ]
then
echo_green "${tar_file} has been ingested to ${repo}."
echo_green "${tar_file} has been ingested to ${cvmfs_repo}."
else
error "${tar_file} could not be ingested to ${repo}."
error "${tar_file} could not be ingested to ${cvmfs_repo}."
fi

# "cvmfs_server ingest" doesn't automatically rebuild the nested catalogs,
Expand Down Expand Up @@ -180,9 +179,9 @@ function update_lmod_caches() {
then
error "the script for updating the Lmod caches (${update_caches_script}) does not have execute permissions!"
fi
${cvmfs_server} transaction "${repo}"
${update_caches_script} /cvmfs/${repo}/${basedir}/${version}
${cvmfs_server} publish -m "update Lmod caches after ingesting ${tar_file_basename}" "${repo}"
${cvmfs_server} transaction "${cvmfs_repo}"
${update_caches_script} /cvmfs/${cvmfs_repo}/${basedir}/${version}
${cvmfs_server} publish -m "update Lmod caches after ingesting ${tar_file_basename}" "${cvmfs_repo}"
}

function ingest_init_tarball() {
Expand All @@ -207,25 +206,25 @@ function ingest_compat_tarball() {
# Handle the ingestion of tarballs containing a compatibility layer
check_arch
check_os
compat_layer_path="/cvmfs/${repo}/${basedir}/${version}/compat/${os}/${arch}"
compat_layer_path="/cvmfs/${cvmfs_repo}/${basedir}/${version}/compat/${os}/${arch}"
# Assume that we already had a compat layer in place if there is a startprefix script in the corresponding CVMFS directory
if [ -f "${compat_layer_path}/startprefix" ];
then
echo_yellow "Compatibility layer for version ${version}, OS ${os}, and architecture ${arch} already exists!"
${cvmfs_server} transaction "${repo}"
${cvmfs_server} transaction "${cvmfs_repo}"
last_suffix=$((ls -1d ${compat_layer_path}-* | tail -n 1 | xargs basename | cut -d- -f2) 2> /dev/null)
new_suffix=$(printf '%03d\n' $((${last_suffix:-0} + 1)))
old_layer_suffixed_path="${compat_layer_path}-${new_suffix}"
echo_yellow "Moving the existing compat layer from ${compat_layer_path} to ${old_layer_suffixed_path}..."
mv ${compat_layer_path} ${old_layer_suffixed_path}
tar -C "/cvmfs/${repo}/${basedir}/" -xzf "${tar_file}"
${cvmfs_server} publish -m "updated compat layer for ${version}, ${os}, ${arch}" "${repo}"
tar -C "/cvmfs/${cvmfs_repo}/${basedir}/" -xzf "${tar_file}"
${cvmfs_server} publish -m "updated compat layer for ${version}, ${os}, ${arch}" "${cvmfs_repo}"
ec=$?
if [ $ec -eq 0 ]
then
echo_green "Successfully ingested the new compatibility layer!"
else
${cvmfs_server} abort "${repo}"
${cvmfs_server} abort "${cvmfs_repo}"
error "error while updating the compatibility layer, transaction aborted."
fi
else
Expand All @@ -236,11 +235,17 @@ function ingest_compat_tarball() {


# Check if a tarball has been specified
if [ "$#" -ne 1 ]; then
error "usage: $0 <gzipped tarball>"
if [ "$#" -ne 2 ]; then
error "usage: $0 <CVMFS repository name> <gzipped tarball>"
fi

tar_file="$1"
cvmfs_repo="$1"
tar_file="$2"

# Check if the CVMFS repository exists
if ( ! cvmfs_server list | grep -q "${cvmfs_repo}" ); then
error "CVMFS repository ${cvmfs_repo} does not exist!"
fi

# Check if the given tarball exists
if [ ! -f "${tar_file}" ]; then
Expand Down
24 changes: 20 additions & 4 deletions scripts/test-ingest-tarball.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

INGEST_SCRIPT=$(dirname "$(realpath $0)")/ingest-tarball.sh
TEST_OUTPUT=/dev/null # change to /dev/stdout to print test outputs for debugging purposes

# Temporary base dir for the tests
tstdir=$(mktemp -d)
Expand Down Expand Up @@ -30,11 +31,14 @@ function create_tarball() {
# Create a fake cvmfs_server executable, and prepend it to $PATH
cat << EOF > "${tstdir}/cvmfs_server"
#!/bin/bash
if [ \$# -lt 2 ]; then
echo "cvmfs_server expects at least two arguments!"
if [ \$# -lt 1 ]; then
echo "cvmfs_server expects at least one argument!"
exit 1
fi
echo "Calling: cvmfs_server \$@"
if [ \$1 == "list" ]; then
echo "my.repo.tld (stratum0 / local)"
fi
EOF
chmod +x "${tstdir}/cvmfs_server"
export PATH="${tstdir}:$PATH"
Expand Down Expand Up @@ -86,7 +90,7 @@ tarballs_fail=(
# Run the tests that should succeed
for ((i = 0; i < ${#tarballs_success[@]}; i++)); do
t=$(create_tarball ${tarballs_success[$i]})
"${INGEST_SCRIPT}" "$t" >& /dev/null
"${INGEST_SCRIPT}" "my.repo.tld" "$t" >& "${TEST_OUTPUT}"
if [ ! $? -eq 0 ]; then
num_tests_failed=$((num_tests_failed + 1))
else
Expand All @@ -98,7 +102,19 @@ done
# Run the tests that should fail
for ((i = 0; i < ${#tarballs_fail[@]}; i++)); do
t=$(create_tarball ${tarballs_fail[$i]})
"${INGEST_SCRIPT}" "$t" >& /dev/null
"${INGEST_SCRIPT}" "my.repo.tld" "$t" >& "${TEST_OUTPUT}"
if [ ! $? -eq 1 ]; then
num_tests_failed=$((num_tests_failed + 1))
else
num_tests_succeeded=$((num_tests_succeeded + 1))
fi
num_tests=$((num_tests + 1))
done

# Run the tests that should succeed again, but with a non-existing repo; now they should fail
for ((i = 0; i < ${#tarballs_success[@]}; i++)); do
t=$(create_tarball ${tarballs_success[$i]})
"${INGEST_SCRIPT}" "my.nonexistingrepo.tld" "$t" >& "${TEST_OUTPUT}"
if [ ! $? -eq 1 ]; then
num_tests_failed=$((num_tests_failed + 1))
else
Expand Down

0 comments on commit 86053f0

Please sign in to comment.