Skip to content

Commit 86053f0

Browse files
authored
Merge pull request #201 from bedroge/map_bucket_to_cvmfs_repo
[Automated ingestion] Map S3 buckets to CVMFS repositories
2 parents ca1aa51 + 4158341 commit 86053f0

File tree

5 files changed

+67
-39
lines changed

5 files changed

+67
-39
lines changed

scripts/automated_ingestion/automated_ingestion.cfg.example

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ ingestion_script = /absolute/path/to/ingest-tarball.sh
1010
metadata_file_extension = .meta.txt
1111

1212
[aws]
13-
staging_buckets = eessi-staging, eessi-staging-2023.06
13+
staging_buckets = {
14+
"software.eessi.io-2023.06": "software.eessi.io",
15+
"dev.eessi.io-2024.09": "dev.eessi.io",
16+
"riscv.eessi.io-20240402": "riscv.eessi.io" }
1417

1518
[cvmfs]
1619
ingest_as_root = yes
@@ -44,7 +47,7 @@ failed_tarball_overview_issue_body = An error occurred while trying to get the c
4447
```
4548
pr_body = A new tarball has been staged for {pr_url}.
4649
Please review the contents of this tarball carefully.
47-
Merging this PR will lead to automatic ingestion of the tarball.
50+
Merging this PR will lead to automatic ingestion of the tarball to the repository {cvmfs_repo}.
4851

4952
<details>
5053
<summary>Metadata of tarball</summary>
@@ -64,7 +67,7 @@ pr_body = A new tarball has been staged for {pr_url}.
6467

6568
[slack]
6669
ingestion_notification = yes
67-
ingestion_message = Tarball `{tarball}` has been ingested into the CVMFS repository.
70+
ingestion_message = Tarball `{tarball}` has been ingested into the CVMFS repository `{cvmfs_repo}`.
6871

6972
[logging]
7073
level = WARN

scripts/automated_ingestion/automated_ingestion.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import botocore
1010
import configparser
1111
import github
12+
import json
1213
import logging
1314
import os
1415
import pid
@@ -99,15 +100,15 @@ def main():
99100
aws_secret_access_key=config['secrets']['aws_secret_access_key'],
100101
)
101102

102-
buckets = [x.strip() for x in config['aws']['staging_buckets'].split(',')]
103-
for bucket in buckets:
103+
buckets = json.loads(config['aws']['staging_buckets'])
104+
for bucket, cvmfs_repo in buckets.items():
104105
tarballs = find_tarballs(s3, bucket)
105106
if args.list_only:
106107
for num, tarball in enumerate(tarballs):
107108
print(f'[{bucket}] {num}: {tarball}')
108109
else:
109110
for tarball in tarballs:
110-
tar = EessiTarball(tarball, config, gh_staging_repo, s3, bucket)
111+
tar = EessiTarball(tarball, config, gh_staging_repo, s3, bucket, cvmfs_repo)
111112
tar.run_handler()
112113

113114

scripts/automated_ingestion/eessitarball.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ class EessiTarball:
1919
for which it interfaces with the S3 bucket, GitHub, and CVMFS.
2020
"""
2121

22-
def __init__(self, object_name, config, git_staging_repo, s3, bucket):
22+
def __init__(self, object_name, config, git_staging_repo, s3, bucket, cvmfs_repo):
2323
"""Initialize the tarball object."""
2424
self.config = config
2525
self.git_repo = git_staging_repo
2626
self.metadata_file = object_name + config['paths']['metadata_file_extension']
2727
self.object = object_name
2828
self.s3 = s3
2929
self.bucket = bucket
30+
self.cvmfs_repo = cvmfs_repo
3031
self.local_path = os.path.join(config['paths']['download_dir'], os.path.basename(object_name))
3132
self.local_metadata_path = self.local_path + config['paths']['metadata_file_extension']
3233
self.url = f'https://{bucket}.s3.amazonaws.com/{object_name}'
@@ -177,7 +178,7 @@ def ingest(self):
177178
sudo = ['sudo'] if self.config['cvmfs'].getboolean('ingest_as_root', True) else []
178179
logging.info(f'Running the ingestion script for {self.object}...')
179180
ingest_cmd = subprocess.run(
180-
sudo + [script, self.local_path],
181+
sudo + [script, self.cvmfs_repo, self.local_path],
181182
stdout=subprocess.PIPE,
182183
stderr=subprocess.PIPE)
183184
if ingest_cmd.returncode == 0:
@@ -186,7 +187,7 @@ def ingest(self):
186187
if self.config.has_section('slack') and self.config['slack'].getboolean('ingestion_notification', False):
187188
send_slack_message(
188189
self.config['secrets']['slack_webhook'],
189-
self.config['slack']['ingestion_message'].format(tarball=os.path.basename(self.object))
190+
self.config['slack']['ingestion_message'].format(tarball=os.path.basename(self.object), cvmfs_repo=self.cvmfs_repo)
190191
)
191192
else:
192193
issue_title = f'Failed to ingest {self.object}'
@@ -295,11 +296,13 @@ def make_approval_request(self):
295296
try:
296297
tarball_contents = self.get_contents_overview()
297298
pr_body = self.config['github']['pr_body'].format(
299+
cvmfs_repo=self.cvmfs_repo,
298300
pr_url=pr_url,
299301
tar_overview=self.get_contents_overview(),
300302
metadata=metadata,
301303
)
302-
self.git_repo.create_pull(title='Ingest ' + filename, body=pr_body, head=git_branch, base='main')
304+
pr_title = '[%s] Ingest %s' % (self.cvmfs_repo, filename)
305+
self.git_repo.create_pull(title=pr_title, body=pr_body, head=git_branch, base='main')
303306
except Exception as err:
304307
issue_title = f'Failed to get contents of {self.object}'
305308
issue_body = self.config['github']['failed_tarball_overview_issue_body'].format(

scripts/ingest-tarball.sh

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
# Only if it passes these checks, the tarball gets ingested to the base dir in the repository specified below.
1414

15-
repo=software.eessi.io
1615
basedir=versions
1716
decompress="gunzip -c"
1817
cvmfs_server="cvmfs_server"
@@ -42,15 +41,15 @@ function error() {
4241
}
4342

4443
function is_repo_owner() {
45-
if [ -f "/etc/cvmfs/repositories.d/${repo}/server.conf" ]
44+
if [ -f "/etc/cvmfs/repositories.d/${cvmfs_repo}/server.conf" ]
4645
then
47-
. "/etc/cvmfs/repositories.d/${repo}/server.conf"
46+
. "/etc/cvmfs/repositories.d/${cvmfs_repo}/server.conf"
4847
[ x"$(whoami)" = x"$CVMFS_USER" ]
4948
fi
5049
}
5150

5251
function check_repo_vars() {
53-
if [ -z "${repo}" ]
52+
if [ -z "${cvmfs_repo}" ]
5453
then
5554
error "the 'repo' variable has to be set to the name of the CVMFS repository."
5655
fi
@@ -73,8 +72,8 @@ function check_version() {
7372
fi
7473

7574
# Check if the EESSI version number encoded in the filename
76-
# is valid, i.e. matches the format YYYY.DD
77-
if ! echo "${version}" | egrep -q '^20[0-9][0-9]\.(0[0-9]|1[0-2])$'
75+
# is valid, i.e. matches the format YYYY.MM or YYYYMMDD
76+
if ! echo "${version}" | egrep '(^20[0-9][0-9]\.(0[0-9]|1[0-2])$)|(^20[0-9][0-9][0-9][0-9][0-9][0-9]$)'
7877
then
7978
error "${version} is not a valid EESSI version."
8079
fi
@@ -113,28 +112,28 @@ function check_contents_type() {
113112
function cvmfs_regenerate_nested_catalogs() {
114113
# Use the .cvmfsdirtab to generate nested catalogs for the ingested tarball
115114
echo "Generating the nested catalogs..."
116-
${cvmfs_server} transaction "${repo}"
117-
${cvmfs_server} publish -m "Generate catalogs after ingesting ${tar_file_basename}" "${repo}"
115+
${cvmfs_server} transaction "${cvmfs_repo}"
116+
${cvmfs_server} publish -m "Generate catalogs after ingesting ${tar_file_basename}" "${cvmfs_repo}"
118117
ec=$?
119118
if [ $ec -eq 0 ]
120119
then
121-
echo_green "Nested catalogs for ${repo} have been created!"
120+
echo_green "Nested catalogs for ${cvmfs_repo} have been created!"
122121
else
123-
echo_red "failure when creating nested catalogs for ${repo}."
122+
echo_red "failure when creating nested catalogs for ${cvmfs_repo}."
124123
fi
125124
}
126125

127126
function cvmfs_ingest_tarball() {
128127
# Do a regular "cvmfs_server ingest" for a given tarball,
129128
# followed by regenerating the nested catalog
130-
echo "Ingesting tarball ${tar_file} to ${repo}..."
131-
${decompress} "${tar_file}" | ${cvmfs_server} ingest -t - -b "${basedir}" "${repo}"
129+
echo "Ingesting tarball ${tar_file} to ${cvmfs_repo}..."
130+
${decompress} "${tar_file}" | ${cvmfs_server} ingest -t - -b "${basedir}" "${cvmfs_repo}"
132131
ec=$?
133132
if [ $ec -eq 0 ]
134133
then
135-
echo_green "${tar_file} has been ingested to ${repo}."
134+
echo_green "${tar_file} has been ingested to ${cvmfs_repo}."
136135
else
137-
error "${tar_file} could not be ingested to ${repo}."
136+
error "${tar_file} could not be ingested to ${cvmfs_repo}."
138137
fi
139138

140139
# "cvmfs_server ingest" doesn't automatically rebuild the nested catalogs,
@@ -180,9 +179,9 @@ function update_lmod_caches() {
180179
then
181180
error "the script for updating the Lmod caches (${update_caches_script}) does not have execute permissions!"
182181
fi
183-
${cvmfs_server} transaction "${repo}"
184-
${update_caches_script} /cvmfs/${repo}/${basedir}/${version}
185-
${cvmfs_server} publish -m "update Lmod caches after ingesting ${tar_file_basename}" "${repo}"
182+
${cvmfs_server} transaction "${cvmfs_repo}"
183+
${update_caches_script} /cvmfs/${cvmfs_repo}/${basedir}/${version}
184+
${cvmfs_server} publish -m "update Lmod caches after ingesting ${tar_file_basename}" "${cvmfs_repo}"
186185
}
187186

188187
function ingest_init_tarball() {
@@ -207,25 +206,25 @@ function ingest_compat_tarball() {
207206
# Handle the ingestion of tarballs containing a compatibility layer
208207
check_arch
209208
check_os
210-
compat_layer_path="/cvmfs/${repo}/${basedir}/${version}/compat/${os}/${arch}"
209+
compat_layer_path="/cvmfs/${cvmfs_repo}/${basedir}/${version}/compat/${os}/${arch}"
211210
# Assume that we already had a compat layer in place if there is a startprefix script in the corresponding CVMFS directory
212211
if [ -f "${compat_layer_path}/startprefix" ];
213212
then
214213
echo_yellow "Compatibility layer for version ${version}, OS ${os}, and architecture ${arch} already exists!"
215-
${cvmfs_server} transaction "${repo}"
214+
${cvmfs_server} transaction "${cvmfs_repo}"
216215
last_suffix=$((ls -1d ${compat_layer_path}-* | tail -n 1 | xargs basename | cut -d- -f2) 2> /dev/null)
217216
new_suffix=$(printf '%03d\n' $((${last_suffix:-0} + 1)))
218217
old_layer_suffixed_path="${compat_layer_path}-${new_suffix}"
219218
echo_yellow "Moving the existing compat layer from ${compat_layer_path} to ${old_layer_suffixed_path}..."
220219
mv ${compat_layer_path} ${old_layer_suffixed_path}
221-
tar -C "/cvmfs/${repo}/${basedir}/" -xzf "${tar_file}"
222-
${cvmfs_server} publish -m "updated compat layer for ${version}, ${os}, ${arch}" "${repo}"
220+
tar -C "/cvmfs/${cvmfs_repo}/${basedir}/" -xzf "${tar_file}"
221+
${cvmfs_server} publish -m "updated compat layer for ${version}, ${os}, ${arch}" "${cvmfs_repo}"
223222
ec=$?
224223
if [ $ec -eq 0 ]
225224
then
226225
echo_green "Successfully ingested the new compatibility layer!"
227226
else
228-
${cvmfs_server} abort "${repo}"
227+
${cvmfs_server} abort "${cvmfs_repo}"
229228
error "error while updating the compatibility layer, transaction aborted."
230229
fi
231230
else
@@ -236,11 +235,17 @@ function ingest_compat_tarball() {
236235

237236

238237
# Check if a tarball has been specified
239-
if [ "$#" -ne 1 ]; then
240-
error "usage: $0 <gzipped tarball>"
238+
if [ "$#" -ne 2 ]; then
239+
error "usage: $0 <CVMFS repository name> <gzipped tarball>"
241240
fi
242241

243-
tar_file="$1"
242+
cvmfs_repo="$1"
243+
tar_file="$2"
244+
245+
# Check if the CVMFS repository exists
246+
if ( ! cvmfs_server list | grep -q "${cvmfs_repo}" ); then
247+
error "CVMFS repository ${cvmfs_repo} does not exist!"
248+
fi
244249

245250
# Check if the given tarball exists
246251
if [ ! -f "${tar_file}" ]; then

scripts/test-ingest-tarball.sh

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/bin/bash
22

33
INGEST_SCRIPT=$(dirname "$(realpath $0)")/ingest-tarball.sh
4+
TEST_OUTPUT=/dev/null # change to /dev/stdout to print test outputs for debugging purposes
45

56
# Temporary base dir for the tests
67
tstdir=$(mktemp -d)
@@ -30,11 +31,14 @@ function create_tarball() {
3031
# Create a fake cvmfs_server executable, and prepend it to $PATH
3132
cat << EOF > "${tstdir}/cvmfs_server"
3233
#!/bin/bash
33-
if [ \$# -lt 2 ]; then
34-
echo "cvmfs_server expects at least two arguments!"
34+
if [ \$# -lt 1 ]; then
35+
echo "cvmfs_server expects at least one argument!"
3536
exit 1
3637
fi
3738
echo "Calling: cvmfs_server \$@"
39+
if [ \$1 == "list" ]; then
40+
echo "my.repo.tld (stratum0 / local)"
41+
fi
3842
EOF
3943
chmod +x "${tstdir}/cvmfs_server"
4044
export PATH="${tstdir}:$PATH"
@@ -86,7 +90,7 @@ tarballs_fail=(
8690
# Run the tests that should succeed
8791
for ((i = 0; i < ${#tarballs_success[@]}; i++)); do
8892
t=$(create_tarball ${tarballs_success[$i]})
89-
"${INGEST_SCRIPT}" "$t" >& /dev/null
93+
"${INGEST_SCRIPT}" "my.repo.tld" "$t" >& "${TEST_OUTPUT}"
9094
if [ ! $? -eq 0 ]; then
9195
num_tests_failed=$((num_tests_failed + 1))
9296
else
@@ -98,7 +102,19 @@ done
98102
# Run the tests that should fail
99103
for ((i = 0; i < ${#tarballs_fail[@]}; i++)); do
100104
t=$(create_tarball ${tarballs_fail[$i]})
101-
"${INGEST_SCRIPT}" "$t" >& /dev/null
105+
"${INGEST_SCRIPT}" "my.repo.tld" "$t" >& "${TEST_OUTPUT}"
106+
if [ ! $? -eq 1 ]; then
107+
num_tests_failed=$((num_tests_failed + 1))
108+
else
109+
num_tests_succeeded=$((num_tests_succeeded + 1))
110+
fi
111+
num_tests=$((num_tests + 1))
112+
done
113+
114+
# Run the tests that should succeed again, but with a non-existing repo; now they should fail
115+
for ((i = 0; i < ${#tarballs_success[@]}; i++)); do
116+
t=$(create_tarball ${tarballs_success[$i]})
117+
"${INGEST_SCRIPT}" "my.nonexistingrepo.tld" "$t" >& "${TEST_OUTPUT}"
102118
if [ ! $? -eq 1 ]; then
103119
num_tests_failed=$((num_tests_failed + 1))
104120
else

0 commit comments

Comments
 (0)