Skip to content

Commit 5a03ab6

Browse files
committed
WIP: Bintar deps validation script
1 parent 7199ce3 commit 5a03ab6

File tree

11 files changed

+4750
-0
lines changed

11 files changed

+4750
-0
lines changed

runtime_config/bintar_deps/deps_10.11.yaml

Lines changed: 627 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_10.5.yaml

Lines changed: 412 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_10.6.yaml

Lines changed: 602 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.4.yaml

Lines changed: 628 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.6.yaml

Lines changed: 651 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.7.yaml

Lines changed: 651 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.8.yaml

Lines changed: 630 additions & 0 deletions
Large diffs are not rendered by default.

scripts/bintars/__init__.py

Whitespace-only changes.

scripts/bintars/common.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import logging
2+
import tarfile
3+
import subprocess
4+
import shutil
5+
import sys
6+
import re
7+
import os
8+
from typing import Tuple
9+
10+
from pathlib import Path
11+
12+
13+
def setup_logging(level: int):
14+
# ANSI escape codes for colors
15+
RESET = "\033[0m"
16+
GREEN = "\033[32m"
17+
RED = "\033[31m"
18+
YELLOW = "\033[33m"
19+
20+
21+
# Custom log formatter to include colors
22+
class ColoredFormatter(logging.Formatter):
23+
def format(self, record):
24+
if record.levelno == logging.INFO:
25+
color = GREEN
26+
elif record.levelno == logging.ERROR:
27+
color = RED
28+
elif record.levelno == logging.WARNING:
29+
color = YELLOW
30+
else:
31+
color = RESET
32+
33+
# Apply color to the message
34+
record.msg = f"{color}{record.levelname}{RESET}: {record.msg}"
35+
return super().format(record)
36+
37+
# Basic logging configuration
38+
logging.basicConfig(
39+
level=level,
40+
format="%(message)s", # No logger name or timestamp
41+
handlers=[
42+
logging.StreamHandler()
43+
]
44+
)
45+
46+
# Apply the custom formatter
47+
logging.getLogger().handlers[0].setFormatter(ColoredFormatter("%(message)s"))
48+
49+
50+
# Helper functions
51+
def run_command(command):
52+
"""Run a shell command and return the output."""
53+
try:
54+
result = subprocess.run(command, shell=True, check=True, text=True,
55+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
56+
return result.stdout.strip()
57+
except subprocess.CalledProcessError as e:
58+
logging.error(f"Error running command '{command}': {e} {e.stderr.strip()}")
59+
return None
60+
61+
62+
def _unpack_archive(tarball_path: str, dst_path: str):
63+
logging.info(f"Extracting archive {tarball_path}")
64+
with tarfile.open(tarball_path, 'r:*') as tar:
65+
tar.extractall(path=dst_path, filter='fully_trusted')
66+
67+
68+
def _parse_archive_path(archive_path: str) -> Tuple[str, str]:
69+
archive_name = os.path.basename(archive_path)
70+
71+
# Removes the last extension (e.g., .gz)
72+
base_name = Path(archive_name).stem
73+
# Check and remove the .tar extension
74+
if base_name.endswith(".tar"):
75+
base_name = Path(base_name).stem
76+
77+
# Let's extract the product version from the archive:
78+
match = re.search('([1-9][0-9]+\\.[0-9]+\\.[0-9]+)', base_name)
79+
if not match:
80+
logging.error(f'Archive name {archive_name} must contain product version')
81+
sys.exit(1)
82+
83+
# Only interested in major and minor version numbers, not point.
84+
version = match.group(0).split('.')
85+
major_minor = f'{version[0]}.{version[1]}'
86+
87+
logging.info(f'Product version (major.minor) {major_minor}')
88+
89+
return base_name, major_minor
90+
91+
92+
def prepare_test_directory(archive_path: str, tests_path: str):
93+
94+
base_name, major_minor = _parse_archive_path(archive_path)
95+
# The archive contains a folder with the same name as the archive.
96+
# We are interested in the contents within that folder, as thats where
97+
# the files are.
98+
files_path = os.path.join(tests_path, base_name)
99+
100+
# Cleanup any previous run.
101+
shutil.rmtree(files_path, ignore_errors=True)
102+
103+
# Create the test directory.
104+
Path(tests_path).mkdir(parents=True, exist_ok=True)
105+
106+
_unpack_archive(archive_path, tests_path)
107+
108+
# Sanity check that the archive has maintained its format.
109+
assert os.path.isdir(files_path)
110+
111+
return files_path, major_minor

scripts/bintars/deps_test.py

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
import argparse
2+
import os
3+
import re
4+
import sys
5+
from concurrent.futures import ProcessPoolExecutor
6+
from typing import Iterable, Tuple, Generator
7+
import logging
8+
9+
import magic
10+
import yaml
11+
12+
from common import run_command, setup_logging, prepare_test_directory
13+
14+
15+
def check_file_is_elf_binary_callback(file_path: str) -> str | None:
16+
global mime
17+
try:
18+
file_type = mime.from_file(file_path)
19+
if "elf" in file_type.lower(): # Identify ELF files
20+
return file_path
21+
except Exception as e:
22+
logging.error(f"Error checking file {file_path}: {e}")
23+
return None
24+
25+
26+
def start_worker():
27+
global mime
28+
mime = magic.Magic()
29+
30+
31+
def get_file_paths(path: str) -> Generator[str, None, None]:
32+
# Generator to feed file paths to processes.
33+
for root, _, files in os.walk(path):
34+
for file in files:
35+
yield os.path.join(root, file)
36+
37+
38+
def get_executables(path: str):
39+
"""
40+
Recursively searches for ELF executable files and libraries in the given
41+
path using a multiprocess approach (to speed up).
42+
43+
Args:
44+
path (str): Root directory to search.
45+
46+
Returns:
47+
list: List of paths to ELF executables and libraries.
48+
"""
49+
executables = []
50+
51+
# Use ProcessPoolExecutor to process files in parallel
52+
# This offers a 10x speed up compared to single threaded.
53+
with ProcessPoolExecutor(initializer=start_worker,
54+
max_workers=os.cpu_count()) as executor:
55+
results = executor.map(check_file_is_elf_binary_callback,
56+
get_file_paths(path))
57+
58+
# Collect non-None results
59+
executables = [result for result in results if result]
60+
61+
return executables
62+
63+
64+
def get_file_dependencies_callback(file: str) -> Tuple[str, set[str]]:
65+
result = set()
66+
output = run_command(f'readelf -d {file}')
67+
if output is None:
68+
logging.error(f"Failed to check libraries for {file}.")
69+
return file, False
70+
71+
pattern = "Shared library: \\[(\\S*)\\]"
72+
regex_shared_library = re.compile(pattern)
73+
74+
for line in output.splitlines():
75+
# Here is an example line we match:
76+
# 0x0000000000000001 (NEEDED) Shared library: [libsystemd.so.0]
77+
78+
match = regex_shared_library.search(line)
79+
if not match:
80+
continue
81+
library = match.group(1)
82+
result.add(library)
83+
84+
return file, result
85+
86+
87+
def get_dependencies_for_files(files: Iterable[str]) -> dict[str, list[str]]:
88+
with ProcessPoolExecutor(initializer=start_worker,
89+
max_workers=os.cpu_count()) as executor:
90+
results = executor.map(get_file_dependencies_callback, files)
91+
92+
deps = {}
93+
for full_file_path, file_deps in results:
94+
# TODO(cvicentiu) Perhaps this should be marked as a failure.
95+
# Unable to read file dependencies, skip the file.
96+
if file_deps is False:
97+
continue
98+
99+
deps[full_file_path] = file_deps
100+
101+
return deps
102+
103+
104+
def remove_base_path_from_files(dependencies: dict[str, list[str]],
105+
base_path: str) -> dict[str, list[str]]:
106+
"""
107+
For all keys in dependencies, remove the base_path prefix.
108+
"./tests/mariadb-11.6.2-linux-systemd-x86_64/lib/libgalera_smm.so"
109+
becomes
110+
"lib/libgalera_smm.so"
111+
"""
112+
result = {}
113+
for full_file_name, deps in dependencies.items():
114+
# If this assert fails, there is a bug in the testing script.
115+
assert full_file_name.startswith(base_path)
116+
file_name = full_file_name[len(base_path)+1:]
117+
result[file_name] = deps
118+
return result
119+
120+
121+
def dependencies_to_canonical_repr(
122+
dependencies: dict[str, set[str]],
123+
version: str,
124+
base_path: str
125+
) -> dict[str, dict[str, list[str]]]:
126+
127+
dependencies = remove_base_path_from_files(dependencies, base_path)
128+
result = {
129+
'version': version,
130+
'files': {},
131+
}
132+
133+
for file, deps in dependencies.items():
134+
result['files'][file] = list(sorted(deps))
135+
136+
return result
137+
138+
139+
def get_standard_dependencies(path: str):
140+
with open(path, 'r') as spec_file:
141+
return yaml.safe_load(spec_file)
142+
143+
144+
def get_executable_files_dependencies(path: str):
145+
files = get_executables(path)
146+
return get_dependencies_for_files(files)
147+
148+
149+
def compare_versions(archive_deps, standard_deps,
150+
allow_cross_version: bool):
151+
a_version = archive_deps['version']
152+
s_version = standard_deps['version']
153+
154+
if a_version != s_version:
155+
if allow_cross_version:
156+
logging.warn(f'WARNING: version mismatch {a_version} {s_version}')
157+
else:
158+
logging.error(f'version mismatch {a_version} {s_version}')
159+
return True
160+
return False
161+
162+
163+
def compare_dependencies(archive_deps, standard_deps):
164+
error = False
165+
files = archive_deps['files']
166+
control = standard_deps['files']
167+
168+
files_set = set(files.keys())
169+
control_set = set(control.keys())
170+
171+
files_extra = files_set.difference(control_set)
172+
files_missing = control_set.difference(files_set)
173+
common = files_set.intersection(control_set)
174+
175+
if files_extra:
176+
logging.error(f'We have extra files! {files_extra}')
177+
error = True
178+
179+
if files_missing:
180+
logging.error(f'We have missing files from the archive! {files_missing}')
181+
error = True
182+
183+
for file in common:
184+
deps_extra = set(files[file]).difference(control[file])
185+
deps_missing = set(control[file]).difference(files[file])
186+
187+
if deps_extra:
188+
logging.error(f'We have extra deps for {file}! {deps_extra}')
189+
error = True
190+
if deps_missing:
191+
logging.error(f'We have missing deps for {file}! {deps_missing}')
192+
error = True
193+
194+
return error
195+
196+
197+
def main(archive_path: str,
198+
tests_path: str,
199+
deps_file: str,
200+
record: bool,
201+
allow_cross_version: bool):
202+
error = False # track any errors so we can return properly.
203+
204+
files_path, major_minor = prepare_test_directory(archive_path, tests_path)
205+
206+
logging.info("Fetching archive dependencies")
207+
dependencies = get_executable_files_dependencies(files_path)
208+
209+
canonical_deps = dependencies_to_canonical_repr(dependencies,
210+
version=major_minor,
211+
base_path=files_path)
212+
213+
if record:
214+
logging.info(f"Recording new result to {deps_file}")
215+
with open(deps_file, 'w') as f:
216+
yaml.dump(canonical_deps, f)
217+
return
218+
219+
# Validate dependencies.
220+
standard = get_standard_dependencies(deps_file)
221+
222+
error |= compare_versions(canonical_deps, standard, allow_cross_version)
223+
error |= compare_dependencies(canonical_deps, standard)
224+
225+
if error:
226+
logging.error("Some tests failed")
227+
sys.exit(1)
228+
229+
logging.info("All OK")
230+
231+
232+
if __name__ == "__main__":
233+
parser = argparse.ArgumentParser(
234+
prog='bintar_deps.py',
235+
description='Checks/Records bintar files and dependencies')
236+
parser.add_argument('archive',
237+
help='Path to the binary tarball archive')
238+
parser.add_argument('deps_file',
239+
help='Path to YAML file with a list of dependencies')
240+
parser.add_argument('--record', action='store_true',
241+
help='Use the bintar archive to generate a deps file')
242+
parser.add_argument('--test_directory', type=str, default='./tests/',
243+
help='Where to extract the archive and run tests.')
244+
parser.add_argument('--allow_cross_version', action='store_true',
245+
help='Tests pass even if there is a '
246+
'version mismatch between the archive and '
247+
'the deps_file version')
248+
args = parser.parse_args()
249+
250+
setup_logging(logging.INFO)
251+
main(archive_path=args.archive,
252+
tests_path=args.test_directory,
253+
deps_file=args.deps_file,
254+
record=args.record,
255+
allow_cross_version=args.allow_cross_version)

0 commit comments

Comments
 (0)