From 474b2e4839676c17021ed2446ad9207557a208fe Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 10 Sep 2025 09:14:54 +1000 Subject: [PATCH 1/2] MDBF-143: Add Infer builder This preforms static analysis on the MariaDB codebase by maintaining a git source repository as a shared volume. Because static analysis takes time, a lot of time, there is a shared cache volume to store build results from main branches of the codebase so that as much incremental usage can occur. Infer runs in to phases, a capture and an analyze. Infer output are in a result-dir this contains: * report.json - what infer tools use * report.txt - the human readable version of this * capture.db - the sqlite3 version presentation of captured files and the relation to functions definitions. * results.db - the analyze phase outputs Of these, the report.json is desirable as the long term record of vulnerabilities. and the main_diff containing the difference from the last main X.Y branch commit. --- configuration/builders/sequences/sast.py | 54 ++++ configuration/steps/commands/base.py | 10 +- configuration/steps/commands/scripts/infer.sh | 286 ++++++++++++++++++ configuration/steps/commands/util.py | 9 + constants.py | 1 + master-migration/master.cfg | 36 +++ 6 files changed, 394 insertions(+), 2 deletions(-) create mode 100644 configuration/builders/sequences/sast.py create mode 100755 configuration/steps/commands/scripts/infer.sh diff --git a/configuration/builders/sequences/sast.py b/configuration/builders/sequences/sast.py new file mode 100644 index 000000000..ed9bb4be3 --- /dev/null +++ b/configuration/builders/sequences/sast.py @@ -0,0 +1,54 @@ +import os + +from configuration.builders.infra.runtime import ( + BuildSequence, + DockerConfig, + InContainer, +) +from configuration.steps.base import StepOptions +from configuration.steps.commands.base import URL +from configuration.steps.commands.packages import SavePackages +from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails +from configuration.steps.remote import ShellStep + + +def infer(config: DockerConfig): + sequence = BuildSequence() + + sequence.add_step(ShellStep(command=PrintEnvironmentDetails())) + + sequence.add_step( + InContainer( + docker_environment=config, + step=ShellStep( + command=InferScript("%(prop:branch)s"), + options=StepOptions( + description="running infer analysis", + descriptionDone="infer analysis complete", + ), + env_vars=[("JOBS", str("%(prop:jobs)s"))], + ), + ) + ) + + sequence.add_step( + InContainer( + docker_environment=config, + step=ShellStep( + command=SavePackages( + packages=["infer_results"], + destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s", + ), + url=URL( + url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s", + url_text="Infer artifacts/logs", + ), + options=StepOptions( + alwaysRun=True, + description="saving infer analysis results", + descriptionDone="infer analysis results saved", + ), + ), + ) + ) + return sequence diff --git a/configuration/steps/commands/base.py b/configuration/steps/commands/base.py index 18988b81b..7e279a59c 100644 --- a/configuration/steps/commands/base.py +++ b/configuration/steps/commands/base.py @@ -62,8 +62,14 @@ def as_cmd_arg(self) -> list[str]: class BashCommand(Command): - def __init__(self, cmd: str, name: str = "Run command", user: str = "buildbot"): - super().__init__(name=name, workdir=PurePath("."), user=user) + def __init__( + self, + cmd: str, + name: str = "Run command", + user: str = "buildbot", + workdir: PurePath = PurePath("."), + ): + super().__init__(name=name, workdir=workdir, user=user) self.cmd = cmd def as_cmd_arg(self) -> list[str]: diff --git a/configuration/steps/commands/scripts/infer.sh b/configuration/steps/commands/scripts/infer.sh new file mode 100755 index 000000000..c51242c32 --- /dev/null +++ b/configuration/steps/commands/scripts/infer.sh @@ -0,0 +1,286 @@ +#!/bin/bash + +# Infer script for performing +# static analysis on the MariaDB codebase + +set -x -e + +infer --version + +if [ $# -lt 1 ]; then + echo insufficient args >&2 + exit 1 +fi + +# Testing this version +branch=$1 + +if [ -z "$branch" ]; then + echo "usage $0 {branch/commit}" >&2 + exit 1 +fi + +: "${JOBS:=4}" + +base=$PWD +result_dir=$PWD/infer_results +infer="/mnt/infer" + +rm -rf "${result_dir}" index.txt report.json + +## Fetch + +pushd /mnt/src +if [ ! -d .git ]; then + git clone https://github.com/MariaDB/server.git +else + git clean -df +fi +git fetch origin "$branch" +git checkout -f FETCH_HEAD +git submodule update --init --recursive --jobs "${JOBS}" +git clean -df +commit=$(git rev-parse FETCH_HEAD) + +if [ -d "${infer}/$commit" ]; then + echo "Already scanned $commit" + exit 0 +fi + +# Directory to clean +# Target maximum usage (in percent) +max_usage=90 + +# Function to get current disk usage (integer percent) +get_usage() { + df -P "$infer" | awk 'NR==2 {gsub(/%/,""); print $5}' +} + +echo "Checking disk usage on $(df -h "$infer" | tail -n -1)" +usage=$(get_usage) +echo "Current usage: ${usage}%" + +# Find directories sorted by oldest modification time (oldest first) +mapfile -t dirs < <( + find "$infer" -mindepth 1 -maxdepth 1 -type d -printf '%T@ %p\n' \ + | sort -n | awk '{print $2}' +) + +# Loop through and delete until below threshold +for dir in "${dirs[@]}"; do + if (( usage < max_usage )); then + echo "Disk usage is ${usage}%, below ${max_usage}%. Done." + break + fi + + echo "Deleting oldest directory: $dir" + rm -rf -- "$dir" + + usage=$(get_usage) + echo "New usage: ${usage}%" +done + +if (( usage >= max_usage )); then + echo "Warning: disk still above ${max_usage}% after deleting all directories!" +else + echo "Done. Disk usage now ${usage}%." +fi + + +# What can we use as a reference + +populate_differences() +# input $merge_base +{ + # Find something closer - e.g. we've appended to a branch + # we've already tested + mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD") + for common_commit in "${commits[@]}"; do + if [ -d "${infer}/$common_commit" ]; then + break; + fi + done + if [ ! -d "${infer}/$common_commit" ]; then + return 1 + fi + merge_base=$common_commit + # The file changes we from last results + git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt + + if [ ! -s "$base"/index.txt ]; then + echo "Empty changes - nothing necessary" + rm "$base"/index.txt + exit 0 + fi + + limit=50 + if [ "$(wc -l < "${base}"/index.txt)" -gt $limit ]; then + echo "More than $limit changes, just do a full generation" + rm "$base/index.txt" + return 1 + fi + + # use previous results as a base + cp -a "$infer/$merge_base" "$result_dir" + + # Using as a recently used maker + # Eventually we can remove/clear based on not being looked at + touch "$infer/$merge_base" + return 0 +} + +# Just assume we diverged from main at some point +# Using $commit because merge-base didn't process +# pull request references. +merge_base=$(git merge-base "$commit" origin/main) + +if populate_differences; then + echo "No common commit ancestor with analysis or over depth limit($limit)" >&2 + + echo "This is going to take a while for a full scan" +fi + +# back from /mnt/src +popd + +# Build + +build() +{ + cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -S /mnt/src -B bld + cmake --build bld \ + --target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \ + --parallel "$JOBS" +} + +if [ ! -d bld ]; then + mkdir bld + build +fi + +# +capture() +{ + infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@" +} + +analyze() +{ + infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@" +} +# Capture and analyze the feature of the files changes in index +# +cd bld + +if [ ! -f ../index.txt ]; then + echo "full run, this could take a while" + capture + analyze + cp -a "$result_dir" "$infer/$commit" + cd .. +else + echo "incremental run" + # We've copied over a result dir, so we're continuing + # https://fbinfer.com/docs/infer-workflow/#differential-workflow + # using 'infer capture" instead infer run + capture --reactive + + # some form of incremental + analyze --changed-files-index ../index.txt + + # Preserve result + cp "${result_dir}"/report.json ../report.json + + # just in case these have changed, including generated files + cd .. + build + cd bld + + # Can we use the previous captured $infer/$merge_base + capture --merge-capture "$infer/$merge_base" --reactive --mark-unchanged-procs + + analyze --incremental-analysis --changed-files-index ../index.txt + + # It may be merged next, or a commit pushed on top of it. + infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}" + cd .. + ## At this point we have infer_results/differential/{fixed,introduced}.json + #!? Change the name as we're going to use differential as a main branch difference + #!!mv "${result_dir}"/differential "${result_dir}"/diff_prev_commit +fi +rm -rf bld index.txt + +# Useful enough to save as $infer/ +# Its unknown if this is on main branch or now, but just save. +# If its merged next, then a commit exists, if a user appends +# a commit, we've got a smaller delta. +cp -a "${result_dir}" "$infer/${commit}" + +# Look at the changes from the main branch +# +# Take the main branch report.json +# remove fixed, add introduced, and then walk +# though other commits, if they exist, and apply the +# same again up until, and including the last commit +source /mnt/src/VERSION +branch=${MYSQL_VERSION_MAJOR}.${MYSQL_VERSION_MINOR} + +pushd /mnt/src +merge_base=$(git merge-base "origin/$branch" "$commit") +mapfile -t commits < <(git rev-list "${merge_base}..${commit}") +popd + +base=/mnt/infer/$merge_base +last_ref=$base +for common_commit in "${commits[@]}"; do + diff_dir="${infer}/$common_commit"/differential/ + if [ -d "$diff_dir" ]; then + # removed fixed issues and append introduced. + jq --slurpfile to_remove "${diff_dir}"/fixed.json ' + ($to_remove[0] | map(.hash)) as $hashes_to_remove + | map(select(.hash as $h | $hashes_to_remove | index($h) | not))' \ + "${last_ref}"/report.json > filtered.json + jq -s 'add | unique_by(.hash)' filtered.json "${diff_dir}"/introduced.json > report.json + last_ref=$PWD + fi +done + +infer reportdiff --report-current report.json --report-previous "${base}"/report.json --project-root /mnt/src --results-dir "${result_dir}_diff" + +result_dir_main_diff=${result_dir}/main_diff +mv "${result_dir}_diff"/differential/ "${result_dir_main_diff}" +cp -a "${result_dir_main_diff}" "$infer/${commit}" + +# cleanup for smaller CI +rm -rf "${result_dir}"/*.db "${result_dir}"/tmp + +check() +{ + file=$1 + msg=$2 + if [ -f "${file}" ]; then + filesize=$(stat -c%s "$file") + # 2 is the size of an empty json array '[]' + if [ "$filesize" -gt 2 ]; then + echo "$msg" + echo + echo "Here are the changes:" + jq . "${file}" + return 1 + fi + fi + return 0 +} + +check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things in the last commit" + +check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things in the last commit" >&2 + +check "${result_dir_main_diff}"/fixed.json "Good human! Thanks for fixing the bad things" + +if check "${result_dir_main_diff}"//introduced.json "Bad human! Don't introduce bad things" >&2; then + exit 1 +fi diff --git a/configuration/steps/commands/util.py b/configuration/steps/commands/util.py index 2420ba57d..79347db15 100644 --- a/configuration/steps/commands/util.py +++ b/configuration/steps/commands/util.py @@ -173,3 +173,12 @@ def __init__( ): args = [f"{binary}:{','.join(libs)}" for binary, libs in binary_checks.items()] super().__init__(script_name="ldd_check.sh", args=args) + + +class InferScript(BashScriptCommand): + """ + A command to run the Infer analysis on the MariaDB codebase. + """ + + def __init__(self, branch: str): + super().__init__(script_name="infer.sh", args=[branch]) diff --git a/constants.py b/constants.py index 14dbb98f3..dc9663e20 100644 --- a/constants.py +++ b/constants.py @@ -177,6 +177,7 @@ "amd64-debian-12-debug-embedded", "amd64-fedora-41", "amd64-fedora-42", + "amd64-infer-clang-20", "amd64-msan-clang-20-debug", "amd64-opensuse-1506", "amd64-rhel-10", diff --git a/master-migration/master.cfg b/master-migration/master.cfg index ca3760b76..020c56f1e 100644 --- a/master-migration/master.cfg +++ b/master-migration/master.cfg @@ -16,6 +16,7 @@ from configuration.builders.sequences.compile_only import ( from configuration.builders.sequences.debug import openssl_fips from configuration.builders.sequences.release import deb_autobake, rpm_autobake from configuration.builders.sequences.sanitizers import asan_ubsan, msan +from configuration.builders.sequences.sast import infer from configuration.reporters import github_summary from configuration.workers import worker from master_common import base_master_config, IS_CHECKCONFIG @@ -318,6 +319,41 @@ def msan_builder(name: str, debug: bool) -> GenericBuilder: builder = "amd64-msan-clang-20-debug" c["builders"].append(msan_builder(name=builder, debug=builder.endswith("debug"))) +## ------------------------------------------------------------------- ## +## STATIC ANALYZERS BUILDERS ## +## ------------------------------------------------------------------- ## + +c["builders"].append( + GenericBuilder( + name="amd64-infer-clang-20", + sequences=[ + infer( + config=DockerConfig( + repository=os.environ["CONTAINER_REGISTRY_URL"], + image_tag="debian13-infer-clang-20", + workdir=PurePath("/home/buildbot"), + bind_mounts=[ + ("/srv/buildbot/src", "/mnt/src"), + ("/srv/buildbot/infer", "/mnt/infer"), + (f'{os.environ["MASTER_PACKAGES_DIR"]}/', "/packages"), + ], + shm_size=shm_size, + env_vars=[ + ("ARTIFACTS_URL", os.environ["ARTIFACTS_URL"]), + ], + memlock_limit=memlock_limit, + ), + ), + ], + ).get_config( + workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + next_build=nextBuild, + can_start_build=canStartBuild, + tags=["clang", "infer", "sast"], + jobs=6, + ) +) + ## ------------------------------------------------------------------- ## ## REPORTERS ## ## ------------------------------------------------------------------- ## From 4f323809833722ffdb428474ea53ad3cd46d54fd Mon Sep 17 00:00:00 2001 From: Razvan-Liviu Varzaru Date: Tue, 28 Oct 2025 14:38:46 +0200 Subject: [PATCH 2/2] MDBF 143 - Configure the builder worker host Run infer on bg-bbw5-x64 --- master-migration/master.cfg | 23 ++++++++++++++--------- master-private.cfg-sample | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/master-migration/master.cfg b/master-migration/master.cfg index 020c56f1e..98907cc24 100644 --- a/master-migration/master.cfg +++ b/master-migration/master.cfg @@ -39,7 +39,12 @@ WORKER_POOL.add( WORKER_POOL.add( arch="amd64", worker=worker.NonLatent(name="hz-bbw9", config=config, total_jobs=110) ) +WORKER_POOL.add( + arch="amd64", worker=worker.NonLatent(name="bg-bbw5-x64", config=config, total_jobs=8) +) +DEFAULT_AMD64_WORKER_POOL = WORKER_POOL.get_workers_for_arch(arch="amd64", filter_fn=lambda name: name in ["hz-bbw8", "hz-bbw9"]) +INFER_WORKER_POOL = WORKER_POOL.get_workers_for_arch(arch="amd64", filter_fn=lambda name: name == "bg-bbw5-x64") c["workers"] = WORKER_POOL.get_instances() @@ -101,7 +106,7 @@ c["builders"] = [ ), ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch=arch), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=[], @@ -139,7 +144,7 @@ c["builders"] = [ ), ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch=arch), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=[], @@ -183,7 +188,7 @@ c["builders"].extend( ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=["compile-only", "protected"], @@ -221,7 +226,7 @@ c["builders"].append( ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=[ @@ -265,7 +270,7 @@ def ubasan_builder(name: str, debug: bool) -> GenericBuilder: ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=list(tags_ubasan), @@ -308,7 +313,7 @@ def msan_builder(name: str, debug: bool) -> GenericBuilder: ) ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=DEFAULT_AMD64_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=list(tags_msan), @@ -337,7 +342,7 @@ c["builders"].append( ("/srv/buildbot/infer", "/mnt/infer"), (f'{os.environ["MASTER_PACKAGES_DIR"]}/', "/packages"), ], - shm_size=shm_size, + shm_size="16g", env_vars=[ ("ARTIFACTS_URL", os.environ["ARTIFACTS_URL"]), ], @@ -346,11 +351,11 @@ c["builders"].append( ), ], ).get_config( - workers=WORKER_POOL.get_workers_for_arch(arch="amd64"), + workers=INFER_WORKER_POOL, next_build=nextBuild, can_start_build=canStartBuild, tags=["clang", "infer", "sast"], - jobs=6, + jobs=8, ) ) diff --git a/master-private.cfg-sample b/master-private.cfg-sample index 2a48b7c2a..f98b6eeb3 100644 --- a/master-private.cfg-sample +++ b/master-private.cfg-sample @@ -59,6 +59,7 @@ private["worker_pass"]= { "libvirt": "1234", "hz-bbw8": "1234", "hz-bbw9": "1234", + "bg-bbw5-x64": "1234", } private["docker_workers"]= { "amd-bbw1-docker":"tcp://IP_address:port",