Skip to content

Commit ba8e232

Browse files
committed
MDBF-143: Add Infer builder
This preforms static analysis on the MariaDB codebase by maintaining a git source repository as a shared volume. Because static analysis takes time, a lot of time, there is a shared cache volume to store build results from main branches of the codebase so that as much incremental usage can occur. Infer runs in to phases, a capture and an analyze. Infer output are in a result-dir this contains: * report.json - what infer tools use * report.txt - the human readable version of this * capture.db - the sqlite3 version presentation of captured files and the relation to functions definitions. * results.db - the analyze phase outputs Of these, the report.json is desirable as the long term record of vulnerabilities.
1 parent 31cbb6e commit ba8e232

File tree

5 files changed

+323
-0
lines changed

5 files changed

+323
-0
lines changed

configuration/builders/sequences/helpers.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from configuration.steps.base import StepOptions
66
from configuration.steps.commands.base import URL
77
from configuration.steps.commands.mtr import MTRReporter, MTRTest
8+
from configuration.steps.commands.packages import SavePackages
89
from configuration.steps.commands.util import (
910
CreateS3Bucket,
1011
DeleteS3Bucket,
@@ -391,3 +392,24 @@ def mtr_junit_reporter(
391392
warn_on_fail=True,
392393
),
393394
)
395+
396+
397+
def save_infer_logs(
398+
step_wrapping_fn=lambda step: step,
399+
):
400+
return step_wrapping_fn(
401+
ShellStep(
402+
command=SavePackages(
403+
packages=["infer_results"],
404+
name="Save Infer artifacts/logs",
405+
destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
406+
),
407+
url=URL(
408+
url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
409+
url_text="Infer artifacts/logs",
410+
),
411+
options=StepOptions(
412+
alwaysRun=True,
413+
),
414+
),
415+
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from pathlib import PurePath
2+
3+
from configuration.builders.infra.runtime import (
4+
BuildSequence,
5+
DockerConfig,
6+
InContainer,
7+
)
8+
from configuration.builders.sequences.helpers import save_infer_logs
9+
from configuration.steps.base import StepOptions
10+
from configuration.steps.commands.base import BashCommand
11+
from configuration.steps.commands.compile import CompileCMakeCommand
12+
from configuration.steps.commands.configure import ConfigureMariaDBCMake
13+
from configuration.steps.commands.download import GitFetch, GitInitFromCommit
14+
from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails
15+
from configuration.steps.generators.cmake.compilers import ClangCompiler
16+
from configuration.steps.generators.cmake.generator import CMakeGenerator
17+
from configuration.steps.generators.cmake.options import (
18+
CMAKE,
19+
WITH,
20+
BuildType,
21+
CMakeOption,
22+
)
23+
from configuration.steps.remote import ShellStep
24+
25+
26+
def infer(
27+
config: DockerConfig,
28+
jobs: int,
29+
):
30+
sequence = BuildSequence()
31+
32+
sequence.add_step(ShellStep(command=PrintEnvironmentDetails()))
33+
# infer --version
34+
35+
sequence.add_step(
36+
InContainer(
37+
docker_environment=config,
38+
step=ShellStep(
39+
command=BashCommand(
40+
cmd="git clean -df", workdir=PurePath("/mnt", "src")
41+
),
42+
options=StepOptions(
43+
haltOnFailure=False,
44+
descriptionDone="git cleaned",
45+
),
46+
),
47+
)
48+
)
49+
50+
env_vars = [("JOBS", str(jobs))]
51+
sequence.add_step(
52+
InContainer(
53+
docker_environment=config,
54+
step=ShellStep(
55+
command=InferScript("%(prop:branch)s"),
56+
options=StepOptions(
57+
descriptionDone="infer analysis complete",
58+
),
59+
env_vars=env_vars,
60+
),
61+
)
62+
)
63+
64+
sequence.add_step(
65+
save_infer_logs(
66+
step_wrapping_fn=lambda step: InContainer(
67+
docker_environment=config, step=step
68+
),
69+
)
70+
)
71+
return sequence
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/bin/bash
2+
3+
# Infer script for performing
4+
# static analysis on the MariaDB codebase
5+
6+
set -x -e
7+
8+
if [ $# -lt 1 ]; then
9+
echo insufficient args >&2
10+
exit 1
11+
fi
12+
13+
# Testing this version
14+
branch=$1
15+
16+
if [ -z "$branch" ]; then
17+
echo "usage $0 {branch/commit}" >&2
18+
exit 1
19+
fi
20+
21+
: "${JOBS:=4}"
22+
23+
base=$PWD
24+
result_dir=$PWD/infer_results
25+
26+
## Fetch
27+
28+
pushd /mnt/src
29+
git fetch origin "$branch"
30+
git checkout -f FETCH_HEAD
31+
git submodule update --init --recursive
32+
commit=$(git rev-parse FETCH_HEAD)
33+
34+
if [ -d "/mnt/infer/$commit" ]; then
35+
echo "Already scanned $commit"
36+
exit 0
37+
fi
38+
39+
# What can we use as a reference
40+
41+
populate_differences()
42+
# input $merge_base
43+
{
44+
# Find something closer - e.g. we've appended to a branch
45+
# we've already tested
46+
mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
47+
for common_commit in "${commits[@]}"; do
48+
if [ -d /mnt/infer/"$common_commit" ]; then
49+
break;
50+
fi
51+
done
52+
if [ ! -d "/mnt/infer/$common_commit" ]; then
53+
return 1
54+
fi
55+
merge_base=$common_commit
56+
# The file changes we from last results
57+
git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt
58+
59+
if [ ! -s "$base"/index.txt ]; then
60+
echo "Empty changes - nothing necessary"
61+
rm "$base"/index.txt
62+
exit 0
63+
fi
64+
65+
# use previous results as a base
66+
cp -a "/mnt/infer/$merge_base" "$result_dir"
67+
68+
# Using as a recently used maker
69+
touch "/mnt/infer/$merge_base"
70+
return 0
71+
}
72+
73+
# Just assume we diverged from main at some point
74+
# Using $commit because merge-base didn't process
75+
# pull request references.
76+
merge_base=$(git merge-base "$commit" origin/main)
77+
78+
if populate_differences; then
79+
echo "No common commit ancestor with analysis" >&2
80+
81+
echo "This is going to take a while for a full scan"
82+
fi
83+
84+
# back from /mnt/src
85+
popd
86+
87+
# Build
88+
89+
build()
90+
{
91+
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
92+
-DCMAKE_C_COMPILER=clang \
93+
-DCMAKE_CXX_COMPILER=clang++ \
94+
-S /mnt/src -B bld
95+
cmake --build bld \
96+
--target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
97+
--parallel "$JOBS"
98+
}
99+
100+
if [ ! -d bld ]; then
101+
mkdir bld
102+
build
103+
fi
104+
105+
#
106+
capture()
107+
{
108+
infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
109+
}
110+
111+
analyze()
112+
{
113+
infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
114+
}
115+
# Capture and analyze the feature of the files changes in index
116+
#
117+
cd bld
118+
119+
if [ ! -f ../index.txt ]; then
120+
echo "full run, this could take a while"
121+
capture
122+
analyze
123+
mv "$result_dir" /mnt/infer/"$commit"
124+
cd ..
125+
rm -rf bld
126+
exit
127+
fi
128+
129+
# We've copied over a result dir, so we're continuing
130+
# https://fbinfer.com/docs/infer-workflow/#differential-workflow
131+
# using 'infer capture" instead infer run
132+
capture --reactive
133+
134+
# some form of incremental
135+
analyze --changed-files-index ../index.txt
136+
137+
# Preserve result
138+
cp "${result_dir}"/report.json ../report.json
139+
140+
cp -a "${result_dir}" "${result_dir}_preserved"
141+
142+
pushd /mnt/src
143+
git checkout "$merge_base"
144+
popd
145+
146+
# TODO
147+
# How can we use the previous captured /mnt/infer/$merge_base
148+
149+
# just in case these have changed, including generated files
150+
cd ..
151+
build
152+
cd bld
153+
154+
capture --reactive --mark-unchanged-procs
155+
analyze --incremental-analysis --changed-files-index ../index.txt
156+
157+
# TODO useful enough to save as /mnt/infer/$commit
158+
# it may be merged next, or a commit pushed on top of it.
159+
infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}"
160+
cd ..
161+
rm -rf bld index.txt
162+
# report.json
163+
164+
check()
165+
{
166+
file=$1
167+
msg=$2
168+
if [ -f "${file}" ]; then
169+
filesize=$(stat -c%s "$file")
170+
# 2 is the size of an empty json array '[]'
171+
if [ "$filesize" -gt 2 ]; then
172+
echo "$msg"
173+
echo
174+
jq . "${file}"
175+
return 1
176+
fi
177+
fi
178+
return 0
179+
}
180+
181+
check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things"
182+
183+
if check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things" >&2; then
184+
exit 1
185+
fi

configuration/steps/commands/util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,12 @@ def __init__(
173173
):
174174
args = [f"{binary}:{','.join(libs)}" for binary, libs in binary_checks.items()]
175175
super().__init__(script_name="ldd_check.sh", args=args)
176+
177+
178+
class InferScript(BashScriptCommand):
179+
"""
180+
A command to run the Infer analysis on the MariaDB codebase.
181+
"""
182+
183+
def __init__(self, branch: str):
184+
super().__init__(script_name="infer.sh", args=[branch])

master-migration/master.cfg

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ from configuration.builders.sequences.compile_only import (
1616
from configuration.builders.sequences.debug import openssl_fips
1717
from configuration.builders.sequences.release import deb_autobake, rpm_autobake
1818
from configuration.builders.sequences.sanitizers import asan_ubsan
19+
from configuration.builders.sequences.sast import infer
1920
from configuration.reporters import github_summary
2021
from configuration.workers import worker
2122
from master_common import base_master_config, IS_CHECKCONFIG
@@ -277,6 +278,41 @@ builder = "amd64-ubasan-clang-20"
277278
c["builders"].append(ubasan_builder(name=builder, debug=builder.endswith("debug")))
278279

279280

281+
## ------------------------------------------------------------------- ##
282+
## STATIC ANALYZERS BUILDERS ##
283+
## ------------------------------------------------------------------- ##
284+
285+
c["builders"].append(
286+
GenericBuilder(
287+
name="amd64-infer-clang-20",
288+
sequences=[
289+
infer(
290+
jobs=12,
291+
config=DockerConfig(
292+
repository=os.environ["CONTAINER_REGISTRY_URL"],
293+
image_tag="debian13-infer-clang-20",
294+
workdir=PurePath("/home/buildbot"),
295+
bind_mounts=[
296+
("/srv/buildbot/src", "/mnt/src"),
297+
("/srv/buildbot/infer", "/mnt/infer"),
298+
(f'{os.environ["MASTER_PACKAGES_DIR"]}/', "/packages"),
299+
],
300+
shm_size=shm_size,
301+
env_vars=[
302+
("ARTIFACTS_URL", os.environ["ARTIFACTS_URL"]),
303+
],
304+
memlock_limit=memlock_limit,
305+
),
306+
)
307+
],
308+
).get_config(
309+
workers=WORKER_POOL.get_workers_for_arch(arch="amd64"),
310+
next_build=nextBuild,
311+
can_start_build=canStartBuild,
312+
tags=["clang", "infer", "sast"],
313+
jobs=12,
314+
)
315+
)
280316

281317
## ------------------------------------------------------------------- ##
282318
## REPORTERS ##

0 commit comments

Comments
 (0)