Skip to content

Commit b6cbeca

Browse files
committed
MDBF-143: Add Infer builder
This preforms static analysis on the MariaDB codebase by maintaining a git source repository as a shared volume. Because static analysis takes time, a lot of time, there is a shared cache volume to store build results from main branches of the codebase so that as much incremental usage can occur. Infer runs in to phases, a capture and an analyze. Infer output are in a result-dir this contains: * report.json - what infer tools use * report.txt - the human readable version of this * capture.db - the sqlite3 version presentation of captured files and the relation to functions definitions. * results.db - the analyze phase outputs Of these, the report.json is desirable as the long term record of vulnerabilities.
1 parent f7efe87 commit b6cbeca

File tree

5 files changed

+431
-0
lines changed

5 files changed

+431
-0
lines changed

configuration/builders/sequences/helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,3 +391,26 @@ def mtr_junit_reporter(
391391
warn_on_fail=True,
392392
),
393393
)
394+
395+
396+
def save_infer_logs(
397+
logs_path: PurePath = PurePath("infer_results"),
398+
step_wrapping_fn=lambda step: step,
399+
):
400+
return step_wrapping_fn(
401+
ShellStep(
402+
command=SaveCompressedTar(
403+
name="Save Infer artifacts/logs",
404+
workdir=logs_path,
405+
archive_name="logs",
406+
destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
407+
),
408+
url=URL(
409+
url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
410+
url_text="Infer artifacts/logs",
411+
),
412+
options=StepOptions(
413+
alwaysRun=True,
414+
),
415+
),
416+
)
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
from pathlib import PurePath
2+
3+
from configuration.builders.infra.runtime import (
4+
BuildSequence,
5+
DockerConfig,
6+
InContainer,
7+
)
8+
from configuration.builders.sequences.helpers import save_infer_logs
9+
from configuration.steps.base import StepOptions
10+
from configuration.steps.command.base import BashCommand
11+
from configuration.steps.commands.compile import CompileCMakeCommand
12+
from configuration.steps.commands.configure import ConfigureMariaDBCMake
13+
from configuration.steps.commands.download import GitFetch, GitInitFromCommit
14+
from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails
15+
from configuration.steps.generators.cmake.compilers import ClangCompiler
16+
from configuration.steps.generators.cmake.generator import CMakeGenerator
17+
from configuration.steps.generators.cmake.options import (
18+
CMAKE,
19+
WITH,
20+
BuildType,
21+
CMakeOption,
22+
)
23+
from configuration.steps.remote import ShellStep
24+
25+
26+
def infer(
27+
config: DockerConfig,
28+
jobs: int,
29+
):
30+
sequence = BuildSequence()
31+
32+
sequence.add_step(ShellStep(command=PrintEnvironmentDetails()))
33+
# infer --version
34+
35+
sequence.add_step(
36+
InContainer(
37+
docker_environment=config,
38+
step=ShellStep(
39+
command=BashCommand(
40+
cmd="git clean -df", workdir=PurePath("/mnt", "src")
41+
),
42+
options=StepOptions(
43+
haltOnFailure=False,
44+
descriptionDone="git cleaned",
45+
),
46+
),
47+
)
48+
)
49+
50+
sequence.add_step(
51+
InContainer(
52+
ShellStep(
53+
command=GitInitFromCommit(
54+
repo_url="%(prop:repository)s",
55+
commit="%(prop:revision)s",
56+
jobs=jobs,
57+
depth=0,
58+
workdir=PurePath("/mnt", "src"),
59+
)
60+
),
61+
docker_environment=config,
62+
),
63+
)
64+
65+
sequence.add_step(
66+
InContainer(
67+
docker_environment=config,
68+
step=ShellStep(
69+
command=BashCommand(
70+
cmd="git diff --name-only FETCH_HEAD..%(prop:master_branch)s | tee $OLD_PWD/index.txt",
71+
workdir=PurePath("/mnt", "src"),
72+
),
73+
options=StepOptions(
74+
haltOnFailure=False,
75+
descriptionDone="names of changed files",
76+
),
77+
),
78+
)
79+
)
80+
81+
flags = [
82+
# UBSAN is the only prevention of UNINIT_VAR(X) x= x
83+
# that generated lots of uninit read/write errors.
84+
CMakeOption(WITH.UBSAN, True),
85+
CMakeOption(CMAKE.EXPORT_COMPILE_COMMANDS, True),
86+
]
87+
88+
sequence.add_step(
89+
InContainer(
90+
docker_environment=config,
91+
step=ShellStep(
92+
command=ConfigureMariaDBCMake(
93+
name="configure",
94+
cmake_generator=CMakeGenerator(
95+
use_ccache=True,
96+
flags=flags,
97+
source_path="/mnt/src",
98+
builddir="bld",
99+
compiler=ClangCompiler(),
100+
),
101+
),
102+
options=StepOptions(descriptionDone="Configure"),
103+
),
104+
)
105+
)
106+
107+
# Some server code is generated, so these need to be generated to test
108+
sequence.add_step(
109+
InContainer(
110+
docker_environment=config,
111+
step=ShellStep(
112+
command=CompileCMakeCommand(
113+
builddir="bld",
114+
jobs=jobs,
115+
verbose=True,
116+
targets=[
117+
"GenError",
118+
"GenServerSource",
119+
"GenUnicodeDataSource",
120+
"GenFixPrivs",
121+
],
122+
),
123+
options=StepOptions(descriptionDone="compile"),
124+
),
125+
)
126+
)
127+
128+
env_vars = [("JOBS", str(jobs))]
129+
sequence.add_step(
130+
InContainer(
131+
docker_environment=config,
132+
step=ShellStep(
133+
command=InferScript("%(prop:branch)s", "%(prop:master_branch)s"),
134+
options=StepOptions(
135+
descriptionDone="infer analysis complete",
136+
),
137+
env_vars=env_vars,
138+
),
139+
)
140+
)
141+
142+
sequence.add_step(
143+
save_infer_logs(
144+
step_wrapping_fn=lambda step: InContainer(
145+
docker_environment=config, step=step
146+
),
147+
)
148+
)
149+
return sequence
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/bin/bash
2+
3+
# Infer script for performing
4+
# static analysis on the MariaDB codebase
5+
6+
set -x -e
7+
8+
if [ $# -lt 2 ]; then
9+
echo insufficient args >&2
10+
exit 1
11+
fi
12+
13+
# Testing this version
14+
branch=$1
15+
16+
shift
17+
# Which is against the master_branch
18+
master_branch=$1
19+
20+
if [ -z "$branch" ] || [ -z "$master_branch" ]; then
21+
echo "usage $0 branch master_branch" >&2
22+
exit 1
23+
fi
24+
25+
: "${JOBS:=4}"
26+
27+
base=$PWD
28+
result_dir=$PWD/infer_results
29+
30+
## Fetch
31+
32+
pushd /mnt/src
33+
git fetch origin "$branch"
34+
git checkout -f FETCH_HEAD
35+
git submodule update --init --recursive
36+
commit=$(git rev-parse FETCH_HEAD)
37+
38+
if [ -d "/mnt/infer/$commit" ]; then
39+
echo "Already scanned $commit"
40+
exit 0
41+
fi
42+
43+
# What can we use as a reference
44+
45+
#if [ ! -L /mnt/infer/"$master_branch" ] && [ -L /mnt/infer/main ]; then
46+
# # Attempting to use main to find/create a base $master_branch
47+
# merge_base=$(git merge-base "$master_branch" main)
48+
# if [ -n "$merge_base" ]; then
49+
# if [ -d /mnt/infer/"$merge_base" ]; then
50+
# echo "Creating $master_branch based of $merge_base"
51+
# ln -s "$merge_base" /mnt/infer/"$master_branch"
52+
# else
53+
# echo "Creating $master_branch based of main"
54+
# # could be a bit inaccurate as main as moved on from $master_branch
55+
# ln -s "$(readlink /mnt/infer/main)" /mnt/infer/"$master_branch"
56+
# fi
57+
# fi
58+
#fi
59+
60+
populate_differences()
61+
# input $merge_base
62+
{
63+
# Find something closer - e.g. we've appended to a branch
64+
# we've already tested
65+
mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
66+
for common_commit in "${commits[@]}"; do
67+
if [ -d /mnt/infer/"$common_commit" ]; then
68+
break;
69+
fi
70+
done
71+
if [ ! -d "/mnt/infer/$common_commit" ]; then
72+
echo "From $branch to master branch $master_branch last analysis $common_commit or later is missing" >&2
73+
exit 1
74+
fi
75+
merge_base=$common_commit
76+
# The file changes we from last results
77+
git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt
78+
79+
if [ ! -s "$base"/index.txt ]; then
80+
echo "Empty changes - nothing necessary"
81+
rm "$base"/index.txt
82+
exit 0
83+
fi
84+
85+
# use previous results as a base
86+
cp -a "/mnt/infer/$merge_base" "$result_dir"
87+
}
88+
89+
if [ "$branch" = "$master_branch" ]; then
90+
# compare against the last record we have for the master_branch
91+
# as this is a push on the master branch
92+
#last_master_branch_ref=$(readlink /mnt/infer/"$master_branch")
93+
#merge_base=$(git merge-base "$branch" "$last_master_branch_ref")
94+
95+
# Just assume we diverged from main at some point
96+
merge_base=$(git merge-base "$branch" origin/main)
97+
else
98+
merge_base=$(git merge-base "$branch" "$master_branch")
99+
fi
100+
101+
if [ -z "$merge_base" ]; then
102+
echo "No common commit ancestor between $branch and $master_branch" >&2
103+
# We don't have a master symlink yet
104+
# lack of index.txt is the key
105+
echo "This is going to take a while for a full scan"
106+
else
107+
populate_differences
108+
fi
109+
110+
# back from /mnt/src
111+
popd
112+
113+
# Build
114+
115+
build()
116+
{
117+
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
118+
-DCMAKE_C_COMPILER=clang \
119+
-DCMAKE_CXX_COMPILER=clang++ \
120+
-S /mnt/src -B bld
121+
cmake --build bld \
122+
--target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
123+
--parallel "$JOBS"
124+
}
125+
126+
if [ ! -d bld ]; then
127+
mkdir bld
128+
build
129+
fi
130+
131+
#
132+
capture()
133+
{
134+
infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
135+
}
136+
137+
analyze()
138+
{
139+
infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
140+
}
141+
# Capture and analyze the feature of the files changes in index
142+
#
143+
cd bld
144+
145+
if [ ! -f ../index.txt ]; then
146+
echo "full run, this could take a while"
147+
capture
148+
analyze
149+
mv "$result_dir" /mnt/infer/"$commit"
150+
if [ "$branch" = "$master_branch" ];then
151+
ln -fs "$commit" /mnt/infer/"$master_branch"
152+
fi
153+
cd ..
154+
rm -rf bld
155+
exit
156+
fi
157+
158+
# We've copied over a result dir, so we're continuing
159+
# https://fbinfer.com/docs/infer-workflow/#differential-workflow
160+
# using 'infer capture" instead infer run
161+
capture --reactive
162+
163+
# some form of incremental
164+
analyze --changed-files-index ../index.txt
165+
166+
# Preserve result
167+
cp "${result_dir}"/report.json ../report.json
168+
169+
cp -a "${result_dir}" "${result_dir}_preserved"
170+
171+
pushd /mnt/src
172+
git checkout "$merge_base"
173+
popd
174+
175+
# TODO
176+
# How can we use the previous captured /mnt/infer/$merge_base
177+
178+
# just in case these have changed, including generated files
179+
cd ..
180+
build
181+
cd bld
182+
183+
capture --reactive --mark-unchanged-procs
184+
analyze --incremental-analysis --changed-files-index ../index.txt
185+
186+
# TODO useful enough to save as /mnt/infer/$commit
187+
# it may be merged next, or a commit pushed on top of it.
188+
infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}"
189+
cd ..
190+
rm -rf bld index.txt
191+
# report.json
192+
193+
check()
194+
{
195+
file=$1
196+
msg=$2
197+
if [ -f "${file}" ]; then
198+
filesize=$(stat -c%s "$file")
199+
# 2 is the size of an empty json array '[]'
200+
if [ "$filesize" -gt 2 ]; then
201+
echo "$msg"
202+
return 1
203+
fi
204+
fi
205+
return 0
206+
}
207+
208+
check "${result_dir}"/differential/introduced.json "bad human! Don't introduce bad things"
209+
check "${result_dir}"/differential/fixed.json "good human! Thanks for fixing the bad things"
210+
211+
212+
213+

0 commit comments

Comments
 (0)