Skip to content

Commit d26d94f

Browse files
committed
MDBF-143: Add Infer builder
This preforms static analysis on the MariaDB codebase by maintaining a git source repository as a shared volume. Because static analysis takes time, a lot of time, there is a shared cache volume to store build results from main branches of the codebase so that as much incremental usage can occur. Infer runs in to phases, a capture and an analyze. Infer output are in a result-dir this contains: * report.json - what infer tools use * report.txt - the human readable version of this * capture.db - the sqlite3 version presentation of captured files and the relation to functions definitions. * results.db - the analyze phase outputs Of these, the report.json is desirable as the long term record of vulnerabilities.
1 parent 31cbb6e commit d26d94f

File tree

6 files changed

+371
-2
lines changed

6 files changed

+371
-2
lines changed

configuration/builders/sequences/helpers.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from configuration.steps.base import StepOptions
66
from configuration.steps.commands.base import URL
77
from configuration.steps.commands.mtr import MTRReporter, MTRTest
8+
from configuration.steps.commands.packages import SavePackages
89
from configuration.steps.commands.util import (
910
CreateS3Bucket,
1011
DeleteS3Bucket,
@@ -391,3 +392,23 @@ def mtr_junit_reporter(
391392
warn_on_fail=True,
392393
),
393394
)
395+
396+
397+
def save_infer_logs(
398+
step_wrapping_fn=lambda step: step,
399+
):
400+
return step_wrapping_fn(
401+
ShellStep(
402+
command=SavePackages(
403+
packages=["infer_results"],
404+
destination="/packages/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
405+
),
406+
url=URL(
407+
url=f"{os.environ['ARTIFACTS_URL']}/%(prop:tarbuildnum)s/logs/%(prop:buildername)s",
408+
url_text="Infer artifacts/logs",
409+
),
410+
options=StepOptions(
411+
alwaysRun=True,
412+
),
413+
),
414+
)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from pathlib import PurePath
2+
3+
from configuration.builders.infra.runtime import (
4+
BuildSequence,
5+
DockerConfig,
6+
InContainer,
7+
)
8+
from configuration.builders.sequences.helpers import save_infer_logs
9+
from configuration.steps.base import StepOptions
10+
from configuration.steps.commands.base import BashCommand
11+
from configuration.steps.commands.compile import CompileCMakeCommand
12+
from configuration.steps.commands.configure import ConfigureMariaDBCMake
13+
from configuration.steps.commands.util import InferScript, PrintEnvironmentDetails
14+
from configuration.steps.generators.cmake.compilers import ClangCompiler
15+
from configuration.steps.generators.cmake.generator import CMakeGenerator
16+
from configuration.steps.generators.cmake.options import (
17+
CMAKE,
18+
WITH,
19+
BuildType,
20+
CMakeOption,
21+
)
22+
from configuration.steps.remote import ShellStep
23+
24+
25+
def infer(
26+
config: DockerConfig,
27+
jobs: int,
28+
):
29+
sequence = BuildSequence()
30+
31+
sequence.add_step(ShellStep(command=PrintEnvironmentDetails()))
32+
# infer --version
33+
34+
sequence.add_step(
35+
InContainer(
36+
docker_environment=config,
37+
step=ShellStep(
38+
command=BashCommand(
39+
cmd="git clean -df", workdir=PurePath("/mnt", "src")
40+
),
41+
options=StepOptions(
42+
haltOnFailure=False,
43+
descriptionDone="git cleaned",
44+
),
45+
),
46+
)
47+
)
48+
49+
env_vars = [("JOBS", str(jobs))]
50+
sequence.add_step(
51+
InContainer(
52+
docker_environment=config,
53+
step=ShellStep(
54+
command=InferScript("%(prop:branch)s"),
55+
options=StepOptions(
56+
descriptionDone="infer analysis complete",
57+
),
58+
env_vars=env_vars,
59+
),
60+
)
61+
)
62+
63+
sequence.add_step(
64+
save_infer_logs(
65+
step_wrapping_fn=lambda step: InContainer(
66+
docker_environment=config, step=step
67+
),
68+
)
69+
)
70+
return sequence

configuration/steps/commands/base.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,14 @@ def as_cmd_arg(self) -> list[str]:
6262

6363

6464
class BashCommand(Command):
65-
def __init__(self, cmd: str, name: str = "Run command", user: str = "buildbot"):
66-
super().__init__(name=name, workdir=PurePath("."), user=user)
65+
def __init__(
66+
self,
67+
cmd: str,
68+
name: str = "Run command",
69+
user: str = "buildbot",
70+
workdir: PurePath = PurePath("."),
71+
):
72+
super().__init__(name=name, workdir=workdir, user=user)
6773
self.cmd = cmd
6874

6975
def as_cmd_arg(self) -> list[str]:
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
#!/bin/bash
2+
3+
# Infer script for performing
4+
# static analysis on the MariaDB codebase
5+
6+
set -x -e
7+
8+
if [ $# -lt 1 ]; then
9+
echo insufficient args >&2
10+
exit 1
11+
fi
12+
13+
# Testing this version
14+
branch=$1
15+
16+
if [ -z "$branch" ]; then
17+
echo "usage $0 {branch/commit}" >&2
18+
exit 1
19+
fi
20+
21+
: "${JOBS:=4}"
22+
23+
base=$PWD
24+
result_dir=$PWD/infer_results
25+
26+
rm -rf "${result_dir}" index.txt report.json
27+
28+
## Fetch
29+
30+
pushd /mnt/src
31+
git fetch origin "$branch"
32+
git checkout -f FETCH_HEAD
33+
git submodule update --init --recursive
34+
git clean -df
35+
commit=$(git rev-parse FETCH_HEAD)
36+
37+
if [ -d "/mnt/infer/$commit" ]; then
38+
echo "Already scanned $commit"
39+
exit 0
40+
fi
41+
42+
# What can we use as a reference
43+
44+
populate_differences()
45+
# input $merge_base
46+
{
47+
# Find something closer - e.g. we've appended to a branch
48+
# we've already tested
49+
mapfile -t commits < <(git rev-list "${merge_base}..FETCH_HEAD")
50+
for common_commit in "${commits[@]}"; do
51+
if [ -d /mnt/infer/"$common_commit" ]; then
52+
break;
53+
fi
54+
done
55+
if [ ! -d "/mnt/infer/$common_commit" ]; then
56+
return 1
57+
fi
58+
merge_base=$common_commit
59+
# The file changes we from last results
60+
git diff --name-only FETCH_HEAD.."${merge_base}" | tee "$base"/index.txt
61+
62+
if [ ! -s "$base"/index.txt ]; then
63+
echo "Empty changes - nothing necessary"
64+
rm "$base"/index.txt
65+
exit 0
66+
fi
67+
68+
limit=50
69+
if [ "$(wc -l < "${base}"/index.txt)" -gt $limit ]; then
70+
echo "More than $limit changes, just do a full generation"
71+
rm "$base/index.txt"
72+
return 1
73+
fi
74+
75+
# use previous results as a base
76+
cp -a "/mnt/infer/$merge_base" "$result_dir"
77+
78+
# Using as a recently used maker
79+
touch "/mnt/infer/$merge_base"
80+
return 0
81+
}
82+
83+
# Just assume we diverged from main at some point
84+
# Using $commit because merge-base didn't process
85+
# pull request references.
86+
merge_base=$(git merge-base "$commit" origin/main)
87+
88+
if populate_differences; then
89+
echo "No common commit ancestor with analysis" >&2
90+
91+
echo "This is going to take a while for a full scan"
92+
fi
93+
94+
# back from /mnt/src
95+
popd
96+
97+
# Build
98+
99+
build()
100+
{
101+
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
102+
-DCMAKE_C_COMPILER=clang \
103+
-DCMAKE_CXX_COMPILER=clang++ \
104+
-S /mnt/src -B bld
105+
cmake --build bld \
106+
--target GenError GenServerSource GenUnicodeDataSource GenFixPrivs \
107+
--parallel "$JOBS"
108+
}
109+
110+
if [ ! -d bld ]; then
111+
mkdir bld
112+
build
113+
fi
114+
115+
#
116+
capture()
117+
{
118+
infer capture --compilation-database compile_commands.json --project-root /mnt/src --results-dir "${result_dir}" "$@"
119+
}
120+
121+
analyze()
122+
{
123+
infer analyze --project-root /mnt/src --results-dir "${result_dir}" --max-jobs "${JOBS}" "$@"
124+
}
125+
# Capture and analyze the feature of the files changes in index
126+
#
127+
cd bld
128+
129+
if [ ! -f ../index.txt ]; then
130+
echo "full run, this could take a while"
131+
capture
132+
analyze
133+
mv "$result_dir" /mnt/infer/"$commit"
134+
cd ..
135+
rm -rf bld
136+
exit
137+
fi
138+
139+
# We've copied over a result dir, so we're continuing
140+
# https://fbinfer.com/docs/infer-workflow/#differential-workflow
141+
# using 'infer capture" instead infer run
142+
capture --reactive
143+
144+
# some form of incremental
145+
analyze --changed-files-index ../index.txt
146+
147+
# Preserve result
148+
cp "${result_dir}"/report.json ../report.json
149+
150+
cp -a "${result_dir}" "${result_dir}_preserved"
151+
152+
pushd /mnt/src
153+
#?git checkout "$merge_base"
154+
popd
155+
156+
# just in case these have changed, including generated files
157+
cd ..
158+
#?build
159+
cd bld
160+
161+
# TODO Can we use the previous captured /mnt/infer/$merge_base
162+
capture --merge-capture "/mnt/infer/$merge_base" --reactive --mark-unchanged-procs
163+
164+
analyze --incremental-analysis --changed-files-index ../index.txt
165+
166+
# It may be merged next, or a commit pushed on top of it.
167+
infer reportdiff --report-current ../report.json --report-previous "${result_dir}"/report.json --project-root /mnt/src --results-dir "${result_dir}"
168+
cd ..
169+
rm -rf bld index.txt
170+
# report.json
171+
172+
## At this point we have infer_results/differential/{fixed,introduced}.json
173+
pushd "${result_dir}"
174+
175+
# To have a useful reference we apply these differences
176+
#TODO jq: error: function compiled to 176940 bytes which is too long
177+
#jq --slurpfile excl differential/fixed.json -f /mnt/infer/"${merge_base}"/report.json > report.json <<'EOF'
178+
# map(select(
179+
# ($excl | any(
180+
# .key == .key and .node_key == .node_key and .hash == .hash
181+
# )) | not
182+
# ))
183+
#EOF
184+
185+
jq -s 'add' report.json differential/introduced.json > report1.json
186+
mv report1.json report.json
187+
188+
infer report -o "${result_dir}" --report-json report.json --report-text report.txt
189+
190+
# Useful enough to save as /mnt/infer/
191+
# Its unknown if this is on main branch or now, but just save.
192+
# If its merged next, then a commit exists, if a user appends
193+
# a commit, we've got a smaller delta.
194+
mv "${result_dir}" /mnt/infer/"${commit}"
195+
196+
197+
# TODO, we should walkfrom the main branch 11.x
198+
# and take the main branch report.json
199+
# remove fixed, add introduced, and then walk
200+
# though other commits, if they exist, and apply the
201+
# same again up until, and including the last commit
202+
# merge_base=$(git merge-base --reverse "$MAIN" "$commit")
203+
204+
result_dir=/mnt/infer/"${commit}"
205+
206+
check()
207+
{
208+
file=$1
209+
msg=$2
210+
if [ -f "${file}" ]; then
211+
filesize=$(stat -c%s "$file")
212+
# 2 is the size of an empty json array '[]'
213+
if [ "$filesize" -gt 2 ]; then
214+
echo "$msg"
215+
echo
216+
jq . "${file}"
217+
return 1
218+
fi
219+
fi
220+
return 0
221+
}
222+
223+
check "${result_dir}"/differential/fixed.json "Good human! Thanks for fixing the bad things"
224+
225+
if check "${result_dir}"/differential/introduced.json "Bad human! Don't introduce bad things" >&2; then
226+
exit 1
227+
fi

configuration/steps/commands/util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,12 @@ def __init__(
173173
):
174174
args = [f"{binary}:{','.join(libs)}" for binary, libs in binary_checks.items()]
175175
super().__init__(script_name="ldd_check.sh", args=args)
176+
177+
178+
class InferScript(BashScriptCommand):
179+
"""
180+
A command to run the Infer analysis on the MariaDB codebase.
181+
"""
182+
183+
def __init__(self, branch: str):
184+
super().__init__(script_name="infer.sh", args=[branch])

0 commit comments

Comments
 (0)