diff --git a/Dockerfile.min b/Dockerfile.min new file mode 100644 index 00000000..acceb8de --- /dev/null +++ b/Dockerfile.min @@ -0,0 +1,37 @@ +FROM ubuntu:20.04 +# Multi-stage docker build +# Copying over only needed compiled objects from development docker to get CBAT to run +# https://docs.docker.com/build/building/multi-stage/ + +WORKDIR /root/ +# Bap cli binary +COPY --from=cbat:latest /home/opam/.opam/4.14/bin/bap /home/opam/.opam/4.14/bin/bap +ENV PATH="$PATH:/home/opam/.opam/4.14/bin" + +# Plugins +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/bap/*.plugin /home/opam/.opam/4.14/lib/bap/ +# Removing some unneeded larger plugins mostly related to primus +RUN rm /home/opam/.opam/4.14/lib/bap/primus* \ + /home/opam/.opam/4.14/lib/bap/beagle.plugin \ + /home/opam/.opam/4.14/lib/bap/run.plugin \ + /home/opam/.opam/4.14/lib/bap/constant_tracker.plugin +# Do we need this? /home/opam/.opam/4.14/lib/bap/frontc_parser.plugin \ + +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/bap/primus_lisp.plugin /home/opam/.opam/4.14/lib/bap/ + +# Some primus semantics +COPY --from=cbat:latest /home/opam/.opam/4.14/share/bap/primus/ /home/opam/.opam/4.14/share/bap/primus/ + +# Get Z3 library +COPY --from=cbat:latest /home/opam/.opam/4.14/lib/stublibs/libz3.so /home/opam/.opam/4.14/lib/stublibs/libz3.so + +# Objdump and Objdump dependencies +COPY --from=cbat:latest /usr/bin/objdump /usr/bin/objdump +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libopcodes-2.34-multiarch.so /lib/x86_64-linux-gnu/libopcodes-2.34-multiarch.so +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libbfd-2.34-multiarch.so /lib/x86_64-linux-gnu/libbfd-2.34-multiarch.so +COPY --from=cbat:latest /lib/x86_64-linux-gnu/libctf-multiarch.so.0 /lib/x86_64-linux-gnu/libctf-multiarch.so.0 + +# Removing unneeded bytecode .cma from plugins +COPY --from=cbat:latest /usr/bin/zip /usr/bin/zip +RUN for file in /home/opam/.opam/4.14/lib/bap/*.plugin; do zip --delete $file "*.cma"; done +RUN rm /usr/bin/zip diff --git a/wp/pycbat/.gitignore b/wp/pycbat/.gitignore new file mode 100644 index 00000000..375de919 --- /dev/null +++ b/wp/pycbat/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +.pytest_cache diff --git a/wp/pycbat/README.md b/wp/pycbat/README.md new file mode 100644 index 00000000..35de37b3 --- /dev/null +++ b/wp/pycbat/README.md @@ -0,0 +1,23 @@ +# CBAT Python Bindings + + + + +Installation +``` +docker pull philzook58/cbat_min +python3 -m pip install -e . +``` + +Usage: + +```python +import z3 +import cbat + +rax = z3.BitVec("RAX", 64) +postcond = rax == 4 +# Check that rax is always 4 at end of function "fact" in binary file myfactorial.o +cbat.run_wp("./myfactorial.o", func="fact", postcond=postcond) +``` + diff --git a/wp/pycbat/pyproject.toml b/wp/pycbat/pyproject.toml new file mode 100644 index 00000000..2f7f8feb --- /dev/null +++ b/wp/pycbat/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "cbat" +version = "0.0.1" +authors = [ + { name="Philip Zucker", email="pzucker@draper.com" }, +] +description = "Comparative Binary Analysis Tool" +readme = "README.md" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] + +[project.urls] +"Homepage" = "https://github.com/draperlaboratory/cbat_tools" diff --git a/wp/pycbat/requirements.txt b/wp/pycbat/requirements.txt new file mode 100644 index 00000000..d4930567 --- /dev/null +++ b/wp/pycbat/requirements.txt @@ -0,0 +1,3 @@ +z3 +docker + diff --git a/wp/pycbat/src/cbat/__init__.py b/wp/pycbat/src/cbat/__init__.py new file mode 100644 index 00000000..e749698a --- /dev/null +++ b/wp/pycbat/src/cbat/__init__.py @@ -0,0 +1,54 @@ +import subprocess +import z3 + + +def run_wp(filename, filename2=None, func="main", invariants=[], debug=False, precond=None, postcond=None, docker_image=None, inline=None, **kwargs): + '''Simple wrapper around cbat cli. For more information about the options, see `bap wp --help`''' + cmd = ["bap", "wp", "--no-cache", + "--show", "precond-smtlib", "--func", func] + if precond != None: + cmd.append("--precond") + cmd.append("(assert " + precond.sexpr() + ")") + if postcond != None: + cmd.append("--postcond") + cmd.append("(assert " + postcond.sexpr() + ")") + if inline != None: + cmd.append("--inline") + cmd.append(inline) + # TODO: Fill out invariants + + # forward kwargs. Typo unfriendly + # TODO: fill out other allowed flags + flags = ["check-invalid-derefs", "check-null-derefs"] + assert all(k in flags for k in kwargs.keys()), kwargs + cmd += [f"--{k}" for k, + v in kwargs.items() if v == True and k in flags] + + cmd.append(filename) + if filename2 != None: + cmd.append(filename2) + + if docker_image != None: + flags = ["-v", f"{filename}:{filename}"] + if filename2 != None: + flags += ["-v", f"{filename2}:{filename2}"] + cmd = ["docker", "run"] + flags + [docker_image] + cmd + res = subprocess.run(cmd, check=False, capture_output=True) + if debug: + print(res.stderr) + smt = res.stdout.decode().split("Z3 :") + if len(smt) != 2: + print("SMT formula extraction failed", smt) + print(res.stderr) + assert False + smtlib = smt[1] + s = z3.Solver() + s.from_string(smtlib) + if debug: + print(s) + res = s.check() + if res == z3.unsat: + return (z3.unsat, s) + elif res == z3.sat: + # raise? + return (z3.sat, s) diff --git a/wp/pycbat/src/cbat/helpers.py b/wp/pycbat/src/cbat/helpers.py new file mode 100644 index 00000000..f098543a --- /dev/null +++ b/wp/pycbat/src/cbat/helpers.py @@ -0,0 +1,118 @@ +import z3 +import angr +from . import run_wp + + +class TypedView(): + def __init__(self, value: z3.BitVecRef, typ: angr.sim_type.SimType, le=True, mem=None): + self.mem = mem + self.value = value + self.typ = typ + self.le = True + + def deref(self): + assert self.mem != None + bytes = range(self.typ.size // 8) + if self.le: + bytes = reversed(bytes) + value = z3.Concat([self.mem[self.value + n] for n in bytes]) + return TypedView(value, self.typ.pts_to, le=self.le, mem=self.mem) + + def __getitem__(self, field): + start = self.typ.offsets[field] + end = start + self.typ.fields[field].size + value = z3.Extract(end-1, start, self.value) + return TypedView(value, self.typ.fields[field], mem=self.mem, le=self.le) + + def __add__(self, b): + if isinstance(b, int): + return TypedView(self.value + b, self.typ, mem=self.mem, le=self.le) + elif isinstance(b, TypedView): + assert b.typ == self.typ and self.mem == b.mem and self.le == b.le + return TypedView(self.value + b.value, self.typ, mem=self.mem, le=self.le) + assert False, "Unexpected addition type" + + def __eq__(self, b): + assert self.typ == b.typ + return self.value == b.value + + def __repr__(self): + return f"({repr(self.typ)}){repr(self.value)}" + + +def make_mem(name): + return z3.Array(name, z3.BitVecSort(64), z3.BitVecSort(8)) + + +class PropertyBuilder(): + def __init__(self, binary=None, binary2=None, func=None, headers=None): + self.binary = binary + self.binary2 = binary2 + self.func = func + if binary != None: + self.load_binary(binary) + if headers != None: + self.load_headers(headers) + self.posts = [] + + self.mem = make_mem("mem") + self.init_mem = make_mem("init_mem") + self.mem0 = make_mem("mem0") + self.init_mem0 = make_mem("init_mem0") + + def load_binary(self, filename): + self.proj = angr.Project(filename, load_debug_info=True) + self.cc = self.proj.factory.cc() + + def load_headers(self, header_str): + (defns, types) = angr.types.parse_file(header_str) + angr.types.register_types(types) + self.defns = defns + + def cast(self, value, typ, prefix="", suffix=""): + mem = make_mem(prefix+"mem"+suffix) + le = self.proj.arch.memory_endness == 'Iend_LE' + value = z3.Extract(typ.size - 1, 0, value) + return TypedView(value, typ, le=le, mem=mem) + + def fun_args(self, prefix="", suffix=""): + assert self.func != None + funsig = self.defns[self.func] + funsig = funsig.with_arch(self.proj.arch) + # stack args not supported yet + assert len(funsig.args) <= len(self.cc.ARG_REGS) + return [self.cast(z3.BitVec(prefix + reg.upper() + suffix, 64), typ, prefix=prefix, suffix=suffix) for typ, reg in zip(funsig.args, self.cc.ARG_REGS)] + + def init_fun_args(self): + return self.fun_args(prefix="init_") + + def init_fun_args_mod(self): + return self.fun_args(prefix="init_", suffix="_mod") + + def fun_args_mod(self): + return self.fun_args(suffix="_mod") + + def init_fun_args_orig(self): + return self.fun_args(prefix="init_", suffix="_orig") + + def fun_args_orig(self): + return self.fun_args(suffix="_orig") + + def ret_val(self, suffix=""): + assert self.func != None + funsig = self.defns[self.func] + funsig = funsig.with_arch(self.proj.arch) + reg = self.cc.RETURN_VAL.reg_name + return self.cast(z3.BitVec(reg.upper() + suffix, 64), funsig.returnty) + + def ret_val_orig(self): + return self.ret_val(suffix="_orig") + + def ret_val_mod(self): + return self.ret_val(suffix="_mod") + + def add_post(self, post): + self.posts.append(post) + + def run_wp(self, **kwargs): + return run_wp(self.binary, func=self.func, filename2=self.binary2, postcond=z3.And(self.posts), **kwargs) diff --git a/wp/pycbat/tests/resources/Makefile b/wp/pycbat/tests/resources/Makefile new file mode 100644 index 00000000..7b0fbd18 --- /dev/null +++ b/wp/pycbat/tests/resources/Makefile @@ -0,0 +1,4 @@ + +test6: test6_1.c test6_2.c + gcc -O1 -c test6_1.c -o test6_1.o + gcc -O1 -c test6_2.c -o test6_2.o \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test4.c b/wp/pycbat/tests/resources/test4.c new file mode 100644 index 00000000..5e18a82d --- /dev/null +++ b/wp/pycbat/tests/resources/test4.c @@ -0,0 +1,6 @@ +int foo(int x, char y) +{ + int bar; + bar = x; + return 3 + bar; +} diff --git a/wp/pycbat/tests/resources/test4.o b/wp/pycbat/tests/resources/test4.o new file mode 100644 index 00000000..3b781757 Binary files /dev/null and b/wp/pycbat/tests/resources/test4.o differ diff --git a/wp/pycbat/tests/resources/test5.c b/wp/pycbat/tests/resources/test5.c new file mode 100644 index 00000000..e6753d1d --- /dev/null +++ b/wp/pycbat/tests/resources/test5.c @@ -0,0 +1,12 @@ +typedef struct mystruct +{ + int f1; + char f2; +} mystruct; + +int foo(mystruct *x, char y) +{ + int bar; + bar = x->f1; + return 3 + bar; +} diff --git a/wp/pycbat/tests/resources/test5.o b/wp/pycbat/tests/resources/test5.o new file mode 100644 index 00000000..c97f2e0b Binary files /dev/null and b/wp/pycbat/tests/resources/test5.o differ diff --git a/wp/pycbat/tests/resources/test6_1.c b/wp/pycbat/tests/resources/test6_1.c new file mode 100644 index 00000000..5b1faf89 --- /dev/null +++ b/wp/pycbat/tests/resources/test6_1.c @@ -0,0 +1,4 @@ +int foo(int x) +{ + return x + 3; +} \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test6_1.o b/wp/pycbat/tests/resources/test6_1.o new file mode 100644 index 00000000..1b1d87b9 Binary files /dev/null and b/wp/pycbat/tests/resources/test6_1.o differ diff --git a/wp/pycbat/tests/resources/test6_2.c b/wp/pycbat/tests/resources/test6_2.c new file mode 100644 index 00000000..ac9fc555 --- /dev/null +++ b/wp/pycbat/tests/resources/test6_2.c @@ -0,0 +1,4 @@ +int foo(int x) +{ + return x + 4; +} \ No newline at end of file diff --git a/wp/pycbat/tests/resources/test6_2.o b/wp/pycbat/tests/resources/test6_2.o new file mode 100644 index 00000000..b4a48394 Binary files /dev/null and b/wp/pycbat/tests/resources/test6_2.o differ diff --git a/wp/pycbat/tests/test_cbat.py b/wp/pycbat/tests/test_cbat.py new file mode 100644 index 00000000..56cde0e4 --- /dev/null +++ b/wp/pycbat/tests/test_cbat.py @@ -0,0 +1,132 @@ +from cbat import run_wp +from cbat.helpers import PropertyBuilder +import z3 +import tempfile +import subprocess +import angr +rax = z3.BitVec("RAX", 64) +init_rax = z3.BitVec("init_RAX", 64) +init_rdi = z3.BitVec("init_RDI", 64) + + +def run_code(code, postcond): + + with tempfile.NamedTemporaryFile(suffix=".c") as fp: + with tempfile.TemporaryDirectory() as mydir: + fp.write(code.encode()) + fp.flush() + fp.seek(0) + print(fp.readlines()) + outfile = mydir + "/fact" + print(subprocess.run(["gcc", "-g", "-c", "-O1", + "-o", outfile, fp.name], check=True)) + + print(subprocess.run(["objdump", "-d", outfile], check=True)) + return run_wp(outfile, func="fact", postcond=postcond) + + +def test1(): + code = ''' + int fact(int x){ + return 3; + } + ''' + + postcond = rax == 3 + + assert run_code(code, postcond)[0] == z3.unsat + + +def test2(): + code = ''' + int fact(int x){ + return 3; + } + ''' + postcond = rax == 4 + + assert run_code(code, postcond)[0] == z3.sat + + +def test3(): + code = ''' + int fact(int x){ + return x + 3; + } + ''' + postcond = z3.Extract(31, 0, rax) == z3.Extract(31, 0, init_rdi) + 3 + # postcond = rax == init_rdi + 3 + + assert run_code(code, postcond)[0] == z3.unsat + + +def test4(): + pb = PropertyBuilder(binary="resources/test4.o", func="foo", + headers="int foo(int x, char y);") + x, y = pb.fun_args() + init_x, init_y = pb.init_fun_args() + retval = pb.ret_val() + # This is using bitvector semantic. Not C int semantics. Sigh. Since I'm pretending, this is confusing. + postcond = retval == init_x + 3 + precond = None + (res, model) = run_wp("resources/test4.o", func="foo", + precond=precond, postcond=postcond, docker_image=None) + assert res == z3.unsat + + postcond = retval == init_x + 4 + (res, model) = run_wp("resources/test4.o", func="foo", + precond=precond, postcond=postcond, docker_image=None) + assert res == z3.sat + + +def test5(): + header = ''' + typedef struct mystruct + { + int f1; + char f2; + } mystruct; + + int foo(mystruct *x, char y); + ''' + pb = PropertyBuilder(binary="resources/test5.o", + func="foo", headers=header) + x, y = pb.fun_args() + init_x, init_y = pb.init_fun_args() + retval = pb.ret_val() + postcond = retval.value == init_x.deref()["f1"].value + 3 + (res, model) = run_wp("resources/test5.o", func="foo", + postcond=postcond, docker_image=None) + # debugging + #retval = z3.Extract(31, 0, z3.BitVec("RAX0", 64)) + #init_mem0 = pb.init_mem0 + #init_x0, init_y0 = pb.init_fun_args("foo", prefix="init_", suffix="0") + # x_init = MemView(init_mem0, init_x0, angr.types.parse_type( + # "mystruct").with_arch(pb.proj.arch)) + #postcond = retval == x_init0["f1"].z3() + 3 + # print(postcond) + # print(model) + # print(model.eval(postcond)) + # print(model.eval(x_init["f1"].z3())) + #print(model.eval(x_init["f1"].z3() + 3)) + assert res == z3.unsat + + +def test_compare(): + header = "int foo(int x);" + pb = PropertyBuilder(binary="resources/test6_1.o", + binary2="resources/test6_2.o", func="foo", headers=header) + retorig = pb.ret_val_orig() + retmod = pb.ret_val_mod() + pb.add_post(retmod.value == retorig.value + 1) + (res, s) = pb.run_wp() + assert res == z3.unsat + + +# I dunno. Something weird is going on with pytest and IO. +test_compare() +test5() +test4() +test1() +test2() +test3()