From 0b12ba0ca00f7cdfb50b614fb24b673fb7e4e322 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Fri, 14 Jul 2023 07:25:31 -0700 Subject: initial code --- example/README | 5 ++ example/program.py | 130 ++++++++++++++++++++++++++++++++++++++++++++++ example/symabs.py | 62 ++++++++++++++++++++++ example/symex.py | 23 ++++++++ example/test_files/cycle | 17 ++++++ example/test_files/fib | 15 ++++++ example/test_files/hasheq | 31 +++++++++++ 7 files changed, 283 insertions(+) create mode 100644 example/README create mode 100644 example/program.py create mode 100644 example/symabs.py create mode 100644 example/symex.py create mode 100644 example/test_files/cycle create mode 100644 example/test_files/fib create mode 100644 example/test_files/hasheq (limited to 'example') diff --git a/example/README b/example/README new file mode 100644 index 0000000..07ed675 --- /dev/null +++ b/example/README @@ -0,0 +1,5 @@ +From the parent folder: + $ python3 example/symex.py example/test_files/fib + $ python3 example/symex.py example/test_files/cycle + $ python3 example/symex.py example/test_files/hasheq + etc. diff --git a/example/program.py b/example/program.py new file mode 100644 index 0000000..b3e6afb --- /dev/null +++ b/example/program.py @@ -0,0 +1,130 @@ +from satispi import * + +class Program: + def __init__(self, instructions): + self.instructions = instructions + self.registers = set() + for pc in self.instructions: + if pc[0] == "label": continue + if pc[0] == "ite": + if isinstance(pc[1], str): + self.registers.add(pc[1]) + else: + self.registers.update(a for a in pc[1:] if isinstance(a, str)) + self.registers = sorted(self.registers) + + @staticmethod + def from_file(path): + lines = [l.strip() for l in open(path, "r").readlines()] + lines = [l for l in lines if l and not l.startswith(";")] + instructions = [] + for l in lines: + opcode, *args = l.split() + if opcode != "label": + args = [int(a[1:]) if a[0] == "$" else a for a in args] + instructions.append((opcode, *args)) + return Program(instructions) + + def get_op(self, pcidx): + return self.instructions[pcidx] + + def label_to_pcidx(self, label): + for i, pc in enumerate(self.instructions): + if pc == ("label", label): + return i + raise NotImplementedError + +class State: + def __init__(self, program, pcidx, smtlib, parent): + self.program = program + self.pcidx = pcidx + self.smtlib = smtlib.copy() + self.parent = parent + self.idx = (parent.idx + 1) if parent is not None else 0 + + def step(self): + smtlib = self.smtlib + opcode, *args = self.program.get_op(self.pcidx) + if opcode == "ite": # ite cond tlabel flabel + cond = self.to_term(args[0]) + true_idx = self.program.label_to_pcidx(args[1]) + false_idx = self.program.label_to_pcidx(args[2]) + delta = self.nops({}) + true_delta = Term("and", delta, Term("distinct", cond, Literal("(_ bv0 32)"))) + false_delta = Term("and", delta, Term("=", cond, Literal("(_ bv0 32)"))) + if args[1] == args[2]: + return [State(self.program, true_idx, smtlib + [delta], self)] + return [State(self.program, true_idx, smtlib + [true_delta], self), + State(self.program, false_idx, smtlib + [false_delta], self)] + + new_pcidx = self.pcidx + 1 + if opcode == "label": + delta = self.nops({}) + elif opcode == "end": + delta = self.nops({}) + new_pcidx = self.pcidx + elif opcode == "load": # load dstreg addr + to_reg = self.nto_term(args[0]) + value = Term("select", self.heap(), self.to_term(args[1])) + delta = self.nops({args[0]}) + delta = Term("and", delta, Term("=", to_reg, value)) + elif opcode == "store": # store addr val + addr = self.to_term(args[0]) + value = self.to_term(args[1]) + new_heap = Term("store", self.heap(), addr, value) + delta = self.nops({"@heap"}) + delta = Term("and", delta, Term("=", self.nheap(), new_heap)) + elif opcode in ("add", "sub", "eq", "xor", "mul"): # op reg delta + op = {"add": "bvadd", "sub": "bvsub", "eq": "=", + "xor": "bvxor", "mul": "bvmul"}[opcode] + old = self.to_term(args[0]) + val = self.to_term(args[1]) + new = Term(op, old, val) + if op == "=": + new = Term("ite", new, Literal("(_ bv1 32)"), Literal("(_ bv0 32)")) + delta = self.nops({args[0]}) + delta = Term("and", delta, Term("=", self.nreg(args[0]), new)) + elif opcode == "mov": # mov dst src + val = self.to_term(args[1]) + delta = self.nops({args[0]}) + delta = Term("and", delta, Term("=", self.nreg(args[0]), val)) + else: print(opcode); raise NotImplementedError + return [State(self.program, new_pcidx, self.smtlib + [delta], self)] + + def has_model(self): + return Solver.singleton().model(self.smtlib) is not None + + def heap(self): + return Variable(f"heap_{self.idx}", + "(Array (_ BitVec 32) (_ BitVec 32))") + + def reg(self, reg): + return Variable(f"reg_{reg}_{self.idx}", "(_ BitVec 32)") + + def to_term(self, arg): + if isinstance(arg, int): + return Literal(f"(_ bv{arg} 32)") + return self.reg(arg) + + def nheap(self): + return Variable(f"heap_{self.idx + 1}", + "(Array (_ BitVec 32) (_ BitVec 32))") + + def nreg(self, reg): + return Variable(f"reg_{reg}_{self.idx + 1}", "(_ BitVec 32)") + + def nto_term(self, arg): + if isinstance(arg, int): + return Literal(f"(_ bv{arg} 32)") + return self.nreg(arg) + + def nops(self, except_for=set()): + term = Literal("true") + for reg in self.program.registers: + if reg in except_for: continue + term = Term("and", term, + Term("=", self.nreg(reg), self.reg(reg))) + if "@heap" not in except_for: + term = Term("and", term, + Term("=", self.nheap(), self.heap())) + return term diff --git a/example/symabs.py b/example/symabs.py new file mode 100644 index 0000000..a74db27 --- /dev/null +++ b/example/symabs.py @@ -0,0 +1,62 @@ +import sys +import itertools +from satispi import * + +def symabs(smtlib, exprs, types, cutoffs): + if len(exprs) == 0: return [] + + if Solver.singleton().model(smtlib) is None: + return [frozenset() for _ in exprs] + + smtlib = smtlib.copy() + + vars_ = [Variable(f"____symabs{i}", types[i]) for i in range(len(exprs))] + smtlib += [Term("=", var, expr) for var, expr in zip(vars_, exprs)] + + possible = [frozenset() for _ in exprs] + aim_for = max(len(possible) // 8, 1) + for i in itertools.count(): + distinction = [bvize(Term("distinct", v, *map(wordlit, sorted(p)))) + for v, p in zip(vars_, possible) + if p and p is not True] + + if i > 0 and not distinction: + # everything is Top + return possible + + if len(distinction) == 1: + n_deltas = Term("bvadd", *distinction, wordlit(0)) + else: + n_deltas = Term("bvadd", *distinction) + + # aim_for <= n distincts. + # if aim_for = 1 and it's unsat, then impossible to get any distincts + goal = [] + if distinction: + goal = [Term("bvult", wordlit(aim_for - 1), n_deltas)] + + use_vars = [v for v, p in zip(vars_, possible) if p is not True] + model = Solver.singleton().model(smtlib + goal, use_vars) + + if model is None: + assert aim_for >= 0 + if aim_for > 1: + aim_for = max(aim_for // 2, 1) + continue + return possible + + prior = possible.copy() + for i, (v, c) in enumerate(zip(vars_, cutoffs)): + if possible[i] is True: continue + if model[v.name] is None: continue + possible[i] = possible[i] | frozenset({model[v.name]}) + if c is not None and len(possible[i]) > c: + possible[i] = True + assert possible != prior, \ + "Solver did not return a complete enough model to make progress" + +def wordlit(val): + return Literal(f"(_ bv{val} 32)") + +def bvize(term): + return Term("ite", term, wordlit(1), wordlit(0)) diff --git a/example/symex.py b/example/symex.py new file mode 100644 index 0000000..c5d13be --- /dev/null +++ b/example/symex.py @@ -0,0 +1,23 @@ +import sys +from satispi import * +from example.program import * +from example.symabs import * + +program = Program.from_file(sys.argv[1]) +worklist = [State(program, 0, [], None)] +while worklist: + state = worklist.pop(0) + + # Process state + print("Processing state ...") + exprs = [state.reg(reg) for reg in program.registers] + types = ["(_ BitVec 32)" for _ in program.registers] + cutoffs = [5 for _ in exprs] + possible = symabs(state.smtlib, exprs, types, cutoffs) + for reg, values in zip(program.registers, possible): + print("\t", reg, "\t->\t", values) + + # Execute + for next_state in state.step(): + if next_state.has_model(): + worklist.append(next_state) diff --git a/example/test_files/cycle b/example/test_files/cycle new file mode 100644 index 0000000..e5fc1d3 --- /dev/null +++ b/example/test_files/cycle @@ -0,0 +1,17 @@ +mov tortoise $0 +mov hare $0 +mov cycle_found $0 +label loop + ; tortise = tortoise->next + load tortoise tortoise + ; hare = hare->next->next + load hare hare + load hare hare + ; are_eq = (tortoise == hare) + mov are_eq tortoise + eq are_eq hare + ; cycle found? + ite are_eq cycle loop +label cycle + mov cycle_found $1 +end diff --git a/example/test_files/fib b/example/test_files/fib new file mode 100644 index 0000000..f197e07 --- /dev/null +++ b/example/test_files/fib @@ -0,0 +1,15 @@ +store $0 $0 +store $1 $1 +mov idx $1 +label loop + ; fib = f(idx) + f(idx - 1) + load fib idx + sub idx $1 + load fib2 idx + add idx $1 + add fib fib2 + ; memory[idx + 1] = fib + add idx $1 + store idx fib +ite $1 loop loop +end diff --git a/example/test_files/hasheq b/example/test_files/hasheq new file mode 100644 index 0000000..702c17d --- /dev/null +++ b/example/test_files/hasheq @@ -0,0 +1,31 @@ +; find inputs that hash to the same thing under +; https://doc.riot-os.org/group__sys__hashes__djb2.html +; and +; https://doc.riot-os.org/group__sys__hashes__sdbm.html + +mov hash1 $123456 +mov hash2 $789012 +mov same_found $0 + +mov i $0 +label loop + load next_word i + + ; djb2 iteration + mul hash1 $33 + xor hash1 next_word + + ; sdbm iteration + mul hash2 $65599 + add hash2 next_word + + ; compare + mov are_eq hash1 + eq are_eq hash2 + + add i $1 + ite are_eq hashes_same loop + +label hashes_same + mov same_found $1 +end -- cgit v1.2.3