Semi-Interactive Assembly Verification in Knuckledragger
I’ve been working on an interactive proof experience in python for assembly code.
In previous posts I talked about the fully automatic tool, which still works. https://www.philipzucker.com/asm_verify3/
But for debugging failing specs or for slightly more involved properties, a semi-interactive mode is desirable. I’m using Knuckledragger https://github.com/philzook58/knuckledragger , my z3py based proof assistant as the base system. Step by painful step the pcode based verification system is becoming more usable. This whole post is developed as a Jupyter notebook, which gives a nice interactive exploratory repl for building and debugging specs and proofs.
I use the pypcode package which offers lifters from binary to Ghidra Pcode.
I have a symbolic executor that given a specification will discover the finite trace fragments that make up the CFG. (If you give a bad spec or there isn’t a finite CFG, the symbolic executor won’t terminate.). Every possibly infinite trace through the program is composed of these finite fragments. The executor emits verification conditions that are roughly of the form precondition /\ path_condition => postcondition / assert. A cute formulation here is that invariants are expressed to the system as “cuts” of the CFG. If you cut the CFG enough to make it a DAG, the length and number of trace fragments is finite. Invariants are a self consistent sufficient summaries of the reachable state at those cut positions.
The code is available here https://github.com/philzook58/knuckledragger/tree/main/src/kdrag/contrib/pcode
Demo
This is a chatgpt produced piece of riscv32 assembly to sum an array of integers.
%%file /tmp/sum_i32.s
.text
.globl sum_i32
.align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
mv t0, a0 # t0 = arr (cursor)
mv t1, a1 # t1 = n (remaining)
li a0, 0 # a0 = sum (return value)
beqz t1, .done # if n == 0 -> return 0
.loop:
lw t2, 0(t0) # t2 = *arr
add a0, a0, t2 # sum += *arr
addi t0, t0, 4 # arr++
addi t1, t1, -1 # n--
bnez t1, .loop # keep going while n != 0
.done:
ret
Overwriting /tmp/sum_i32.s
This is short main program to call the library function
%%file /tmp/test.c
#include <stdint.h>
#include <stdio.h>
extern int32_t sum_i32(const int32_t* arr, uint32_t n);
int main() {
int32_t arr[] = {1, 2, 3, 4, 5};
uint32_t n = 5;
int32_t result = sum_i32(arr, n);
printf("Sum: %d\n", result); // Expected output: Sum: 15
return 0;
}
Writing /tmp/test.c
We can compile and link these together. nix is actually a decent way to get a working riscv32 toolchain. Go figure.
!nix-shell -p pkgsCross.riscv32.buildPackages.gcc --run "riscv32-unknown-linux-gnu-gcc -O2 /tmp/sum_i32.s /tmp/test.c -o /tmp/sum32"
It works
! qemu-riscv32 /tmp/sum32
Sum: 15
A half functional spec
First I want to write a higher level function that gets the same data to use as a spec to compare against the assembly. I do this in z3py/knuckledragger. It is kind of a low level way of writing such a thing because I directly talk about a memory array. A higher level functional spec might involve a function connecting the memory array to a list or sequence and a function program over that.
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n],
smt.If(n == 0,
0,
sum_i32(
mem,
addr + 4,
n - 1
) + bv.select_concat(mem, addr, 4, le=True)
)
)
sum_i32
Some sanity check evaluation using knuckledragger’s simplifier. Summing a zeroed out memory returns 0
zeromem = smt.Lambda([addr], smt.BitVecVal(0, 8))
kd.full_simp(sum_i32(zeromem, 0, 7))
0
An array of all ones should sum to n. The goofiness of the array expression is because the array is byte addressed but we are summing 32 bit integers.
kd.full_simp(sum_i32(smt.Lambda([addr], smt.If(addr % 4 == 0, smt.BitVecVal(1,8), 0)), 0, 7))
7
Bounded Checking
It is always a good idea to do bounded model checking first. It is much easier. It is also useful to make the loop invariant an assertion to see if we can find a place they fail. I can assure you that you don’t always have your invariants correct at first and it’s easier to debug the problem here. In unbounded verification, there is the second possible failure mode that the invariants are not strong enough to be self consistent.
ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)
N = smt.BitVec("N", 32)
Nlim = 5
# In initial state a0 = address, a1 = N size
dbg.add_entry('sum_i32', smt.And(a0 == addr, a1 == N, N <= Nlim, N >= 0))
# In final state return value in a0 equals sum_i32(ram, addr, N)
dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))
dbg.start()
dbg.run()
while dbg.vcs:
print(dbg.vcs[-1])
l = dbg.pop_lemma()
for i in range(Nlim+1):
l.unfold(sum_i32)
l.unfold(*ctx.definitions, keep=True)
l.auto(timeout=10000)
l.qed()
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Continuing execution at: 0x400612 trace_id [0] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Continuing execution at: 0x400614 trace_id [0] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Continuing execution at: 0x400616 trace_id [0] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Continuing execution at: 0x40061a trace_id [0, 1] num insns 4
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1] num insns 5
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1] num insns 6
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1] num insns 7
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1] num insns 8
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1] num insns 9
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1] num insns 10
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1] num insns 11
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1] num insns 12
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1] num insns 13
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1] num insns 14
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1] num insns 15
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1] num insns 16
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1] num insns 17
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1] num insns 18
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1, 1] num insns 19
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1, 1] num insns 20
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1, 1] num insns 21
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1, 1] num insns 22
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1, 1] num insns 23
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1, 1, 1] num insns 24
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1, 1, 1] num insns 25
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1, 1, 1] num insns 26
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1, 1, 1] num insns 27
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1, 1, 1] num insns 28
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 1, 1] num insns 29
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 1, 0] num insns 24
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 0] num insns 19
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 0] num insns 14
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 0] num insns 9
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 4
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
Unbounded
A more thorough verification involves adding loop invariants.
An interesting thing I realized while doing this is that the form of my spec of sum_i32 is not ideal. Maybe I should have realized this earlier, but I only did once I had the verification conditions cleaned up enough. A very pleasant form is to have one function per block label. The closest form of functional programming to a block based form is to have one function per block, where the functions take in the live variables as arguments. https://www.cs.princeton.edu/~appel/papers/ssafun.pdf Writing your spec in this form will probably lead to you having the easiest most automated time.
While all of these proofs are automated completely by just injecting in the sum_i32 definition to z3, I needed the interactive capabilities to start doing manual rewrite proofs until I found out what was wrong. I had many bugs in my spec / missing invariants etc. I think this capability was essential.
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr,acc,res = smt.BitVecs('n addr acc res', 32)
sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
smt.If(n == 0,
acc,
sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
)
)
ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
spec = asmspec.AsmSpec(ctx)
N = smt.BitVec("N", 32) #2
spec.add_entry('sum_i32', spec.find_label("sum_i32"), smt.And(a1 == N, addr == a0, res == sum_i32(ram, 0, addr, N), N >= 0, N <= 2))
end_addr = 4*N + addr
spec.add_cut(".loop", spec.find_label(".loop"), smt.And(res == sum_i32(ram, a0, t0, t1), 4*N + addr == 4*t1 + t0, t1 > 0, t1 <= N))
spec.add_exit(".done", spec.find_label(".done"), a0 == res)
pf = asmspec.AsmProofState(spec)
Unknown reloc 3 on RISCV:LE:32:RV32GC
Unknown reloc 1 on RISCV:LE:32:RV32GC
Unknown reloc 5 on RISCV:LE:32:RV32GC
Unknown reloc 10 on RISCV:LE:32:RV32GC
Unknown reloc 58 on RISCV:LE:32:RV32GC
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N))
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N))
Here is interacting with a proof a bit.
We can see this results from the path coming from the entry point to the start of .loop
from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
addr == a0,
res == sum_i32(ram, 0, addr, N),
N >= 0,
N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N)))
- The
introstactic brings the path condition (which includes the preconditions at entry) into the context. splitbreaks up theAndin the context
l = pf.lemma(0)
l.intros()
l.split(at=0)
[select32le(register(state0), &a1) == N,
addr == select32le(register(state0), &a0),
res == sum_i32(ram(state0), 0, addr, N),
0 <= N,
N <= 2,
Not(select32le(register(state0), &a1) == 0)]
?|= And(res ==
sum_i32(ram(state0),
0,
select32le(register(state0), &a0),
select32le(register(state0), &a1)),
4*N + addr ==
4*select32le(register(state0), &a1) +
select32le(register(state0), &a0),
Not(select32le(register(state0), &a1) <= 0),
select32le(register(state0), &a1) <= N)
from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
addr == a0,
res == sum_i32(ram, 0, addr, N),
N >= 0,
N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N)))
I can for example start to manually rewrite the goal using facts in the context to start cleaning it up anbd maybe see what is good or bad. I could also start using lemmas to reason about select32le etc.
l.rw(0)
l.rw(1,rev=True)
l.rw(2, rev=True)
[select32le(register(state0), &a1) == N,
addr == select32le(register(state0), &a0),
res == sum_i32(ram(state0), 0, addr, N),
0 <= N,
N <= 2,
Not(select32le(register(state0), &a1) == 0)]
?|= And(res == res,
4*N + addr == 4*N + addr,
Not(N <= 0),
N <= N)
from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
addr == a0,
res == sum_i32(ram, 0, addr, N),
N >= 0,
N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N)))
A nice thing I added recently is the “egraphs modulo theories” tactic, which will significantly reduce the goal. It’s a congruence closure tactic of sorts that performs simplification by egraph extraction.
l.emt()
[select32le(register(state0), &a1) == N,
addr == select32le(register(state0), &a0),
res == sum_i32(ram(state0), 0, addr, N),
0 <= N,
N <= 2,
Not(select32le(register(state0), &a1) == 0)]
?|= N <= N
from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
addr == a0,
res == sum_i32(ram, 0, addr, N),
N >= 0,
N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N)))
But actually, all of this is unneccessary now that I have the spec set up right. z3 is perfectly capable of discharging this goal automatically. So let’s start over. The call to qed registers the proof object with the overarching AsmProofState
l = pf.lemma(0)
l.auto()
l.qed()
⊨Implies(And(select32le(register(state0), &a1) == N, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, N), 0 <= N, N <= 2, Not(select32le(register(state0), &a1) == 0)), And(res == sum_i32(ram(state0), 0, select32le(register(state0), &a0), select32le(register(state0), &a1)), 4N + addr == 4select32le(register(state0), &a1) + select32le(register(state0), &a0), Not(select32le(register(state0), &a1) <= 0), select32le(register(state0), &a1) <= N))
We need to discharge all of the verification conditions. Here is more of the same. It turns out we need unfolding of the definition of sum_i32, but once we make that avaialable, z3 can again discharge this
l = pf.lemma(1)
l.auto(by=sum_i32.defn)
l.qed()
⊨Implies(And(select32le(register(state0), &a1) == N, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, N), 0 <= N, N <= 2, select32le(register(state0), &a1) == 0), res == 0)
l = pf.lemma(2)
l.auto(by=sum_i32.defn)
l.qed()
⊨Implies(And(res == sum_i32(ram(state0), select32le(register(state0), &a0), select32le(register(state0), &t0), select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(select32le(register(state0), &t1) <= 0), select32le(register(state0), &t1) <= N, select32le(register(state0), &t1) == 1), select32le(register(state0), &a0) + select32le(ram(state0), select32le(register(state0), &t0)) == res)
l = pf.lemma(3)
l.auto(by=sum_i32.defn)
l.qed()
⊨Implies(And(res == sum_i32(ram(state0), select32le(register(state0), &a0), select32le(register(state0), &t0), select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(select32le(register(state0), &t1) <= 0), select32le(register(state0), &t1) <= N, Not(select32le(register(state0), &t1) == 1)), And(res == sum_i32(ram(state0), select32le(register(state0), &a0) + select32le(ram(state0), select32le(register(state0), &t0)), 4 + select32le(register(state0), &t0), 4294967295 + select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(4294967295 + select32le(register(state0), &t1) <= 0), 4294967295 + select32le(register(state0), &t1) <= N))
Finally calling qed on the top level object checks that we have gotten proofs on every trace fragment through the assembly program.
pf.qed()
AsmProof(spec=AsmSpec(ctx=<kdrag.contrib.pcode.BinaryContext object at 0x754af03fe210>, addrmap=defaultdict(<class 'list'>, {4195856: [Entry(sum_i32, 0x400610, And(a1 == N,
addr == a0,
res == sum_i32(ram, 0, addr, N),
N >= 0,
N <= 2))], 4195866: [Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
4*N + addr == 4*t1 + t0,
t1 > 0,
t1 <= N))], 4195880: [Exit(.done, 0x400628, a0 == res)]})))
New Features
Folded Definitions
Ghidra’s uniform model is byte addressable memory for registers, ram, and temporaries. I model these using smtlib (Array (_ BitVec 64) (_ BitVec 8)). However, most operations work over 32bit or 64bit slices rather than 8bit slices. This requires every read or write to be expanded into byte-wise reads or writes. Basically the result is not pleasant for a human reader.
Knuckledragger offers a definition mechanism which uses z3’s built in substitute_funs to unfold them. By making auxiliarly definitions that select and concatenate, the result is much more readable, and faster to manipulate to boot, since the expression is smaller. Before truly blasting it into z3 at the final step, these definitions need to be unfolded. It sounds easy enough, but finding reasonable places for the folds and unfolds took a good couple days.
Register Names
Registers in pcode are held in a memory space called “register” which is not all that different from how “ram” is represented. This makes things uniform, but also means that the nicer things you might expect of register manipulations aren’t there by default if you treat “register” the same as “ram”. One such case is that each register is associated to a fairly meaningless offset in the “register” memory space. By making defined constants that are human readable that correspond to these offsets improves readability.
Cached Memory
Many symbolic executors don’t fully model memory as a symbolic array. A common pattern is to model memory as concrete addresses with symbolic values at those addresses. Reads and writes are concretized by an smt call if necessary to keep the array representation in this form.
It was important to me for the soundness / modeling level I aspire to to do a more complete model of memory that this. This is why I used the smt theory of arrays which allows symbolic addresses. However, this pollutes the resulting expressions and makes them less readable, as some very obvious data flow (obvious moves between temporary, registers, and fixed addresses in ram) bulks out the expressions with select and store.
I made a “cached memory” object in python that performs the stores more lazily. If the addresses are concrete, the stores are held in a python dict and only flushed into the z3 array when I detect or can’t be sure about aliasing. When and where the cache needs to be flushed is not too bad, but it is a bit finicky so I’m not thrilled on that account. I think down the line is is very possible to do these cache optimizations in a proof producing way, but I’m picking my battles on that for now.
I believe it is a promise of the ghidra model that unique and registers are never addressed in a non concrete / indirect way. This cleans up operations over these significantly.
class CachedArray(NamedTuple):
"""
CachedArray keeps some writes unapplied to the array.
The invariant is that anything in the cache owns that data.
You may replace something in the cache
>>> r1 = _TestVarnode("register", 0, 2)
>>> e1 = _TestVarnode("register", 0, 1)
>>> mem = CachedArray(smt.Array("mem", BV[64], BV[8]), {}, {}, 64, register=True)
>>> mem1 = mem.store(r1, smt.BitVecVal(2, 16))
>>> mem2 = mem1.store(e1, smt.BitVecVal(1,8))
>>> mem2.read(e1)
1
>>> mem2.read(r1)
select16le(Store(store16le(mem, &Roff0_size2, 2),
&Roff0_size1,
1),
&Roff0_size2)
>>> mem2.store(r1, smt.BitVecVal(3,16)).read(r1)
3
"""
data: smt.ArrayRef
cache: dict[pypcode.Varnode, smt.BitVecRef]
owner: dict[int, pypcode.Varnode]
bits: int
register: int = False
le: bool = True
def store(self, vnode: pypcode.Varnode, value: smt.BitVecRef) -> "CachedArray":
if vnode in self.cache:
return self._replace(cache={**self.cache, vnode: value})
else:
data = self.data
offset0 = vnode.offset
cache = self.cache.copy()
cache[vnode] = value
owner = self.owner.copy()
# flush out all overlapping vnodes to data
for i in range(vnode.size):
vnode1 = owner.get(offset0 + i)
if vnode1 is not None and vnode1 in cache:
assert vnode1 != vnode
for j in range(vnode1.size):
del owner[vnode1.offset + j]
if self.register:
offset = smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
else:
offset = vnode1.offset
data = bv.store_concat(self.data, offset, cache[vnode1], le=self.le)
del cache[vnode1]
for i in range(vnode.size):
owner[offset0 + i] = vnode
assert all(own in cache for own in owner.values())
return self._replace(data=data, cache=cache, owner=owner)
def read(self, vnode: pypcode.Varnode) -> smt.BitVecRef:
if vnode in self.cache:
return self.cache[vnode]
else:
# flush out any cached variables that may overlap to data
offset0 = vnode.offset
data = self.data
owner = self.owner
cache = self.cache
flushed = set()
for i in range(vnode.size):
vnode1 = owner.get(offset0 + i)
if vnode1 is not None and vnode1 not in flushed:
if self.register:
offset = smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
else:
offset = vnode1.offset
data = bv.store_concat(data, offset, cache[vnode1], le=self.le)
flushed.add(vnode1)
if self.register:
offset = smt.BitVec("&" + vnode.getRegisterName(), self.bits)
else:
offset = vnode.offset
return bv.select_concat(data, offset, vnode.size, le=self.le)
def to_expr(self):
data = self.data
for vnode, value in self.cache.items():
if self.register:
offset = smt.BitVec("&" + vnode.getRegisterName(), self.bits)
else:
offset = vnode.offset
data = bv.store_concat(data, offset, value, le=self.le)
return data
A custom proof environment
VCProofState is a subclass of kd.ProofState so that I can have all the regular tactics, but also interpret countermodels back into terms that make sense and known which trace we are currently working on.
A small symbolic debugger
Debug is a symbolic executor you can run like a debugger, setting breakpoints.
By asking for models, you can know what states are currently possible.
class Debug:
def __init__(
self,
ctx: pcode.BinaryContext,
spec: AsmSpec = None,
verbose=True,
):
self.ctx = ctx
self.spec: AsmSpec = spec if spec is not None else AsmSpec(ctx)
self.tracestates: list[TraceState] = []
self.vcs: list[VerificationCondition] = []
self.breakpoints = set()
self.verbose = verbose
self._cur_model = None
self.pfs = []
def spec_file(self, filename: str):
self.spec = AsmSpec.of_file(filename, self.ctx)
def label(self, label: str | int) -> int:
assert self.ctx.loader is not None, (
"BinaryContext must be loaded before disassembling"
)
if isinstance(label, str):
sym = self.ctx.loader.find_symbol(label)
if sym is None:
raise Exception(
f"Symbol {label} not found in binary {self.ctx.filename}"
)
return sym.rebased_addr
elif isinstance(label, int):
return label
else:
raise Exception(f"Unexpected type {type(label)} for label")
def add_entry(self, name, precond=smt.BoolVal(True)):
self.spec.add_entry(name, self.label(name), precond)
def add_exit(self, name, postcond=smt.BoolVal(True)):
self.spec.add_exit(name, self.label(name), postcond)
def add_cut(self, name, invariant):
self.spec.add_cut(name, self.label(name), invariant)
def add_assert(self, name, assertion):
self.spec.add_assert(name, self.label(name), assertion)
def add_assign(self, name, var_name, expr):
self.spec.add_assign(name, self.label(name), var_name, expr)
def start(self, mem=None):
if mem is None:
mem = self.ctx.init_mem()
tracestates, vcs = init_trace_states(self.ctx, mem, self.spec)
self.tracestates = tracestates
self.vcs = vcs
def breakpoint(self, addr):
self.breakpoints.add(addr)
def step(self, n=1):
self._cur_model = None
for _ in range(n):
tracestate = self.pop()
if self.verbose:
print(
"Continuing execution at:",
hex(tracestate.state.pc[0]),
"trace_id",
tracestate.trace_id,
"num insns",
len(tracestate.trace),
)
new_tracestates, new_vcs = execute_spec_and_insn(
tracestate, self.spec, self.ctx
)
self.vcs.extend(new_vcs)
self.tracestates.extend(new_tracestates)
def run(self):
while self.tracestates:
if self.addr() in self.breakpoints:
break
self.step()
def run_vc(self):
n = len(self.vcs)
while len(self.vcs) == n:
self.step()
def pop_run(self):
"""
Run until the current trace state is completely done.
"""
n = len(self.tracestates)
while len(self.tracestates) >= n:
self.step()
def pop_verify(self, **kwargs):
vc = self.vcs.pop()
self.pfs.append((vc, vc.verify(self.ctx, **kwargs)))
def verify(self, **kwargs):
if not self.vcs:
raise Exception("No verification conditions to verify")
if self.tracestates:
raise Exception("There are still trace states to execute")
while self.vcs:
self.pop_verify(**kwargs)
def pop_lemma(self) -> kd.tactics.ProofState:
vc = self.vcs.pop()
return VCProofState(vc, self.ctx) # kd.Lemma(vc.vc(self.ctx))
def pop(self):
return self.tracestates.pop()
def addr(self):
return self.tracestates[-1].state.pc[0]
def ghost(self, name):
return self.tracestates[-1].ghost_env[name]
def reg(self, name):
reg = self.ctx.state_vars[name]
return self.ctx.simplify(
self.ctx.substitute(self.tracestates[-1].state.memstate, reg)
)
def ram(self, addr, size=None):
"""
Get the expression held at at ram location addr
"""
if size is None:
size = self.ctx.bits // 8
return self.ctx.simplify(
self.tracestates[-1].state.memstate.getvalue_ram(addr, size)
)
def insn(self):
return self.ctx.disassemble(self.addr())
def model(self):
s = smt.Solver()
s.add(self.tracestates[-1].state.path_cond)
assert s.check() == smt.sat
self._cur_model = s.model()
def eval(self, expr: smt.ExprRef) -> smt.ExprRef:
if self._cur_model is None:
self.model()
assert self._cur_model is not None
tracestate = self.tracestates[-1]
return smt.simplify(
self._cur_model.eval(
self.ctx.unfold(
substitute_state(
expr, self.ctx, tracestate.state.memstate, tracestate.ghost_env
)
)
)
)
Bits and Bobbles
- https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/simple.ml s2n-bignum has a HOL-light based system for assembly verification. Sort of what I’m trying
- F* has lots of good stuff. What could I pull from there?
- Using C code via pycparser to make it easier to write specs
- Better integration with a more normal emulator for understanding counterexamples. Icicle? GDB script output?
- It is possible and interesting to connect the two forms of
sum_i32to each other in regular knuckledragger. I should do that - Actual proof certificates from the symbolic executor.
mysum = smt.Function("mysum", smt.ArraySort(smt.BitVecSort(64), smt.BitVecSort(8)), smt.BitVecSort(64), smt.BitVecSort(32), smt.BitVecSort(32))
size = smt.Const("size", smt.BitVecSort(32))
mysum = kd.define("mysum", [mem, addr, size],
smt.If(size == 0,
0,
bv.select_concat(mem, addr, 4) + mysum(mem, addr + 4, size-1)
))
retro arch
Pcode out of the box supports 6502, z80, 8085. There is also a gameboy and wasm processor spec available on github. A cool aspect of basing myself on ghidra is getting these for close to free.
6502 avr z80 8085 https://www.reddit.com/r/beneater/comments/141me6d/best_practices_for_assembly_programming/ https://www.nesdev.org/wiki/RTS_Trick http://www.6502.org/tutorials/6502opcodes.html#BIT bit skip trick zero page
bv induct
Bitvector induction is a theorem. Kind of an interesting idea. Bitvectors are too big to brute force. Yea, sat solvers. But still. well orderings on bitvectors. Strong induction?
Use bvinduct(n) to prove bvinduct(n+1) bvinduct(n/2) to prove bvinduct(n) ? forall a == forall a1 a2, a == concat(a1,a2)
P = smt.Array("P", smt.BitVecSort(4), smt.BoolSort())
a = smt.BitVec("a", 4)
kd.prove(smt.ForAll([P], P[0], smt.ForAll([a], smt.Implies(smt.And(a != -1, P[a]), P[a+1])), smt.ForAll([a], P(a))))
def bv_induct(P, x):
y = smt.FreshConst("y", x.sort())
return kd.axiom(
smt.Implies(smt.And(P[0], smt.ForAll([y], smt.Implies(smt.And(y != -1, P[y]), P[y+1]))),
P(x)
))
from kdrag.all import *
import kdrag.theories.bitvec as bv
mem = smt.Array("mem", smt.BitVecSort(64), smt.BitVecSort(8))
addr = smt.BitVec("addr", 64)
n = smt.Int("n")
read_seq = smt.Function("read_seq", smt.ArraySort(smt.BitVecSort(64), smt.BitVecSort(8)), smt.BitVecSort(64), smt.IntSort(), smt.SeqSort(smt.BitVecSort(32)))
read_seq = kd.define("read_seq", [mem, addr, n], smt.If(
n <= 0,
smt.Empty(smt.SeqSort(smt.BitVecSort(32))),
smt.Concat(
smt.Unit(bv.select_concat(mem, addr, 4)), # load?
read_seq(mem, addr + 4, n - 1),
)))
kd.rewrite.full_simp(read_seq(smt.Lambda([addr], smt.Extract(7,0,addr)), smt.BitVecVal(0,64), 3))
seq_sum = smt.Function("seq_sum", smt.SeqSort(smt.BitVecSort(32)), smt.IntSort())
s = smt.Const("s", smt.SeqSort(smt.BitVecSort(32)))
seq_sum = kd.define("seq_sum", [s], smt.If(
smt.Length(s) == 0,
0,
kd.Head(s) + seq_sum(kd.Tail(s))))
kd.full_simp(kd.seq(1,2,3))
---------------------------------------------------------------------------
Z3Exception Traceback (most recent call last)
/tmp/ipykernel_4153919/1460975090.py in ?()
19 s = smt.Const("s", smt.SeqSort(smt.BitVecSort(32)))
20 seq_sum = kd.define("seq_sum", [s], smt.If(
21 smt.Length(s) == 0,
22 0,
---> 23 kd.Head(s) + seq_sum(kd.Tail(s))))
24
25
26 kd.full_simp(kd.seq(1,2,3))
~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(self, other)
3624 x + y
3625 >>> (x + y).sort()
3626 BitVec(32)
3627 """
-> 3628 a, b = _coerce_exprs(self, other)
3629 return BitVecRef(Z3_mk_bvadd(self.ctx_ref(), a.as_ast(), b.as_ast()), self.ctx)
~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(a, b, ctx)
1275 return (a, b)
1276
1277 s = None
1278 s = _coerce_expr_merge(s, a)
-> 1279 s = _coerce_expr_merge(s, b)
1280 a = s.cast(a)
1281 b = s.cast(b)
1282 return (a, b)
~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(s, a)
1240 return s
1241 else:
1242 if z3_debug():
1243 _z3_assert(s1.ctx == s.ctx, "context mismatch")
-> 1244 _z3_assert(False, "sort mismatch")
1245 else:
1246 return s
~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(cond, msg)
113 def _z3_assert(cond, msg):
114 if not cond:
--> 115 raise Z3Exception(msg)
Z3Exception: sort mismatch
hex(50462976)
'0x3020100'
instruction count. executable checker of metadtaa
binary analysis passes? angr interface mocking? Ranges as sets. The intersection should be 0 smt. trace induction
def MayChange(mem1 : smt.ArrayRef,mem2 : smt.ArrayRef, addrs : list[int | smt.BitVecRef]) -> smt.BoolRef:
"""
forall addr not in addr, mem1[addr] == mem2[addr]
Can instead be done by masking out the addresses into a quantifier free statement.
"""
for a in addrs:
mem1 = bv.store_concat(mem1, addr, 0)
mem2 = bv.store_concat(mem2, addr, 0)
return mem1 == mem2
import bisect
# keep vnodes in sorted list
# search backwards and forwards for overlaps. Ugh.
# some kind of tree?
{offset : [vnode1,vnode2]}
# evict on store
class CachedArray(NamedTuple):
array : smt.ArrayRef
cache : dict
register : bool
bits : int
def store(self, vnode, value):
if symbolic:
cache = {}
else:
offset0 = vnode.offset
cache = self.cache.copy()
cache[vnode] = value
for i in range(vnode.size):
offset = offset0 + i
vnode1 = offsets.get(offset)
if vnode1:
del self.cache[vnode1]
offsets[offset] = vnode
def indirect_store(self, offset, vnode):
self._replace(array=bv.store_concat(), cache={})
def read(vnode):
if vnode in cache:
return cache[vnode]
else:
return bv.select_concat(self.mem, vnode.offset, vnode.size)
"""
if vnode in self.writes:
return self._replace(data=self.data, writes={**self.writes, vnode: value})
else:
data = self.data
writes = {vnode: value}
for vnode1, v in self.writes.items():
if overlaps(vnode, vnode1):
if self.register:
data = bv.store_concat(
data,
smt.BitVec("&" + vnode.getRegisterName(), self.bits),
vnode.size,
)
else:
data = bv.store_concat(data, vnode1.offset, v)
else:
writes[vnode1] = v
return self._replace(data=data, writes=writes)
"""
"""
if vnode in self.writes:
return self.writes[vnode]
else:
data = self.data
for vnode1, v in self.writes.items():
if overlaps(vnode, vnode1):
if self.register:
data = bv.store_concat(
data,
smt.BitVec("&" + vnode.getRegisterName(), self.bits),
vnode.size,
)
else:
data = bv.store_concat(data, vnode1.offset, v)
"""
"""
data = self.data
for vnode1, v in self.writes.items():
data = bv.store_concat(
data,
smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
if self.register
else vnode1.offset,
v,
)
"""
def overlaps(vnode, vnode1):
return (
max(vnode.offset, vnode1.offset)
<= min(vnode.offset + vnode.size, vnode1.offset + vnode1.size) - 1
)
# return bv.select_concat(
# self.register,
# smt.BitVec("&" + vnode.getRegisterName(), self.bits),
# vnode.size,
# )
# register=bv.store_concat(
# self.register,
# smt.BitVec("&" + vnode.getRegisterName(), self.bits),
# value,
#
# return self._replace(unique=bv.store_concat(self.unique, offset, value))
s2n bignum
https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/simple.ml
Maychange. Could do this.
%%file /tmp/simple.S
_start:
add %rax,%rbx
sub %rax,%rbx
done:
Writing /tmp/simple.S
! as /tmp/simple.S -o /tmp/simple.o && objdump -d /tmp/simple.o
/tmp/simple.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_start>:
0: 48 01 c3 add %rax,%rbx
3: 48 29 c3 sub %rax,%rbx
import archinfo
ainfo = archinfo.ArchPcode("x86:LE:64:default")
ainfo.ip_offset
ainfo.registers
Unexpected SP conflict
{'rax': (0, 8),
'eax': (0, 4),
'ax': (0, 2),
'al': (0, 1),
'ah': (1, 1),
'rcx': (8, 8),
'ecx': (8, 4),
'cx': (8, 2),
'cl': (8, 1),
'ch': (9, 1), ...
'xmm19_bf': (5829, 1),
'xmm19_wd': (5830, 2),
'xmm19_bg': (5830, 1),
...}
ainfo = archinfo.ArchPcode("RISCV:LE:32:default")
dir(ainfo)
ainfo.registers["pc"]
(4096, 4)
from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
ctx = pcode.BinaryContext("/tmp/simple.o")
dbg = asmspec.Debug(ctx)
pc,a,b = smt.BitVecs("pc a b", 64)
rax = ctx.state_vars[r"%rax"]
rbx = ctx.state_vars[r"%rbx"]
rip = ctx.state_vars[r"%rip"]
dbg.add_entry("_start",
#smt.ForAll([a, b],
smt.And(
rax == a,
rbx == b,
rip == pc
)
#)
)
# Yeaaa, I'm not sure it's worth adding in set rip
dbg.add_exit("done",
smt.And(
rbx == b,
rip == pc+6
))
dbg.start()
dbg.run()
dbg.verify()
Unexpected SP conflict
Executing 0x400000/3: ADD RBX,RAX at (4194304, 0) PCODE IMARK ram[400000:3]
Executing 0x400000/3: ADD RBX,RAX at (4194304, 1) PCODE CF = carry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 2) PCODE OF = scarry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 3) PCODE RBX = RBX + RAX
Executing 0x400000/3: ADD RBX,RAX at (4194304, 4) PCODE SF = RBX s< 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 5) PCODE ZF = RBX == 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 6) PCODE unique[28080:8] = RBX & 0xff
Executing 0x400000/3: ADD RBX,RAX at (4194304, 7) PCODE unique[28100:1] = popcount(unique[28080:8])
Executing 0x400000/3: ADD RBX,RAX at (4194304, 8) PCODE unique[28180:1] = unique[28100:1] & 0x1
Executing 0x400000/3: ADD RBX,RAX at (4194304, 9) PCODE PF = unique[28180:1] == 0x0
Continuing execution at: 0x400003 trace_id [0] num insns 1
Executing 0x400003/3: SUB RBX,RAX at (4194307, 0) PCODE IMARK ram[400003:3]
Executing 0x400003/3: SUB RBX,RAX at (4194307, 1) PCODE CF = RBX < RAX
Executing 0x400003/3: SUB RBX,RAX at (4194307, 2) PCODE OF = sborrow(RBX, RAX)
Executing 0x400003/3: SUB RBX,RAX at (4194307, 3) PCODE RBX = RBX - RAX
Executing 0x400003/3: SUB RBX,RAX at (4194307, 4) PCODE SF = RBX s< 0x0
Executing 0x400003/3: SUB RBX,RAX at (4194307, 5) PCODE ZF = RBX == 0x0
Executing 0x400003/3: SUB RBX,RAX at (4194307, 6) PCODE unique[28080:8] = RBX & 0xff
Executing 0x400003/3: SUB RBX,RAX at (4194307, 7) PCODE unique[28100:1] = popcount(unique[28080:8])
Executing 0x400003/3: SUB RBX,RAX at (4194307, 8) PCODE unique[28180:1] = unique[28100:1] & 0x1
Executing 0x400003/3: SUB RBX,RAX at (4194307, 9) PCODE PF = unique[28180:1] == 0x0
Continuing execution at: 0x400006 trace_id [0] num insns 2
Executing SpecStmt: Exit(done, 0x400006, And(%rbx == b, %rip == pc + 6))
def Maychange(state0, state1, state_vars):
return
The initialization makes ugly That z3 printer doesn’t use lets hurts. select_r0(st) select(r1, regfile) select_r1(st) select… store_r1
fold pop_count
custom printer one select per line we don’t have to unfold register values then
domain sepcific simplifier. select32. Find simple formn, then discharge via z3
Make a topologically ordered list of every store
def pretty(self):
cur_ram = smt.Const("CUR_RAM", self.ram.sort())
cur_register = smt.Const("CUR_REGFILE", self.register.sort())
seen = {}
res = []
def
def worker(e):
if e in seen:
return seen[e]
if e.decl().startswith("store"):
x = smt.FreshConst(e.sort())
res.append()
return x
todo = [smt.And(cur_ram == self.ram, cur_register == self.register)]
Wait. Reg is never indirectly accessed. So I could keep it in unpacked form…
from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
ctx = pcode.BinaryContext("/tmp/simple.o")
dbg = asmspec.Debug(ctx)
pc,a,b = smt.BitVecs("pc a b", 64)
rax = ctx.state_vars[r"%rax"]
rbx = ctx.state_vars[r"%rbx"]
rip = ctx.state_vars[r"%rip"]
dbg.add_entry("_start",
#smt.ForAll([a, b],
smt.And(
rax == a,
rbx == b,
rip == pc
)
#)
)
# Yeaaa, I'm not sure it's worth adding in set rip
dbg.add_exit("done",
smt.And(
rbx == b,
rip == pc+6
))
dbg.start()
#print(smt.simplify(dbg.tracestates[0].state.memstate.unique).sexpr())
dbg.tracestates[0].state.memstate
#dbg.tracestates
Unexpected SP conflict
Executing 0x400000/3: ADD RBX,RAX at (4194304, 0) PCODE IMARK ram[400000:3]
Executing 0x400000/3: ADD RBX,RAX at (4194304, 1) PCODE CF = carry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 2) PCODE OF = scarry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 3) PCODE RBX = RBX + RAX
Executing 0x400000/3: ADD RBX,RAX at (4194304, 4) PCODE SF = RBX s< 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 5) PCODE ZF = RBX == 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 6) PCODE unique[2c280:8] = RBX & 0xff
Executing 0x400000/3: ADD RBX,RAX at (4194304, 7) PCODE unique[2c300:1] = popcount(unique[2c280:8])
Executing 0x400000/3: ADD RBX,RAX at (4194304, 8) PCODE unique[2c380:1] = unique[2c300:1] & 0x1
Executing 0x400000/3: ADD RBX,RAX at (4194304, 9) PCODE PF = unique[2c380:1] == 0x0
MemState((let ((a!1 ((_ zero_extend 1)
(select64le (store64le (unique state0) &RIP #x0000000000400000)
&RBX)))
(a!2 ((_ zero_extend 1)
(select64le (store64le (unique state0) &RIP #x0000000000400000)
&RAX))))
(let ((a!3 (not (= ((_ extract 64 64) (bvadd a!1 a!2)) #b0))))
(let ((a!4 (store (store64le (unique state0) &RIP #x0000000000400000)
&CF
(ite a!3 #x01 #x00))))
(let ((a!5 (=> (and (bvslt #x0000000000000000 (select64le a!4 &RBX))
(bvslt #x0000000000000000 (select64le a!4 &RAX)))
(bvslt #x0000000000000000
(bvadd (select64le a!4 &RBX) (select64le a!4 &RAX))))))
(let ((a!6 (select64le (store a!4 &OF (ite (not a!5) #x01 #x00)) &RBX))
(a!7 (select64le (store a!4 &OF (ite (not a!5) #x01 #x00)) &RAX)))
(let ((a!8 (store64le (store a!4 &OF (ite (not a!5) #x01 #x00))
&RBX
(bvadd a!6 a!7))))
(let ((a!9 (store a!8
&SF
(ite (bvsgt #x0000000000000000 (select64le a!8 &RBX))
#x01
#x00))))
(let ((a!10 (store a!9
&ZF
(ite (= #x0000000000000000 (select64le a!9 &RBX)) #x01 #x00))))
(let ((a!11 (select64le (store64le (unique state0)
#x000000000002c280
(bvand (select64le a!10 &RBX)
#x00000000000000ff))
#x000000000002c280)))
(let ((a!12 (store (store64le (unique state0)
#x000000000002c280
(bvand (select64le a!10 &RBX) #x00000000000000ff))
#x000000000002c300
((_ extract 7 0) (popcount a!11)))))
(let ((a!13 (select (store a!12
#x000000000002c380
(bvand (select a!12 #x000000000002c300) #x01))
#x000000000002c380)))
(let ((a!14 (store64le (store a!10 &PF (ite (= #x00 a!13) #x01 #x00))
&RIP
#x0000000000400003)))
(and (= CUR_RAM (ram state0)) (= CUR_REGFILE a!14)))))))))))))))
sequence
https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/sequence.ml
%%file /tmp/sequence.s
_start:
add %rax,%rbx
add %rax,%rcx
mov $0x2,%rdx
imul %rdx,%rbx
done:
Writing /tmp/sequence.s
! as /tmp/sequence.s -o /tmp/sequence.o && objdump -d /tmp/sequence.o
/tmp/sequence.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_start>:
0: 48 01 c3 add %rax,%rbx
3: 48 01 c1 add %rax,%rcx
6: 48 c7 c2 02 00 00 00 mov $0x2,%rdx
d: 48 0f af da imul %rdx,%rbx
from kdrag.all import smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
pc,a,b,c,d = smt.BitVecs("pc a b c d", 64)
rax = dbg.ctx.state_vars[r"%rax"]
rbx = dbg.ctx.state_vars[r"%rbx"]
rcx = dbg.ctx.state_vars[r"%rcx"]
rip = dbg.ctx.state_vars[r"%rip"]
dbg.add_entry("_start",
smt.And(
rax == a,
rbx == b,
rcx == c,
rip == pc
)
)
dbg.add_cut(dbg.label("_start") + 6,
smt.And(
rbx == a + b,
rip == pc + 6
)
)
dbg.add_exit("done",
smt.And(
rbx == (a + b) * 2,
rip == pc+0x11
))
dbg.start()
dbg.run()
dbg.verify()
Unexpected SP conflict
---------------------------------------------------------------------------
CLEFileNotFoundError Traceback (most recent call last)
Cell In[1], line 5
2 import kdrag.contrib.pcode as pcode
3 import kdrag.contrib.pcode.asmspec as asmspec
----> 5 dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
6 pc,a,b,c,d = smt.BitVecs("pc a b c d", 64)
8 rax = dbg.ctx.state_vars[r"%rax"]
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:439, in BinaryContext.__init__(self, filename, langid)
430 self.definitions.extend(
431 [
432 bv.store_concats(bits, size, le=le)
(...) 436 ]
437 )
438 if filename is not None:
--> 439 self.load(filename)
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:446, in BinaryContext.load(self, main_binary, **kwargs)
442 """
443 Load a binary file and initialize the context.
444 """
445 self.filename = main_binary
--> 446 self.loader = cle.loader.Loader(main_binary, **kwargs)
447 # self.ctx = pypcode.Context(self.loader.arch.name) # TODO: derive from cle
448 # Note: These decls need to be synchronized with the code in BinaryContext.substitute
449 # The decls are used for parsing textual smtlib
450 # substitute is used to convert these names into memstate expressions
451 decls = {
452 name: smt.BitVec(name, vnode.size * 8)
453 for name, vnode in self.ctx.registers.items()
454 }
File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:185, in Loader.__init__(self, main_binary, auto_load_libs, concrete_target, force_load_libs, skip_libs, main_opts, lib_opts, ld_path, use_system_libs, ignore_import_version_numbers, case_insensitive, rebase_granularity, except_missing_libs, aslr, perform_relocations, load_debug_info, page_size, preload_libs, arch)
183 self._main_opts.update({"arch": arch})
184 self.preload_libs = []
--> 185 self.initial_load_objects = self._internal_load(
186 main_binary, *preload_libs, *force_load_libs, preloading=(main_binary, *preload_libs)
187 )
189 # cache
190 self._last_object = None
File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:795, in Loader._internal_load(self, preloading, *args)
793 log.info("Skipping load request %s - already loaded", main_spec)
794 continue
--> 795 obj = self._load_object_isolated(main_spec)
796 objects.append(obj)
797 objects.extend(obj.child_objects)
File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:967, in Loader._load_object_isolated(self, spec, obj_ident)
965 close = False
966 elif isinstance(spec, bytes | str):
--> 967 binary = self._search_load_path(spec) # this is allowed to cheat and do partial static loading
968 log.debug("... using full path %s", binary)
969 binary_stream = open(binary, "rb")
File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:1126, in Loader._search_load_path(self, spec)
1122 continue
1124 return path
-> 1126 raise CLEFileNotFoundError(f"Could not find file {spec}")
CLEFileNotFoundError: Could not find file /tmp/sequence.o
branch
%%file /tmp/branch.S
.intel_syntax noprefix
.text
_start:
cmp rbx, rcx
ja BB2
mov rax, rcx
done1:
ret
BB2:
mov rax, rbx
done2:
ret
Overwriting /tmp/branch.S
! as /tmp/branch.S -o /tmp/branch.o && objdump -d /tmp/branch.o
/tmp/branch.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_start>:
0: 48 39 cb cmp %rcx,%rbx
3: 77 04 ja 9 <BB2>
5: 48 89 c8 mov %rcx,%rax
0000000000000008 <done1>:
8: c3 ret
0000000000000009 <BB2>:
9: 48 89 d8 mov %rbx,%rax
000000000000000c <done2>:
c: c3 ret
from kdrag.all import smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
pc,a,b,c,d,stackpointer, returnaddress = smt.BitVecs("pc a b c d stackpointer returnaddress", 64)
rax = dbg.ctx.state_vars[r"%rax"]
ram32 = dbg.ctx.state_vars[r"%ram32"]
rbx = dbg.ctx.state_vars[r"%rbx"]
rcx = dbg.ctx.state_vars[r"%rcx"]
rip = dbg.ctx.state_vars[r"%rip"]
rsp = dbg.ctx.state_vars[r"%rsp"]
dbg.add_entry("_start",
smt.And(
rbx == b,
rcx == c,
rsp == stackpointer,
ram64[stackpointer] == returnaddress
)
)
post = smt.And(
rsp == stackpointer + 8,
rip == returnaddress,
rax == smt.If(b > c, b, c)
)
dbg.add_exit("done1",
post
)
dbg.add_exit("done2",
post
)
Hmm. Could I have implemented ghost assignment using assume? Uhhh, it’d be a little different. But maybe that’s fine
Hmm. Implement return. But how to say when it should stop? On Ret, throw a special error, or just stop, or have special signal
statevars should really be functions from state more radable state expressions with folding/unfolding proof tracking
maychange
dbg = RelDebug(ctx1,ctx2) dbg.step1() dbg.step2()
step1 step2
dbg1 = prog1 dbg2 = prog2
dbg1.step(3) dbg2.step(5)
def state_lam(expr): smt.Lambda([state], smt.substitute()) # replace all references
sum array
%%file /tmp/sum_i32.s
.text
.globl sum_i32
.align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
mv t0, a0 # t0 = arr (cursor)
mv t1, a1 # t1 = n (remaining)
li a0, 0 # a0 = sum (return value)
beqz t1, .done # if n == 0 -> return 0
.loop:
lw t2, 0(t0) # t2 = *arr
add a0, a0, t2 # sum += *arr
addi t0, t0, 4 # arr++
addi t1, t1, -1 # n--
bnez t1, .loop # keep going while n != 0
.done:
ret
Writing /tmp/sum_i32.s
Ok I need
# pip install keystone-engine angr
code = """
.text
.globl sum_i32
.align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
mv t0, a0 # t0 = arr (cursor)
mv t1, a1 # t1 = n (remaining)
li a0, 0 # a0 = sum (return value)
beqz t1, .done # if n == 0 -> return 0
.loop:
lw t2, 0(t0) # t2 = *arr
add a0, a0, t2 # sum += *arr
addi t0, t0, 4 # arr++
addi t1, t1, -1 # n--
bnez t1, .loop # keep going while n != 0
.done:
ret
"""
from keystone import Ks, KS_ARCH_RISCV, KS_MODE_RISCV32
ks = Ks(KS_ARCH_RISCV, KS_MODE_RISCV32)
encoding, count = ks.asm(code)
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[1], line 25
1 # pip install keystone-engine angr
2 code = """
3 .text
4 .globl sum_i32
(...) 22 ret
23 """
---> 25 from keystone import Ks, KS_ARCH_RISCV, KS_MODE_RISCV32
26 ks = Ks(KS_ARCH_RISCV, KS_MODE_RISCV32)
27 encoding, count = ks.asm(code)
ImportError: cannot import name 'KS_ARCH_RISCV' from 'keystone' (/home/philip/philzook58.github.io/.venv/lib/python3.12/site-packages/keystone/__init__.py)
import keystone
keystone.
%%file /tmp/test.c
#include <stdint.h>
#include <stdio.h>
extern int32_t sum_i32(const int32_t* arr, uint32_t n);
int main() {
int32_t arr[] = {1, 2, 3, 4, 5};
uint32_t n = 5;
int32_t result = sum_i32(arr, n);
printf("Sum: %d\n", result); // Expected output: Sum: 15
return 0;
}
Overwriting /tmp/test.c
!nix-shell -p pkgsCross.riscv32.buildPackages.gcc --run "riscv32-unknown-linux-gnu-gcc -O2 /tmp/sum_i32.s /tmp/test.c -o /tmp/sum32"
! qemu-riscv32 /tmp/sum32
Sum: 15
from kdrag.all import *
BV32 = smt.BitVecSort(32)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV32), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, BV32)
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n],
smt.If(n == 0,
0,
sum_i32(
mem,
addr + 4,
n - 1
) + mem[addr]
)
)
sum_i32
C dsl for auxiliary spec functions might be nice.
kd.simp(sum_i32(
MultStore(mem, 0, 1, 2, 3, 4, 5),
))
kd.full_simp(sum_i32(ram, addr, 0))
0
Bounded Version
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n],
smt.If(n == 0,
0,
sum_i32(
mem,
addr + 4,
n - 1
) + bv.select_concat(mem, addr, 4, le=True)
)
)
ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)
N = smt.BitVec("N", 32)
Nlim = 2
dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0, N <= Nlim, N >= 0))
#dbg.add_exit(".done", a0 == 0)
# alignment invariant?
dbg.add_cut(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4),
(t0 - addr) / 4 == (t0 - addr) / 4,
4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N, N <= Nlim, N >= 0))
dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))
dbg.start()
#dbg.run()
print(len(dbg.tracestates[-1].state.path_cond))
dbg.tracestates
dbg.run()
"""
dbg.step()
dbg.step()
dbg.eval(t1)
dbg.tracestates
dbg.step()
dbg.step()
"""
dbg.tracestates
#dbg.verify()
while dbg.vcs:
print(dbg.vcs[-1])
l = dbg.pop_lemma()
#for i in range(N+1)
for i in range(Nlim+1):
l.unfold(sum_i32)
l.unfold(*ctx.definitions, keep=True)
l.auto(timeout=10000)
l.qed()
#dbg.add_cut(".loop", )
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
1
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Continuing execution at: 0x40061a trace_id [1, 1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0))
Continuing execution at: 0x400628 trace_id [1, 0] num insns 4
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1] num insns 5
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0)))
VC(Entry(sum_i32, 0x400610, And(a1 == N, addr == a0, N <= 2, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a1 == N, addr == a0, N <= 2, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
(t0 - addr)/4 == (t0 - addr)/4,
4*N == 4*t1 + t0 - addr,
t1 > 0,
t1 <= N,
N <= 2,
N >= 0)))
todo: refold path cond and unfold only in symexec. Symexec might want to eject a Proof obligation of some kind for possible pc. or have debug get the ability to inject a lemma into path cond.
num_insn in ghost state.
.text
.globl sum_i32
.align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
mv t0, a0 # t0 = arr (cursor)
mv t1, a1 # t1 = n (remaining)
li a0, 0 # a0 = sum (return value)
beqz t1, .done # if n == 0 -> return 0
.loop:
lw t2, 0(t0) # t2 = *arr
add a0, a0, t2 # sum += *arr
addi t0, t0, 4 # arr++
addi t1, t1, -1 # n--
bnez t1, .loop # keep going while n != 0
.done:
ret
Unbounded
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
sum_i32 = kd.define("sum_i32", [mem, addr, n],
smt.If(n == 0,
0,
sum_i32(
mem,
addr + 4,
n - 1
) + bv.select_concat(mem, addr, 4, le=True)
)
)
#sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
# smt.If(n == 0,
# acc,
# sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
# )
#)
ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
#ram = ctx.state_vars["ram32"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)
N = 2 #smt.BitVec("N", 32)
dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0))
#dbg.add_exit(".done", a0 == 0)
#dbg.add_assert(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), 4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N))
dbg.add_cut(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), N == t1 + (t0 - addr)/4, t1 > 0, t1 < N))
dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))
dbg.start()
dbg.run()
"""
ctx.definitions.append(sum_i32)
#dbg.verify(unfold=sum_i32)
while dbg.vcs:
print(dbg.vcs[-1])
l = dbg.pop_lemma()
l.unfold(*ctx.definitions)
#l.intros()
for i in range(N+1):
l.unfold(sum_i32,keep=True)
l.auto()
l.qed()
"""
"""
l = dbg.pop_lemma()
l
print(l.goals[-1].goal.sexpr())
#l.qed()
l.unfold(*ctx.definitions)
l.intros()
l.unfold(sum_i32)
l.auto()
"""
#l.unfold(*ctx.definitions, at=1)
#l.auto(by=[f.defn for f in ctx.definitions])
#l.vc
l = dbg.pop_lemma()
#for i in range(N+1):
# l.unfold(sum_i32,keep=True)
#l.simp()
"""
l.intros()
l.unfold(sum_i32, keep=True)
l.unfold(*ctx.definitions)
l.auto()
"""
print(l.vc)
l.intros()
l.unfold(sum_i32)
l.simp()
l.split(at=-1)
#l.rw(0)
#l.auto()
#l.unfold(*ctx.definitions)
#l
#l.unfold(*ctx.definitions)
#l.auto()
Store and evict cache pc
Cache read miss for t1
Cache read miss for t0
Cache read miss for a0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Store and evict cache unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Cache read miss for t0
Cache read hit for unique[580:4]
Store and evict cache unique[3080:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Cache read hit for unique[3080:4]
Store and evict cache t2
Store cache pc
Store and evict cache pc
Cache read miss for a1
Cache read miss for a0
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Cache read miss for a0
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Cache read miss for a1
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Store and evict cache unique[c00:4]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Cache read hit for unique[c00:4]
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Cache read hit for t1
Store and evict cache unique[1c00:1]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Cache read hit for unique[1c00:1]
Store cache pc
Continuing execution at: 0x40061a trace_id [1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
t1 + (t0 - addr)/4 == 2,
t1 > 0,
t1 < 2))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Cache read miss for a0
Cache read hit for t2
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Store and evict cache unique[c00:4]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Cache read miss for t0
Cache read hit for unique[c00:4]
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Store cache unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Cache read miss for t1
Cache read hit for unique[c00:4]
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Cache read hit for t1
Store and evict cache unique[1e80:1]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Cache read hit for unique[1e80:1]
Store cache pc
Continuing execution at: 0x400628 trace_id [0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2))
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
t1 + (t0 - addr)/4 == 2,
t1 > 0,
t1 < 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2)))
[select32le(register(state0), &a0) ==
sum_i32(ram(state0),
addr,
bvsdiv_i(select32le(register(state0), &t0) +
4294967295*addr,
4)),
select32le(register(state0), &t1) ==
2 +
4294967295*
bvsdiv_i(select32le(register(state0), &t0) + 4294967295*addr,
4),
Not(select32le(register(state0), &t1) <= 0),
Not(2 <= select32le(register(state0), &t1)),
select32le(register(state0), &t1) == 1]
?|= select32le(register(state0), &a0) +
select32le(ram(state0), select32le(register(state0), &t0)) ==
sum_i32(ram(state0), 4 + addr, 1) +
select32le(ram(state0), addr)
Lessons: Write “spec” program in accumulator form. Really basic blocks. Then induction is easy. Conversion to functional form can happen at the knuckledragger layer.
If I cut at every basic block I’d have ssa block form
being able to inspect formula in somewhat readalbe form was crucial. Perhaps make more readable l should also print vc. vc ordering instability sucked. Should make more stable. Avoid negation (?) It makes nasty looking goals
Need to do arbitrary N version. Maybe I’m still missing something. I had the bounds wrong < N vs <= N I had t0 and t1 swapped as counter vs addr
Set dbg as parent begg tactic
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr,acc,res = smt.BitVecs('n addr acc res', 32)
sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
smt.If(n == 0,
acc,
sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
)
)
ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
#ram = ctx.state_vars["ram32"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
N = 2 #smt.BitVec("N", 32)
dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0, res == sum_i32(ram, 0, addr, N)))
#dbg.add_exit(".done", a0 == 0)
end_addr = 4*N + addr
#dbg.add_assert(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), 4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N))
dbg.add_cut(".loop", smt.And(res == sum_i32(ram, a0, t0, t1), 4*N + addr == 4*t1 + t0, t1 > 0, t1 <= N))
dbg.add_exit(".done", a0 == res)
dbg.start()
dbg.run()
l = dbg.pop_lemma()
print(l.vc)
# loop to done
l.intros()
l.split(at=0)
l.rw(0)
l.unfold(sum_i32)
l.unfold(sum_i32)
l.auto()
l.qed()
l = dbg.pop_lemma()
print(l.vc)
# loop -> loop
l.intros()
l.split(at=0)
l.auto(by=[sum_i32.defn]) # really i think I want to
l.qed()
l = dbg.pop_lemma()
# loop -> entry
print(l.vc)
l.intros()
l.split(at=0)
l.auto()
l.qed()
Store and evict cache pc
Cache read miss for t0
Cache read miss for t1
Cache read miss for a0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Store and evict cache unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Cache read miss for t0
Cache read hit for unique[580:4]
Store and evict cache unique[3080:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Cache read hit for unique[3080:4]
Store and evict cache t2
Store cache pc
Store and evict cache pc
Cache read miss for a0
Cache read miss for a1
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Cache read miss for a0
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Cache read miss for a1
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Store and evict cache unique[c00:4]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Cache read hit for unique[c00:4]
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Cache read hit for t1
Store and evict cache unique[1c00:1]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Cache read hit for unique[1c00:1]
Store cache pc
Continuing execution at: 0x40061a trace_id [1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Cache read miss for a0
Cache read hit for t2
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Store and evict cache unique[c00:4]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Cache read miss for t0
Cache read hit for unique[c00:4]
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Store cache unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Cache read miss for t1
Cache read hit for unique[c00:4]
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Cache read hit for t1
Store and evict cache unique[1e80:1]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Cache read hit for unique[1e80:1]
Store cache pc
Store cache pc
Continuing execution at: 0x40061a trace_id [0, 1] num insns 5
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == res))
Cache read hit for t0
Cache read hit for t1
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2)))
Cache read hit for t0
Cache read hit for t1
Cache read hit for a0
VC(Entry(sum_i32, 0x400610, And(a1 == 2, addr == a0, res == sum_i32(ram, 0, addr, 2))),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
8 + addr == 4*t1 + t0,
t1 > 0,
t1 <= 2)))
⊨Implies(And(select32le(register(state0), &a1) == 2, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, 2), Not(select32le(register(state0), &a1) == 0)), And(res == sum_i32(ram(state0), 0, select32le(register(state0), &a0), select32le(register(state0), &a1)), addr == 4294967288 + 4*select32le(register(state0), &a1) + select32le(register(state0), &a0), Not(select32le(register(state0), &a1) <= 0), select32le(register(state0), &a1) <= 2))
#l.rw(0)
#l.unfold(sum_i32)
#l.unfold(sum_i32)
#l.rw(-1)
#l.simp()
#l.simp()
#l.auto()
x.definitions)
l.intros()
l.unfold(sum_i32)
l.auto()
"""
#l.unfold(*ctx.definitions, at=1)
#l.auto(by=[f.defn for f in ctx.definitions])
#l.vc
l = dbg.pop_lemma()
#for i in range(N+1):
# l.unfold(sum_i32,keep=True)
#l.simp()
"""
l.intros()
l.unfold(sum_i32, keep=True)
l.unfold(*ctx.definitions)
l.auto()
"""
print(l.vc)
l.intros()
l.unfold(sum_i32)
l.simp()
l.split(at=-1)
#l.rw(0)
#l.auto()
#l.unfold(*ctx.definitions)
#l
#l.unfold(*ctx.definitions)
#l.auto()
#l = dbg.pop_lemma()
#print(l.vc)
#l.intros()
#l.split(at=0)
#l.rw(0)
#l.rw(1,rev=True)
#l.have((t0 == 0) == False)
vc = dbg.vcs[-1]
print(vc)
s = smt.Solver()
v = vc.vc(ctx)
s.add(v.arg(0))
s.add(smt.Not(v.arg(1)))
#print(smt.Not(v.arg(1)).sexpr())
print(vc.vc(ctx).sexpr())
VC(Entry(sum_i32, 0x400610, And(a1 == 2, addr == a0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2)))
Cache read hit for a0
Cache read hit for a0
(let ((a!1 (not (= (select32le (register state0) &a1) #x00000000)))
(a!2 (not (= (select32le (register state0) &a1) #x00000001)))
(a!4 (select32le (ram state0)
(bvadd #x00000004 (select32le (register state0) &a0)))))
(let ((a!3 (and (= (select32le (register state0) &a1) #x00000002)
(= addr (select32le (register state0) &a0))
a!1
a!2))
(a!5 (bvadd (select32le (ram state0) (select32le (register state0) &a0))
a!4)))
(=> a!3 (= a!5 (sum_i32 (ram state0) addr #x00000002)))))
pycparser
cmake python ziglang
make a toy symbolic executor?
https://github.com/lark-parser/lark/blob/master/examples/grammars/verilog.lark
Extract C spec stuff from smtlib, dischage using cbmc?
struct offsets + code that uses it.
vs imperative DSL in lark
import kdrag.printers.c as cprinter
def pre_post_cbmc(self, pre, post, filename):
# make harness
cprinter.print("kdrag_pre", [], )
cprinter.print("kdrag_post", [], )
subprocess.call(["cbmc", filename])
import pycparser
from pycparser import c_parser, c_ast, parse_file
import tempfile
prog = """
//#include <stdint.h>
//typedef unsigned long uint64_t;
int myadd(int a, int b, int* mem) {
int d;
int c = a + b;
d = a - b;
return c + a / d + mem[0];
}
"""
#ast = c_parser.CParser().parse(prog)
#def binop(ast : c_ast.BinOp):
# assert isinstance(ast, c_ast.BinOp)
import re
from dataclasses import dataclass, field
@dataclass
class Env():
bits : int = 32
le : bool = True
tenv : dict[str, smt.SortRef] = field(default_factory=dict)
env : dict[str, smt.ExprRef] = field(default_factory=dict)
def copy(self):
return Env(
bits=self.bits,
le=self.le,
tenv=self.tenv.copy(),
env=self.env.copy()
)
def of_type(ast, tenv):
match ast:
case c_ast.IdentifierType(names=[t]):
if t == "int":
return smt.BitVecSort(32)
match = re.compile(r"u?int(\d+)_t").fullmatch(t)
if match:
bits = int(match.group(1))
return smt.BitVecSort(bits)
else:
raise NotImplementedError(f"Unknown type {ast}")
case c_ast.ArrayDecl(type=ty) | c_ast.PtrDecl(type=ty):
return smt.ArraySort(smt.BitVecSort(32), of_type(ty, tenv))
case c_ast.TypeDecl(type=ty):
return of_type(ty, tenv)
case _:
raise NotImplementedError(f"Unknown type {ast}")
def interp(prog):
"""
https://github.com/eliben/pycparser/blob/main/pycparser/c_ast.py
"""
with tempfile.NamedTemporaryFile(suffix=".c") as f:
f.write(prog.encode())
f.flush()
ast = parse_file(f.name, use_cpp=True)
#ast = c_parser.CParser().parse(prog)
match ast:
case c_ast.FileAST(ext=[c_ast.FuncDef(decl=decl, body=body)]):
match decl:
case c_ast.Decl(name=name, type=c_ast.FuncDecl(args=c_ast.ParamList(params=params), type=rettype)):
env = {}
for param in params:
#print(param)
match param:
case c_ast.Decl(name=pname, type=ty):
t = of_type(ty, {})
env[pname] = smt.Const(pname, t)
case _:
raise NotImplementedError(f"Unknown param {param}")
print({k: v.sort() for k,v in env.items()})
bmc_stmt(body, env)
return env["##retval"]
case _:
raise NotImplementedError(f"Unknown function decl {decl}")
case _:
raise NotImplementedError(f"Unknown top level {ast}")
def bmc_stmt(ast : c_ast.Node, env) -> Optional[smt.ExprRef]:
match ast:
case c_ast.Compound(block_items=stmts):
for stmt in stmts:
res = bmc_stmt(stmt, env)
return
case c_ast.Assignment(op="=", lvalue=lvalue, rvalue=rvalue):
rval = bmc_stmt(rvalue, env)
match lvalue:
case c_ast.ID(name=name):
env[name] = rval
#case c_ast.ArrayRef(name=name, subscript=subscript):
# arr = bmc_stmt(name, env)
# index = bmc_stmt(subscript, env)
# env[name] = smt.store(index, rval)
case _:
raise NotImplementedError(f"Unknown lvalue {lvalue}")
return rval
case c_ast.Return(expr=expr):
res = bmc_stmt(expr, env)
env["##retval"] = res
return
case c_ast.Decl(name=name, type=ty, init=init):
t = of_type(ty.type, env)
if init:
env[name] = bmc_stmt(init, env)
else:
env[name] = smt.Const(name, t)
case c_ast.If(cond=cond, iftrue=iftrue, iffalse=iffalse):
b = bmc_stmt(cond, env)
env1 = env.copy()
t = bmc_stmt(iftrue, env)
e = bmc_stmt(iffalse, env1)
for k,t in env1.items():
assert k in env
if not env[k].eq(t):
env[k] = smt.If(b, t, e)
case c_ast.Constant(type=ty, value=value):
return value
case c_ast.ID(name=name):
return env[name]
case c_ast.BinaryOp(op=op, left=left, right=right):
l = bmc_stmt(left, env)
r = bmc_stmt(right, env)
match op:
case "+":
return l + r
case "-":
return l - r
case "*":
return l * r
case "/":
return l / r
case _:
raise NotImplementedError(f"Unknown binary op {op}")
case c_ast.UnaryOp(op=op, expr=expr):
e = bmc_stmt(expr, env)
match op:
case "-":
return -e
case "~":
return ~e
case _:
raise NotImplementedError(f"Unknown unary op {op}")
case c_ast.TernaryOp(cond=cond, iftrue=iftrue, iffalse=iffalse):
b = bmc_stmt(cond, env)
env1 = env.copy()
t = bmc_stmt(iftrue, env)
e = bmc_stmt(iffalse, env1)
for k,t in env1.items():
assert k in env
if not env[k].eq(t):
env[k] = smt.If(b, t, e)
return smt.If(b, t, e)
case c_ast.ArrayRef(name=name, subscript=subscript):
arr = bmc_stmt(name, env)
index = bmc_stmt(subscript, env)
print(arr, index)
return arr[index]
case c_ast.FuncCall(name=c_ast.ID(name=fname), args=c_ast.ExprList(exprs=args)):
argvals = [bmc_stmt(arg, env) for arg in args]
env[fname](*argvals)
case _:
raise NotImplementedError(f"Unknown expr {ast}")
interp(prog)
{'a': BitVec(32), 'b': BitVec(32), 'mem': Array(BitVec(32), BitVec(32))}
mem 0
a + b + a/(a - b) + mem[0]
import tree_sitter_c
from tree_sitter import Language, Parser
CLANG = Language(tree_sitter_c.language())
parser = Parser(CLANG)
tree = parser.parse(
bytes(prog, "utf8" ))
tree.print_dot_graph()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[19], line 7
4 parser = Parser(CLANG)
5 tree = parser.parse(
6 bytes(prog, "utf8" ))
----> 7 tree.print_dot_graph()
TypeError: Tree.print_dot_graph() takes exactly one argument (0 given)
riscv formal
%%file /tmp/add.v
// https://github.com/YosysHQ/riscv-formal/blob/main/insns/insn_add.v
// DO NOT EDIT -- auto-generated from riscv-formal/insns/generate.py
module rvfi_insn_add (
input rvfi_valid,
input [`RISCV_FORMAL_ILEN - 1 : 0] rvfi_insn,
input [`RISCV_FORMAL_XLEN - 1 : 0] rvfi_pc_rdata,
input [`RISCV_FORMAL_XLEN - 1 : 0] rvfi_rs1_rdata,
input [`RISCV_FORMAL_XLEN - 1 : 0] rvfi_rs2_rdata,
input [`RISCV_FORMAL_XLEN - 1 : 0] rvfi_mem_rdata,
`ifdef RISCV_FORMAL_CSR_MISA
input [`RISCV_FORMAL_XLEN - 1 : 0] rvfi_csr_misa_rdata,
output [`RISCV_FORMAL_XLEN - 1 : 0] spec_csr_misa_rmask,
`endif
output spec_valid,
output spec_trap,
output [ 4 : 0] spec_rs1_addr,
output [ 4 : 0] spec_rs2_addr,
output [ 4 : 0] spec_rd_addr,
output [`RISCV_FORMAL_XLEN - 1 : 0] spec_rd_wdata,
output [`RISCV_FORMAL_XLEN - 1 : 0] spec_pc_wdata,
output [`RISCV_FORMAL_XLEN - 1 : 0] spec_mem_addr,
output [`RISCV_FORMAL_XLEN/8 - 1 : 0] spec_mem_rmask,
output [`RISCV_FORMAL_XLEN/8 - 1 : 0] spec_mem_wmask,
output [`RISCV_FORMAL_XLEN - 1 : 0] spec_mem_wdata
);
// R-type instruction format
wire [`RISCV_FORMAL_ILEN-1:0] insn_padding = rvfi_insn >> 16 >> 16;
wire [6:0] insn_funct7 = rvfi_insn[31:25];
wire [4:0] insn_rs2 = rvfi_insn[24:20];
wire [4:0] insn_rs1 = rvfi_insn[19:15];
wire [2:0] insn_funct3 = rvfi_insn[14:12];
wire [4:0] insn_rd = rvfi_insn[11: 7];
wire [6:0] insn_opcode = rvfi_insn[ 6: 0];
`ifdef RISCV_FORMAL_CSR_MISA
wire misa_ok = (rvfi_csr_misa_rdata & `RISCV_FORMAL_XLEN'h 0) == `RISCV_FORMAL_XLEN'h 0;
assign spec_csr_misa_rmask = `RISCV_FORMAL_XLEN'h 0;
`else
wire misa_ok = 1;
`endif
// ADD instruction
wire [`RISCV_FORMAL_XLEN-1:0] result = rvfi_rs1_rdata + rvfi_rs2_rdata;
assign spec_valid = rvfi_valid && !insn_padding && insn_funct7 == 7'b 0000000 && insn_funct3 == 3'b 000 && insn_opcode == 7'b 0110011;
assign spec_rs1_addr = insn_rs1;
assign spec_rs2_addr = insn_rs2;
assign spec_rd_addr = insn_rd;
assign spec_rd_wdata = spec_rd_addr ? result : 0;
assign spec_pc_wdata = rvfi_pc_rdata + 4;
// default assignments
assign spec_trap = !misa_ok;
assign spec_mem_addr = 0;
assign spec_mem_rmask = 0;
assign spec_mem_wmask = 0;
assign spec_mem_wdata = 0;
endmodule
Writing /tmp/add.v
import kdrag.contrib.yosys as yosys
help(yosys.VerilogModuleRel.from_file("rvfi_insn_add")
Help on method from_file in module kdrag.contrib.yosys:
from_file(name: str, wire_names: list[str], filepath: str) class method of kdrag.contrib.yosys.VerilogModuleRel
older
https://github.com/cfbolz/vexingz3
Battle Plan:
Done:
- allow easier python specdefinition
-
%reg syntax for cool inline asm demos
def add_subst def add_state_var def add_cb
syscalls?
Interrupt preludes It got a lot slower? pi pico More examples from frama-c, s2n-bignum simd reads/writes kd_always. Some interrupt bug?
Change register names to enable true inline assembly.
Can I use unicorn to validate against that easily?
spacer? I’d be shocked if useful.
sizex <= x - y <= -sizey \/ sizey <= y - x <= -sizex
We can say exactly when the or happens if x > y then sizey <= x - y <= -sizex else sizey <= y - x <= -sizex
inequality and addition / diff son’t play the same as the ints.
https://mcyoung.xyz/2023/11/27/simd-base64/ simd
Would I want to state from particular entry? or even address
kd_reachable from_label here_label kd_reachable here_label kd_unreachable from_label here_label
address expressions. Literal address for label, or <foo+0x7> relative addressing. I guess not necessary if this is mainly for textual assembly we have no control over. In that case just use programmatic bindings
https://github.com/toolCHAINZ/jingle
https://github.com/m4b/goblin An impish, cross-platform binary parsing crate, written in Rust https://github.com/m4b/bingrep https://github.com/m4b/faerie
https://ruor.uottawa.ca/server/api/core/bitstreams/86cc22d7-4517-4744-9cf7-4b1745a6c029/content race conditions and memory models. AMusing because that is what sail is for. Could be fun.
https://news.ycombinator.com/item?id=40185065 https://alastairreid.github.io/riscv-spec-issues/ https://github.com/riscv/riscv-opcodes just the opcodes
https://cs.ru.nl/~mschool/pub/vstte-why3-avr-revised.pdf why3 avr
https://antmicro.com/blog/2025/09/girdl-ghidra-renode-integration/ https://github.com/antmicro/girdl pull in svd files? That’s a good idea. I doubt I can directly use this
kd_prelude "(define-fun nonoverlap ((a (_ BitVec 64)) (sa (_ BitVec 64)) (b (_ BitVec 64)) (sb (_ BitVec 64))))"
kd_prelude "(define-const bvone (_ bv1 64))"
kd_prelude "(define-const bvtwo (_ bv2 64))"
kd_prelude "(define-fun select32 ...)"
from kdrag.all import *
x,y = smt.BitVecs("x y", 64)
sizex, sizey = smt.Ints("sizex, sizey")
bsizex = smt.Int2BV(sizex, 64)
bsizey = smt.Int2BV(sizey, 64)
maxint = smt.BitVecVal(2**64-1, 64)
no_overlap = kd.define("no_overlap", [x,sizex,y,sizey],
smt.If( smt.ULE(x,y),
smt.And( smt.ULT(bsizex , y - x), smt.ULE(bsizey, maxint - (y - x))),
smt.And( smt.ULT(bsizey , x - y), smt.ULE(bsizex, maxint - (x - y)))
))
kd.prove(smt.Implies(smt.Distinct(x, x+1, y, y+1, y+2), no_overlap(x,2,y,3)), unfold=1)
---------------------------------------------------------------------------
LemmaError Traceback (most recent call last)
Cell In[18], line 14
7 maxint = smt.BitVecVal(2**64-1, 64)
8 no_overlap = kd.define("no_overlap", [x,sizex,y,sizey],
9 smt.If( smt.ULE(x,y),
10 smt.And( smt.ULT(bsizex , y - x), smt.ULE(bsizey, maxint - (y - x))),
11 smt.And( smt.ULT(bsizey , x - y), smt.ULE(bsizex, maxint - (x - y)))
12 ))
---> 14 kd.prove(smt.Implies(smt.Distinct(x, x+1, y, y+1, y+2), no_overlap(x,2,y,3)), unfold=1)
File ~/Documents/python/knuckledragger/kdrag/tactics.py:286, in prove(thm, by, admit, timeout, dump, solver, instan, unfold)
282 raise Exception(
283 "Worked with quantifier stripped. Something is going awry", pf
284 )
285 else:
--> 286 raise e
287 except Exception as e:
288 raise e
File ~/Documents/python/knuckledragger/kdrag/tactics.py:264, in prove(thm, by, admit, timeout, dump, solver, instan, unfold)
262 by.append(kd.kernel.prove(thm == thm1, by=trace, timeout=timeout)) # type: ignore
263 try:
--> 264 pf = kd.kernel.prove(
265 thm, by, timeout=timeout, dump=dump, solver=solver, admit=admit
266 )
267 kdrag.config.perf_event("prove", thm, time.perf_counter() - start_time)
268 return pf
File ~/Documents/python/knuckledragger/kdrag/kernel.py:176, in prove(thm, by, admit, timeout, dump, solver)
174 if res != smt.unsat:
175 if res == smt.sat:
--> 176 raise LemmaError(thm, "Countermodel", s.model())
177 raise LemmaError("prove", thm, res)
178 else:
LemmaError: (Implies(Distinct(x, x + 1, y, y + 1, y + 2),
no_overlap(x, 2, y, 3)), 'Countermodel', [y = 18446744073709551613,
x = 18446744073709551611,
no_overlap = [else -> False]])
2**64-1
18446744073709551615
Bytecode
%%file /tmp/aexpr.s
.option norvc
.text
.globl _start
.equ OP_HALT, 0
.equ OP_CONST, 1
.equ OP_ADD, 2
.equ OP_MUL, 3
_start:
la t1, program # ip: points to current 32-bit instruction
la t0, stack # sp: next free word on the VM stack (grows up)
loop:
lw t2, 0(t1) # t2 = opcode (full word)
beqz t2, done # HALT (opcode 0)
addi t1, t1, 4 # advance ip to next word (default)
li t3, OP_CONST
beq t2, t3, op_const
li t3, OP_ADD
beq t2, t3, op_add
li t3, OP_MUL
beq t2, t3, op_mul
j loop # unknown opcode -> ignore
# --- CONST (two-word instruction) ---
op_const:
lw t4, 0(t1) # load immediate from *next* word
addi t1, t1, 4 # ip consumes the extra word
sw t4, 0(t0) # push
addi t0, t0, 4
j loop
# --- ADD ---
op_add:
addi t0, t0, -4
lw t4, 0(t0) # b
addi t0, t0, -4
lw t5, 0(t0) # a
add t6, t5, t4 # a+b
sw t6, 0(t0)
addi t0, t0, 4
j loop
# --- MUL (needs 'M' extension; see note below for pure RV32I) ---
op_mul:
addi t0, t0, -4
lw t4, 0(t0) # b
addi t0, t0, -4
lw t5, 0(t0) # a
mul t6, t5, t4 # a*b
sw t6, 0(t0)
addi t0, t0, 4
j loop
done:
# Pop result into 'result' and park
addi t0, t0, -4
lw t4, 0(t0)
la t5, result
sw t4, 0(t5)
hang:
j hang
.section .rodata
# Program: (3 + 4) * 5
# Words: CONST 3, CONST 4, ADD, CONST 5, MUL, HALT
program:
.word 1, 3
.word 1, 4
.word 2
.word 1, 5
.word 3
.word 0
.bss
.align 4
stack:
.space 1024 # 256-word VM stack
.data
.align 4
result:
.word 0
Writing /tmp/aexpr.s
! /home/philip/Downloads/riscv-toolchain-15-x86_64-lin/riscv32-unknown-elf/bin/as /tmp/aexpr.s -o aexpr.o && /home/philip/Downloads/riscv-toolchain-15-x86_64-lin/riscv32-unknown-elf/bin/objdump -d aexpr.o
aexpr.o: file format elf32-littleriscv
Disassembly of section .text:
00000000 <_start>:
0: 00000317 auipc t1,0x0
4: 00030313 mv t1,t1
8: 00000297 auipc t0,0x0
c: 00028293 mv t0,t0
00000010 <loop>:
10: 00032383 lw t2,0(t1) # 0 <_start>
14: 06038c63 beqz t2,8c <done>
18: 00430313 addi t1,t1,4
1c: 00100e13 li t3,1
20: 01c38c63 beq t2,t3,38 <op_const>
24: 00200e13 li t3,2
28: 03c38263 beq t2,t3,4c <op_add>
2c: 00300e13 li t3,3
30: 03c38e63 beq t2,t3,6c <op_mul>
34: fddff06f j 10 <loop>
00000038 <op_const>:
38: 00032e83 lw t4,0(t1)
3c: 00430313 addi t1,t1,4
40: 01d2a023 sw t4,0(t0) # 8 <_start+0x8>
44: 00428293 addi t0,t0,4
48: fc9ff06f j 10 <loop>
0000004c <op_add>:
4c: ffc28293 addi t0,t0,-4
50: 0002ae83 lw t4,0(t0)
54: ffc28293 addi t0,t0,-4
58: 0002af03 lw t5,0(t0)
5c: 01df0fb3 add t6,t5,t4
60: 01f2a023 sw t6,0(t0)
64: 00428293 addi t0,t0,4
68: fa9ff06f j 10 <loop>
0000006c <op_mul>:
6c: ffc28293 addi t0,t0,-4
70: 0002ae83 lw t4,0(t0)
74: ffc28293 addi t0,t0,-4
78: 0002af03 lw t5,0(t0)
7c: 03df0fb3 mul t6,t5,t4
80: 01f2a023 sw t6,0(t0)
84: 00428293 addi t0,t0,4
88: f89ff06f j 10 <loop>
0000008c <done>:
8c: ffc28293 addi t0,t0,-4
90: 0002ae83 lw t4,0(t0)
94: 00000f17 auipc t5,0x0
98: 000f0f13 mv t5,t5
9c: 01df2023 sw t4,0(t5) # 94 <done+0x8>
000000a0 <hang>:
a0: 0000006f j a0 <hang>
Run regular gdb
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
class Debug():
def __init__(self, ctx):
#self.watches = []
self.ctx = ctx
"""
def start(self, spec):
if mem is None:
mem = ctx.init_mem() # pcode.MemState.Const("mem", bits=ctx.bits)
todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
self.todo = todo
self.vcs = vcs
def next(self):
pass
def step(self):
ntraces = len(self.todo)
while len(self.todo) == ntraces:
self.next()
def pop_state(self):
pass
def focus(self, state_id):
def watch(self, expr):
self.watches.append(expr)
def concrete(self):
pass
def insn(self):
pass
"""
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
import pprint
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("myasm.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("myentry").rebased_addr
exitaddr = ctx.loader.find_symbol("myexit").rebased_addr
# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
t0 = ctx._subst_decls['t0']
a0 = ctx._subst_decls['a0']
a1 = ctx._subst_decls['a1']
a2 = ctx._subst_decls['a2']
sp = ctx._subst_decls['sp']
stride = 4
addr = 76
size = 0
spec.add_entry("mymemset", startaddr , smt.And(
a0 == 1000,
sp == 1000000,
ram32[a0] == 0, # type
ram32[a0 + 4] == addr, # addr
ram32[a0 + 8] == stride, # stride,
ram32[a0 + 12] == size, # size
#ram32[a0 + 12] == 1, # size
ram32[a0 + 16] == 42, # data
ram32[a0 + 20] == 2, # end
a1 == a1,
a2 == a2
))
spec.add_assert("loopsanity", ctx.loader.find_symbol("lit").rebased_addr + 8, smt.And(
ram32[a0 + 0] == addr, # addr
ram32[a0 + 4] == stride, # stride,
#ram32[a0 + 8] == 1, # size)
))
#spec.add_assign("mymemset", startaddr, "dst", ram32[a0])
#spec.add_assign("mymemset", startaddr, "value", ram32[a0 + 4])
#dst, value = smt.BitVecs('dst value', 32)
spec.add_exit("myexit", exitaddr, smt.BoolVal(True))
vcs = asmspec.run_all_paths(ctx, spec, max_insns=250)
print(len(vcs), "paths found")
for vc in vcs:
pprint.pp(vc)
# ok ctx.substitute is very slow because gathering constants is slow. Worth persuing?
try:
vc.verify(ctx)
print("success")
except Exception as e:
print("fail")
countermodel = e.args[2]
if not isinstance(vc.assertion, asmspec.OutOfGas):
pprint.pp({k : smt.simplify(v) for k,v in vc.countermodel(ctx, countermodel).items()})
else:
print("out of gas")
%%file /tmp/aexpr.c
int exec( , *):
from kdrag.all import *
import kdrag.theories.seq as seq
Aexpr = kd.Inductive("Aexpr")
Aexpr.declare("Const", ("val", smt.BitVecSort(32)))
Aexpr.declare("Add", ("x0", Aexpr), ("x1", Aexpr))
Aexpr = Aexpr.create()
kd.notation.add.register(Aexpr, Aexpr.Add)
e, e1, e2 = smt.Consts("e e1 e2", Aexpr)
Code = kd.Inductive("Code")
Code.declare("Const", ("val", smt.BitVecSort(32)))
Code.declare("Add")
Code = Code.create()
eval = smt.Function("eval", Aexpr, smt.BitVecSort(32))
eval = kd.define("eval", [e],
smt.If(
e.is_Const, e.val,
eval(e.x0) + eval(e.x1)
)
)
compile = smt.Function("compile", Aexpr, seq.Seq(Code))
compile = kd.define("compile", [e],
smt.If(e.is_Const,
smt.Unit(Code.Const(e.val)),
smt.Concat( compile(e.x0), compile(e.x1) , smt.Unit(Code.Add)))
)
c = smt.Const("c", Code)
s = smt.Const("st", seq.Seq(smt.BitVecSort(32)))
exec1 = kd.define("exec1", [c, s],
smt.If(c.is_Const,
smt.Concat(s, smt.Unit(c.val)),
smt.Concat(seq.Tail(seq.Tail(s)), smt.Unit(seq.Head(s) + seq.Head(seq.Tail(s))))
)
)
cs = smt.Const("cs", seq.Seq(Code))
exec = smt.Function("exec", seq.Seq(Code), seq.Seq(smt.BitVecSort(32)), seq.Seq(smt.BitVecSort(32)))
exec = kd.define("exec",
[cs,s],
smt.If(
smt.Length(cs) == 0,
s,
exec(seq.Tail(cs), exec1(seq.Head(cs), s))))
one = Aexpr.Const(1)
start = seq.Nil(smt.BitVecSort(32))
assert kd.simp(eval(one + one + one)).eq(smt.BitVecVal(3,32))
assert kd.simp(exec(compile(one + one + one), start))
---------------------------------------------------------------------------
Z3Exception Traceback (most recent call last)
/tmp/ipykernel_1045074/625003890.py in ?()
53
54 start = seq.Nil(smt.BitVecSort(32))
55
56 assert kd.simp(eval(one + one + one)).eq(smt.BitVecVal(3,32))
---> 57 assert kd.simp(exec(compile(one + one + one), start))
58
59
60
~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(self)
387 return False
388 elif is_eq(self) and self.num_args() == 2:
389 return self.arg(0).eq(self.arg(1))
390 else:
--> 391 raise Z3Exception("Symbolic expressions cannot be cast to concrete Boolean values.")
Z3Exception: Symbolic expressions cannot be cast to concrete Boolean values.
Debugger
use angr to get views?
sy brand
actually make a cli?
gdb or unicorn?
class Debug():
def __init__(self, ctx):
self.watches = []
self.ctx = ctx
def start(self, spec):
if mem is None:
mem = ctx.init_mem() # pcode.MemState.Const("mem", bits=ctx.bits)
todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
self.todo = todo
self.vcs = vcs
def next(self):
pass
def step(self):
"""
Step to next branch point
"""
ntraces = len(self.todo)
while len(self.todo) == ntraces:
self.next()
def pop_state(self):
pass
def focus(self, state_id):
def watch(self, expr):
self.watches.append(expr)
def concrete(self):
pass
def insn(self):
pass
from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
class Debug():
def __init__(self, ctx):
self.watches = []
self.ctx = ctx
def start(self, spec):
if mem is None:
mem = ctx.init_mem() # pcode.MemState.Const("mem", bits=ctx.bits)
todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
self.todo = todo
self.vcs = vcs
TraceFragmentBundle
It’s interesting to be able to run code to use in specs
list[TraceBundleFragment] could be the return type of run_all_paths. Collected up by common entry or
@dataclass
class TraceFragmentBundle:
"""
A bundle is a set of pathways
"""
start_addr : int # all at same start addr? Not necesarily all end at same exit addr
ctx : pcode.BinaryContext
args : # or is args a mapping from initial ghost state? #list[smt.ExprRef] # ghost vars?
vcs: list[VerificationCondition]
rets : list[smt.ExprRef] # ghost vars?
@staticmethod
def from_entry_exit(ctx: pcode.BinaryContext, entry_addr: int, exit_addr: int):
spec = AsmSpec()
spec.add_entry("entry", entry_addr, smt.BoolVal(True))
spec.add_exit("exit", exit_addr, smt.BoolVal(True))
return TraceFragmentBundle(ctx, run_all_paths(ctx, spec))
def __call__(self, memstate : pcode.MemState) -> smt.BoolRef:
"""
The bundle considered as a Function.
Joined together.
"""
# But a function from ghost to ghost or from memstate to memstate? or memstate to ghost?
acc = memstate.Const("undef", bits=ctx.bits)
for vc in vcs:
smt.If(vc.path_cond, vc.memstate, acc)
return acc
compile aexpr
type aexpr = I64 i64 | Add aexpr aexpr eval(compile(e)) ==
Or even a model of some kind of hack
virtual machine
I kind of did the conrete version
But the btecode interpreter version is interesting.
memset
interpreted memset
Swtich on type is also intereting. Interpreter
Is this overwrought compared to what we have to do though?
| type MemLang = SetList list (addr *value) | SetMany int addr value (* iterate the construction *) |
do finite unroll
Compare to my high level versions? Output of ocmpiler oct 7 demo
get riscv32 toolchain in ci
get small snippets
%%file /tmp/mymemset.s
# kd_entry mymemset "true"
.globl mymemset
mymemset:
lw t1, 0(a0) # load dst ptr
lw t2, 4(a0) # load value
lw t3, 8(a0) # load length
sw t2, 0(t1) # store value
ret
Overwriting /tmp/mymemset.s
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.gcc --run "riscv32-none-elf-as /tmp/mymemset.s -o /tmp/mymemset.o && riscv32-none-elf-objdump -d /tmp/mymemset.o"
/tmp/mymemset.o: file format elf32-littleriscv
Disassembly of section .text:
00000000 <mymemset>:
0: 00052303 lw t1,0(a0)
4: 00452383 lw t2,4(a0)
8: 00852e03 lw t3,8(a0)
c: 00732023 sw t2,0(t1)
10: 00008067 ret
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/mymemset.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("mymemset").rebased_addr
# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
t0 = ctx._subst_decls['t0']
a0 = ctx._subst_decls['a0']
spec.add_entry("mymemset", startaddr , smt.BoolVal(True))
spec.add_assign("mymemset", startaddr, "dst", ram32[a0])
spec.add_assign("mymemset", startaddr, "value", ram32[a0 + 4])
dst, value = smt.BitVecs('dst value', 32)
spec.add_exit("myexit", startaddr + 16, ram32[dst] == value)
vcs = asmspec.run_all_paths(ctx, spec)
for vc in vcs:
#print(vc)
# ok ctx.substitute is very slow because gathering constants is slow. Worth persuing?
vc.verify(ctx)
conmsts
don
Executing SpecStmt: Assign(label='mymemset', addr=4194304, name='dst', expr=ram32[a0])
conmsts
don
Executing SpecStmt: Assign(label='mymemset', addr=4194304, name='value', expr=ram32[a0 + 4])
conmsts
don
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 0) PCODE IMARK ram[400000:4]
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 1) PCODE unique[580:4] = 0x0
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 3) PCODE t1 = *[ram]unique[3080:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 0) PCODE IMARK ram[400004:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 1) PCODE unique[580:4] = 0x4
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 0) PCODE IMARK ram[400008:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 1) PCODE unique[580:4] = 0x8
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 3) PCODE t3 = *[ram]unique[3080:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 0) PCODE IMARK ram[40000c:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 1) PCODE unique[600:4] = 0x0
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 2) PCODE unique[4180:4] = t1 + unique[600:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 3) PCODE *[ram]unique[4180:4] = t2
Executing SpecStmt: Exit(myexit, 0x400010, ram32[dst] == value)
conmsts
don
%%file /tmp/mymemcpy.c
#include <stddef.h>
void mymemcpy(int *dst, const int *src, size_t n) {
for (size_t i = 0; i < n; i++) {
dst[i] = src[i];
}
return;
}
Writing /tmp/mymemcpy.c
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.gcc --run "riscv32-none-elf-gcc -O1 -c /tmp/mymemcpy.c -o /tmp/mymemcpy.o && riscv32-none-elf-objdump -d /tmp/mymemcpy.o"
/tmp/mymemcpy.o: file format elf32-littleriscv
Disassembly of section .text:
00000000 <mymemcpy>:
0: ca11 beqz a2,14 <.L1>
2: 87ae mv a5,a1
4: 060a slli a2,a2,0x2
6: 95b2 add a1,a1,a2
00000008 <.L3>:
8: 4398 lw a4,0(a5)
a: c118 sw a4,0(a0)
c: 0791 addi a5,a5,4
e: 0511 addi a0,a0,4
10: feb79ce3 bne a5,a1,8 <.L3>
00000014 <.L1>:
14: 8082 ret
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/mymemcpy.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("mymemcpy").rebased_addr
# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
dstptr = ctx._subst_decls['a0']
srcptr = ctx._subst_decls['a1']
n = ctx._subst_decls['a2']
# "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)))"
spec.add_entry("mymemcpy", startaddr , smt.And(n == 0))
# kd_cut mycut "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)) (bvult RAX RDX)
# (forall ((offset (_ BitVec 64))) (=> (and (bvuge offset (_ bv0 64)) (bvult offset RAX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64))))
# (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))))"
#spec.add_cut("loopstart", startaddr + 8, smt.And(True))
# kd_exit myexit "(=> (and (bvuge offset (_ bv0 64)) (bvult offset RDX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64))))
# (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))"
spec.add_exit("mymemset_exit", startaddr + 14, smt.And(True))
vcs = asmspec.run_all_paths(ctx, spec)
for vc in vcs:
#print(vc)
# ok ctx.substitute is very slow because gathering constants is slow. Worth perusing?
vc.verify(ctx)
conmsts
don
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 0) PCODE IMARK ram[400000:2]
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 1) PCODE unique[10800:1] = a2 == 0x0
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 2) PCODE if (unique[10800:1]) goto ram[400014:4]
Executing 0x400014/2: ret at (4194324, 0) PCODE IMARK ram[400014:2]
Executing 0x400014/2: ret at (4194324, 1) PCODE return ra
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Cell In[9], line 27
16 # kd_cut mycut "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)) (bvult RAX RDX)
17 # (forall ((offset (_ BitVec 64))) (=> (and (bvuge offset (_ bv0 64)) (bvult offset RAX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64))))
18 # (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))))"
(...) 23 # kd_exit myexit "(=> (and (bvuge offset (_ bv0 64)) (bvult offset RDX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64))))
24 # (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))"
25 spec.add_exit("mymemset_exit", startaddr + 14, smt.And(True))
---> 27 vcs = asmspec.run_all_paths(ctx, spec)
28 for vc in vcs:
29 #print(vc)
30 # ok ctx.substitute is very slow because gathering constants is slow. Worth perusing?
31 vc.verify(ctx)
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/asmspec.py:458, in run_all_paths(ctx, spec, mem, verbose)
456 vcs.extend(new_vcs)
457 if tracestate is not None:
--> 458 todo.extend(execute_insn(tracestate, ctx, verbose=verbose))
459 return vcs
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/asmspec.py:411, in execute_insn(tracestate, ctx, verbose)
402 if pc != 0:
403 raise Exception(f"Unexpected program counter {pc} at address {hex(addr)}")
404 return [
405 TraceState(
406 start=tracestate.start,
407 trace=tracestate.trace + [state0.pc[0]],
408 state=state,
409 ghost_env=tracestate.ghost_env,
410 )
--> 411 for state in ctx.sym_execute(
412 state0.memstate,
413 addr,
414 path_cond=state0.path_cond,
415 max_insns=1,
416 verbose=verbose,
417 )
418 ]
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:516, in BinaryContext.sym_execute(self, memstate, addr, path_cond, max_insns, breakpoints, verbose)
512 if verbose:
513 print(
514 f"Executing {pretty_insn(self.disassemble(pc[0]))} at {pc} PCODE {pretty_op(self.translate(pc[0])[pc[1]])}"
515 )
--> 516 memstate1, pc1 = self.execute1(memstate, pc)
517 if isinstance(
518 pc1[0], smt.ExprRef
519 ): # PC has become symbolic, requiring branching
520 assert isinstance(pc1[1], smt.ExprRef)
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:425, in BinaryContext.execute1(self, memstate, pc)
423 addr = pc[0]
424 op = self.translate(addr)[pc[1]]
--> 425 return self.executeCurrentOp(op, memstate, pc)
File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:390, in BinaryContext.executeCurrentOp(self, op, memstate, pc)
388 return executeBinary(op, memstate), self.fallthruOp(pc)
389 else:
--> 390 raise NotImplementedError("Opcode not implemented: ", op.opcode)
NotImplementedError: ('Opcode not implemented: ', OpCode.RETURN)
Track better which trace fragment we’re on. Trace bundles
ctx._subst_decls['t0']
t0
%%file /tmp/tag.c
enum Op {Inc1, Inc2, Inc3};
int doop(enum Op op, int x) {
switch(op) {
case Inc1: return x + 1;
case Inc2: return x + 2;
case Inc3: return x + 3;
}
return -1;
}
Overwriting /tmp/tag.c
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.{gcc,binutils} --run "riscv32-none-elf-gcc -O2 -S /tmp/tag.c -o /tmp/tag.s && cat /tmp/tag.s"
[35;1mwarning:[0m [31;1merror:[0m unable to [35;1mdownload[0m '[35;1mhttps://cache.nixos.org/nix-cache-info[0m': [35;1mTimeout was reached[0m ([35;1m28[0m) [35;1mResolving timed out after 15000 milliseconds[0m; retrying in 309 ms
[35;1mwarning:[0m [31;1merror:[0m unable to [35;1mdownload[0m '[35;1mhttps://cache.nixos.org/nix-cache-info[0m': [35;1mTimeout was reached[0m ([35;1m28[0m) [35;1mResolving timed out after 15000 milliseconds[0m; retrying in 685 ms
[35;1mwarning:[0m [31;1merror:[0m unable to [35;1mdownload[0m '[35;1mhttps://cache.nixos.org/nix-cache-info[0m': [35;1mTimeout was reached[0m ([35;1m28[0m) [35;1mResolving timed out after 15000 milliseconds[0m; retrying in 1074 ms
[31;1merror:[0m interrupted by the user
^C
! ! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-gcc -W -c -S -o /tmp/tag.s -O1 /tmp/tag.c
! cat /tmp/tag.s
.file "tag.c"
.option nopic
.attribute arch, "rv32i2p0_m2p0_c2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl doop
.type doop, @function
doop:
mv a5,a0
li a4,1
beq a0,a4,.L2
li a4,2
addi a0,a1,3
beq a5,a4,.L1
li a0,-1
beq a5,zero,.L6
.L1:
ret
.L6:
addi a0,a1,1
ret
.L2:
addi a0,a1,2
j .L1
.size doop, .-doop
.ident "GCC: (crosstool-NG 1.26.0_rc1) 10.2.0"
%%file /tmp/memset.c
#include <stddef.h>
const int myflags[] = {1 << 0, 1 << 1, 1 << 2};
typedef struct {
size_t ind;
int *dst;
size_t n;
} config;
void mymemset(config *cfg) {
int color = myflags[cfg->ind];
int *dst = cfg->dst;
for(size_t i = 0; i < cfg->n; i++)
dst[i] = color;
}
Overwriting /tmp/memset.c
! gcc -W -c -S -o /tmp/memset.s -O1 /tmp/memset.c
! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-gcc -W -c -S -o /tmp/memset.s -O1 /tmp/memset.c
! cat /tmp/memset.s
.file "memset.c"
.option nopic
.attribute arch, "rv32i2p0_m2p0_c2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl mymemset
.type mymemset, @function
mymemset:
lw a5,0(a0)
slli a4,a5,2
lui a5,%hi(.LANCHOR0)
addi a5,a5,%lo(.LANCHOR0)
add a5,a5,a4
lw a2,0(a5)
lw a5,4(a0)
lw a4,8(a0)
beq a4,zero,.L1
li a4,0
.L3:
sw a2,0(a5)
addi a4,a4,1
addi a5,a5,4
lw a3,8(a0)
bgtu a3,a4,.L3
.L1:
ret
.size mymemset, .-mymemset
.globl myflags
.section .rodata
.align 2
.set .LANCHOR0,. + 0
.type myflags, @object
.size myflags, 12
myflags:
.word 1
.word 2
.word 4
.ident "GCC: (crosstool-NG 1.26.0_rc1) 10.2.0"
%%file /tmp/mov42.s
.include "/tmp/knuckle.s"
.text
.globl myfunc
kd_entry myfunc, "(= true true)"
addi sp, sp, -4 # make room on the stack
li t0, 42 # t0 ← 42
sw t0, 0(sp) # [sp] = 42
kd_exit myfunc_end, "(= (select ram32 sp) (_ bv42 32))"
ret
Overwriting /tmp/mov42.s
! python3 -m kdrag.contrib.pcode --langid="RISCV:LE:32:default" --asm /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as /tmp/mov42.s
Processing /tmp/mov42.s with language ID RISCV:LE:32:default using assembler /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as
Constructing Trace Fragments...
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 0) PCODE IMARK ram[400000:4]
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 1) PCODE unique[580:4] = 0xfffffffc
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 2) PCODE sp = sp + unique[580:4]
Executing 0x400004/4: li t0,0x2a at (4194308, 0) PCODE IMARK ram[400004:4]
Executing 0x400004/4: li t0,0x2a at (4194308, 1) PCODE unique[580:4] = 0x2a
Executing 0x400004/4: li t0,0x2a at (4194308, 2) PCODE t0 = unique[580:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 0) PCODE IMARK ram[400008:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 1) PCODE unique[600:4] = 0x0
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 2) PCODE unique[4180:4] = sp + unique[600:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 3) PCODE *[ram]unique[4180:4] = t0
Executing SpecStmt: Exit(myfunc_end, 0x40000c, ram32[sp] == 42)
Checking verification conditions...
[✅] Exit(myfunc_end, 0x40000c, ram32[sp] == 42)
✅✅✅✅ All 1 verification conditions passed! ✅✅✅✅
ghost call
Figuring out how to access members of structs, offsets etc. Shouldn’t change state, can read it. Changed real state is thrown away (stored as var?). Changed ghost state is not. How to setup frame? More like a subroutine, since we odn’t setup a stack (?). In ghost code, read from named coroutine params. Or give fresh Know the abi somehow?
struct biz
get param(&biz) {
return biz->a->b->c;
}
My dishonesty about ram32 ram64 rax bites now?
(RAX reg) (RDI reg)
%rdi == (select32 reg RDI)
(store32 reg RDI 0x42)
kd_reify "memstate_name" # freeze current state into ghost.
kd_assign "memstate" "(store RAX )"
kd_ghost_call “res_memstate” “myfun” “ “
kd_assign “foo” “res_memstate”
read write
I should unpack the state datatype. No reason to bring datatypes into it. Could use store_view instead of explciit write field? Jankier
read and write state variables.
x = {"foo" : 3}
{**x, "foo": 4}
{'foo': 4}
%%file /tmp/write_simple.s
.option norvc
.text
.globl _start
_start:
li t0, 42 # load immediate 42 into t0
li t1, 1000 # load address 1000 into t1
sw t0, 0(t1) # store t0 at memory address
j _start # infinite loop
Overwriting /tmp/write_simple.s
! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as -o /tmp/write_simple.o /tmp/write_simple.s && objdump -d /tmp/write_simple.o
/tmp/write_simple.o: file format elf32-little
objdump: can't disassemble for architecture UNKNOWN!
import kdrag.smt as smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/write_simple.s", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("_start").rebased_addr
exitaddr = startaddr + 16
verilog / sail
Pcode is not that trustworthy. Vet against other semantics. (And why not just use those semantics instead ?)
https://docs.pydrofoil.org/en/latest/scripting-api.html
rpython https://mssun.github.io/rpython-by-example/ So rpython itself is a bit like cython? But not exporting python libs. Using rpython on it’s own like this is perhaps odd
smtlib interpreter in rpython? Lean expr interpreter?
! nix build github:rpypkgs/rpypkgs#pydrofoil-riscv
[K34;1m1[0m/[32;1m18[0m/19 built, [32;1m159[0m copied ([32;1m1102.1[0m/1102.7 MiB), [32;1m270.7[0m MiB DL] building [1mpydrofoil[0m[Km[K[0m[K
! nix run github:rpypkgs/rpypkgs#pydrofoil-riscv -- --version
[Kpydrofoil-riscv 0.0.1-alpha0 (Sail model version: 0.5-273-g87f8bb34d6)
%%file /tmp/hello.py
def entry_point(argv):
print "Hello, World!"
return 0
# The target function is the main function of a RPython program. It takes
# command line arguments as inputs and return an entry point function.
def target(*args):
return entry_point
python properties
Inline asm Reg names
Also memory locations?
import z3
z3.parse_smt2_string(r"""
(declare-const %rdi (_ BitVec 64))
(assert (= %rdi (_ bv0 64)))
""")
[%rdi = 0]
%%file /tmp/hello.c
#include <stddef.h>
__asm__(".include \"knuckle.S\"");
__asm__("kd_prelude \"(declare-const x (_ BitVec 64))\"");
int foo(int x){
__asm__("kd_entry myentry \"true\"");
__asm__("kd_assign x %0" : : "r"(x));
int y = x + 42;
__asm__("kd_exit myentry \"(assert (= %0 (bvadd x (_ bv42 64)))\"" : : "r"(y));
return y;
}
Overwriting /tmp/hello.c
! gcc -O1 -S -c /tmp/hello.c -o /tmp/hello.S && cat /tmp/hello.S
.file "hello.c"
.text
.globl foo
.type foo, @function
foo:
.LFB0:
.cfi_startproc
endbr64
#APP
# 3 "/tmp/hello.c" 1
kd_prelude "(assert (= %edi 42))"
# 0 "" 2
#NO_APP
ret
.cfi_endproc
.LFE0:
.size foo, .-foo
.ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
CN
https://github.com/rems-project/cn https://rems-project.github.io/cn-tutorial/getting-started/tutorials/welcome/ https://www.galois.com/articles/escaping-isla-nublar-coming-around-to-llms-for-formal-methods
cfg reconstruction
Takes VCs
Any unreached specstmt, warn reachable_from
Join specstmts
Print CFG as ascii? Output it?
reachable = set().union(*[vc.trace for vc in vcs])
Hyperproperties / Relational
Synchro points?
entry exit labels pairs. kd_sync
kd_rel entry exit property kd_rel entry_exit property
kd_channels monotonic channels?
Finely interleaved product programs
A syncrhonzination / scheudling program? ram_2
Multiram executor? Channels
Same program twice is easy to synchrnoize. High low variables
havoc statement
kd_havoc_ghost ghost_var
kd_havoc_reg reg
kd_havoc_mem memexpr
interrupt havocs touched stuff every cycle. So we do want to enable havor
https://docs.certora.com/en/latest/docs/cvl/statements.html
kd_join collect up all trace fragements with common start and ends. Maybe I should anyhow?
Useful for the using assembly as spec idea.
trace fragment bundle. Yikes
thread_id as a way of knowing which program you’re in?
Unicorn
Running stuff in pcode2c was clutch. What about just a nice running environemnt that ingests asmspec. Unicorn, gdb, qemu. Something.
chat missing vex optimziationd via z3
p4-=
Concerns - user to
New Sleigh
https://github.com/nneonneo/ghidra-wasm-plugin/tree/master
https://spinsel.dev/2020/06/17/ghidra-brainfuck-processor-1.html
3 roads to go down: https://github.com/rbran/sleigh-rs/tree/main try to reuse this sleigh parser of nebulous quality, try to hack onto ghidra’s sleigh parser https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Features/Decompiler/src/decompile/cpp/slghparse.y , or try to rebuild the grammar in a custom parser (edited) Ok actually a fourth is to consume the compiled .sla files. I don’t even really know how their format works Why? Well, because using ghidra off the shelf only lets you turn concrete bits into pcode (edited) But the Sleigh format is kind of more like the analog of sail. You could use it to compile or assemble forward (which ghidra does use it to generate assemblers but I haven’t seen that rewrapped to be usable anywhere except in the gui) https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java
https://github.com/CTSRD-CHERI/ghidra cheri modelled in ghidra? https://github.com/Cherified/cheriot-abstract-spec https://github.com/Cherified/cheriot-program-logic https://github.com/CTSRD-CHERI/ghidra/blob/morello/main/Ghidra/Processors/AARCH64/data/languages/AARCH64_MorelloInstructions.sinc
Hmm generate sleigh from yosys akin to how C backend works maybe?
! mkdir /tmp/Hack || mkdir /tmp/Hack/data
mkdir: cannot create directory ‘/tmp/Hack’: File exists
mkdir: cannot create directory ‘/tmp/Hack/data’: File exists
%%file /tmp/Hack/Module.manifest
<module>
<name>Hack</name>
<description>nand2tetris Hack CPU</description>
<version>1.0</version>
</module>
Overwriting /tmp/Hack/Module.manifest
%%file /tmp/Hack/data/hack.pspec
<?xml version="1.0" encoding="UTF-8"?>
<processor_spec>
<programcounter register="PC"/>
<default_symbols>
<function name="entry" entry="true" address="0x0000"/>
</default_symbols>
</processor_spec>
Writing /tmp/Hack/data/hack.pspec
%%file /tmp/Hack/data/hack.ldefs
<?xml version="1.0" encoding="UTF-8"?>
<language_definitions>
<language processor="Hack" endian="big" size="16" variant="default"
version="1" slafile="hack.sla" processorspec="hack.pspec"
description="Hack (nand2tetris) 16-bit"/>
<compiler name="default" spec="hack.cspec" id="default"/>
</language_definitions>
Writing /tmp/Hack/data/hack.ldefs
%%file /tmp/Hack/data/hack.slaspec
define endian=big;
define alignment=2;
define space ram type=ram_space size=2 default;
define space register type=register_space size=1;
define register offset=0x00 size=2 [PC D A];
define token instr(16)
b15 = (15,15) # 0 = A-instr, 1 = C-instr
cxx = (13,14) # the conventional "11" in Hack C-instr
a = (12,12)
c1 = (11,11)
c2 = (10,10)
c3 = (9,9)
c4 = (8,8)
c5 = (7,7)
c6 = (6,6)
d1 = (5,5) # A dest
d2 = (4,4) # D dest
d3 = (3,3) # M dest
j1 = (2,2)
j2 = (1,1)
j3 = (0,0)
aimm = (0,14) # 15-bit literal for A-instruction
;
#OP1: "#"aimm is b15=0; aimm { tmp:2 = aimm; export tmp; }
:a_inst aimm is b15=0 & aimm {
A = aimm; # zero-extend 15-bit literal
PC = PC + 2;
}
Overwriting /tmp/Hack/data/hack.slaspec
! /home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/support/sleigh /tmp/Hack/data/hack.slaspec
openjdk version "21.0.8" 2025-07-15
OpenJDK Runtime Environment (build 21.0.8+9-Ubuntu-0ubuntu124.04.1)
OpenJDK 64-Bit Server VM (build 21.0.8+9-Ubuntu-0ubuntu124.04.1, mixed mode)
INFO Using log config file: jar:file:/home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)
INFO Using log file: /home/philip/.config/ghidra/ghidra_11.4.2_PUBLIC/application.log (LoggingInitialization)
import kdrag.contrib.pcode as pcode
import pypcode
lang = pypcode.Arch("Hack", "/tmp/Hack/data/hack.ldefs").languages[0]
ctx = pypcode.Context(lang)
tx = ctx.disassemble(bytes.fromhex("1f"))
print(tx.instructions[0].mnem)
print(pcode.pretty_insn(tx.instructions[0]))
tx = ctx.translate(bytes.fromhex("1f"))
print(tx.ops)
pcode.pretty_op(tx.ops[1])
tx.ops[1].inputs[0].size
a_inst
0x0/2: a_inst 0x1f00
[IMARK ram[0:2], A = 0x1f00, PC = PC + 0x2]
2
wasm
https://github.com/nneonneo/ghidra-wasm-plugin/releases/tag/v2.3.2
! /home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/support/sleigh -n /home/philip/Downloads/ghidra_11.4.1_PUBLIC_20250808_ghidra-wasm-plugin/ghidra-wasm-plugin/data/languages/WebAssembly.slaspec
openjdk version "21.0.8" 2025-07-15
OpenJDK Runtime Environment (build 21.0.8+9-Ubuntu-0ubuntu124.04.1)
OpenJDK 64-Bit Server VM (build 21.0.8+9-Ubuntu-0ubuntu124.04.1, mixed mode)
INFO Using log config file: jar:file:/home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)
INFO Using log file: /home/philip/.config/ghidra/ghidra_11.4.2_PUBLIC/application.log (LoggingInitialization)
WARN NOP detected at Leb128_opc2_1b.sinc:1 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:2 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:3 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:4 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:5 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:1 (SleighCompile)
WARN NOP detected at Leb128_opc2_1b.sinc:2 (SleighCompile) ...
WARN Leb128_opc2_2b.sinc:1: Unreferenced table: 'opc2_238' (SleighCompile)
folder = "/home/philip/Downloads/ghidra_11.4.1_PUBLIC_20250808_ghidra-wasm-plugin/ghidra-wasm-plugin/data/languages"
import kdrag.contrib.pcode as pcode
import pypcode
lang = pypcode.Arch("WebAssembly", folder + "/WebAssembly.ldefs").languages[0]
ctx = pypcode.Context(lang)
tx = ctx.disassemble(bytes.fromhex("6a"))
print(tx.instructions[0].mnem)
print(pcode.pretty_insn(tx.instructions[0]))
tx = ctx.translate(bytes.fromhex("6a"))
print(tx.ops)
pcode.pretty_op(tx.ops[1])
tx.ops[1].inputs[0].size
i32.add
0x0/1: i32.add
[IMARK ram[0:1], register[4fffffe0:4] = register[4fffffe0:4] + register[4ffffff0:4]]
4
pypcode.Arch("WebAssembly", folder + "/WebAssembly.ldefs").languages
[<pypcode.ArchLanguage at 0x7f075820a600>]
from wasmtime import wat2wasm
prog = wat2wasm("""
(module
(func (export "add") (param i32 i32) (result i32)
local.get 0
local.get 1
i32.add))
""")
tx = ctx.disassemble(bytes(prog))
tx.instructions
"""
for i in range(0x28):
try:
print(hex(i), ctx.disassemble(bytes(prog[i:])).instructions)
except Exception as e:
pass
"""
ctx.disassemble(bytes(prog[0x23:]))
---------------------------------------------------------------------------
BadDataError Traceback (most recent call last)
Cell In[2], line 19
11 tx.instructions
12 """
13 for i in range(0x28):
14 try:
(...) 17 pass
18 """
---> 19 ctx.disassemble(bytes(prog[0x23:]))
BadDataError: r0x00000000: Unable to resolve constructor
ctx.translate(bytes.fromhex("2000")).ops
---------------------------------------------------------------------------
BadDataError Traceback (most recent call last)
Cell In[9], line 1
----> 1 ctx.translate(bytes.fromhex("2000")).ops
BadDataError: r0x00000000: Unable to resolve constructor
%%file /tmp/sum_i32.wat
(module
(func (export "add") (param i32 i32) (result i32)
local.get 0
local.get 1
i32.add))
Writing /tmp/sum_i32.wat
! wat2wasm /tmp/sum_i32.wat -o /tmp/sum32.wasm
! wasm-objdump -d /tmp/sum32.wasm
sum32.wasm: file format wasm 0x1
Code Disassembly:
000022 func[0] <add>:
000023: 20 00 | local.get 0
000025: 20 01 | local.get 1
000027: 6a | i32.add
000028: 0b | end
sail
Give sail another shake. Comparing and constrasting to sleigh is kind of interesting.
vec
https://en.algorithmica.org/hpc/simd/
%%file /tmp/sum.c
//#pragma GCC target("avx2")
const int n = 1e5;
int a[n], s = 0;
int main() {
for (int t = 0; t < 100000; t++)
for (int i = 0; i < n; i++)
s += a[i];
return 0;
}
Overwriting /tmp/sum.c
! g++ -O3 /tmp/sum.c -o /tmp/sum && time /tmp/sum
real 0m0.526s
user 0m0.522s
sys 0m0.001s
! g++ -march=native -O3 /tmp/sum.c -o /tmp/sum && time /tmp/sum
real 0m0.137s
user 0m0.136s
sys 0m0.000s
! python3 -m pypcode
usage: pypcode [-h] [-l] [-o OFFSET] [-s LENGTH] [-i MAX_INSTRUCTIONS] [-b]
langid binary [base]
pypcode: error: the following arguments are required: langid, binary
! g++ -O3 /tmp/sum.c -o /tmp/sum.S -S -masm=intel && head -40 /tmp/sum.S
.file "sum.c"
.intel_syntax noprefix
.text
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
endbr64
lea rdx, a[rip]
movd xmm0, DWORD PTR s[rip]
lea rcx, 400000[rdx]
.L2:
movdqa xmm1, XMMWORD PTR [rdx]
mov eax, 100000
.p2align 4,,10
.p2align 3
.L3:
paddd xmm0, xmm1
paddd xmm0, xmm1
sub eax, 2
jne .L3
add rdx, 16
cmp rcx, rdx
jne .L2
movdqa xmm1, xmm0
xor eax, eax
psrldq xmm1, 8
paddd xmm0, xmm1
movdqa xmm1, xmm0
psrldq xmm1, 4
paddd xmm0, xmm1
movd DWORD PTR s[rip], xmm0
ret
.cfi_endproc
.LFE0:
.size main, .-main
.globl s
! g++ -march=native -O3 /tmp/sum.c -o /tmp/sum.S -S -masm=intel && head -60 /tmp/sum.S
.file "sum.c"
.intel_syntax noprefix
.text
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
endbr64
lea rdx, a[rip]
vmovd xmm0, DWORD PTR s[rip]
lea rcx, 400000[rdx]
.L2:
vmovdqa32 zmm1, ZMMWORD PTR [rdx]
mov eax, 100000
.p2align 4
.p2align 3
.L3:
vpaddd zmm0, zmm0, zmm1
sub eax, 2
vpaddd zmm0, zmm0, zmm1
jne .L3
add rdx, 64
cmp rcx, rdx
jne .L2
vmovdqa ymm1, ymm0
vextracti32x8 ymm0, zmm0, 0x1
xor eax, eax
vpaddd ymm1, ymm1, ymm0
vmovdqa xmm0, xmm1
vextracti128 xmm1, ymm1, 0x1
vpaddd xmm0, xmm0, xmm1
vpsrldq xmm1, xmm0, 8
vpaddd xmm0, xmm0, xmm1
vpsrldq xmm1, xmm0, 4
vpaddd xmm0, xmm0, xmm1
vmovd DWORD PTR s[rip], xmm0
vzeroupper
ret
.cfi_endproc
.LFE0:
.size main, .-main
.globl s
.bss
.align 4
.type s, @object
.size s, 4
s:
.zero 4
.globl a
.align 64
.type a, @object
.size a, 400000
a:
.zero 400000
.ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
%%file /tmp/support.cpp
#include <iostream>
using namespace std;
int main() {
cout << __builtin_cpu_supports("sse") << endl;
cout << __builtin_cpu_supports("sse2") << endl;
cout << __builtin_cpu_supports("avx") << endl;
cout << __builtin_cpu_supports("avx2") << endl;
cout << __builtin_cpu_supports("avx512f") << endl;
return 0;
}
Writing /tmp/support.cpp
! g++ -O3 /tmp/support.cpp -o /tmp/support && /tmp/support
8
16
512
1024
32768
! cat /proc/cpuinfo