I’ve been working on an interactive proof experience in python for assembly code.

In previous posts I talked about the fully automatic tool, which still works. https://www.philipzucker.com/asm_verify3/

But for debugging failing specs or for slightly more involved properties, a semi-interactive mode is desirable. I’m using Knuckledragger https://github.com/philzook58/knuckledragger , my z3py based proof assistant as the base system. Step by painful step the pcode based verification system is becoming more usable. This whole post is developed as a Jupyter notebook, which gives a nice interactive exploratory repl for building and debugging specs and proofs.

I use the pypcode package which offers lifters from binary to Ghidra Pcode. I have a symbolic executor that given a specification will discover the finite trace fragments that make up the CFG. (If you give a bad spec or there isn’t a finite CFG, the symbolic executor won’t terminate.). Every possibly infinite trace through the program is composed of these finite fragments. The executor emits verification conditions that are roughly of the form precondition /\ path_condition => postcondition / assert. A cute formulation here is that invariants are expressed to the system as “cuts” of the CFG. If you cut the CFG enough to make it a DAG, the length and number of trace fragments is finite. Invariants are a self consistent sufficient summaries of the reachable state at those cut positions.

The code is available here https://github.com/philzook58/knuckledragger/tree/main/src/kdrag/contrib/pcode

Demo

This is a chatgpt produced piece of riscv32 assembly to sum an array of integers.

%%file /tmp/sum_i32.s
    .text
    .globl sum_i32
    .align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
    mv      t0, a0          # t0 = arr (cursor)
    mv      t1, a1          # t1 = n   (remaining)
    li      a0, 0           # a0 = sum (return value)

    beqz    t1, .done       # if n == 0 -> return 0

.loop:
    lw      t2, 0(t0)       # t2 = *arr
    add     a0, a0, t2      # sum += *arr
    addi    t0, t0, 4       # arr++
    addi    t1, t1, -1      # n--
    bnez    t1, .loop       # keep going while n != 0

.done:
    ret
Overwriting /tmp/sum_i32.s

This is short main program to call the library function

%%file /tmp/test.c
#include <stdint.h>
#include <stdio.h>
extern int32_t sum_i32(const int32_t* arr, uint32_t n);
int main() {
    int32_t arr[] = {1, 2, 3, 4, 5};
    uint32_t n = 5;
    int32_t result = sum_i32(arr, n);
    printf("Sum: %d\n", result); // Expected output: Sum: 15
    return 0;
}
Writing /tmp/test.c

We can compile and link these together. nix is actually a decent way to get a working riscv32 toolchain. Go figure.

!nix-shell -p pkgsCross.riscv32.buildPackages.gcc --run "riscv32-unknown-linux-gnu-gcc -O2 /tmp/sum_i32.s /tmp/test.c  -o /tmp/sum32"

It works

! qemu-riscv32 /tmp/sum32
Sum: 15

A half functional spec

First I want to write a higher level function that gets the same data to use as a spec to compare against the assembly. I do this in z3py/knuckledragger. It is kind of a low level way of writing such a thing because I directly talk about a memory array. A higher level functional spec might involve a function connecting the memory array to a list or sequence and a function program over that.

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n], 
    smt.If(n == 0,
        0,
        sum_i32(
            mem,
            addr + 4,
            n - 1
        ) + bv.select_concat(mem, addr, 4, le=True)
    )
)

sum_i32

Some sanity check evaluation using knuckledragger’s simplifier. Summing a zeroed out memory returns 0

zeromem = smt.Lambda([addr], smt.BitVecVal(0, 8))
kd.full_simp(sum_i32(zeromem, 0, 7))

0

An array of all ones should sum to n. The goofiness of the array expression is because the array is byte addressed but we are summing 32 bit integers.

kd.full_simp(sum_i32(smt.Lambda([addr], smt.If(addr % 4 == 0, smt.BitVecVal(1,8), 0)), 0, 7))

7

Bounded Checking

It is always a good idea to do bounded model checking first. It is much easier. It is also useful to make the loop invariant an assertion to see if we can find a place they fail. I can assure you that you don’t always have your invariants correct at first and it’s easier to debug the problem here. In unbounded verification, there is the second possible failure mode that the invariants are not strong enough to be self consistent.

ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)

N = smt.BitVec("N", 32)
Nlim = 5

# In initial state a0 = address, a1 = N size
dbg.add_entry('sum_i32', smt.And(a0 == addr, a1 == N, N <= Nlim, N >= 0))

# In final state return value in a0 equals sum_i32(ram, addr, N)
dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))

dbg.start()
dbg.run()
while dbg.vcs:
    print(dbg.vcs[-1])
    l = dbg.pop_lemma()
    for i in range(Nlim+1):
        l.unfold(sum_i32)
    l.unfold(*ctx.definitions, keep=True)
    l.auto(timeout=10000)
    l.qed()

Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Continuing execution at: 0x400612 trace_id [0] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Continuing execution at: 0x400614 trace_id [0] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Continuing execution at: 0x400616 trace_id [0] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Continuing execution at: 0x40061a trace_id [0, 1] num insns 4
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1] num insns 5
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1] num insns 6
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1] num insns 7
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1] num insns 8
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1] num insns 9
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1] num insns 10
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1] num insns 11
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1] num insns 12
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1] num insns 13
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1] num insns 14
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1] num insns 15
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1] num insns 16
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1] num insns 17
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1] num insns 18
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1, 1] num insns 19
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1, 1] num insns 20
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1, 1] num insns 21
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1, 1] num insns 22
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1, 1] num insns 23
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1, 1, 1, 1, 1] num insns 24
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Continuing execution at: 0x40061e trace_id [0, 1, 1, 1, 1, 1] num insns 25
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0, 1, 1, 1, 1, 1] num insns 26
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0, 1, 1, 1, 1, 1] num insns 27
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0, 1, 1, 1, 1, 1] num insns 28
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 1, 1] num insns 29
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 1, 0] num insns 24
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 1, 0] num insns 19
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 1, 0] num insns 14
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 1, 0] num insns 9
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 4
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a0 == addr, a1 == N, N <= 5, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))

Unbounded

A more thorough verification involves adding loop invariants.

An interesting thing I realized while doing this is that the form of my spec of sum_i32 is not ideal. Maybe I should have realized this earlier, but I only did once I had the verification conditions cleaned up enough. A very pleasant form is to have one function per block label. The closest form of functional programming to a block based form is to have one function per block, where the functions take in the live variables as arguments. https://www.cs.princeton.edu/~appel/papers/ssafun.pdf Writing your spec in this form will probably lead to you having the easiest most automated time.

While all of these proofs are automated completely by just injecting in the sum_i32 definition to z3, I needed the interactive capabilities to start doing manual rewrite proofs until I found out what was wrong. I had many bugs in my spec / missing invariants etc. I think this capability was essential.

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv

BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr,acc,res = smt.BitVecs('n addr acc res', 32)

sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
    smt.If(n == 0,
        acc,
        sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
        )
)

ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
spec = asmspec.AsmSpec(ctx)
N = smt.BitVec("N", 32) #2

spec.add_entry('sum_i32', spec.find_label("sum_i32"), smt.And(a1 == N, addr == a0, res == sum_i32(ram, 0, addr, N), N >= 0, N <= 2))
end_addr = 4*N + addr
spec.add_cut(".loop", spec.find_label(".loop"), smt.And(res == sum_i32(ram, a0, t0, t1), 4*N + addr == 4*t1 + t0, t1 > 0, t1 <= N))

spec.add_exit(".done", spec.find_label(".done"), a0 == res)

pf = asmspec.AsmProofState(spec)

Unknown reloc 3 on RISCV:LE:32:RV32GC
Unknown reloc 1 on RISCV:LE:32:RV32GC
Unknown reloc 5 on RISCV:LE:32:RV32GC
Unknown reloc 10 on RISCV:LE:32:RV32GC
Unknown reloc 58 on RISCV:LE:32:RV32GC


Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N))
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N))

Here is interacting with a proof a bit.

We can see this results from the path coming from the entry point to the start of .loop

from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
    addr == a0,
    res == sum_i32(ram, 0, addr, N),
    N >= 0,
    N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N)))
  • The intros tactic brings the path condition (which includes the preconditions at entry) into the context.
  • split breaks up the And in the context
l = pf.lemma(0)
l.intros()
l.split(at=0)
[select32le(register(state0), &a1) == N,
 addr == select32le(register(state0), &a0),
 res == sum_i32(ram(state0), 0, addr, N),
 0 <= N,
 N <= 2,
 Not(select32le(register(state0), &a1) == 0)]
?|= And(res ==
    sum_i32(ram(state0),
            0,
            select32le(register(state0), &a0),
            select32le(register(state0), &a1)),
    4*N + addr ==
    4*select32le(register(state0), &a1) +
    select32le(register(state0), &a0),
    Not(select32le(register(state0), &a1) <= 0),
    select32le(register(state0), &a1) <= N)

from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
    addr == a0,
    res == sum_i32(ram, 0, addr, N),
    N >= 0,
    N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N)))

I can for example start to manually rewrite the goal using facts in the context to start cleaning it up anbd maybe see what is good or bad. I could also start using lemmas to reason about select32le etc.

l.rw(0)
l.rw(1,rev=True)
l.rw(2, rev=True)
[select32le(register(state0), &a1) == N,
 addr == select32le(register(state0), &a0),
 res == sum_i32(ram(state0), 0, addr, N),
 0 <= N,
 N <= 2,
 Not(select32le(register(state0), &a1) == 0)]
?|= And(res == res,
    4*N + addr == 4*N + addr,
    Not(N <= 0),
    N <= N)

from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
    addr == a0,
    res == sum_i32(ram, 0, addr, N),
    N >= 0,
    N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N)))

A nice thing I added recently is the “egraphs modulo theories” tactic, which will significantly reduce the goal. It’s a congruence closure tactic of sorts that performs simplification by egraph extraction.

l.emt()
[select32le(register(state0), &a1) == N,
 addr == select32le(register(state0), &a0),
 res == sum_i32(ram(state0), 0, addr, N),
 0 <= N,
 N <= 2,
 Not(select32le(register(state0), &a1) == 0)]
?|= N <= N

from VC: VC(Entry(sum_i32, 0x400610, And(a1 == N,
    addr == a0,
    res == sum_i32(ram, 0, addr, N),
    N >= 0,
    N <= 2)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N)))

But actually, all of this is unneccessary now that I have the spec set up right. z3 is perfectly capable of discharging this goal automatically. So let’s start over. The call to qed registers the proof object with the overarching AsmProofState

l = pf.lemma(0)
l.auto()
l.qed()

⊨Implies(And(select32le(register(state0), &a1) == N, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, N), 0 <= N, N <= 2, Not(select32le(register(state0), &a1) == 0)), And(res == sum_i32(ram(state0), 0, select32le(register(state0), &a0), select32le(register(state0), &a1)), 4N + addr == 4select32le(register(state0), &a1) + select32le(register(state0), &a0), Not(select32le(register(state0), &a1) <= 0), select32le(register(state0), &a1) <= N))

We need to discharge all of the verification conditions. Here is more of the same. It turns out we need unfolding of the definition of sum_i32, but once we make that avaialable, z3 can again discharge this

l = pf.lemma(1)
l.auto(by=sum_i32.defn)
l.qed()

⊨Implies(And(select32le(register(state0), &a1) == N, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, N), 0 <= N, N <= 2, select32le(register(state0), &a1) == 0), res == 0)

l = pf.lemma(2)
l.auto(by=sum_i32.defn)
l.qed()

⊨Implies(And(res == sum_i32(ram(state0), select32le(register(state0), &a0), select32le(register(state0), &t0), select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(select32le(register(state0), &t1) <= 0), select32le(register(state0), &t1) <= N, select32le(register(state0), &t1) == 1), select32le(register(state0), &a0) + select32le(ram(state0), select32le(register(state0), &t0)) == res)

l = pf.lemma(3)
l.auto(by=sum_i32.defn)
l.qed()

⊨Implies(And(res == sum_i32(ram(state0), select32le(register(state0), &a0), select32le(register(state0), &t0), select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(select32le(register(state0), &t1) <= 0), select32le(register(state0), &t1) <= N, Not(select32le(register(state0), &t1) == 1)), And(res == sum_i32(ram(state0), select32le(register(state0), &a0) + select32le(ram(state0), select32le(register(state0), &t0)), 4 + select32le(register(state0), &t0), 4294967295 + select32le(register(state0), &t1)), 4N + addr == 4select32le(register(state0), &t1) + select32le(register(state0), &t0), Not(4294967295 + select32le(register(state0), &t1) <= 0), 4294967295 + select32le(register(state0), &t1) <= N))

Finally calling qed on the top level object checks that we have gotten proofs on every trace fragment through the assembly program.

pf.qed()
AsmProof(spec=AsmSpec(ctx=<kdrag.contrib.pcode.BinaryContext object at 0x754af03fe210>, addrmap=defaultdict(<class 'list'>, {4195856: [Entry(sum_i32, 0x400610, And(a1 == N,
    addr == a0,
    res == sum_i32(ram, 0, addr, N),
    N >= 0,
    N <= 2))], 4195866: [Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    4*N + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= N))], 4195880: [Exit(.done, 0x400628, a0 == res)]})))

New Features

Folded Definitions

Ghidra’s uniform model is byte addressable memory for registers, ram, and temporaries. I model these using smtlib (Array (_ BitVec 64) (_ BitVec 8)). However, most operations work over 32bit or 64bit slices rather than 8bit slices. This requires every read or write to be expanded into byte-wise reads or writes. Basically the result is not pleasant for a human reader.

Knuckledragger offers a definition mechanism which uses z3’s built in substitute_funs to unfold them. By making auxiliarly definitions that select and concatenate, the result is much more readable, and faster to manipulate to boot, since the expression is smaller. Before truly blasting it into z3 at the final step, these definitions need to be unfolded. It sounds easy enough, but finding reasonable places for the folds and unfolds took a good couple days.

Register Names

Registers in pcode are held in a memory space called “register” which is not all that different from how “ram” is represented. This makes things uniform, but also means that the nicer things you might expect of register manipulations aren’t there by default if you treat “register” the same as “ram”. One such case is that each register is associated to a fairly meaningless offset in the “register” memory space. By making defined constants that are human readable that correspond to these offsets improves readability.

Cached Memory

Many symbolic executors don’t fully model memory as a symbolic array. A common pattern is to model memory as concrete addresses with symbolic values at those addresses. Reads and writes are concretized by an smt call if necessary to keep the array representation in this form.

It was important to me for the soundness / modeling level I aspire to to do a more complete model of memory that this. This is why I used the smt theory of arrays which allows symbolic addresses. However, this pollutes the resulting expressions and makes them less readable, as some very obvious data flow (obvious moves between temporary, registers, and fixed addresses in ram) bulks out the expressions with select and store.

I made a “cached memory” object in python that performs the stores more lazily. If the addresses are concrete, the stores are held in a python dict and only flushed into the z3 array when I detect or can’t be sure about aliasing. When and where the cache needs to be flushed is not too bad, but it is a bit finicky so I’m not thrilled on that account. I think down the line is is very possible to do these cache optimizations in a proof producing way, but I’m picking my battles on that for now.

I believe it is a promise of the ghidra model that unique and registers are never addressed in a non concrete / indirect way. This cleans up operations over these significantly.

https://github.com/philzook58/knuckledragger/blob/42fa6f125518f710a8e45ec34f3bacb97f2cc2cd/src/kdrag/contrib/pcode/__init__.py#L126


class CachedArray(NamedTuple):
    """
    CachedArray keeps some writes unapplied to the array.

    The invariant is that anything in the cache owns that data.
    You may replace something in the cache

    >>> r1 = _TestVarnode("register", 0, 2)
    >>> e1 = _TestVarnode("register", 0, 1)
    >>> mem = CachedArray(smt.Array("mem", BV[64], BV[8]), {}, {}, 64, register=True)
    >>> mem1 = mem.store(r1, smt.BitVecVal(2, 16))
    >>> mem2 = mem1.store(e1, smt.BitVecVal(1,8))
    >>> mem2.read(e1)
    1
    >>> mem2.read(r1)
    select16le(Store(store16le(mem, &Roff0_size2, 2),
                    &Roff0_size1,
                    1),
            &Roff0_size2)
    >>> mem2.store(r1, smt.BitVecVal(3,16)).read(r1)
    3
    """

    data: smt.ArrayRef
    cache: dict[pypcode.Varnode, smt.BitVecRef]
    owner: dict[int, pypcode.Varnode]
    bits: int
    register: int = False
    le: bool = True

    def store(self, vnode: pypcode.Varnode, value: smt.BitVecRef) -> "CachedArray":
        if vnode in self.cache:
            return self._replace(cache={**self.cache, vnode: value})
        else:
            data = self.data
            offset0 = vnode.offset
            cache = self.cache.copy()
            cache[vnode] = value
            owner = self.owner.copy()
            # flush out all overlapping vnodes to data
            for i in range(vnode.size):
                vnode1 = owner.get(offset0 + i)
                if vnode1 is not None and vnode1 in cache:
                    assert vnode1 != vnode
                    for j in range(vnode1.size):
                        del owner[vnode1.offset + j]
                    if self.register:
                        offset = smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
                    else:
                        offset = vnode1.offset
                    data = bv.store_concat(self.data, offset, cache[vnode1], le=self.le)
                    del cache[vnode1]
            for i in range(vnode.size):
                owner[offset0 + i] = vnode
            assert all(own in cache for own in owner.values())
            return self._replace(data=data, cache=cache, owner=owner)

    def read(self, vnode: pypcode.Varnode) -> smt.BitVecRef:
        if vnode in self.cache:
            return self.cache[vnode]
        else:
            # flush out any cached variables that may overlap to data
            offset0 = vnode.offset
            data = self.data
            owner = self.owner
            cache = self.cache
            flushed = set()
            for i in range(vnode.size):
                vnode1 = owner.get(offset0 + i)
                if vnode1 is not None and vnode1 not in flushed:
                    if self.register:
                        offset = smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
                    else:
                        offset = vnode1.offset
                    data = bv.store_concat(data, offset, cache[vnode1], le=self.le)
                    flushed.add(vnode1)
            if self.register:
                offset = smt.BitVec("&" + vnode.getRegisterName(), self.bits)
            else:
                offset = vnode.offset
            return bv.select_concat(data, offset, vnode.size, le=self.le)

    def to_expr(self):
        data = self.data
        for vnode, value in self.cache.items():
            if self.register:
                offset = smt.BitVec("&" + vnode.getRegisterName(), self.bits)
            else:
                offset = vnode.offset
            data = bv.store_concat(data, offset, value, le=self.le)
        return data

A custom proof environment

VCProofState is a subclass of kd.ProofState so that I can have all the regular tactics, but also interpret countermodels back into terms that make sense and known which trace we are currently working on.

A small symbolic debugger

Debug is a symbolic executor you can run like a debugger, setting breakpoints. By asking for models, you can know what states are currently possible.


class Debug:
    def __init__(
        self,
        ctx: pcode.BinaryContext,
        spec: AsmSpec = None,
        verbose=True,
    ):
        self.ctx = ctx
        self.spec: AsmSpec = spec if spec is not None else AsmSpec(ctx)
        self.tracestates: list[TraceState] = []
        self.vcs: list[VerificationCondition] = []
        self.breakpoints = set()
        self.verbose = verbose
        self._cur_model = None
        self.pfs = []

    def spec_file(self, filename: str):
        self.spec = AsmSpec.of_file(filename, self.ctx)

    def label(self, label: str | int) -> int:
        assert self.ctx.loader is not None, (
            "BinaryContext must be loaded before disassembling"
        )
        if isinstance(label, str):
            sym = self.ctx.loader.find_symbol(label)
            if sym is None:
                raise Exception(
                    f"Symbol {label} not found in binary {self.ctx.filename}"
                )
            return sym.rebased_addr
        elif isinstance(label, int):
            return label
        else:
            raise Exception(f"Unexpected type {type(label)} for label")

    def add_entry(self, name, precond=smt.BoolVal(True)):
        self.spec.add_entry(name, self.label(name), precond)

    def add_exit(self, name, postcond=smt.BoolVal(True)):
        self.spec.add_exit(name, self.label(name), postcond)

    def add_cut(self, name, invariant):
        self.spec.add_cut(name, self.label(name), invariant)

    def add_assert(self, name, assertion):
        self.spec.add_assert(name, self.label(name), assertion)

    def add_assign(self, name, var_name, expr):
        self.spec.add_assign(name, self.label(name), var_name, expr)

    def start(self, mem=None):
        if mem is None:
            mem = self.ctx.init_mem()
        tracestates, vcs = init_trace_states(self.ctx, mem, self.spec)
        self.tracestates = tracestates
        self.vcs = vcs

    def breakpoint(self, addr):
        self.breakpoints.add(addr)

    def step(self, n=1):
        self._cur_model = None
        for _ in range(n):
            tracestate = self.pop()
            if self.verbose:
                print(
                    "Continuing execution at:",
                    hex(tracestate.state.pc[0]),
                    "trace_id",
                    tracestate.trace_id,
                    "num insns",
                    len(tracestate.trace),
                )
            new_tracestates, new_vcs = execute_spec_and_insn(
                tracestate, self.spec, self.ctx
            )
            self.vcs.extend(new_vcs)
            self.tracestates.extend(new_tracestates)

    def run(self):
        while self.tracestates:
            if self.addr() in self.breakpoints:
                break
            self.step()

    def run_vc(self):
        n = len(self.vcs)
        while len(self.vcs) == n:
            self.step()

    def pop_run(self):
        """
        Run until the current trace state is completely done.
        """
        n = len(self.tracestates)
        while len(self.tracestates) >= n:
            self.step()

    def pop_verify(self, **kwargs):
        vc = self.vcs.pop()
        self.pfs.append((vc, vc.verify(self.ctx, **kwargs)))

    def verify(self, **kwargs):
        if not self.vcs:
            raise Exception("No verification conditions to verify")
        if self.tracestates:
            raise Exception("There are still trace states to execute")
        while self.vcs:
            self.pop_verify(**kwargs)

    def pop_lemma(self) -> kd.tactics.ProofState:
        vc = self.vcs.pop()
        return VCProofState(vc, self.ctx)  # kd.Lemma(vc.vc(self.ctx))

    def pop(self):
        return self.tracestates.pop()

    def addr(self):
        return self.tracestates[-1].state.pc[0]

    def ghost(self, name):
        return self.tracestates[-1].ghost_env[name]

    def reg(self, name):
        reg = self.ctx.state_vars[name]
        return self.ctx.simplify(
            self.ctx.substitute(self.tracestates[-1].state.memstate, reg)
        )

    def ram(self, addr, size=None):
        """
        Get the expression held at at ram location addr
        """
        if size is None:
            size = self.ctx.bits // 8
        return self.ctx.simplify(
            self.tracestates[-1].state.memstate.getvalue_ram(addr, size)
        )

    def insn(self):
        return self.ctx.disassemble(self.addr())

    def model(self):
        s = smt.Solver()
        s.add(self.tracestates[-1].state.path_cond)
        assert s.check() == smt.sat
        self._cur_model = s.model()

    def eval(self, expr: smt.ExprRef) -> smt.ExprRef:
        if self._cur_model is None:
            self.model()
        assert self._cur_model is not None
        tracestate = self.tracestates[-1]
        return smt.simplify(
            self._cur_model.eval(
                self.ctx.unfold(
                    substitute_state(
                        expr, self.ctx, tracestate.state.memstate, tracestate.ghost_env
                    )
                )
            )
        )

Bits and Bobbles

  • https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/simple.ml s2n-bignum has a HOL-light based system for assembly verification. Sort of what I’m trying
  • F* has lots of good stuff. What could I pull from there?
  • Using C code via pycparser to make it easier to write specs
  • Better integration with a more normal emulator for understanding counterexamples. Icicle? GDB script output?
  • It is possible and interesting to connect the two forms of sum_i32 to each other in regular knuckledragger. I should do that
  • Actual proof certificates from the symbolic executor.
mysum = smt.Function("mysum", smt.ArraySort(smt.BitVecSort(64), smt.BitVecSort(8)), smt.BitVecSort(64), smt.BitVecSort(32), smt.BitVecSort(32))
size = smt.Const("size", smt.BitVecSort(32))
mysum = kd.define("mysum", [mem, addr, size], 
smt.If(size == 0,
     0,
     bv.select_concat(mem, addr, 4) + mysum(mem, addr + 4, size-1)
))


retro arch

Pcode out of the box supports 6502, z80, 8085. There is also a gameboy and wasm processor spec available on github. A cool aspect of basing myself on ghidra is getting these for close to free.

6502 avr z80 8085 https://www.reddit.com/r/beneater/comments/141me6d/best_practices_for_assembly_programming/ https://www.nesdev.org/wiki/RTS_Trick http://www.6502.org/tutorials/6502opcodes.html#BIT bit skip trick zero page

bv induct

Bitvector induction is a theorem. Kind of an interesting idea. Bitvectors are too big to brute force. Yea, sat solvers. But still. well orderings on bitvectors. Strong induction?

Use bvinduct(n) to prove bvinduct(n+1) bvinduct(n/2) to prove bvinduct(n) ? forall a == forall a1 a2, a == concat(a1,a2)

P = smt.Array("P", smt.BitVecSort(4), smt.BoolSort())
a = smt.BitVec("a", 4)
kd.prove(smt.ForAll([P], P[0], smt.ForAll([a], smt.Implies(smt.And(a != -1, P[a]), P[a+1])), smt.ForAll([a], P(a))))

def bv_induct(P, x):
    y = smt.FreshConst("y", x.sort())
    return kd.axiom(
        smt.Implies(smt.And(P[0], smt.ForAll([y], smt.Implies(smt.And(y != -1, P[y]), P[y+1]))),
        P(x)
    ))


from kdrag.all import *
import kdrag.theories.bitvec as bv
mem = smt.Array("mem", smt.BitVecSort(64), smt.BitVecSort(8))
addr = smt.BitVec("addr", 64)
n = smt.Int("n")
read_seq = smt.Function("read_seq", smt.ArraySort(smt.BitVecSort(64), smt.BitVecSort(8)), smt.BitVecSort(64), smt.IntSort(), smt.SeqSort(smt.BitVecSort(32)))
read_seq = kd.define("read_seq", [mem, addr, n], smt.If(
    n <= 0,
    smt.Empty(smt.SeqSort(smt.BitVecSort(32))),
    smt.Concat(
        smt.Unit(bv.select_concat(mem, addr, 4)), # load?
        read_seq(mem, addr + 4, n - 1),
)))

kd.rewrite.full_simp(read_seq(smt.Lambda([addr], smt.Extract(7,0,addr)), smt.BitVecVal(0,64), 3))


seq_sum = smt.Function("seq_sum", smt.SeqSort(smt.BitVecSort(32)), smt.IntSort())
s = smt.Const("s", smt.SeqSort(smt.BitVecSort(32)))
seq_sum = kd.define("seq_sum", [s], smt.If(
    smt.Length(s) == 0,
    0,
    kd.Head(s) + seq_sum(kd.Tail(s))))


kd.full_simp(kd.seq(1,2,3))



---------------------------------------------------------------------------

Z3Exception                               Traceback (most recent call last)

/tmp/ipykernel_4153919/1460975090.py in ?()
     19 s = smt.Const("s", smt.SeqSort(smt.BitVecSort(32)))
     20 seq_sum = kd.define("seq_sum", [s], smt.If(
     21     smt.Length(s) == 0,
     22     0,
---> 23     kd.Head(s) + seq_sum(kd.Tail(s))))
     24 
     25 
     26 kd.full_simp(kd.seq(1,2,3))


~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(self, other)
   3624         x + y
   3625         >>> (x + y).sort()
   3626         BitVec(32)
   3627         """
-> 3628         a, b = _coerce_exprs(self, other)
   3629         return BitVecRef(Z3_mk_bvadd(self.ctx_ref(), a.as_ast(), b.as_ast()), self.ctx)


~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(a, b, ctx)
   1275         return (a, b)
   1276 
   1277     s = None
   1278     s = _coerce_expr_merge(s, a)
-> 1279     s = _coerce_expr_merge(s, b)
   1280     a = s.cast(a)
   1281     b = s.cast(b)
   1282     return (a, b)


~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(s, a)
   1240             return s
   1241         else:
   1242             if z3_debug():
   1243                 _z3_assert(s1.ctx == s.ctx, "context mismatch")
-> 1244                 _z3_assert(False, "sort mismatch")
   1245     else:
   1246         return s


~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(cond, msg)
    113 def _z3_assert(cond, msg):
    114     if not cond:
--> 115         raise Z3Exception(msg)


Z3Exception: sort mismatch
hex(50462976)
'0x3020100'

instruction count. executable checker of metadtaa

binary analysis passes? angr interface mocking? Ranges as sets. The intersection should be 0 smt. trace induction

def MayChange(mem1 : smt.ArrayRef,mem2 : smt.ArrayRef, addrs : list[int | smt.BitVecRef]) -> smt.BoolRef:
    """
    forall addr not in addr, mem1[addr] == mem2[addr]
    Can instead be done by masking out the addresses into a quantifier free statement.
    """
    for a in addrs:
        mem1 = bv.store_concat(mem1, addr, 0)
        mem2 = bv.store_concat(mem2, addr, 0)
    return mem1 == mem2
import bisect
# keep vnodes in sorted list
# search backwards and forwards for overlaps. Ugh.
# some kind of tree?

{offset : [vnode1,vnode2]}

# evict on store
class CachedArray(NamedTuple):
    array : smt.ArrayRef
    cache : dict
    register : bool
    bits : int
    def store(self, vnode, value):
        if symbolic:
            cache = {}
        else:
            offset0 = vnode.offset
            cache = self.cache.copy()
            cache[vnode] = value
            for i in range(vnode.size):
                offset = offset0 + i
                vnode1 = offsets.get(offset)
                if vnode1:
                    del self.cache[vnode1]    
                offsets[offset] = vnode
    def indirect_store(self, offset, vnode):
        self._replace(array=bv.store_concat(), cache={})
    def read(vnode):
        if vnode in cache:
            return cache[vnode]
        else:
            return bv.select_concat(self.mem, vnode.offset, vnode.size)


        """
        if vnode in self.writes:
            return self._replace(data=self.data, writes={**self.writes, vnode: value})
        else:
            data = self.data
            writes = {vnode: value}
            for vnode1, v in self.writes.items():
                if overlaps(vnode, vnode1):
                    if self.register:
                        data = bv.store_concat(
                            data,
                            smt.BitVec("&" + vnode.getRegisterName(), self.bits),
                            vnode.size,
                        )
                    else:
                        data = bv.store_concat(data, vnode1.offset, v)
                else:
                    writes[vnode1] = v
            return self._replace(data=data, writes=writes)
        """

        """
        if vnode in self.writes:
            return self.writes[vnode]
        else:
            data = self.data
            for vnode1, v in self.writes.items():
                if overlaps(vnode, vnode1):
                    if self.register:
                        data = bv.store_concat(
                            data,
                            smt.BitVec("&" + vnode.getRegisterName(), self.bits),
                            vnode.size,
                        )
                    else:
                        data = bv.store_concat(data, vnode1.offset, v)
        """

                """
        data = self.data
        for vnode1, v in self.writes.items():
            data = bv.store_concat(
                data,
                smt.BitVec("&" + vnode1.getRegisterName(), self.bits)
                if self.register
                else vnode1.offset,
                v,
            )
        """

def overlaps(vnode, vnode1):
    return (
        max(vnode.offset, vnode1.offset)
        <= min(vnode.offset + vnode.size, vnode1.offset + vnode1.size) - 1
    )
                # return bv.select_concat(
                #    self.register,
                #    smt.BitVec("&" + vnode.getRegisterName(), self.bits),
                #    vnode.size,
                # )

                                # register=bv.store_concat(
                #    self.register,
                #    smt.BitVec("&" + vnode.getRegisterName(), self.bits),
                #    value,
                # 
                            # return self._replace(unique=bv.store_concat(self.unique, offset, value))

s2n bignum

https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/simple.ml

Maychange. Could do this.

%%file /tmp/simple.S

_start:
add    %rax,%rbx
sub    %rax,%rbx
done:
Writing /tmp/simple.S
! as /tmp/simple.S -o /tmp/simple.o && objdump -d /tmp/simple.o
/tmp/simple.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_start>:
   0: 48 01 c3              add    %rax,%rbx
   3: 48 29 c3              sub    %rax,%rbx
import archinfo
ainfo = archinfo.ArchPcode("x86:LE:64:default")
ainfo.ip_offset
ainfo.registers
Unexpected SP conflict





{'rax': (0, 8),
 'eax': (0, 4),
 'ax': (0, 2),
 'al': (0, 1),
 'ah': (1, 1),
 'rcx': (8, 8),
 'ecx': (8, 4),
 'cx': (8, 2),
 'cl': (8, 1),
 'ch': (9, 1),  ...
 'xmm19_bf': (5829, 1),
 'xmm19_wd': (5830, 2),
 'xmm19_bg': (5830, 1),
 ...}
ainfo = archinfo.ArchPcode("RISCV:LE:32:default")
dir(ainfo)
ainfo.registers["pc"]
(4096, 4)
from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec

ctx = pcode.BinaryContext("/tmp/simple.o")

dbg = asmspec.Debug(ctx)
pc,a,b = smt.BitVecs("pc a b", 64)
rax = ctx.state_vars[r"%rax"]
rbx = ctx.state_vars[r"%rbx"]
rip = ctx.state_vars[r"%rip"]
dbg.add_entry("_start", 
    #smt.ForAll([a, b],
    smt.And(
        rax == a,
        rbx == b,
        rip == pc
    )
    #)
)

# Yeaaa, I'm not sure it's worth adding in set rip
dbg.add_exit("done",
smt.And(
    rbx == b,
    rip == pc+6
))
dbg.start()
dbg.run()
dbg.verify()



Unexpected SP conflict


Executing 0x400000/3: ADD RBX,RAX at (4194304, 0) PCODE IMARK ram[400000:3]
Executing 0x400000/3: ADD RBX,RAX at (4194304, 1) PCODE CF = carry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 2) PCODE OF = scarry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 3) PCODE RBX = RBX + RAX
Executing 0x400000/3: ADD RBX,RAX at (4194304, 4) PCODE SF = RBX s< 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 5) PCODE ZF = RBX == 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 6) PCODE unique[28080:8] = RBX & 0xff
Executing 0x400000/3: ADD RBX,RAX at (4194304, 7) PCODE unique[28100:1] = popcount(unique[28080:8])
Executing 0x400000/3: ADD RBX,RAX at (4194304, 8) PCODE unique[28180:1] = unique[28100:1] & 0x1
Executing 0x400000/3: ADD RBX,RAX at (4194304, 9) PCODE PF = unique[28180:1] == 0x0
Continuing execution at: 0x400003 trace_id [0] num insns 1
Executing 0x400003/3: SUB RBX,RAX at (4194307, 0) PCODE IMARK ram[400003:3]
Executing 0x400003/3: SUB RBX,RAX at (4194307, 1) PCODE CF = RBX < RAX
Executing 0x400003/3: SUB RBX,RAX at (4194307, 2) PCODE OF = sborrow(RBX, RAX)
Executing 0x400003/3: SUB RBX,RAX at (4194307, 3) PCODE RBX = RBX - RAX
Executing 0x400003/3: SUB RBX,RAX at (4194307, 4) PCODE SF = RBX s< 0x0
Executing 0x400003/3: SUB RBX,RAX at (4194307, 5) PCODE ZF = RBX == 0x0
Executing 0x400003/3: SUB RBX,RAX at (4194307, 6) PCODE unique[28080:8] = RBX & 0xff
Executing 0x400003/3: SUB RBX,RAX at (4194307, 7) PCODE unique[28100:1] = popcount(unique[28080:8])
Executing 0x400003/3: SUB RBX,RAX at (4194307, 8) PCODE unique[28180:1] = unique[28100:1] & 0x1
Executing 0x400003/3: SUB RBX,RAX at (4194307, 9) PCODE PF = unique[28180:1] == 0x0
Continuing execution at: 0x400006 trace_id [0] num insns 2
Executing SpecStmt: Exit(done, 0x400006, And(%rbx == b, %rip == pc + 6))

def Maychange(state0, state1, state_vars):
    return 

The initialization makes ugly That z3 printer doesn’t use lets hurts. select_r0(st) select(r1, regfile) select_r1(st) select… store_r1

fold pop_count

custom printer one select per line we don’t have to unfold register values then

domain sepcific simplifier. select32. Find simple formn, then discharge via z3

Make a topologically ordered list of every store

    def pretty(self):
        cur_ram = smt.Const("CUR_RAM", self.ram.sort())
        cur_register = smt.Const("CUR_REGFILE", self.register.sort())
        seen = {}
        res = []
        def 
        def worker(e):
            if e in seen:
                return seen[e]
            if e.decl().startswith("store"):
                x = smt.FreshConst(e.sort())
                res.append()
                return x


        todo = [smt.And(cur_ram == self.ram, cur_register == self.register)]

Wait. Reg is never indirectly accessed. So I could keep it in unpacked form…

from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec

ctx = pcode.BinaryContext("/tmp/simple.o")

dbg = asmspec.Debug(ctx)
pc,a,b = smt.BitVecs("pc a b", 64)
rax = ctx.state_vars[r"%rax"]
rbx = ctx.state_vars[r"%rbx"]
rip = ctx.state_vars[r"%rip"]
dbg.add_entry("_start", 
    #smt.ForAll([a, b],
    smt.And(
        rax == a,
        rbx == b,
        rip == pc
    )
    #)
)

# Yeaaa, I'm not sure it's worth adding in set rip
dbg.add_exit("done",
smt.And(
    rbx == b,
    rip == pc+6
))
dbg.start()
#print(smt.simplify(dbg.tracestates[0].state.memstate.unique).sexpr())
dbg.tracestates[0].state.memstate
#dbg.tracestates
Unexpected SP conflict


Executing 0x400000/3: ADD RBX,RAX at (4194304, 0) PCODE IMARK ram[400000:3]
Executing 0x400000/3: ADD RBX,RAX at (4194304, 1) PCODE CF = carry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 2) PCODE OF = scarry(RBX, RAX)
Executing 0x400000/3: ADD RBX,RAX at (4194304, 3) PCODE RBX = RBX + RAX
Executing 0x400000/3: ADD RBX,RAX at (4194304, 4) PCODE SF = RBX s< 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 5) PCODE ZF = RBX == 0x0
Executing 0x400000/3: ADD RBX,RAX at (4194304, 6) PCODE unique[2c280:8] = RBX & 0xff
Executing 0x400000/3: ADD RBX,RAX at (4194304, 7) PCODE unique[2c300:1] = popcount(unique[2c280:8])
Executing 0x400000/3: ADD RBX,RAX at (4194304, 8) PCODE unique[2c380:1] = unique[2c300:1] & 0x1
Executing 0x400000/3: ADD RBX,RAX at (4194304, 9) PCODE PF = unique[2c380:1] == 0x0





MemState((let ((a!1 ((_ zero_extend 1)
             (select64le (store64le (unique state0) &RIP #x0000000000400000)
                         &RBX)))
      (a!2 ((_ zero_extend 1)
             (select64le (store64le (unique state0) &RIP #x0000000000400000)
                         &RAX))))
(let ((a!3 (not (= ((_ extract 64 64) (bvadd a!1 a!2)) #b0))))
(let ((a!4 (store (store64le (unique state0) &RIP #x0000000000400000)
                  &CF
                  (ite a!3 #x01 #x00))))
(let ((a!5 (=> (and (bvslt #x0000000000000000 (select64le a!4 &RBX))
                    (bvslt #x0000000000000000 (select64le a!4 &RAX)))
               (bvslt #x0000000000000000
                      (bvadd (select64le a!4 &RBX) (select64le a!4 &RAX))))))
(let ((a!6 (select64le (store a!4 &OF (ite (not a!5) #x01 #x00)) &RBX))
      (a!7 (select64le (store a!4 &OF (ite (not a!5) #x01 #x00)) &RAX)))
(let ((a!8 (store64le (store a!4 &OF (ite (not a!5) #x01 #x00))
                      &RBX
                      (bvadd a!6 a!7))))
(let ((a!9 (store a!8
                  &SF
                  (ite (bvsgt #x0000000000000000 (select64le a!8 &RBX))
                       #x01
                       #x00))))
(let ((a!10 (store a!9
                   &ZF
                   (ite (= #x0000000000000000 (select64le a!9 &RBX)) #x01 #x00))))
(let ((a!11 (select64le (store64le (unique state0)
                                   #x000000000002c280
                                   (bvand (select64le a!10 &RBX)
                                          #x00000000000000ff))
                        #x000000000002c280)))
(let ((a!12 (store (store64le (unique state0)
                              #x000000000002c280
                              (bvand (select64le a!10 &RBX) #x00000000000000ff))
                   #x000000000002c300
                   ((_ extract 7 0) (popcount a!11)))))
(let ((a!13 (select (store a!12
                           #x000000000002c380
                           (bvand (select a!12 #x000000000002c300) #x01))
                    #x000000000002c380)))
(let ((a!14 (store64le (store a!10 &PF (ite (= #x00 a!13) #x01 #x00))
                       &RIP
                       #x0000000000400003)))
  (and (= CUR_RAM (ram state0)) (= CUR_REGFILE a!14)))))))))))))))

sequence

https://github.com/awslabs/s2n-bignum/blob/main/x86/tutorial/sequence.ml

%%file /tmp/sequence.s

_start:
add    %rax,%rbx
add    %rax,%rcx
mov    $0x2,%rdx
imul   %rdx,%rbx
done:
Writing /tmp/sequence.s
! as /tmp/sequence.s -o /tmp/sequence.o && objdump -d /tmp/sequence.o
/tmp/sequence.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_start>:
   0: 48 01 c3              add    %rax,%rbx
   3: 48 01 c1              add    %rax,%rcx
   6: 48 c7 c2 02 00 00 00  mov    $0x2,%rdx
   d: 48 0f af da           imul   %rdx,%rbx
from kdrag.all import smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec

dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
pc,a,b,c,d = smt.BitVecs("pc a b c d", 64)

rax = dbg.ctx.state_vars[r"%rax"]
rbx = dbg.ctx.state_vars[r"%rbx"]
rcx = dbg.ctx.state_vars[r"%rcx"]
rip = dbg.ctx.state_vars[r"%rip"]

dbg.add_entry("_start",
    smt.And(
        rax == a,
        rbx == b,
        rcx == c,
        rip == pc
    )
)
dbg.add_cut(dbg.label("_start") + 6, 
    smt.And(
        rbx == a + b,
        rip == pc + 6
    )
)

dbg.add_exit("done",
smt.And(
    rbx == (a + b) * 2,
    rip == pc+0x11
))

dbg.start()
dbg.run()
dbg.verify()


Unexpected SP conflict



---------------------------------------------------------------------------

CLEFileNotFoundError                      Traceback (most recent call last)

Cell In[1], line 5
      2 import kdrag.contrib.pcode as pcode
      3 import kdrag.contrib.pcode.asmspec as asmspec
----> 5 dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
      6 pc,a,b,c,d = smt.BitVecs("pc a b c d", 64)
      8 rax = dbg.ctx.state_vars[r"%rax"]


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:439, in BinaryContext.__init__(self, filename, langid)
    430 self.definitions.extend(
    431     [
    432         bv.store_concats(bits, size, le=le)
   (...)    436     ]
    437 )
    438 if filename is not None:
--> 439     self.load(filename)


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:446, in BinaryContext.load(self, main_binary, **kwargs)
    442 """
    443 Load a binary file and initialize the context.
    444 """
    445 self.filename = main_binary
--> 446 self.loader = cle.loader.Loader(main_binary, **kwargs)
    447 # self.ctx = pypcode.Context(self.loader.arch.name) # TODO: derive from cle
    448 # Note: These decls need to be synchronized with the code in BinaryContext.substitute
    449 # The decls are used for parsing textual smtlib
    450 # substitute is used to convert these names into memstate expressions
    451 decls = {
    452     name: smt.BitVec(name, vnode.size * 8)
    453     for name, vnode in self.ctx.registers.items()
    454 }


File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:185, in Loader.__init__(self, main_binary, auto_load_libs, concrete_target, force_load_libs, skip_libs, main_opts, lib_opts, ld_path, use_system_libs, ignore_import_version_numbers, case_insensitive, rebase_granularity, except_missing_libs, aslr, perform_relocations, load_debug_info, page_size, preload_libs, arch)
    183     self._main_opts.update({"arch": arch})
    184 self.preload_libs = []
--> 185 self.initial_load_objects = self._internal_load(
    186     main_binary, *preload_libs, *force_load_libs, preloading=(main_binary, *preload_libs)
    187 )
    189 # cache
    190 self._last_object = None


File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:795, in Loader._internal_load(self, preloading, *args)
    793     log.info("Skipping load request %s - already loaded", main_spec)
    794     continue
--> 795 obj = self._load_object_isolated(main_spec)
    796 objects.append(obj)
    797 objects.extend(obj.child_objects)


File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:967, in Loader._load_object_isolated(self, spec, obj_ident)
    965     close = False
    966 elif isinstance(spec, bytes | str):
--> 967     binary = self._search_load_path(spec)  # this is allowed to cheat and do partial static loading
    968     log.debug("... using full path %s", binary)
    969     binary_stream = open(binary, "rb")


File ~/philzook58.github.io/.venv/lib/python3.12/site-packages/cle/loader.py:1126, in Loader._search_load_path(self, spec)
   1122             continue
   1124     return path
-> 1126 raise CLEFileNotFoundError(f"Could not find file {spec}")


CLEFileNotFoundError: Could not find file /tmp/sequence.o

branch

%%file /tmp/branch.S
  .intel_syntax noprefix
  .text
_start:
  cmp rbx, rcx
  ja BB2
  mov rax, rcx
done1:
  ret
BB2:
  mov rax, rbx
done2:
  ret
Overwriting /tmp/branch.S
! as /tmp/branch.S -o /tmp/branch.o && objdump -d /tmp/branch.o
/tmp/branch.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <_start>:
   0: 48 39 cb              cmp    %rcx,%rbx
   3: 77 04                 ja     9 <BB2>
   5: 48 89 c8              mov    %rcx,%rax

0000000000000008 <done1>:
   8: c3                    ret

0000000000000009 <BB2>:
   9: 48 89 d8              mov    %rbx,%rax

000000000000000c <done2>:
   c: c3                    ret
from kdrag.all import smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec

dbg = asmspec.Debug(pcode.BinaryContext("/tmp/sequence.o"))
pc,a,b,c,d,stackpointer, returnaddress = smt.BitVecs("pc a b c d stackpointer returnaddress", 64)

rax = dbg.ctx.state_vars[r"%rax"]
ram32 = dbg.ctx.state_vars[r"%ram32"]
rbx = dbg.ctx.state_vars[r"%rbx"]
rcx = dbg.ctx.state_vars[r"%rcx"]
rip = dbg.ctx.state_vars[r"%rip"]
rsp = dbg.ctx.state_vars[r"%rsp"]

dbg.add_entry("_start",
    smt.And(
        rbx == b,
        rcx == c,
        rsp == stackpointer,
        ram64[stackpointer] == returnaddress
    )
)

post = smt.And(
    rsp == stackpointer + 8,
    rip == returnaddress,
    rax == smt.If(b > c, b, c)
)

dbg.add_exit("done1",
post
)

dbg.add_exit("done2",
post
)

Hmm. Could I have implemented ghost assignment using assume? Uhhh, it’d be a little different. But maybe that’s fine

Hmm. Implement return. But how to say when it should stop? On Ret, throw a special error, or just stop, or have special signal

statevars should really be functions from state more radable state expressions with folding/unfolding proof tracking

maychange

dbg = RelDebug(ctx1,ctx2) dbg.step1() dbg.step2()

step1 step2

dbg1 = prog1 dbg2 = prog2

dbg1.step(3) dbg2.step(5)

def state_lam(expr): smt.Lambda([state], smt.substitute()) # replace all references

sum array

%%file /tmp/sum_i32.s
    .text
    .globl sum_i32
    .align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
    mv      t0, a0          # t0 = arr (cursor)
    mv      t1, a1          # t1 = n   (remaining)
    li      a0, 0           # a0 = sum (return value)

    beqz    t1, .done       # if n == 0 -> return 0

.loop:
    lw      t2, 0(t0)       # t2 = *arr
    add     a0, a0, t2      # sum += *arr
    addi    t0, t0, 4       # arr++
    addi    t1, t1, -1      # n--
    bnez    t1, .loop       # keep going while n != 0

.done:
    ret

Writing /tmp/sum_i32.s

Ok I need

# pip install keystone-engine angr
code = """
    .text
    .globl sum_i32
    .align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
    mv      t0, a0          # t0 = arr (cursor)
    mv      t1, a1          # t1 = n   (remaining)
    li      a0, 0           # a0 = sum (return value)

    beqz    t1, .done       # if n == 0 -> return 0

.loop:
    lw      t2, 0(t0)       # t2 = *arr
    add     a0, a0, t2      # sum += *arr
    addi    t0, t0, 4       # arr++
    addi    t1, t1, -1      # n--
    bnez    t1, .loop       # keep going while n != 0

.done:
    ret
"""

from keystone import Ks, KS_ARCH_RISCV, KS_MODE_RISCV32
ks = Ks(KS_ARCH_RISCV, KS_MODE_RISCV32)
encoding, count = ks.asm(code)
---------------------------------------------------------------------------

ImportError                               Traceback (most recent call last)

Cell In[1], line 25
      1 # pip install keystone-engine angr
      2 code = """
      3     .text
      4     .globl sum_i32
   (...)     22     ret
     23 """
---> 25 from keystone import Ks, KS_ARCH_RISCV, KS_MODE_RISCV32
     26 ks = Ks(KS_ARCH_RISCV, KS_MODE_RISCV32)
     27 encoding, count = ks.asm(code)


ImportError: cannot import name 'KS_ARCH_RISCV' from 'keystone' (/home/philip/philzook58.github.io/.venv/lib/python3.12/site-packages/keystone/__init__.py)
import keystone
keystone.
%%file /tmp/test.c
#include <stdint.h>
#include <stdio.h>
extern int32_t sum_i32(const int32_t* arr, uint32_t n);
int main() {
    int32_t arr[] = {1, 2, 3, 4, 5};
    uint32_t n = 5;
    int32_t result = sum_i32(arr, n);
    printf("Sum: %d\n", result); // Expected output: Sum: 15
    return 0;
}
Overwriting /tmp/test.c
!nix-shell -p pkgsCross.riscv32.buildPackages.gcc --run "riscv32-unknown-linux-gnu-gcc -O2 /tmp/sum_i32.s /tmp/test.c  -o /tmp/sum32"
! qemu-riscv32 /tmp/sum32
Sum: 15
from kdrag.all import *

BV32 = smt.BitVecSort(32)

sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV32), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, BV32)
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n], 
    smt.If(n == 0,
        0,
        sum_i32(
            mem,
            addr + 4,
            n - 1
        ) + mem[addr]
    )
)

sum_i32

C dsl for auxiliary spec functions might be nice.


kd.simp(sum_i32(
    MultStore(mem, 0, 1, 2, 3, 4, 5),


))
kd.full_simp(sum_i32(ram, addr, 0))

0

Bounded Version

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *
import kdrag.theories.bitvec as bv
BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
kd.define("sum_i32", [mem, addr, n], 
    smt.If(n == 0,
        0,
        sum_i32(
            mem,
            addr + 4,
            n - 1
        ) + bv.select_concat(mem, addr, 4, le=True)
    )
)

ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)

N = smt.BitVec("N", 32)
Nlim = 2

dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0, N <= Nlim, N >= 0))
#dbg.add_exit(".done", a0 == 0)

# alignment invariant?
dbg.add_cut(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), 
    (t0 - addr) / 4 == (t0 - addr) / 4,
      4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N,  N <= Nlim, N >= 0))
dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))

dbg.start()
#dbg.run()
print(len(dbg.tracestates[-1].state.path_cond))
dbg.tracestates
dbg.run()

"""
dbg.step()
dbg.step()
dbg.eval(t1)
dbg.tracestates
dbg.step()

dbg.step()
"""
dbg.tracestates
#dbg.verify()
while dbg.vcs:
    print(dbg.vcs[-1])
    l = dbg.pop_lemma()
    #for i in range(N+1)
    for i in range(Nlim+1):
        l.unfold(sum_i32)
    l.unfold(*ctx.definitions, keep=True)
    l.auto(timeout=10000)
    l.qed()

#dbg.add_cut(".loop", )

Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
1
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Continuing execution at: 0x40061a trace_id [1, 1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0))
Continuing execution at: 0x400628 trace_id [1, 0] num insns 4
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Continuing execution at: 0x40061a trace_id [0, 1] num insns 5
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N))
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0)))
VC(Entry(sum_i32, 0x400610, And(a1 == N, addr == a0, N <= 2, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, N)))
VC(Entry(sum_i32, 0x400610, And(a1 == N, addr == a0, N <= 2, N >= 0)),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    (t0 - addr)/4 == (t0 - addr)/4,
    4*N == 4*t1 + t0 - addr,
    t1 > 0,
    t1 <= N,
    N <= 2,
    N >= 0)))

todo: refold path cond and unfold only in symexec. Symexec might want to eject a Proof obligation of some kind for possible pc. or have debug get the ability to inject a lemma into path cond.

num_insn in ghost state.

    .text
    .globl sum_i32
    .align 2
# int32_t sum_i32(const int32_t* arr, uint32_t n)
sum_i32:
    mv      t0, a0          # t0 = arr (cursor)
    mv      t1, a1          # t1 = n   (remaining)
    li      a0, 0           # a0 = sum (return value)

    beqz    t1, .done       # if n == 0 -> return 0

.loop:
    lw      t2, 0(t0)       # t2 = *arr
    add     a0, a0, t2      # sum += *arr
    addi    t0, t0, 4       # arr++
    addi    t1, t1, -1      # n--
    bnez    t1, .loop       # keep going while n != 0

.done:
    ret

Unbounded

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *

from kdrag.all import *
import kdrag.theories.bitvec as bv

BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr = smt.BitVecs('n addr', 32)
sum_i32 = kd.define("sum_i32", [mem, addr, n], 
    smt.If(n == 0,
        0,
        sum_i32(
            mem,
            addr + 4,
            n - 1
        ) + bv.select_concat(mem, addr, 4, le=True)
    )
)

#sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
#    smt.If(n == 0,
#        acc,
#        sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
#        )
#)



ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
#ram = ctx.state_vars["ram32"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)
addr = smt.BitVec("addr", 32)

N = 2 #smt.BitVec("N", 32)

dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0))
#dbg.add_exit(".done", a0 == 0)

#dbg.add_assert(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), 4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N))
dbg.add_cut(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), N == t1 + (t0 - addr)/4, t1 > 0, t1 < N))

dbg.add_exit(".done", a0 == sum_i32(ram, addr, N))

dbg.start()
dbg.run()
"""
ctx.definitions.append(sum_i32)
#dbg.verify(unfold=sum_i32)

while dbg.vcs:
    print(dbg.vcs[-1])
    l = dbg.pop_lemma()
    l.unfold(*ctx.definitions)
    #l.intros()
    for i in range(N+1):
        l.unfold(sum_i32,keep=True)
    l.auto()
    l.qed()
"""
"""
l = dbg.pop_lemma()

l
print(l.goals[-1].goal.sexpr())
#l.qed()

l.unfold(*ctx.definitions)
l.intros()
l.unfold(sum_i32)
l.auto()
"""
#l.unfold(*ctx.definitions, at=1)
#l.auto(by=[f.defn for f in ctx.definitions])
#l.vc
l = dbg.pop_lemma()
#for i in range(N+1):
#    l.unfold(sum_i32,keep=True)
#l.simp()
"""
l.intros()
l.unfold(sum_i32, keep=True)
l.unfold(*ctx.definitions)
l.auto()
"""
print(l.vc)
l.intros()
l.unfold(sum_i32)
l.simp()
l.split(at=-1)
#l.rw(0)
#l.auto()
#l.unfold(*ctx.definitions)
#l
#l.unfold(*ctx.definitions)
#l.auto()
Store and evict cache pc
Cache read miss for t1
Cache read miss for t0
Cache read miss for a0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Store and evict cache unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Cache read miss for t0
Cache read hit for unique[580:4]
Store and evict cache unique[3080:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Cache read hit for unique[3080:4]
Store and evict cache t2
Store cache pc
Store and evict cache pc
Cache read miss for a1
Cache read miss for a0
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Cache read miss for a0
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Cache read miss for a1
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Store and evict cache unique[c00:4]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Cache read hit for unique[c00:4]
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Cache read hit for t1
Store and evict cache unique[1c00:1]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Cache read hit for unique[1c00:1]
Store cache pc
Continuing execution at: 0x40061a trace_id [1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    t1 + (t0 - addr)/4 == 2,
    t1 > 0,
    t1 < 2))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Cache read miss for a0
Cache read hit for t2
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Store and evict cache unique[c00:4]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Cache read miss for t0
Cache read hit for unique[c00:4]
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Store cache unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Cache read miss for t1
Cache read hit for unique[c00:4]
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Cache read hit for t1
Store and evict cache unique[1e80:1]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Cache read hit for unique[1e80:1]
Store cache pc
Continuing execution at: 0x400628 trace_id [0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2))
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(a0 == sum_i32(ram, addr, (t0 - addr)/4),
    t1 + (t0 - addr)/4 == 2,
    t1 > 0,
    t1 < 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2)))





[select32le(register(state0), &a0) ==
sum_i32(ram(state0),
        addr,
        bvsdiv_i(select32le(register(state0), &t0) +
                 4294967295*addr,
                 4)),
 select32le(register(state0), &t1) ==
2 +
4294967295*
bvsdiv_i(select32le(register(state0), &t0) + 4294967295*addr,
         4),
 Not(select32le(register(state0), &t1) <= 0),
 Not(2 <= select32le(register(state0), &t1)),
 select32le(register(state0), &t1) == 1]
?|= select32le(register(state0), &a0) +
select32le(ram(state0), select32le(register(state0), &t0)) ==
sum_i32(ram(state0), 4 + addr, 1) +
select32le(ram(state0), addr)

Lessons: Write “spec” program in accumulator form. Really basic blocks. Then induction is easy. Conversion to functional form can happen at the knuckledragger layer.

If I cut at every basic block I’d have ssa block form

being able to inspect formula in somewhat readalbe form was crucial. Perhaps make more readable l should also print vc. vc ordering instability sucked. Should make more stable. Avoid negation (?) It makes nasty looking goals

Need to do arbitrary N version. Maybe I’m still missing something. I had the bounds wrong < N vs <= N I had t0 and t1 swapped as counter vs addr

Set dbg as parent begg tactic

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
from kdrag.all import *

from kdrag.all import *
import kdrag.theories.bitvec as bv

BV32 = smt.BitVecSort(32)
BV8 = smt.BitVecSort(8)
sum_i32 = smt.Function('sum_i32', smt.ArraySort(BV32, BV8), BV32, BV32, BV32, BV32)
mem = smt.Array('mem', BV32, smt.BitVecSort(8))
n,addr,acc,res = smt.BitVecs('n addr acc res', 32)

sum_i32 = kd.define("sum_i32", [mem, acc, addr, n],
    smt.If(n == 0,
        acc,
        sum_i32(mem, acc + bv.select_concat(mem, addr, 4, le=True), addr + 4, n - 1)
        )
)



ctx = pcode.BinaryContext("/tmp/sum32", langid="RISCV:LE:32:default")
t1 = ctx.state_vars["t1"]
t0 = ctx.state_vars["t0"]
a0 = ctx.state_vars["a0"]
a1 = ctx.state_vars["a1"]
#ram = ctx.state_vars["ram32"]
ram = ctx.state_vars["ram"]
dbg = asmspec.Debug(ctx)

N = 2 #smt.BitVec("N", 32)

dbg.add_entry('sum_i32', smt.And(a1 == N, addr == a0, res == sum_i32(ram, 0, addr, N)))
#dbg.add_exit(".done", a0 == 0)
end_addr = 4*N + addr
#dbg.add_assert(".loop", smt.And(a0 == sum_i32(ram, addr, (t0 - addr) / 4), 4*N == 4*t1 + t0 - addr, t1 > 0, t1 <= N))
dbg.add_cut(".loop", smt.And(res == sum_i32(ram, a0, t0, t1), 4*N + addr == 4*t1 + t0, t1 > 0, t1 <= N))

dbg.add_exit(".done", a0 == res)

dbg.start()
dbg.run()
l = dbg.pop_lemma()
print(l.vc)
# loop to done
l.intros()
l.split(at=0)
l.rw(0)
l.unfold(sum_i32)
l.unfold(sum_i32)
l.auto()
l.qed()

l = dbg.pop_lemma()
print(l.vc)
# loop -> loop
l.intros()
l.split(at=0)
l.auto(by=[sum_i32.defn]) # really i think I want to
l.qed()

l = dbg.pop_lemma()
# loop -> entry
print(l.vc)
l.intros()
l.split(at=0)
l.auto()
l.qed()
Store and evict cache pc
Cache read miss for t0
Cache read miss for t1
Cache read miss for a0
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 0) PCODE IMARK ram[40061a:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 1) PCODE unique[580:4] = 0x0
Store and evict cache unique[580:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 2) PCODE unique[3080:4] = t0 + unique[580:4]
Cache read miss for t0
Cache read hit for unique[580:4]
Store and evict cache unique[3080:4]
Executing 0x40061a/4: lw t2,0x0(t0) at (4195866, 3) PCODE t2 = *[ram]unique[3080:4]
Cache read hit for unique[3080:4]
Store and evict cache t2
Store cache pc
Store and evict cache pc
Cache read miss for a0
Cache read miss for a1
Executing 0x400610/2: c.mv t0,a0 at (4195856, 0) PCODE IMARK ram[400610:2]
Executing 0x400610/2: c.mv t0,a0 at (4195856, 1) PCODE t0 = a0
Cache read miss for a0
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400612 trace_id [1] num insns 1
Executing 0x400612/2: c.mv t1,a1 at (4195858, 0) PCODE IMARK ram[400612:2]
Executing 0x400612/2: c.mv t1,a1 at (4195858, 1) PCODE t1 = a1
Cache read miss for a1
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400614 trace_id [1] num insns 2
Executing 0x400614/2: c.li a0,0x0 at (4195860, 0) PCODE IMARK ram[400614:2]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 1) PCODE unique[c00:4] = 0x0
Store and evict cache unique[c00:4]
Executing 0x400614/2: c.li a0,0x0 at (4195860, 2) PCODE a0 = unique[c00:4]
Cache read hit for unique[c00:4]
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400616 trace_id [1] num insns 3
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 0) PCODE IMARK ram[400616:4]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 1) PCODE unique[1c00:1] = t1 == 0x0
Cache read hit for t1
Store and evict cache unique[1c00:1]
Executing 0x400616/4: beq t1,zero,0x400628 at (4195862, 2) PCODE if (unique[1c00:1]) goto ram[400628:4]
Cache read hit for unique[1c00:1]
Store cache pc
Continuing execution at: 0x40061a trace_id [1] num insns 4
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2))
Continuing execution at: 0x40061e trace_id [0] num insns 1
Executing 0x40061e/2: c.add a0,t2 at (4195870, 0) PCODE IMARK ram[40061e:2]
Executing 0x40061e/2: c.add a0,t2 at (4195870, 1) PCODE a0 = a0 + t2
Cache read miss for a0
Cache read hit for t2
Store and evict cache a0
Store cache pc
Continuing execution at: 0x400620 trace_id [0] num insns 2
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 0) PCODE IMARK ram[400620:2]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 1) PCODE unique[c00:4] = 0x4
Store and evict cache unique[c00:4]
Executing 0x400620/2: c.addi t0,0x4 at (4195872, 2) PCODE t0 = t0 + unique[c00:4]
Cache read miss for t0
Cache read hit for unique[c00:4]
Store and evict cache t0
Store cache pc
Continuing execution at: 0x400622 trace_id [0] num insns 3
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 0) PCODE IMARK ram[400622:2]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 1) PCODE unique[c00:4] = 0xffffffff
Store cache unique[c00:4]
Executing 0x400622/2: c.addi t1,-0x1 at (4195874, 2) PCODE t1 = t1 + unique[c00:4]
Cache read miss for t1
Cache read hit for unique[c00:4]
Store and evict cache t1
Store cache pc
Continuing execution at: 0x400624 trace_id [0] num insns 4
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 0) PCODE IMARK ram[400624:4]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 1) PCODE unique[1e80:1] = t1 != 0x0
Cache read hit for t1
Store and evict cache unique[1e80:1]
Executing 0x400624/4: bne t1,zero,0x40061a at (4195876, 2) PCODE if (unique[1e80:1]) goto ram[40061a:4]
Cache read hit for unique[1e80:1]
Store cache pc
Store cache pc
Continuing execution at: 0x40061a trace_id [0, 1] num insns 5
Executing SpecStmt: Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2))
Continuing execution at: 0x400628 trace_id [0, 0] num insns 5
Executing SpecStmt: Exit(.done, 0x400628, a0 == res)
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == res))
Cache read hit for t0
Cache read hit for t1
Cache read hit for a0
VC(Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2)),
['0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2)))
Cache read hit for t0
Cache read hit for t1
Cache read hit for a0
VC(Entry(sum_i32, 0x400610, And(a1 == 2, addr == a0, res == sum_i32(ram, 0, addr, 2))),
['0x400610', '0x400612', '0x400614', '0x400616'],
Cut(.loop, 0x40061a, And(res == sum_i32(ram, a0, t0, t1),
    8 + addr == 4*t1 + t0,
    t1 > 0,
    t1 <= 2)))

⊨Implies(And(select32le(register(state0), &a1) == 2, addr == select32le(register(state0), &a0), res == sum_i32(ram(state0), 0, addr, 2), Not(select32le(register(state0), &a1) == 0)), And(res == sum_i32(ram(state0), 0, select32le(register(state0), &a0), select32le(register(state0), &a1)), addr == 4294967288 + 4*select32le(register(state0), &a1) + select32le(register(state0), &a0), Not(select32le(register(state0), &a1) <= 0), select32le(register(state0), &a1) <= 2))




#l.rw(0)
#l.unfold(sum_i32)
#l.unfold(sum_i32)
#l.rw(-1)
#l.simp()
#l.simp()
#l.auto()
x.definitions)
l.intros()
l.unfold(sum_i32)
l.auto()
"""
#l.unfold(*ctx.definitions, at=1)
#l.auto(by=[f.defn for f in ctx.definitions])
#l.vc
l = dbg.pop_lemma()
#for i in range(N+1):
#    l.unfold(sum_i32,keep=True)
#l.simp()
"""
l.intros()
l.unfold(sum_i32, keep=True)
l.unfold(*ctx.definitions)
l.auto()
"""
print(l.vc)
l.intros()
l.unfold(sum_i32)
l.simp()
l.split(at=-1)
#l.rw(0)
#l.auto()
#l.unfold(*ctx.definitions)
#l
#l.unfold(*ctx.definitions)
#l.auto()
#l = dbg.pop_lemma()
#print(l.vc)
#l.intros()
#l.split(at=0)
#l.rw(0)
#l.rw(1,rev=True)
#l.have((t0 == 0) == False)
vc = dbg.vcs[-1]
print(vc)
s = smt.Solver()
v = vc.vc(ctx)
s.add(v.arg(0))
s.add(smt.Not(v.arg(1)))
#print(smt.Not(v.arg(1)).sexpr())
print(vc.vc(ctx).sexpr())
VC(Entry(sum_i32, 0x400610, And(a1 == 2, addr == a0)),
['0x400610', '0x400612', '0x400614', '0x400616', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624', '0x40061a', '0x40061e', '0x400620', '0x400622', '0x400624'],
Exit(.done, 0x400628, a0 == sum_i32(ram, addr, 2)))
Cache read hit for a0
Cache read hit for a0
(let ((a!1 (not (= (select32le (register state0) &a1) #x00000000)))
      (a!2 (not (= (select32le (register state0) &a1) #x00000001)))
      (a!4 (select32le (ram state0)
                       (bvadd #x00000004 (select32le (register state0) &a0)))))
(let ((a!3 (and (= (select32le (register state0) &a1) #x00000002)
                (= addr (select32le (register state0) &a0))
                a!1
                a!2))
      (a!5 (bvadd (select32le (ram state0) (select32le (register state0) &a0))
                  a!4)))
  (=> a!3 (= a!5 (sum_i32 (ram state0) addr #x00000002)))))

pycparser

cmake python ziglang

make a toy symbolic executor?

https://github.com/lark-parser/lark/blob/master/examples/grammars/verilog.lark

Extract C spec stuff from smtlib, dischage using cbmc?

struct offsets + code that uses it.

vs imperative DSL in lark

import kdrag.printers.c as cprinter
def pre_post_cbmc(self, pre, post, filename):
    # make harness
    cprinter.print("kdrag_pre", [], )
    cprinter.print("kdrag_post", [], )
    subprocess.call(["cbmc", filename])
    


    
import pycparser

from pycparser import c_parser, c_ast, parse_file
import tempfile
prog = """
//#include <stdint.h>
//typedef unsigned long uint64_t;
int myadd(int a, int b, int* mem) {
    int d;
    int c = a + b;
    d = a - b;
    return c + a / d + mem[0];
}
"""
#ast = c_parser.CParser().parse(prog)

#def binop(ast : c_ast.BinOp):
#    assert isinstance(ast, c_ast.BinOp)
import re
from dataclasses import dataclass, field
@dataclass
class Env():
    bits : int = 32
    le : bool = True
    tenv : dict[str, smt.SortRef] = field(default_factory=dict)
    env : dict[str, smt.ExprRef] = field(default_factory=dict)
    def copy(self):
        return Env(
            bits=self.bits,
            le=self.le,
            tenv=self.tenv.copy(),
            env=self.env.copy()
        )

def of_type(ast, tenv):
    match ast:
        case c_ast.IdentifierType(names=[t]):
            if t == "int":
                return smt.BitVecSort(32)
            match = re.compile(r"u?int(\d+)_t").fullmatch(t)
            if match:
                bits = int(match.group(1))
                return smt.BitVecSort(bits)
            else:
                raise NotImplementedError(f"Unknown type {ast}")
        case c_ast.ArrayDecl(type=ty) | c_ast.PtrDecl(type=ty):
            return smt.ArraySort(smt.BitVecSort(32), of_type(ty, tenv))
        case c_ast.TypeDecl(type=ty):
            return of_type(ty, tenv)
        case _:
            raise NotImplementedError(f"Unknown type {ast}")

def interp(prog):
    """
    https://github.com/eliben/pycparser/blob/main/pycparser/c_ast.py
    """
    with tempfile.NamedTemporaryFile(suffix=".c") as f:
        f.write(prog.encode())
        f.flush()
        ast = parse_file(f.name, use_cpp=True)
    #ast = c_parser.CParser().parse(prog)
    match ast:
        case c_ast.FileAST(ext=[c_ast.FuncDef(decl=decl, body=body)]):
            match decl:
                case c_ast.Decl(name=name, type=c_ast.FuncDecl(args=c_ast.ParamList(params=params), type=rettype)):
                    env = {}
                    for param in params:
                        #print(param)
                        match param:
                            case c_ast.Decl(name=pname, type=ty):
                                t = of_type(ty, {})
                                env[pname] = smt.Const(pname, t)
                            case _:
                                raise NotImplementedError(f"Unknown param {param}")
                    print({k: v.sort() for k,v in env.items()})
                    bmc_stmt(body, env)
                    return env["##retval"]
                case _:
                    raise NotImplementedError(f"Unknown function decl {decl}")
        case _:
            raise NotImplementedError(f"Unknown top level {ast}")

def bmc_stmt(ast : c_ast.Node, env) -> Optional[smt.ExprRef]:
    match ast:
        case c_ast.Compound(block_items=stmts):
            for stmt in stmts:
                res = bmc_stmt(stmt, env)
            return
        case c_ast.Assignment(op="=", lvalue=lvalue, rvalue=rvalue):
            rval = bmc_stmt(rvalue, env)
            match lvalue:
                case c_ast.ID(name=name):
                    env[name] = rval
                #case c_ast.ArrayRef(name=name, subscript=subscript):
                #    arr = bmc_stmt(name, env)
                #    index = bmc_stmt(subscript, env)
                #    env[name] = smt.store(index, rval)
                case _:
                    raise NotImplementedError(f"Unknown lvalue {lvalue}")
            return rval
        case c_ast.Return(expr=expr):
            res = bmc_stmt(expr, env)
            env["##retval"] = res
            return
        case c_ast.Decl(name=name, type=ty, init=init):
            t = of_type(ty.type, env)
            if init:
                env[name] = bmc_stmt(init, env)
            else:
                env[name] = smt.Const(name, t)
        case c_ast.If(cond=cond, iftrue=iftrue, iffalse=iffalse):
            b = bmc_stmt(cond, env)
            env1 = env.copy()
            t = bmc_stmt(iftrue, env)
            e = bmc_stmt(iffalse, env1)
            for k,t in env1.items():
                assert k in env
                if not env[k].eq(t):
                    env[k] = smt.If(b, t, e)
        case c_ast.Constant(type=ty, value=value):
            return value
        case c_ast.ID(name=name):
            return env[name]
        case c_ast.BinaryOp(op=op, left=left, right=right):
            l = bmc_stmt(left, env)
            r = bmc_stmt(right, env)
            match op:
                case "+":
                    return l + r
                case "-":
                    return l - r
                case "*":
                    return l * r
                case "/":
                    return l / r
                case _:
                    raise NotImplementedError(f"Unknown binary op {op}")
        case c_ast.UnaryOp(op=op, expr=expr):
            e = bmc_stmt(expr, env)
            match op:
                case "-":
                    return -e
                case "~":
                    return ~e
                case _:
                    raise NotImplementedError(f"Unknown unary op {op}")
        case c_ast.TernaryOp(cond=cond, iftrue=iftrue, iffalse=iffalse):
            b = bmc_stmt(cond, env)
            env1 = env.copy()
            t = bmc_stmt(iftrue, env)
            e = bmc_stmt(iffalse, env1)
            for k,t in env1.items():
                assert k in env
                if not env[k].eq(t):
                    env[k] = smt.If(b, t, e)
            return smt.If(b, t, e)
        case c_ast.ArrayRef(name=name, subscript=subscript):
            arr = bmc_stmt(name, env)
            index = bmc_stmt(subscript, env)
            print(arr, index)
            return arr[index]
        case c_ast.FuncCall(name=c_ast.ID(name=fname), args=c_ast.ExprList(exprs=args)):
            argvals = [bmc_stmt(arg, env) for arg in args]
            env[fname](*argvals)
        case _:
            raise NotImplementedError(f"Unknown expr {ast}")

interp(prog)

{'a': BitVec(32), 'b': BitVec(32), 'mem': Array(BitVec(32), BitVec(32))}
mem 0

a + b + a/(a - b) + mem[0]

import tree_sitter_c
from tree_sitter import Language, Parser
CLANG = Language(tree_sitter_c.language())
parser = Parser(CLANG)
tree = parser.parse(
    bytes(prog, "utf8" ))
tree.print_dot_graph()
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In[19], line 7
      4 parser = Parser(CLANG)
      5 tree = parser.parse(
      6     bytes(prog, "utf8" ))
----> 7 tree.print_dot_graph()


TypeError: Tree.print_dot_graph() takes exactly one argument (0 given)

riscv formal

%%file /tmp/add.v
// https://github.com/YosysHQ/riscv-formal/blob/main/insns/insn_add.v
// DO NOT EDIT -- auto-generated from riscv-formal/insns/generate.py

module rvfi_insn_add (
  input                                 rvfi_valid,
  input  [`RISCV_FORMAL_ILEN   - 1 : 0] rvfi_insn,
  input  [`RISCV_FORMAL_XLEN   - 1 : 0] rvfi_pc_rdata,
  input  [`RISCV_FORMAL_XLEN   - 1 : 0] rvfi_rs1_rdata,
  input  [`RISCV_FORMAL_XLEN   - 1 : 0] rvfi_rs2_rdata,
  input  [`RISCV_FORMAL_XLEN   - 1 : 0] rvfi_mem_rdata,
`ifdef RISCV_FORMAL_CSR_MISA
  input  [`RISCV_FORMAL_XLEN   - 1 : 0] rvfi_csr_misa_rdata,
  output [`RISCV_FORMAL_XLEN   - 1 : 0] spec_csr_misa_rmask,
`endif

  output                                spec_valid,
  output                                spec_trap,
  output [                       4 : 0] spec_rs1_addr,
  output [                       4 : 0] spec_rs2_addr,
  output [                       4 : 0] spec_rd_addr,
  output [`RISCV_FORMAL_XLEN   - 1 : 0] spec_rd_wdata,
  output [`RISCV_FORMAL_XLEN   - 1 : 0] spec_pc_wdata,
  output [`RISCV_FORMAL_XLEN   - 1 : 0] spec_mem_addr,
  output [`RISCV_FORMAL_XLEN/8 - 1 : 0] spec_mem_rmask,
  output [`RISCV_FORMAL_XLEN/8 - 1 : 0] spec_mem_wmask,
  output [`RISCV_FORMAL_XLEN   - 1 : 0] spec_mem_wdata
);

  // R-type instruction format
  wire [`RISCV_FORMAL_ILEN-1:0] insn_padding = rvfi_insn >> 16 >> 16;
  wire [6:0] insn_funct7 = rvfi_insn[31:25];
  wire [4:0] insn_rs2    = rvfi_insn[24:20];
  wire [4:0] insn_rs1    = rvfi_insn[19:15];
  wire [2:0] insn_funct3 = rvfi_insn[14:12];
  wire [4:0] insn_rd     = rvfi_insn[11: 7];
  wire [6:0] insn_opcode = rvfi_insn[ 6: 0];

`ifdef RISCV_FORMAL_CSR_MISA
  wire misa_ok = (rvfi_csr_misa_rdata & `RISCV_FORMAL_XLEN'h 0) == `RISCV_FORMAL_XLEN'h 0;
  assign spec_csr_misa_rmask = `RISCV_FORMAL_XLEN'h 0;
`else
  wire misa_ok = 1;
`endif

  // ADD instruction
  wire [`RISCV_FORMAL_XLEN-1:0] result = rvfi_rs1_rdata + rvfi_rs2_rdata;
  assign spec_valid = rvfi_valid && !insn_padding && insn_funct7 == 7'b 0000000 && insn_funct3 == 3'b 000 && insn_opcode == 7'b 0110011;
  assign spec_rs1_addr = insn_rs1;
  assign spec_rs2_addr = insn_rs2;
  assign spec_rd_addr = insn_rd;
  assign spec_rd_wdata = spec_rd_addr ? result : 0;
  assign spec_pc_wdata = rvfi_pc_rdata + 4;

  // default assignments
  assign spec_trap = !misa_ok;
  assign spec_mem_addr = 0;
  assign spec_mem_rmask = 0;
  assign spec_mem_wmask = 0;
  assign spec_mem_wdata = 0;
endmodule
Writing /tmp/add.v
import kdrag.contrib.yosys as yosys
help(yosys.VerilogModuleRel.from_file("rvfi_insn_add")
Help on method from_file in module kdrag.contrib.yosys:

from_file(name: str, wire_names: list[str], filepath: str) class method of kdrag.contrib.yosys.VerilogModuleRel

older

https://github.com/cfbolz/vexingz3

Battle Plan:

Done:

  • allow easier python specdefinition
  • %reg syntax for cool inline asm demos

def add_subst def add_state_var def add_cb

syscalls?

Interrupt preludes It got a lot slower? pi pico More examples from frama-c, s2n-bignum simd reads/writes kd_always. Some interrupt bug?

Change register names to enable true inline assembly.

Can I use unicorn to validate against that easily?

spacer? I’d be shocked if useful.

sizex <= x - y <= -sizey \/ sizey <= y - x <= -sizex

We can say exactly when the or happens if x > y then sizey <= x - y <= -sizex else sizey <= y - x <= -sizex

inequality and addition / diff son’t play the same as the ints.

https://mcyoung.xyz/2023/11/27/simd-base64/ simd

Would I want to state from particular entry? or even address

kd_reachable from_label here_label kd_reachable here_label kd_unreachable from_label here_label

address expressions. Literal address for label, or <foo+0x7> relative addressing. I guess not necessary if this is mainly for textual assembly we have no control over. In that case just use programmatic bindings

https://github.com/toolCHAINZ/jingle

https://github.com/m4b/goblin An impish, cross-platform binary parsing crate, written in Rust https://github.com/m4b/bingrep https://github.com/m4b/faerie

https://ruor.uottawa.ca/server/api/core/bitstreams/86cc22d7-4517-4744-9cf7-4b1745a6c029/content race conditions and memory models. AMusing because that is what sail is for. Could be fun.

https://news.ycombinator.com/item?id=40185065 https://alastairreid.github.io/riscv-spec-issues/ https://github.com/riscv/riscv-opcodes just the opcodes

https://cs.ru.nl/~mschool/pub/vstte-why3-avr-revised.pdf why3 avr

https://antmicro.com/blog/2025/09/girdl-ghidra-renode-integration/ https://github.com/antmicro/girdl pull in svd files? That’s a good idea. I doubt I can directly use this

kd_prelude "(define-fun nonoverlap ((a (_ BitVec 64)) (sa (_ BitVec 64)) (b (_ BitVec 64)) (sb (_ BitVec 64))))"
kd_prelude "(define-const bvone (_ bv1 64))"
kd_prelude "(define-const bvtwo (_ bv2 64))"
kd_prelude "(define-fun select32 ...)"
from kdrag.all import *

x,y = smt.BitVecs("x y", 64)
sizex, sizey = smt.Ints("sizex, sizey")
bsizex = smt.Int2BV(sizex, 64)
bsizey = smt.Int2BV(sizey, 64)
maxint = smt.BitVecVal(2**64-1, 64)
no_overlap = kd.define("no_overlap", [x,sizex,y,sizey], 
    smt.If( smt.ULE(x,y),
       smt.And( smt.ULT(bsizex , y - x), smt.ULE(bsizey, maxint - (y - x))),
       smt.And( smt.ULT(bsizey , x - y), smt.ULE(bsizex, maxint - (x - y)))
  ))

kd.prove(smt.Implies(smt.Distinct(x, x+1, y, y+1, y+2), no_overlap(x,2,y,3)), unfold=1)


---------------------------------------------------------------------------

LemmaError                                Traceback (most recent call last)

Cell In[18], line 14
      7 maxint = smt.BitVecVal(2**64-1, 64)
      8 no_overlap = kd.define("no_overlap", [x,sizex,y,sizey], 
      9     smt.If( smt.ULE(x,y),
     10        smt.And( smt.ULT(bsizex , y - x), smt.ULE(bsizey, maxint - (y - x))),
     11        smt.And( smt.ULT(bsizey , x - y), smt.ULE(bsizex, maxint - (x - y)))
     12   ))
---> 14 kd.prove(smt.Implies(smt.Distinct(x, x+1, y, y+1, y+2), no_overlap(x,2,y,3)), unfold=1)


File ~/Documents/python/knuckledragger/kdrag/tactics.py:286, in prove(thm, by, admit, timeout, dump, solver, instan, unfold)
    282         raise Exception(
    283             "Worked with quantifier stripped. Something is going awry", pf
    284         )
    285     else:
--> 286         raise e
    287 except Exception as e:
    288     raise e


File ~/Documents/python/knuckledragger/kdrag/tactics.py:264, in prove(thm, by, admit, timeout, dump, solver, instan, unfold)
    262         by.append(kd.kernel.prove(thm == thm1, by=trace, timeout=timeout))  # type: ignore
    263 try:
--> 264     pf = kd.kernel.prove(
    265         thm, by, timeout=timeout, dump=dump, solver=solver, admit=admit
    266     )
    267     kdrag.config.perf_event("prove", thm, time.perf_counter() - start_time)
    268     return pf


File ~/Documents/python/knuckledragger/kdrag/kernel.py:176, in prove(thm, by, admit, timeout, dump, solver)
    174 if res != smt.unsat:
    175     if res == smt.sat:
--> 176         raise LemmaError(thm, "Countermodel", s.model())
    177     raise LemmaError("prove", thm, res)
    178 else:


LemmaError: (Implies(Distinct(x, x + 1, y, y + 1, y + 2),
        no_overlap(x, 2, y, 3)), 'Countermodel', [y = 18446744073709551613,
 x = 18446744073709551611,
 no_overlap = [else -> False]])
2**64-1
18446744073709551615

Bytecode

%%file /tmp/aexpr.s
    .option norvc
    .text
    .globl _start

    .equ OP_HALT, 0
    .equ OP_CONST, 1
    .equ OP_ADD,  2
    .equ OP_MUL,  3

_start:
    la   t1, program     # ip: points to current 32-bit instruction
    la   t0, stack       # sp: next free word on the VM stack (grows up)

loop:
    lw   t2, 0(t1)       # t2 = opcode (full word)
    beqz t2, done        # HALT (opcode 0)
    addi t1, t1, 4       # advance ip to next word (default)

    li   t3, OP_CONST
    beq  t2, t3, op_const
    li   t3, OP_ADD
    beq  t2, t3, op_add
    li   t3, OP_MUL
    beq  t2, t3, op_mul

    j    loop            # unknown opcode -> ignore

# --- CONST (two-word instruction) ---
op_const:
    lw   t4, 0(t1)       # load immediate from *next* word
    addi t1, t1, 4       # ip consumes the extra word
    sw   t4, 0(t0)       # push
    addi t0, t0, 4
    j    loop

# --- ADD ---
op_add:
    addi t0, t0, -4
    lw   t4, 0(t0)       # b
    addi t0, t0, -4
    lw   t5, 0(t0)       # a
    add  t6, t5, t4      # a+b
    sw   t6, 0(t0)
    addi t0, t0, 4
    j    loop

# --- MUL (needs 'M' extension; see note below for pure RV32I) ---
op_mul:
    addi t0, t0, -4
    lw   t4, 0(t0)       # b
    addi t0, t0, -4
    lw   t5, 0(t0)       # a
    mul  t6, t5, t4      # a*b
    sw   t6, 0(t0)
    addi t0, t0, 4
    j    loop

done:
    # Pop result into 'result' and park
    addi t0, t0, -4
    lw   t4, 0(t0)
    la   t5, result
    sw   t4, 0(t5)

hang:
    j    hang

    .section .rodata
# Program: (3 + 4) * 5
# Words: CONST 3, CONST 4, ADD, CONST 5, MUL, HALT
program:
    .word 1, 3
    .word 1, 4
    .word 2
    .word 1, 5
    .word 3
    .word 0

    .bss
    .align 4
stack:
    .space 1024           # 256-word VM stack

    .data
    .align 4
result:
    .word 0


Writing /tmp/aexpr.s
! /home/philip/Downloads/riscv-toolchain-15-x86_64-lin/riscv32-unknown-elf/bin/as /tmp/aexpr.s -o aexpr.o && /home/philip/Downloads/riscv-toolchain-15-x86_64-lin/riscv32-unknown-elf/bin/objdump -d aexpr.o
aexpr.o:     file format elf32-littleriscv


Disassembly of section .text:

00000000 <_start>:
   0: 00000317           auipc t1,0x0
   4: 00030313           mv t1,t1
   8: 00000297           auipc t0,0x0
   c: 00028293           mv t0,t0

00000010 <loop>:
  10: 00032383           lw t2,0(t1) # 0 <_start>
  14: 06038c63           beqz t2,8c <done>
  18: 00430313           addi t1,t1,4
  1c: 00100e13           li t3,1
  20: 01c38c63           beq t2,t3,38 <op_const>
  24: 00200e13           li t3,2
  28: 03c38263           beq t2,t3,4c <op_add>
  2c: 00300e13           li t3,3
  30: 03c38e63           beq t2,t3,6c <op_mul>
  34: fddff06f           j 10 <loop>

00000038 <op_const>:
  38: 00032e83           lw t4,0(t1)
  3c: 00430313           addi t1,t1,4
  40: 01d2a023           sw t4,0(t0) # 8 <_start+0x8>
  44: 00428293           addi t0,t0,4
  48: fc9ff06f           j 10 <loop>

0000004c <op_add>:
  4c: ffc28293           addi t0,t0,-4
  50: 0002ae83           lw t4,0(t0)
  54: ffc28293           addi t0,t0,-4
  58: 0002af03           lw t5,0(t0)
  5c: 01df0fb3           add t6,t5,t4
  60: 01f2a023           sw t6,0(t0)
  64: 00428293           addi t0,t0,4
  68: fa9ff06f           j 10 <loop>

0000006c <op_mul>:
  6c: ffc28293           addi t0,t0,-4
  70: 0002ae83           lw t4,0(t0)
  74: ffc28293           addi t0,t0,-4
  78: 0002af03           lw t5,0(t0)
  7c: 03df0fb3           mul t6,t5,t4
  80: 01f2a023           sw t6,0(t0)
  84: 00428293           addi t0,t0,4
  88: f89ff06f           j 10 <loop>

0000008c <done>:
  8c: ffc28293           addi t0,t0,-4
  90: 0002ae83           lw t4,0(t0)
  94: 00000f17           auipc t5,0x0
  98: 000f0f13           mv t5,t5
  9c: 01df2023           sw t4,0(t5) # 94 <done+0x8>

000000a0 <hang>:
  a0: 0000006f           j a0 <hang>

Run regular gdb

import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
class Debug():
    def __init__(self, ctx):
        #self.watches = []
        self.ctx = ctx
    """
    def start(self, spec):
        if mem is None:
            mem = ctx.init_mem()  # pcode.MemState.Const("mem", bits=ctx.bits)
        todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
        self.todo = todo
        self.vcs = vcs
    def next(self):
        pass
    def step(self):
        ntraces = len(self.todo)
        while len(self.todo) == ntraces:
            self.next()
    def pop_state(self):
        pass
    def focus(self, state_id):
        
    def watch(self, expr):
        self.watches.append(expr)
    def concrete(self):
        pass
    def insn(self):
        pass
"""
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
import pprint
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("myasm.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("myentry").rebased_addr
exitaddr = ctx.loader.find_symbol("myexit").rebased_addr

# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
t0 = ctx._subst_decls['t0']
a0 = ctx._subst_decls['a0']
a1 = ctx._subst_decls['a1']
a2 = ctx._subst_decls['a2']
sp = ctx._subst_decls['sp']


stride = 4
addr = 76
size = 0
spec.add_entry("mymemset",  startaddr , smt.And(
    a0 == 1000,
    sp == 1000000,
    ram32[a0] == 0, # type
    ram32[a0 + 4] == addr, # addr
    ram32[a0 + 8] == stride, # stride,
    ram32[a0 + 12] == size, # size
    #ram32[a0 + 12] == 1, # size
    ram32[a0 + 16] == 42, # data
    ram32[a0 + 20] == 2, # end
    a1 == a1,
    a2 == a2
))


spec.add_assert("loopsanity", ctx.loader.find_symbol("lit").rebased_addr + 8,  smt.And(   
    ram32[a0 + 0] == addr, # addr
    ram32[a0 + 4] == stride, # stride,
    #ram32[a0 + 8] == 1, # size)
))

#spec.add_assign("mymemset", startaddr, "dst", ram32[a0])
#spec.add_assign("mymemset", startaddr, "value", ram32[a0 + 4])
#dst, value = smt.BitVecs('dst value', 32)
spec.add_exit("myexit", exitaddr, smt.BoolVal(True))


vcs = asmspec.run_all_paths(ctx, spec, max_insns=250)
print(len(vcs), "paths found")
for vc in vcs:
    pprint.pp(vc)
    # ok ctx.substitute is very slow because gathering constants is slow. Worth persuing?
    try:
        vc.verify(ctx)
        print("success")
    except Exception as e:
        print("fail")
        countermodel = e.args[2]
        if not isinstance(vc.assertion, asmspec.OutOfGas):
            pprint.pp({k : smt.simplify(v) for k,v in vc.countermodel(ctx, countermodel).items()})
        else:
            print("out of gas")
%%file /tmp/aexpr.c

int exec( , *):


from kdrag.all import *
import kdrag.theories.seq as seq
Aexpr = kd.Inductive("Aexpr")
Aexpr.declare("Const", ("val", smt.BitVecSort(32)))
Aexpr.declare("Add", ("x0", Aexpr), ("x1", Aexpr))
Aexpr = Aexpr.create()

kd.notation.add.register(Aexpr, Aexpr.Add)

e, e1, e2 = smt.Consts("e e1 e2", Aexpr)

Code = kd.Inductive("Code")
Code.declare("Const", ("val", smt.BitVecSort(32)))
Code.declare("Add")
Code = Code.create()

eval = smt.Function("eval", Aexpr, smt.BitVecSort(32))
eval = kd.define("eval", [e],
    smt.If(
        e.is_Const, e.val,
        eval(e.x0) + eval(e.x1)
    )
)

compile = smt.Function("compile", Aexpr, seq.Seq(Code))
compile = kd.define("compile", [e],
    smt.If(e.is_Const,
    smt.Unit(Code.Const(e.val)),
    smt.Concat( compile(e.x0), compile(e.x1) , smt.Unit(Code.Add)))
    )

c = smt.Const("c", Code)
s = smt.Const("st", seq.Seq(smt.BitVecSort(32)))
exec1 = kd.define("exec1", [c, s], 
    smt.If(c.is_Const,
    smt.Concat(s, smt.Unit(c.val)),
    smt.Concat(seq.Tail(seq.Tail(s)), smt.Unit(seq.Head(s) + seq.Head(seq.Tail(s))))
    )
)

cs = smt.Const("cs", seq.Seq(Code))
exec = smt.Function("exec", seq.Seq(Code), seq.Seq(smt.BitVecSort(32)), seq.Seq(smt.BitVecSort(32)))
exec = kd.define("exec",
[cs,s],
smt.If(
    smt.Length(cs) == 0,
    s,
    exec(seq.Tail(cs), exec1(seq.Head(cs), s))))



one = Aexpr.Const(1)

start = seq.Nil(smt.BitVecSort(32))

assert kd.simp(eval(one + one + one)).eq(smt.BitVecVal(3,32))
assert kd.simp(exec(compile(one + one + one), start))





---------------------------------------------------------------------------

Z3Exception                               Traceback (most recent call last)

/tmp/ipykernel_1045074/625003890.py in ?()
     53 
     54 start = seq.Nil(smt.BitVecSort(32))
     55 
     56 assert kd.simp(eval(one + one + one)).eq(smt.BitVecVal(3,32))
---> 57 assert kd.simp(exec(compile(one + one + one), start))
     58 
     59 
     60 


~/philzook58.github.io/.venv/lib/python3.12/site-packages/z3/z3.py in ?(self)
    387             return False
    388         elif is_eq(self) and self.num_args() == 2:
    389             return self.arg(0).eq(self.arg(1))
    390         else:
--> 391             raise Z3Exception("Symbolic expressions cannot be cast to concrete Boolean values.")


Z3Exception: Symbolic expressions cannot be cast to concrete Boolean values.

Debugger

use angr to get views?

sy brand

actually make a cli?

gdb or unicorn?





class Debug():
    def __init__(self, ctx):
        self.watches = []
        self.ctx = ctx
    def start(self, spec):
        if mem is None:
            mem = ctx.init_mem()  # pcode.MemState.Const("mem", bits=ctx.bits)
        todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
        self.todo = todo
        self.vcs = vcs
    def next(self):
        pass
    def step(self):
        """
        Step to next branch point
        """
        ntraces = len(self.todo)
        while len(self.todo) == ntraces:
            self.next()
    def pop_state(self):
        pass
    def focus(self, state_id):
        
    def watch(self, expr):
        self.watches.append(expr)
    def concrete(self):
        pass
    def insn(self):
        pass
from kdrag.all import *
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
class Debug():
    def __init__(self, ctx):
        self.watches = []
        self.ctx = ctx
    def start(self, spec):
        if mem is None:
            mem = ctx.init_mem()  # pcode.MemState.Const("mem", bits=ctx.bits)
        todo, vcs = init_trace_states(ctx, mem, spec, verbose=verbose)
        self.todo = todo
        self.vcs = vcs

TraceFragmentBundle

It’s interesting to be able to run code to use in specs

list[TraceBundleFragment] could be the return type of run_all_paths. Collected up by common entry or

@dataclass
class TraceFragmentBundle:
    """
    A bundle is a set of pathways
    """
    start_addr : int # all at same start addr? Not necesarily all end at same exit addr
    ctx : pcode.BinaryContext
    args : # or is args a mapping from initial ghost state? #list[smt.ExprRef] # ghost vars?
    vcs: list[VerificationCondition]
    rets : list[smt.ExprRef] # ghost vars?

    @staticmethod
    def from_entry_exit(ctx: pcode.BinaryContext, entry_addr: int, exit_addr: int):
        spec = AsmSpec()
        spec.add_entry("entry", entry_addr, smt.BoolVal(True))
        spec.add_exit("exit", exit_addr, smt.BoolVal(True))
        return TraceFragmentBundle(ctx, run_all_paths(ctx, spec))

    def __call__(self, memstate : pcode.MemState) -> smt.BoolRef:
        """
        The bundle considered as a Function.
        Joined together.

        """
        # But a function from ghost to ghost or from memstate to memstate? or memstate to ghost?
        acc = memstate.Const("undef", bits=ctx.bits)
        for vc in vcs:
            smt.If(vc.path_cond, vc.memstate, acc)
        return acc

compile aexpr

type aexpr = I64 i64 | Add aexpr aexpr eval(compile(e)) ==

Or even a model of some kind of hack

virtual machine

I kind of did the conrete version

But the btecode interpreter version is interesting.

memset

interpreted memset

Swtich on type is also intereting. Interpreter

Is this overwrought compared to what we have to do though?

type MemLang = SetList list (addr *value) SetMany int addr value (* iterate the construction *)

do finite unroll

Compare to my high level versions? Output of ocmpiler oct 7 demo

get riscv32 toolchain in ci

get small snippets

%%file /tmp/mymemset.s

# kd_entry mymemset "true"

.globl mymemset
mymemset:
    lw t1, 0(a0) # load dst ptr
    lw t2, 4(a0) # load value
    lw t3, 8(a0) # load length
    sw t2, 0(t1) # store value
    ret
Overwriting /tmp/mymemset.s
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.gcc --run "riscv32-none-elf-as /tmp/mymemset.s -o /tmp/mymemset.o && riscv32-none-elf-objdump -d /tmp/mymemset.o"
/tmp/mymemset.o:     file format elf32-littleriscv


Disassembly of section .text:

00000000 <mymemset>:
   0: 00052303           lw t1,0(a0)
   4: 00452383           lw t2,4(a0)
   8: 00852e03           lw t3,8(a0)
   c: 00732023           sw t2,0(t1)
  10: 00008067           ret
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/mymemset.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("mymemset").rebased_addr

# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
t0 = ctx._subst_decls['t0']
a0 = ctx._subst_decls['a0']


spec.add_entry("mymemset",  startaddr , smt.BoolVal(True))
spec.add_assign("mymemset", startaddr, "dst", ram32[a0])
spec.add_assign("mymemset", startaddr, "value", ram32[a0 + 4])
dst, value = smt.BitVecs('dst value', 32)
spec.add_exit("myexit", startaddr + 16, ram32[dst] == value)

vcs = asmspec.run_all_paths(ctx, spec)
for vc in vcs:
    #print(vc)
    # ok ctx.substitute is very slow because gathering constants is slow. Worth persuing?
    vc.verify(ctx)

conmsts
don
Executing SpecStmt: Assign(label='mymemset', addr=4194304, name='dst', expr=ram32[a0])
conmsts
don
Executing SpecStmt: Assign(label='mymemset', addr=4194304, name='value', expr=ram32[a0 + 4])
conmsts
don
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 0) PCODE IMARK ram[400000:4]
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 1) PCODE unique[580:4] = 0x0
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400000/4: lw t1,0x0(a0) at (4194304, 3) PCODE t1 = *[ram]unique[3080:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 0) PCODE IMARK ram[400004:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 1) PCODE unique[580:4] = 0x4
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400004/4: lw t2,0x4(a0) at (4194308, 3) PCODE t2 = *[ram]unique[3080:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 0) PCODE IMARK ram[400008:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 1) PCODE unique[580:4] = 0x8
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 2) PCODE unique[3080:4] = a0 + unique[580:4]
Executing 0x400008/4: lw t3,0x8(a0) at (4194312, 3) PCODE t3 = *[ram]unique[3080:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 0) PCODE IMARK ram[40000c:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 1) PCODE unique[600:4] = 0x0
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 2) PCODE unique[4180:4] = t1 + unique[600:4]
Executing 0x40000c/4: sw t2,0x0(t1) at (4194316, 3) PCODE *[ram]unique[4180:4] = t2
Executing SpecStmt: Exit(myexit, 0x400010, ram32[dst] == value)
conmsts
don
%%file /tmp/mymemcpy.c
#include <stddef.h>
void mymemcpy(int *dst, const int *src, size_t n) {
    for (size_t i = 0; i < n; i++) {
        dst[i] = src[i];
    }
    return;
}
Writing /tmp/mymemcpy.c
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.gcc --run "riscv32-none-elf-gcc -O1 -c /tmp/mymemcpy.c -o /tmp/mymemcpy.o && riscv32-none-elf-objdump -d /tmp/mymemcpy.o"
/tmp/mymemcpy.o:     file format elf32-littleriscv


Disassembly of section .text:

00000000 <mymemcpy>:
   0: ca11                 beqz a2,14 <.L1>
   2: 87ae                 mv a5,a1
   4: 060a                 slli a2,a2,0x2
   6: 95b2                 add a1,a1,a2

00000008 <.L3>:
   8: 4398                 lw a4,0(a5)
   a: c118                 sw a4,0(a0)
   c: 0791                 addi a5,a5,4
   e: 0511                 addi a0,a0,4
  10: feb79ce3           bne a5,a1,8 <.L3>

00000014 <.L1>:
  14: 8082                 ret
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
import kdrag.smt as smt
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/mymemcpy.o", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("mymemcpy").rebased_addr

# TODO: actually this is something that should be exposed.
ram32 = ctx._subst_decls['ram32']
dstptr = ctx._subst_decls['a0']
srcptr = ctx._subst_decls['a1']
n = ctx._subst_decls['a2']

#  "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)))"
spec.add_entry("mymemcpy",  startaddr , smt.And(n == 0))
# kd_cut mycut "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)) (bvult RAX RDX) 
# (forall ((offset (_ BitVec 64))) (=> (and (bvuge offset (_ bv0 64)) (bvult offset RAX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64)))) 
# (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))))"

#spec.add_cut("loopstart", startaddr + 8, smt.And(True))


# kd_exit myexit "(=> (and (bvuge offset (_ bv0 64)) (bvult offset RDX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64)))) 
# (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))"
spec.add_exit("mymemset_exit", startaddr + 14, smt.And(True))

vcs = asmspec.run_all_paths(ctx, spec)
for vc in vcs:
    #print(vc)
    # ok ctx.substitute is very slow because gathering constants is slow. Worth perusing?
    vc.verify(ctx)

conmsts
don
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 0) PCODE IMARK ram[400000:2]
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 1) PCODE unique[10800:1] = a2 == 0x0
Executing 0x400000/2: c.beqz a2,0x400014 at (4194304, 2) PCODE if (unique[10800:1]) goto ram[400014:4]
Executing 0x400014/2: ret  at (4194324, 0) PCODE IMARK ram[400014:2]
Executing 0x400014/2: ret  at (4194324, 1) PCODE return ra



---------------------------------------------------------------------------

NotImplementedError                       Traceback (most recent call last)

Cell In[9], line 27
     16 # kd_cut mycut "(and (= RDI (_ bv0 64)) (= RSI (_ bv1024 64)) (bvuge RDX (_ bv0 64)) (bvult RDX (_ bv32 64)) (bvult RAX RDX) 
     17 # (forall ((offset (_ BitVec 64))) (=> (and (bvuge offset (_ bv0 64)) (bvult offset RAX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64)))) 
     18 # (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))))"
   (...)     23 # kd_exit myexit "(=> (and (bvuge offset (_ bv0 64)) (bvult offset RDX)) (= (select ram32 (bvadd RDI (bvshl offset (_ bv2 64)))) 
     24 # (select ram32 (bvadd RSI (bvshl offset (_ bv2 64))))))"
     25 spec.add_exit("mymemset_exit", startaddr + 14, smt.And(True))
---> 27 vcs = asmspec.run_all_paths(ctx, spec)
     28 for vc in vcs:
     29     #print(vc)
     30     # ok ctx.substitute is very slow because gathering constants is slow. Worth perusing?
     31     vc.verify(ctx)


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/asmspec.py:458, in run_all_paths(ctx, spec, mem, verbose)
    456     vcs.extend(new_vcs)
    457     if tracestate is not None:
--> 458         todo.extend(execute_insn(tracestate, ctx, verbose=verbose))
    459 return vcs


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/asmspec.py:411, in execute_insn(tracestate, ctx, verbose)
    402 if pc != 0:
    403     raise Exception(f"Unexpected program counter {pc} at address {hex(addr)}")
    404 return [
    405     TraceState(
    406         start=tracestate.start,
    407         trace=tracestate.trace + [state0.pc[0]],
    408         state=state,
    409         ghost_env=tracestate.ghost_env,
    410     )
--> 411     for state in ctx.sym_execute(
    412         state0.memstate,
    413         addr,
    414         path_cond=state0.path_cond,
    415         max_insns=1,
    416         verbose=verbose,
    417     )
    418 ]


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:516, in BinaryContext.sym_execute(self, memstate, addr, path_cond, max_insns, breakpoints, verbose)
    512 if verbose:
    513     print(
    514         f"Executing {pretty_insn(self.disassemble(pc[0]))} at {pc} PCODE {pretty_op(self.translate(pc[0])[pc[1]])}"
    515     )
--> 516 memstate1, pc1 = self.execute1(memstate, pc)
    517 if isinstance(
    518     pc1[0], smt.ExprRef
    519 ):  # PC has become symbolic, requiring branching
    520     assert isinstance(pc1[1], smt.ExprRef)


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:425, in BinaryContext.execute1(self, memstate, pc)
    423 addr = pc[0]
    424 op = self.translate(addr)[pc[1]]
--> 425 return self.executeCurrentOp(op, memstate, pc)


File ~/Documents/python/knuckledragger/src/kdrag/contrib/pcode/__init__.py:390, in BinaryContext.executeCurrentOp(self, op, memstate, pc)
    388     return executeBinary(op, memstate), self.fallthruOp(pc)
    389 else:
--> 390     raise NotImplementedError("Opcode not implemented: ", op.opcode)


NotImplementedError: ('Opcode not implemented: ', OpCode.RETURN)

Track better which trace fragment we’re on. Trace bundles

ctx._subst_decls['t0']

t0

%%file /tmp/tag.c

enum Op {Inc1, Inc2, Inc3};

int doop(enum Op op, int x) {
    switch(op) {
        case Inc1: return x + 1;
        case Inc2: return x + 2;
        case Inc3: return x + 3;
    }
    return -1;
}



Overwriting /tmp/tag.c
! nix-shell -p pkgsCross.riscv32-embedded.buildPackages.{gcc,binutils} --run "riscv32-none-elf-gcc -O2 -S /tmp/tag.c -o /tmp/tag.s && cat /tmp/tag.s"
warning: error: unable to download 'https://cache.nixos.org/nix-cache-info': Timeout was reached (28) Resolving timed out after 15000 milliseconds; retrying in 309 ms
warning: error: unable to download 'https://cache.nixos.org/nix-cache-info': Timeout was reached (28) Resolving timed out after 15000 milliseconds; retrying in 685 ms
warning: error: unable to download 'https://cache.nixos.org/nix-cache-info': Timeout was reached (28) Resolving timed out after 15000 milliseconds; retrying in 1074 ms
error: interrupted by the user
^C
! ! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-gcc -W -c -S -o /tmp/tag.s -O1   /tmp/tag.c
! cat /tmp/tag.s 
 .file "tag.c"
 .option nopic
 .attribute arch, "rv32i2p0_m2p0_c2p0"
 .attribute unaligned_access, 0
 .attribute stack_align, 16
 .text
 .align 1
 .globl doop
 .type doop, @function
doop:
 mv a5,a0
 li a4,1
 beq a0,a4,.L2
 li a4,2
 addi a0,a1,3
 beq a5,a4,.L1
 li a0,-1
 beq a5,zero,.L6
.L1:
 ret
.L6:
 addi a0,a1,1
 ret
.L2:
 addi a0,a1,2
 j .L1
 .size doop, .-doop
 .ident "GCC: (crosstool-NG 1.26.0_rc1) 10.2.0"
%%file /tmp/memset.c
#include <stddef.h>
const int myflags[] = {1 << 0, 1 << 1, 1 << 2};

typedef struct {
    size_t ind;
    int *dst;
    size_t n;
} config;

void mymemset(config *cfg) {
    int color = myflags[cfg->ind];
    int *dst = cfg->dst;
    for(size_t i = 0; i < cfg->n; i++)
        dst[i] = color;
}


Overwriting /tmp/memset.c
! gcc -W -c -S -o /tmp/memset.s -O1   /tmp/memset.c
! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-gcc -W -c -S -o /tmp/memset.s -O1   /tmp/memset.c
! cat /tmp/memset.s
 .file "memset.c"
 .option nopic
 .attribute arch, "rv32i2p0_m2p0_c2p0"
 .attribute unaligned_access, 0
 .attribute stack_align, 16
 .text
 .align 1
 .globl mymemset
 .type mymemset, @function
mymemset:
 lw a5,0(a0)
 slli a4,a5,2
 lui a5,%hi(.LANCHOR0)
 addi a5,a5,%lo(.LANCHOR0)
 add a5,a5,a4
 lw a2,0(a5)
 lw a5,4(a0)
 lw a4,8(a0)
 beq a4,zero,.L1
 li a4,0
.L3:
 sw a2,0(a5)
 addi a4,a4,1
 addi a5,a5,4
 lw a3,8(a0)
 bgtu a3,a4,.L3
.L1:
 ret
 .size mymemset, .-mymemset
 .globl myflags
 .section .rodata
 .align 2
 .set .LANCHOR0,. + 0
 .type myflags, @object
 .size myflags, 12
myflags:
 .word 1
 .word 2
 .word 4
 .ident "GCC: (crosstool-NG 1.26.0_rc1) 10.2.0"
%%file /tmp/mov42.s
.include "/tmp/knuckle.s"
    .text
    .globl  myfunc
kd_entry myfunc, "(= true true)"
    addi    sp, sp, -4       # make room on the stack
    li      t0, 42           # t0 ← 42
    sw      t0, 0(sp)        # [sp] = 42
kd_exit myfunc_end,  "(= (select ram32 sp) (_ bv42 32))"
    ret
Overwriting /tmp/mov42.s
! python3 -m kdrag.contrib.pcode --langid="RISCV:LE:32:default" --asm /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as /tmp/mov42.s
Processing /tmp/mov42.s with language ID RISCV:LE:32:default using assembler /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as
Constructing Trace Fragments...
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 0) PCODE IMARK ram[400000:4]
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 1) PCODE unique[580:4] = 0xfffffffc
Executing 0x400000/4: addi sp,sp,-0x4 at (4194304, 2) PCODE sp = sp + unique[580:4]
Executing 0x400004/4: li t0,0x2a at (4194308, 0) PCODE IMARK ram[400004:4]
Executing 0x400004/4: li t0,0x2a at (4194308, 1) PCODE unique[580:4] = 0x2a
Executing 0x400004/4: li t0,0x2a at (4194308, 2) PCODE t0 = unique[580:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 0) PCODE IMARK ram[400008:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 1) PCODE unique[600:4] = 0x0
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 2) PCODE unique[4180:4] = sp + unique[600:4]
Executing 0x400008/4: sw t0,0x0(sp) at (4194312, 3) PCODE *[ram]unique[4180:4] = t0
Executing SpecStmt: Exit(myfunc_end, 0x40000c, ram32[sp] == 42)
Checking verification conditions...
[✅] Exit(myfunc_end, 0x40000c, ram32[sp] == 42)
✅✅✅✅ All 1 verification conditions passed! ✅✅✅✅

ghost call

Figuring out how to access members of structs, offsets etc. Shouldn’t change state, can read it. Changed real state is thrown away (stored as var?). Changed ghost state is not. How to setup frame? More like a subroutine, since we odn’t setup a stack (?). In ghost code, read from named coroutine params. Or give fresh Know the abi somehow?

struct biz
get param(&biz) {
    return biz->a->b->c;
}

My dishonesty about ram32 ram64 rax bites now? (RAX reg) (RDI reg) %rdi == (select32 reg RDI) (store32 reg RDI 0x42) kd_reify "memstate_name" # freeze current state into ghost. kd_assign "memstate" "(store RAX )" kd_ghost_call “res_memstate” “myfun” “ “ kd_assign “foo” “res_memstate”

read write

I should unpack the state datatype. No reason to bring datatypes into it. Could use store_view instead of explciit write field? Jankier

read and write state variables.

x = {"foo" : 3}
{**x, "foo": 4}
{'foo': 4}
%%file /tmp/write_simple.s
    .option norvc
    .text
    .globl _start
_start:
    li   t0, 42          # load immediate 42 into t0
    li   t1, 1000        # load address 1000 into t1
    sw   t0, 0(t1)       # store t0 at memory address
    j    _start          # infinite loop

Overwriting /tmp/write_simple.s
! /home/philip/Downloads/lowrisc-toolchain-gcc-rv32imcb-x86_64-20250710-1/bin/riscv32-unknown-elf-as -o /tmp/write_simple.o   /tmp/write_simple.s && objdump -d /tmp/write_simple.o
/tmp/write_simple.o:     file format elf32-little

objdump: can't disassemble for architecture UNKNOWN!
import kdrag.smt as smt
import kdrag.contrib.pcode as pcode
import kdrag.contrib.pcode.asmspec as asmspec
spec = asmspec.AsmSpec()
ctx = ctx = pcode.BinaryContext("/tmp/write_simple.s", langid="RISCV:LE:32:default")
startaddr = ctx.loader.find_symbol("_start").rebased_addr
exitaddr = startaddr + 16

verilog / sail

Pcode is not that trustworthy. Vet against other semantics. (And why not just use those semantics instead ?)

https://docs.pydrofoil.org/en/latest/scripting-api.html

rpython https://mssun.github.io/rpython-by-example/ So rpython itself is a bit like cython? But not exporting python libs. Using rpython on it’s own like this is perhaps odd

smtlib interpreter in rpython? Lean expr interpreter?

! nix build github:rpypkgs/rpypkgs#pydrofoil-riscv
34;1m1/18/19 built, 159 copied (1102.1/1102.7 MiB), 270.7 MiB DL] building pydrofoilm
! nix run github:rpypkgs/rpypkgs#pydrofoil-riscv -- --version
pydrofoil-riscv 0.0.1-alpha0 (Sail model version: 0.5-273-g87f8bb34d6)
%%file /tmp/hello.py

def entry_point(argv):
    print "Hello, World!"
    return 0

# The target function is the main function of a RPython program. It takes
# command line arguments as inputs and return an entry point function.
def target(*args):
    return entry_point

python properties

Inline asm Reg names

Also memory locations?

import z3
z3.parse_smt2_string(r"""
(declare-const %rdi (_ BitVec 64))
(assert (= %rdi (_ bv0 64)))
""")

[%rdi = 0]

%%file /tmp/hello.c
#include <stddef.h>
__asm__(".include \"knuckle.S\"");
__asm__("kd_prelude \"(declare-const x (_ BitVec 64))\"");
int foo(int x){
    __asm__("kd_entry myentry \"true\"");
    __asm__("kd_assign x %0" : : "r"(x));

    int y = x + 42;
    __asm__("kd_exit myentry \"(assert (= %0 (bvadd x (_ bv42 64)))\"" : : "r"(y));
    return y;
}

Overwriting /tmp/hello.c
! gcc -O1 -S -c /tmp/hello.c -o /tmp/hello.S && cat /tmp/hello.S
 .file "hello.c"
 .text
 .globl foo
 .type foo, @function
foo:
.LFB0:
 .cfi_startproc
 endbr64
#APP
# 3 "/tmp/hello.c" 1
 kd_prelude "(assert (= %edi 42))"
# 0 "" 2
#NO_APP
 ret
 .cfi_endproc
.LFE0:
 .size foo, .-foo
 .ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
 .section .note.GNU-stack,"",@progbits
 .section .note.gnu.property,"a"
 .align 8
 .long 1f - 0f
 .long 4f - 1f
 .long 5
0:
 .string "GNU"
1:
 .align 8
 .long 0xc0000002
 .long 3f - 2f
2:
 .long 0x3
3:
 .align 8
4:

CN

https://github.com/rems-project/cn https://rems-project.github.io/cn-tutorial/getting-started/tutorials/welcome/ https://www.galois.com/articles/escaping-isla-nublar-coming-around-to-llms-for-formal-methods



cfg reconstruction

Takes VCs

Any unreached specstmt, warn reachable_from

Join specstmts

Print CFG as ascii? Output it?


reachable = set().union(*[vc.trace for vc in vcs])

Hyperproperties / Relational

Synchro points?

entry exit labels pairs. kd_sync

kd_rel entry exit property kd_rel entry_exit property

kd_channels monotonic channels?

Finely interleaved product programs

A syncrhonzination / scheudling program? ram_2

Multiram executor? Channels

Same program twice is easy to synchrnoize. High low variables

havoc statement kd_havoc_ghost ghost_var kd_havoc_reg reg kd_havoc_mem memexpr
interrupt havocs touched stuff every cycle. So we do want to enable havor

https://docs.certora.com/en/latest/docs/cvl/statements.html

kd_join collect up all trace fragements with common start and ends. Maybe I should anyhow?

Useful for the using assembly as spec idea.

trace fragment bundle. Yikes

thread_id as a way of knowing which program you’re in?

Unicorn

Running stuff in pcode2c was clutch. What about just a nice running environemnt that ingests asmspec. Unicorn, gdb, qemu. Something.

chat missing vex optimziationd via z3

p4-=

Concerns - user to

New Sleigh

https://github.com/nneonneo/ghidra-wasm-plugin/tree/master

https://spinsel.dev/2020/06/17/ghidra-brainfuck-processor-1.html

3 roads to go down: https://github.com/rbran/sleigh-rs/tree/main try to reuse this sleigh parser of nebulous quality, try to hack onto ghidra’s sleigh parser https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Features/Decompiler/src/decompile/cpp/slghparse.y , or try to rebuild the grammar in a custom parser (edited) Ok actually a fourth is to consume the compiled .sla files. I don’t even really know how their format works Why? Well, because using ghidra off the shelf only lets you turn concrete bits into pcode (edited) But the Sleigh format is kind of more like the analog of sail. You could use it to compile or assemble forward (which ghidra does use it to generate assemblers but I haven’t seen that rewrapped to be usable anywhere except in the gui) https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java

https://github.com/CTSRD-CHERI/ghidra cheri modelled in ghidra? https://github.com/Cherified/cheriot-abstract-spec https://github.com/Cherified/cheriot-program-logic https://github.com/CTSRD-CHERI/ghidra/blob/morello/main/Ghidra/Processors/AARCH64/data/languages/AARCH64_MorelloInstructions.sinc

Hmm generate sleigh from yosys akin to how C backend works maybe?

! mkdir /tmp/Hack || mkdir /tmp/Hack/data
mkdir: cannot create directory ‘/tmp/Hack’: File exists
mkdir: cannot create directory ‘/tmp/Hack/data’: File exists
%%file /tmp/Hack/Module.manifest
<module>
  <name>Hack</name>
  <description>nand2tetris Hack CPU</description>
  <version>1.0</version>
</module>

Overwriting /tmp/Hack/Module.manifest
%%file /tmp/Hack/data/hack.pspec
<?xml version="1.0" encoding="UTF-8"?>
<processor_spec>
  <programcounter register="PC"/>
  <default_symbols>
    <function name="entry" entry="true" address="0x0000"/>
  </default_symbols>
</processor_spec>
Writing /tmp/Hack/data/hack.pspec
%%file /tmp/Hack/data/hack.ldefs
<?xml version="1.0" encoding="UTF-8"?>
<language_definitions>
  <language processor="Hack" endian="big" size="16" variant="default"
            version="1" slafile="hack.sla" processorspec="hack.pspec"
            description="Hack (nand2tetris) 16-bit"/>
  <compiler name="default" spec="hack.cspec" id="default"/>
</language_definitions>
Writing /tmp/Hack/data/hack.ldefs

%%file /tmp/Hack/data/hack.slaspec
define endian=big;
define alignment=2;

define space ram     type=ram_space   size=2  default;
define space register type=register_space size=1;

define register offset=0x00 size=2  [PC D A];
define token instr(16)
  b15 = (15,15)             # 0 = A-instr, 1 = C-instr
  cxx = (13,14)             # the conventional "11" in Hack C-instr
  a   = (12,12)
  c1  = (11,11)
  c2  = (10,10)
  c3  = (9,9)
  c4  = (8,8)
  c5  = (7,7)
  c6  = (6,6)
  d1  = (5,5)               # A dest
  d2  = (4,4)               # D dest
  d3  = (3,3)               # M dest
  j1  = (2,2)
  j2  = (1,1)
  j3  = (0,0)
  aimm = (0,14)             # 15-bit literal for A-instruction
;
#OP1: "#"aimm    is b15=0; aimm   { tmp:2 = aimm; export tmp; }
:a_inst aimm is b15=0 & aimm {
  A = aimm;          # zero-extend 15-bit literal
  PC = PC + 2;
}
Overwriting /tmp/Hack/data/hack.slaspec
! /home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/support/sleigh /tmp/Hack/data/hack.slaspec
openjdk version "21.0.8" 2025-07-15
OpenJDK Runtime Environment (build 21.0.8+9-Ubuntu-0ubuntu124.04.1)
OpenJDK 64-Bit Server VM (build 21.0.8+9-Ubuntu-0ubuntu124.04.1, mixed mode)
INFO  Using log config file: jar:file:/home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/philip/.config/ghidra/ghidra_11.4.2_PUBLIC/application.log (LoggingInitialization)  
import kdrag.contrib.pcode as pcode
import pypcode
lang = pypcode.Arch("Hack", "/tmp/Hack/data/hack.ldefs").languages[0]
ctx = pypcode.Context(lang)
tx = ctx.disassemble(bytes.fromhex("1f"))
print(tx.instructions[0].mnem)
print(pcode.pretty_insn(tx.instructions[0]))
tx = ctx.translate(bytes.fromhex("1f"))
print(tx.ops)
pcode.pretty_op(tx.ops[1])
tx.ops[1].inputs[0].size
a_inst
0x0/2: a_inst 0x1f00
[IMARK ram[0:2], A = 0x1f00, PC = PC + 0x2]





2

wasm

https://github.com/nneonneo/ghidra-wasm-plugin/releases/tag/v2.3.2

! /home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/support/sleigh -n /home/philip/Downloads/ghidra_11.4.1_PUBLIC_20250808_ghidra-wasm-plugin/ghidra-wasm-plugin/data/languages/WebAssembly.slaspec
openjdk version "21.0.8" 2025-07-15
OpenJDK Runtime Environment (build 21.0.8+9-Ubuntu-0ubuntu124.04.1)
OpenJDK 64-Bit Server VM (build 21.0.8+9-Ubuntu-0ubuntu124.04.1, mixed mode)
INFO  Using log config file: jar:file:/home/philip/Downloads/ghidra_11.4.2_PUBLIC_20250826/ghidra_11.4.2_PUBLIC/Ghidra/Framework/Generic/lib/Generic.jar!/generic.log4j.xml (LoggingInitialization)  
INFO  Using log file: /home/philip/.config/ghidra/ghidra_11.4.2_PUBLIC/application.log (LoggingInitialization)  
WARN  NOP detected at Leb128_opc2_1b.sinc:1 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:2 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:3 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:4 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:5 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:1 (SleighCompile)  
WARN  NOP detected at Leb128_opc2_1b.sinc:2 (SleighCompile)   ...
WARN  Leb128_opc2_2b.sinc:1: Unreferenced table: 'opc2_238' (SleighCompile)  

folder = "/home/philip/Downloads/ghidra_11.4.1_PUBLIC_20250808_ghidra-wasm-plugin/ghidra-wasm-plugin/data/languages"
import kdrag.contrib.pcode as pcode
import pypcode
lang = pypcode.Arch("WebAssembly", folder + "/WebAssembly.ldefs").languages[0]
ctx = pypcode.Context(lang)
tx = ctx.disassemble(bytes.fromhex("6a"))
print(tx.instructions[0].mnem)
print(pcode.pretty_insn(tx.instructions[0]))
tx = ctx.translate(bytes.fromhex("6a"))
print(tx.ops)
pcode.pretty_op(tx.ops[1])
tx.ops[1].inputs[0].size
i32.add
0x0/1: i32.add 
[IMARK ram[0:1], register[4fffffe0:4] = register[4fffffe0:4] + register[4ffffff0:4]]





4
pypcode.Arch("WebAssembly", folder + "/WebAssembly.ldefs").languages
[<pypcode.ArchLanguage at 0x7f075820a600>]
from wasmtime import wat2wasm

prog = wat2wasm("""
(module
  (func (export "add") (param i32 i32) (result i32)
    local.get 0
    local.get 1
    i32.add))
""")
tx = ctx.disassemble(bytes(prog))
tx.instructions
"""
for i in range(0x28):
  try:
    print(hex(i), ctx.disassemble(bytes(prog[i:])).instructions)
  except Exception as e:
    pass
"""
ctx.disassemble(bytes(prog[0x23:]))

---------------------------------------------------------------------------

BadDataError                              Traceback (most recent call last)

Cell In[2], line 19
     11 tx.instructions
     12 """
     13 for i in range(0x28):
     14   try:
   (...)     17     pass
     18 """
---> 19 ctx.disassemble(bytes(prog[0x23:]))


BadDataError: r0x00000000: Unable to resolve constructor
ctx.translate(bytes.fromhex("2000")).ops
---------------------------------------------------------------------------

BadDataError                              Traceback (most recent call last)

Cell In[9], line 1
----> 1 ctx.translate(bytes.fromhex("2000")).ops


BadDataError: r0x00000000: Unable to resolve constructor
%%file /tmp/sum_i32.wat
(module
  (func (export "add") (param i32 i32) (result i32)
    local.get 0
    local.get 1
    i32.add))

Writing /tmp/sum_i32.wat
! wat2wasm /tmp/sum_i32.wat -o /tmp/sum32.wasm
! wasm-objdump -d /tmp/sum32.wasm
sum32.wasm: file format wasm 0x1

Code Disassembly:

000022 func[0] <add>:
 000023: 20 00                      | local.get 0
 000025: 20 01                      | local.get 1
 000027: 6a                         | i32.add
 000028: 0b                         | end

sail

Give sail another shake. Comparing and constrasting to sleigh is kind of interesting.


vec

https://en.algorithmica.org/hpc/simd/

%%file /tmp/sum.c
//#pragma GCC target("avx2")
const int n = 1e5;
int a[n], s = 0;

int main() {
    for (int t = 0; t < 100000; t++)
        for (int i = 0; i < n; i++)
            s += a[i];

    return 0;
}
Overwriting /tmp/sum.c
! g++ -O3 /tmp/sum.c -o /tmp/sum && time /tmp/sum
real 0m0.526s
user 0m0.522s
sys 0m0.001s
! g++ -march=native -O3 /tmp/sum.c -o /tmp/sum && time /tmp/sum
real 0m0.137s
user 0m0.136s
sys 0m0.000s
! python3 -m pypcode  
usage: pypcode [-h] [-l] [-o OFFSET] [-s LENGTH] [-i MAX_INSTRUCTIONS] [-b]
               langid binary [base]
pypcode: error: the following arguments are required: langid, binary
! g++ -O3 /tmp/sum.c -o /tmp/sum.S -S -masm=intel && head -40 /tmp/sum.S
 .file "sum.c"
 .intel_syntax noprefix
 .text
 .section .text.startup,"ax",@progbits
 .p2align 4
 .globl main
 .type main, @function
main:
.LFB0:
 .cfi_startproc
 endbr64
 lea rdx, a[rip]
 movd xmm0, DWORD PTR s[rip]
 lea rcx, 400000[rdx]
.L2:
 movdqa xmm1, XMMWORD PTR [rdx]
 mov eax, 100000
 .p2align 4,,10
 .p2align 3
.L3:
 paddd xmm0, xmm1
 paddd xmm0, xmm1
 sub eax, 2
 jne .L3
 add rdx, 16
 cmp rcx, rdx
 jne .L2
 movdqa xmm1, xmm0
 xor eax, eax
 psrldq xmm1, 8
 paddd xmm0, xmm1
 movdqa xmm1, xmm0
 psrldq xmm1, 4
 paddd xmm0, xmm1
 movd DWORD PTR s[rip], xmm0
 ret
 .cfi_endproc
.LFE0:
 .size main, .-main
 .globl s
! g++ -march=native -O3 /tmp/sum.c -o /tmp/sum.S -S -masm=intel && head -60 /tmp/sum.S
 .file "sum.c"
 .intel_syntax noprefix
 .text
 .section .text.startup,"ax",@progbits
 .p2align 4
 .globl main
 .type main, @function
main:
.LFB0:
 .cfi_startproc
 endbr64
 lea rdx, a[rip]
 vmovd xmm0, DWORD PTR s[rip]
 lea rcx, 400000[rdx]
.L2:
 vmovdqa32 zmm1, ZMMWORD PTR [rdx]
 mov eax, 100000
 .p2align 4
 .p2align 3
.L3:
 vpaddd zmm0, zmm0, zmm1
 sub eax, 2
 vpaddd zmm0, zmm0, zmm1
 jne .L3
 add rdx, 64
 cmp rcx, rdx
 jne .L2
 vmovdqa ymm1, ymm0
 vextracti32x8 ymm0, zmm0, 0x1
 xor eax, eax
 vpaddd ymm1, ymm1, ymm0
 vmovdqa xmm0, xmm1
 vextracti128 xmm1, ymm1, 0x1
 vpaddd xmm0, xmm0, xmm1
 vpsrldq xmm1, xmm0, 8
 vpaddd xmm0, xmm0, xmm1
 vpsrldq xmm1, xmm0, 4
 vpaddd xmm0, xmm0, xmm1
 vmovd DWORD PTR s[rip], xmm0
 vzeroupper
 ret
 .cfi_endproc
.LFE0:
 .size main, .-main
 .globl s
 .bss
 .align 4
 .type s, @object
 .size s, 4
s:
 .zero 4
 .globl a
 .align 64
 .type a, @object
 .size a, 400000
a:
 .zero 400000
 .ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
 .section .note.GNU-stack,"",@progbits
 .section .note.gnu.property,"a"
%%file /tmp/support.cpp
#include <iostream>
using namespace std;

int main() {
    cout << __builtin_cpu_supports("sse") << endl;
    cout << __builtin_cpu_supports("sse2") << endl;
    cout << __builtin_cpu_supports("avx") << endl;
    cout << __builtin_cpu_supports("avx2") << endl;
    cout << __builtin_cpu_supports("avx512f") << endl;

    return 0;
}
Writing /tmp/support.cpp
! g++ -O3 /tmp/support.cpp -o /tmp/support && /tmp/support
8
16
512
1024
32768
! cat /proc/cpuinfo