-
Notifications
You must be signed in to change notification settings - Fork 6
/
basefind2.py
executable file
·98 lines (79 loc) · 3.18 KB
/
basefind2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import re, struct, argparse, array
def get_pointers(file):
pointers = set()
for offset in range(0, len(file) - 4, 4):
ptr = struct.unpack("<L", file[offset : offset + 4])[0]
pointers.add(ptr)
pointers = list(pointers)
pointers.sort()
return pointers
def get_strings(file, min_length):
regexp = b"[ -~\\t\\r\\n]{%d,}" % min_length
pattern = re.compile(regexp)
strings = []
for m in pattern.finditer(file):
strings.append(m.start())
return strings
def get_differences(ptrs):
differences = array.array("L")
last = 0
for ptr in ptrs:
differences.append(ptr - last)
last = ptr
return differences
# strs and ptrs are ordered, so we can make ordered search
# only counts every samplerate elem, adjusts at return
def count_str(ptrs, strs, offset, samplerate):
c = 0
lastptr = 0
for si in range(0, len(strs), samplerate):
ptr = ptrs.find(struct.pack("<L", strs[si] + offset), lastptr)
if ptr == -1:
continue
lastptr = ptr
c += 1
return c * samplerate
parser = argparse.ArgumentParser(description=
"""Scans a flat 32-bit binary and attempt to determine the base address.
Finds DIFFLENGTH part of the subsequent string differences inside the subsequent pointer differences to get base candidates.
It doesn't need to brute-force all of the base addresses, so it's much faster.
Based on the excellent basefind.py by mncoppola and the excellent rbasefind.""")
parser.add_argument("-sl", metavar="STRLENGTH", type=int, help="minimum length of the strings (default = 10)", default=10)
parser.add_argument("-dl", metavar="DIFFLENGTH", type=int, help="length of the differences (default = 10)", default=10)
parser.add_argument("-s", metavar="SAMPLERATE", type=int, help="samplerate for the validation (default = 20)", default=20)
parser.add_argument("file", help="file to scan")
args = parser.parse_args()
str_len = args.sl
diff_len = args.dl
samplerate = args.s
with open(args.file, "rb") as f:
file = f.read()
print(f"scanning binary for strings len>={str_len}...")
strs = get_strings(file, str_len)
print(f"total strings found: {len(strs)}")
print("scanning binary for pointers...")
ptrs = get_pointers(file)
print(f"total pointers found: {len(ptrs)}")
str_diffs = get_differences(strs)
ptr_diffs = get_differences(ptrs)
# convert to bytes to use the python stringlib's find (mix of boyer-moore and horspool)
# https://github.com/python/cpython/blob/main/Objects/stringlib/fastsearch.h
ptrs_b = array.array("L", ptrs).tobytes()
ptr_diffs_b = ptr_diffs.tobytes()
found = set()
print(f"finding differences of length: {diff_len}")
for si in range(0, len(str_diffs) - diff_len):
print(si, end = "\r")
str_b = str_diffs[si : si + diff_len].tobytes()
pi = ptr_diffs_b.find(str_b)
if pi == -1:
continue
pi //= ptr_diffs.itemsize
offset = ptrs[pi] - strs[si]
if offset < 0 or offset in found:
continue
print(f"possible offset 0x{offset:x} ...", end = "\r")
percent = count_str(ptrs_b, strs, offset, samplerate) / len(strs) * 100
print(f"possible offset 0x{offset:x} {percent:05.2f}%")
found.add(offset)