/
gen.py
72 lines (65 loc) · 2.17 KB
/
gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from collections import Counter
import json, re, sys
LERFU = r"[aeioubcdfg'klmnprstvxyz.]"
VALSI = rf"{LERFU}+"
JUFRA = rf"(\s*{VALSI})+"
DF = re.compile(rf"df-({VALSI})(-{VALSI})*")
with open("gen-valsi-class.json") as handle: vc = json.load(handle)
with open("mm/jbobau.mm") as handle: db = handle.read().split()
clss = Counter(vc.values())
def apos(s): return s.replace("h", "'")
dfs = {}
fs = {}
i = 0
while i < len(db):
if db[i].startswith("df-") and db[i + 1] == "$a":
k = apos(db[i].split("-")[1])
i += 3
j = i
while not db[i].startswith("$"): i += 1
dfs[k] = db[j:i]
elif db[i] == "$f":
fs[apos(db[i + 2])] = db[i + 1]
i += 2
i += 1
def crack(ts):
s = 0
for j, t in enumerate(ts):
if t in ("ga", "ganai", "ge", "go", "gonai"): s += 1
elif t in ("gi", "ginai"): s -= 1
if s == 0:
# Not actually cracked!
if j == 0: return ts, ts
else: return ts[1:j], ts[j + 1:]
raise ValueError("unbalanced GA/GI brackets")
dfc = Counter(vc[k] for k in dfs)
dff = Counter(vc[k] for k in fs)
count = sum(dfc.values()) + sum(dff.values())
cmd = sys.argv[-1]
if cmd == "coverage":
print("Grammatical class | Metamath class | # of formal definitions")
print("---|---|---")
lines = [f"{cls} | metavariable | {dff[cls]} / {clss[cls]}" for cls in dff]
lines.extend(f"{cls} | constant | {dfc[cls]} / {clss[cls]}" for cls in dfc)
for line in sorted(lines): print(line)
total = len(vc)
print("total", "| - |", count, "/", total,
"(%0.2f%%)" % (count * 100 / total))
elif cmd == "metavars":
print("cmavo | Metamath type")
print("---|---")
for k, v in sorted(fs.items()): print(f"{v} | {k}")
elif cmd == "definitions":
d = {k: " ".join(crack(v)[1]) for k, v in dfs.items()}
json.dump(d, sys.stdout)
elif cmd == "dependencies":
mvs = set(fs)
print("digraph {")
for v1, v in dfs.items():
for v2 in set(crack(v)[1]) - mvs:
if v2.startswith("."): v2 = v2[1:]
if v2 in ("ganai", "gi", v1): continue
print(f'"{v1}" -> "{v2}";')
print("}")
else:
raise ValueError("unknown subcommand")