/
autoindex.py
58 lines (50 loc) · 1.61 KB
/
autoindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/python3
import glob
import re
lgs=open("locallanguages.txt").read().split('\n')
terms=open("localsubjectterms.txt").read().split('\n')[::-1]#reverse to avoid double indexing
print("found %i language names for autoindexing" % len(lgs))
print("found %i subject terms for autoindexing" % len(terms))
files = glob.glob('chapters/*tex')
SUBJECTP = re.compile
for f in files:
print("indexing %s" % f)
#strip preamble of edited volume chapters to avoid indexing there
a = open(f).read().split(r"\begin{document}")
content = a[-1]
preamble = ''
joiner = ''
if len(a) == 2:
preamble = a[0]
joiner = r"\begin{document}"
lines = content.split('\n')
excluders = ("section","caption","chapter","addplot")
newlines = []
for line in lines:
included = True
for excluder in excluders:
if "%s{"%excluder in line:
included = False
print line
if included:
for lg in lgs:
lg = lg.strip()
if lg == '':
continue
line = re.sub('(?<!ili{)%s(?![\w}])'%lg, '\ili{%s}'%lg, line)
for term in terms:
term = term.strip()
if term == '':
continue
line = re.sub('(?<!isi{|...[A-Za-z])%s(?![-_\w}])'%term, '\isi{%s}'%term, line)
newlines.append(line)
content = "\n".join(newlines)
nlg = len(re.findall('\\ili{',content))
nt = len(re.findall('\\isi{',content))
outfile = open(f.replace('chapters','indexed'), 'w')
outfile.write(preamble)
outfile.write(joiner)
outfile.write(content)
outfile.close()
print(" %s now contains %i indexed languages and %i indexed subject terms"%(f.split('/')[-1],nlg,nt))
print("indexed files are in the folder 'indexed'")