-
Notifications
You must be signed in to change notification settings - Fork 0
/
autoindex.py
124 lines (112 loc) · 4.22 KB
/
autoindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Find occurrences of terms listed in *txt files and add indexing markup in corresponding *tex files
"""
import glob
import re
import os
if __name__ == "__main__":
#no indexing will take place in lines with the following keywords and {. section also matches subsection.
excluders = ("section","caption","chapter","addplot")
lgs = open("locallanguages.txt").read().split('\n')
terms = open("localsubjectterms.txt").read().split('\n')[::-1]#reverse to avoid double indexing
print("found %i language names for autoindexing" % len(lgs))
print("found %i subject terms for autoindexing" % len(terms))
<<<<<<< HEAD
files = glob.glob('chapters/*tex')
for f in files:
print("indexing %s" % f)
#strip preamble of edited volume chapters to avoid indexing there
parts = open(f).read().split(r"\begin{document}")
content = parts[-1]
preamble = ''
joiner = ''
if len(parts) == 2:
#prepare material to correctly reassemble the file after indexing
preamble = parts[0]
joiner = r"\begin{document}"
oldlines = content.split('\n')
newlines = []
for line in oldlines:
included = True
for excluder in excluders:
if "%s{"%excluder in line:
included = False
print("Found excluder keyword %s:%s"%(excluder, line))
if included:
for lg in lgs:
lg = lg.strip()
if lg == '':
continue
#substitute "lg" with "\ili{lg}"
line = re.sub('(?<!ili{)%s(?![\w}])'%lg, '\ili{%s}'%lg, line)
for term in terms:
term = term.strip()
if term == '':
continue
#substitute "term" with "\isi{term}"
line = re.sub('(?<!isi{|...[A-Za-z])%s(?![-_\w}])'%term, '\isi{%s}'%term, line)
newlines.append(line)
#reassemble body
content = "\n".join(newlines)
#compute stats
numberoflanguages = len(re.findall('\\ili{',content))
numberofterms = len(re.findall('\\isi{',content))
#make sure directory indexed/ exists
try:
os.mkdir('./indexed')
except OSError:
pass
outfile = open(f.replace('chapters','indexed'), 'w')
#write output
outfile.write(preamble)
outfile.write(joiner)
outfile.write(content)
outfile.close()
#print stats
print(" %s now contains %i indexed languages and %i indexed subject terms"%(f.split('/')[-1],numberoflanguages,numberofterms))
print("indexed files are in the folder 'indexed/'")
=======
files = glob.glob('chapters/*tex')
SUBJECTP = re.compile
for f in files:
print("indexing %s" % f)
#strip preamble of edited volume chapters to avoid indexing there
a = open(f).read().split(r"\begin{document}")
content = a[-1]
preamble = ''
joiner = ''
if len(a) == 2:
preamble = a[0]
joiner = r"\begin{document}"
lines = content.split('\n')
excluders = ("section","caption","chapter", "addplot" )
newlines = []
for line in lines:
included = True
for excluder in excluders:
if "%s{"%excluder in line:
included = False
#print line
if included:
for lg in lgs:
lg = lg.strip()
if lg == '':
continue
line = re.sub('(?<!ili{)%s(?![\w}])'%lg, '\ili{%s}'%lg, line)
for term in terms:
term = term.strip()
if term == '':
continue
line = re.sub('(?<!isi{|...[A-Za-z])%s(?![-_\w}])'%term, '\isi{%s}'%term, line)
newlines.append(line)
content = "\n".join(newlines)
nlg = len(re.findall('\\ili{',content))
nt = len(re.findall('\\isi{',content))
outfile = open(f.replace('chapters','indexed'), 'w')
outfile.write(preamble)
outfile.write(joiner)
outfile.write(content)
outfile.close()
print(" %s now contains %i indexed languages and %i indexed subject terms"%(f.split('/')[-1],nlg,nt))
print("indexed files are in the folder 'indexed'")
>>>>>>> 9fd9d6bb11d4ee2717e3541ea7b5e59374517b76