autoindex.py

"""
Find occurrences of terms listed in *txt files and add indexing markup in corresponding *tex files
"""

import glob
import re
import os

if __name__  ==  "__main__":
    #no indexing will take place in lines with the following keywords and {. section also matches subsection.
    excluders  =  ("section","caption","chapter","addplot")
    
    lgs = open("locallanguages.txt").read().split('\n')
    terms = open("localsubjectterms.txt").read().split('\n')[::-1]#reverse to avoid double indexing
    print("found %i language names for autoindexing" % len(lgs))
    print("found %i subject terms for autoindexing" % len(terms))

<<<<<<< HEAD
    files  =  glob.glob('chapters/*tex')
 
    for f in files:
        print("indexing %s" % f)
        #strip preamble of edited volume chapters to avoid indexing there
        parts  =  open(f).read().split(r"\begin{document}")  
        content  =  parts[-1]
        preamble  =  ''
        joiner  =  ''
        if len(parts)  ==  2:
            #prepare material to correctly reassemble the file after indexing
            preamble  =  parts[0]
            joiner  =  r"\begin{document}"
        oldlines  =  content.split('\n')
        newlines  =  []
        for line in oldlines: 
            included  =  True
            for excluder in excluders: 
                if "%s{"%excluder in line:
                    included  =  False
                    print("Found excluder keyword %s:%s"%(excluder, line))
        if included:
            for lg in lgs: 
                lg  =  lg.strip()
                if lg  ==  '':
                    continue 
                #substitute "lg" with "\ili{lg}"
                line  =  re.sub('(?<!ili{)%s(?![\w}])'%lg, '\ili{%s}'%lg, line)
            for term in terms:
                term  =  term.strip() 
                if term  ==  '':
                    continue
                #substitute "term" with "\isi{term}"
                line  =  re.sub('(?<!isi{|...[A-Za-z])%s(?![-_\w}])'%term, '\isi{%s}'%term, line) 
        newlines.append(line)
        #reassemble body
        content  =  "\n".join(newlines)  
        #compute stats
        numberoflanguages  =  len(re.findall('\\ili{',content))
        numberofterms  =  len(re.findall('\\isi{',content))
        #make sure directory indexed/ exists
        try: 
            os.mkdir('./indexed')
        except OSError:
            pass                
        outfile  =  open(f.replace('chapters','indexed'), 'w')
        
        #write output
        outfile.write(preamble)
        outfile.write(joiner)
        outfile.write(content)
        outfile.close()
        
        #print stats
        print(" %s now contains %i indexed languages and %i indexed subject terms"%(f.split('/')[-1],numberoflanguages,numberofterms))
        print("indexed files are in the folder 'indexed/'")     
=======
files = glob.glob('chapters/*tex')

SUBJECTP = re.compile
for f in files:
  print("indexing %s" % f)
  #strip preamble of edited volume chapters to avoid indexing there
  a = open(f).read().split(r"\begin{document}")  
  content = a[-1]
  preamble = ''
  joiner = ''
  if len(a) == 2:
    preamble = a[0]
    joiner = r"\begin{document}"
  lines = content.split('\n')
  excluders = ("section","caption","chapter", "addplot"  )
  newlines = []
  for line in lines: 
    included = True
    for excluder in excluders: 
      if "%s{"%excluder in line:
        included = False
        #print line
    if included:
      for lg in lgs: 
        lg = lg.strip()
        if lg == '':
          continue 
        line = re.sub('(?<!ili{)%s(?![\w}])'%lg, '\ili{%s}'%lg, line)
      for term in terms:
        term = term.strip() 
        if term == '':
          continue
        line = re.sub('(?<!isi{|...[A-Za-z])%s(?![-_\w}])'%term, '\isi{%s}'%term, line) 
    newlines.append(line)
  content = "\n".join(newlines)  
  nlg = len(re.findall('\\ili{',content))
  nt = len(re.findall('\\isi{',content))
  outfile = open(f.replace('chapters','indexed'), 'w')
  outfile.write(preamble)
  outfile.write(joiner)
  outfile.write(content)
  outfile.close()
  print(" %s now contains %i indexed languages and %i indexed subject terms"%(f.split('/')[-1],nlg,nt))
  
print("indexed files are in the folder 'indexed'")
  
  
  
>>>>>>> 9fd9d6bb11d4ee2717e3541ea7b5e59374517b76