-
Notifications
You must be signed in to change notification settings - Fork 1
/
textParsingCode.py
109 lines (68 loc) · 2.79 KB
/
textParsingCode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#written by Viktor Zenkov in 2018
from pyparsing import *
import numpy as np
import pprint
import re
import time
import os
blanks = ' \t'
ParserElement.setDefaultWhitespaceChars(blanks)
#version with simplified line code giving text
mainStrings = []
def addLine(tokens):
if (tokens.hexcommands != ''):
mainStrings.append(tokens.textcommands + '\n')
#we have a set of hexademical numbers and then a set of text commands in many lines. Sometimes that text
#happens to begin with a lowercase hexadecimal number (like db), so we make hexnumscap include only capital letters.
hexnumscap = '0123456789ABCDEF?'
#A hexadecimal command is either of the form "xx" (possibly and question marks) or the form "xx+".
#The Combine here is required for the code to work
hexapair = Combine(Word(hexnumscap, exact=2) + Optional(Literal('+')) + WordEnd())
#The hexadecimal commands are a set of hexadecimal commands, which we call "hexcommands".
hexcommands = OneOrMore(hexapair)('hexcommands')
#The text commands are any text up to the end of the line or a ";", and we call them "textcommands".
textcommands = CharsNotIn(';\n')('textcommands')
#if there is a semicolon in a line that signifies a comment, and we name the rest of the line "comment".
comment = Suppress(';') + restOfLine('comment')
#line = (lineheader + Optional(hexcommands) + Optional(textcommands) + Optional(comment) + LineEnd()).setParseAction(addLine)
line = (Optional(hexcommands) + Optional(textcommands) + Optional(comment) + LineEnd()).setParseAction(addLine)
entirefile = Optional(OneOrMore(Literal('\n'))) + OneOrMore(line)
#parse
f_path = 'asmfiles'
allFileNames = sorted(os.listdir(f_path + '/'))
asmFileNames = [ i for i in allFileNames if i.endswith('.asm')]
for f_name in asmFileNames:
print(f_name[:-4])
f = open(f_path + '/' + f_name,encoding='latin-1')
fileText = ''
counter = 0
time1 = time.time()
mainStrings = []
while True:
#counter += 1
line = f.readline()
if (line == ''):
break
if line.startswith('.text'):
fileText = fileText + line[14:]
counter += 1
if (counter > 1000):
entirefile.parseString(fileText)
fileText = ''
counter = 0
f.close()
if (counter > 0):
entirefile.parseString(fileText)
time2 = time.time()
for i in range(len(mainStrings)):
pattern = re.compile(r'\s+')
temp = re.sub(pattern,' ',mainStrings[i])
temp = temp[:-1] + '\n'
mainStrings[i] = temp
if (mainStrings[i] == '\n'):
mainStrings[i] = ''
mainString = ''.join(mainStrings)
g = open(f_path + 'Output/' + f_name[:-4] + 'Text.txt','w')
g.write(mainString)
g.close()
print (time2-time1)