-
Notifications
You must be signed in to change notification settings - Fork 0
/
readlines.py
47 lines (36 loc) · 1.09 KB
/
readlines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
loop_list = list()
counter = 0
open_quote = False
loop_list.append('')
with open('ned.testa', 'r', encoding='latin1') as trainfile:
for index, line in enumerate(trainfile):
line = line.replace("\n", "")
if(line != ' ' and line != ''):
split_line = line.split()
word = split_line[0]
# Append word to sentence, i.e. ['text'] part of this dict
if('.' in word ):
loop_list[counter] = loop_list[counter][:-1]
loop_list[counter] += word
elif (',' == word or ')' == word):
loop_list[counter] = loop_list[counter][:-1]
loop_list[counter] += word + " "
elif ('(' == word ):
loop_list[counter] += word
elif("'" == word or '"' == word):
if(open_quote == True):
open_quote = False
loop_list[counter] = loop_list[counter][:-1]
loop_list[counter] += word + " "
else:
open_quote = True
loop_list[counter] += word
else:
loop_list[counter] += word + " "
else:
counter = counter + 1
loop_list.append('')
with open('lines_output.txt', 'w') as outputfile:
for lines in loop_list:
outputfile.write(str(lines) + "\n")