This repository has been archived by the owner on Mar 25, 2021. It is now read-only.
/
playground.py
142 lines (121 loc) · 5.04 KB
/
playground.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from random import random
from os import getcwd
from sys import path as sys_path
from time import time
sys_path.append(getcwd())
from spell_checker import *
def sample_file(path,new_path,percentage=.20):
"""Randomly selects lines from files.
Parameters
----------
path : str (path)
The path of the file to be sampled.
new_path : str (path)
The path of the file to be created with the chosen lines.
percentage : float (default = .20)
The percentage of lines to be chosen.
"""
sample = []
with open(path,"r",encoding="utf-8") as file:
line = file.readline()
while line != "":
if random() < percentage: sample.append(line)
line = file.readline()
with open(new_path,"w",encoding="utf-8") as file:
for item in sample: file.write(item)
def shuffle_file(path,new_path):
"""Randomly reorders the lines of a file and writes the result into another file.
Parameters
----------
path : str (path)
The path to the file whose lines are to be randomized.
new_path : str (path)
The path in which to write the file with the already-randomized lines.
"""
with open(path,'r') as source: data = sorted([(random(), line) for line in source])
with open(new_path,'w') as target:
for _, line in data: target.write(line)
def dict_to_list(path):
"""Converts a dictionary file (one word per line) into a Python list object.
Parameters
----------
path : str (path)
The path to the dictionary file to be transformed into a Python list object.
Returns
-------
list
A list where each item is a single word.
"""
with open(path, "r", encoding="utf-8") as file:
return [line.strip("\n") for line in file.readlines()]
#a = CharacterTree("abacate","mamão","maniçoba","queijo")
#print("CharacterTree criada com as palavras \"abacate\", \"mamão\",\"maniçoba\" e \"queijo\" nela.")
#print("Há maniçoba nela? Resposta: {}.".format("maniçoba" in a))
#print("Há abacate nela? Resposta: {}.".format("abacate" in a))
#print("E aba? Resposta: {}.".format("aba" in a))
#print("Então, adicionemos aba.")
#a.insert("aba")
#print("A palavra aba adicionada.")
#print("Há aba na árvore? Resposta: {}.".format("aba" in a))
#print("Perfeito.")
#
#print(", ".join([character.character for character in a.next_characters]))
#
#print("\n\n")
#b = RadixTree("abacate","mamão","maniçoba","queijo")
#b = RadixTree("aba", "abacate", "abacateiro", "abacateirozeiro")
#print("RadixTree criada com as palavras \"abacate\", \"mamão\",\"maniçoba\" e \"queijo\" nela.")
#print("Há maniçoba nela? Resposta: {}.".format("maniçoba" in b))
#print("Há abacate nela? Resposta: {}.".format("abacate" in b))
#print("E aba? Resposta: {}.".format("aba" in b))
#print("Então, adicionemos aba.")
#b.insert("aba")
#print("A palavra aba adicionada.")
#print("Há aba na árvore? Resposta: {}.".format("aba" in b))
#print("Perfeito.")
#print(", ".join([character.character for character in a.next_characters]))
# b.insert("menino")
rt = from_dict("./dictionaries/palavras.txt","RADIX")
words_to_remove = dict_to_list("./dictionaries/palavras2.txt")
print("OK")
for word_to_remove in words_to_remove:
print(word_to_remove)
rt.remove(word_to_remove)
#print("Comparing execution times: ",end="")
#start_time = time()
#ct = from_dict("D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras.txt", "CHARACTER") # 320140 palavras in CharacterTree.
#rt = from_dict("D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras.txt", "RADIX") # 320140 palavras in CharacterTree.
#elapsed = time() - start_time
#print("{}".format(str(elapsed)))
#start_time = time()
#constituicao = from_txt("D:/igor/OneDrive/Documentos/GitHub/spell-checker/texts/constituicao.txt")
#elapsed = time() - start_time
#print("{}".format(str(elapsed)))
#sample_file("D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras.txt",
# "D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras_sample1.txt")
#wrong_words = ct.check("./texts/A-Caravana-de-Veneza.txt")
#ct_wrong_words = ct.check("D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras_sample.txt")
#rt_wrong_words = rt.check("D:\igor\OneDrive\Documentos\GitHub\spell-checker\dictionaries\palavras_sample.txt")
#print("low_memory=True:",end="")
#start_time = time()
#ct = from_csv("D:\igor\OneDrive\Documentos\GitHub\spell-checker\palavras_sample.txt")
#low_memory_time = time() - start_time
#print(" {}".format(str(low_memory_time)))
#
#print("low_memory=False:",end="")
#start_time = time()
#ct = from_csv("D:\igor\OneDrive\Documentos\GitHub\spell-checker\palavras_sample.txt",False)
#no_low_memory_time = time() - start_time
#print(" {}".format(str(no_low_memory_time)))
#print("low_memory=True:",end="")
#start_time = time()
#for i in range(5):
# print(i)
# ct = from_csv("D:\igor\OneDrive\Documentos\GitHub\spell-checker\palavras_sample.txt")
#low_memory_time = (time() - start_time)/50
#print("low_memory=False:",end="")
#start_time = time()
#for i in range(5):
# print(i)
# ct = from_csv("D:\igor\OneDrive\Documentos\GitHub\spell-checker\palavras_sample.txt",False)
#no_low_memory_time = (time() - start_time)/50