/
sample.py
50 lines (37 loc) · 1.44 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import random
def get_text():
file = 'random.txt'
with open(file, 'r') as f:
text = f.read().split()
return text
def histogram(get_text):
''' A function that return a histogram data structure that stores each unique
word along with the number of times the word appears in the source text'''
dictionary = {}
for word in get_text:
if word in dictionary.keys():
dictionary[word] += 1
else:
dictionary[word] = 1
return dictionary
def sample_frequency(histogram):
'''A function that takes a histogram and returns a single word, at random'''
#This function randomly selects a word in the list
random_word = random.randint(0,7)
start_search_range = 0
for word, count in histogram.items():
end_search_range = start_search_range + count
#Function goes through each range of possible frequencies in the list, and tries to
#match where the word selected is within the range
if start_search_range <= random_word < end_search_range:
return word
# If the word is indeed within the range, then the word is returned
start_search_range = end_search_range
#If word not found,the search continues
if __name__ == '__main__':
histo = histogram(get_text())
results = []
for counter in range(10):
random_word = sample_frequency(histo)
results.append(random_word)
print (results)