/
markov.py
117 lines (85 loc) · 3.92 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import sys
import random
import tweet
class SimpleMarkovGenerator(object):
char_count = 500
def read_files(self, filenames):
"""Given a list of files, make text (the corpus) from them."""
text_of_all_files = ""
for file_name in filenames:
file_handle = open(file_name)
text_of_all_files = text_of_all_files + " " + file_handle.read()
return text_of_all_files
def make_chains(self, corpus, n):
"""Takes input text as string; returns dictionary of markov chains."""
# print corpus
ngram_dict = {}
# Create a list of words in order by splitting on spaces
input_words = corpus.split()
# Iterate over word list and create tuple keys for dictionary
for index in xrange(len(input_words) - n):
ngram = ()
for index_n in xrange(n):
ngram = ngram + (input_words[index + index_n],)
# print ngram
# For each tuple, check if the tuple is already a key in the dict
# If it is a key, add to its value. If it is not a key, make it key, with new list
if ngram not in ngram_dict:
ngram_dict[ngram] = []
ngram_dict[ngram].append(input_words[index + n])
# print ngram_dict
return ngram_dict
def make_text(self, chains):
"""Takes dictionary of markov chains; returns random text."""
# Grab random key from markov chain dict to start off the generated text
#print self.char_count
key_list = chains.keys()
# start_index = random.randint(0, (len(key_list) - 1))
start_key = random.choice(key_list)
# print "start_key:", start_key
while not start_key[0][0].isupper():
start_key = random.choice(key_list)
key = start_key
random_list_for_string = []
for n in range(len(key)):
random_list_for_string.append(key[n])
# Until a ngram is not present in the dict, generate random next word from the values associated
# with that ngram
next_word = " "
# print "next word", next_word
while key in chains: #and char_count <= 140:
value_list = chains[key]
# random_index = random.randint(0, (len(value_list) - 1))
next_word = random.choice(value_list)
random_list_for_string.append(next_word)
temp_key = ()
for n in range(1, len(key)):
temp_key += (key[n],)
# print "next word", next_word
key = temp_key + (next_word,)
temp_markov_string = " ".join(random_list_for_string)
final_markov_string = temp_markov_string[:self.char_count]
#print final_markov_string
# print random_list_for_string
for search_i in xrange(len(final_markov_string) - 1, 0, -1):
if final_markov_string[search_i][-1] == "." or final_markov_string[search_i][-1] == "!" or final_markov_string[search_i][-1] == "?":
final_markov_string = final_markov_string[:search_i + 1]
return final_markov_string
class TwitterMarkovGenerator(SimpleMarkovGenerator):
"""creates a tweet (140 character) sized Markov chain"""
char_count = 300
if __name__ == "__main__":
# we should get list of filenames from sys.argv
filenames = []
for i in range(1, len(sys.argv)):
filenames.append(sys.argv[i])
# we should make an instance of the class
markov = TwitterMarkovGenerator()
# we should call the read_files method with the list of filenames
text = markov.read_files(filenames)
markov_chains = markov.make_chains(text, 2)
command = ""
while command != "q":
command = raw_input("Press [enter] to tweet again or 'q' to quit: ")
if command == "":
tweet.tweet_markov(markov.make_text(markov_chains))