-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from MikeCroall/dev
Pull Dev into Master for intial (markov - not tensorflow) release
- Loading branch information
Showing
25 changed files
with
4,655 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/Android |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
web: python server.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
click==6.6 | ||
Flask==0.11.1 | ||
Flask-Cors==3.0.2 | ||
h5py==2.6.0 | ||
itsdangerous==0.24 | ||
Jinja2==2.8 | ||
MarkupSafe==0.23 | ||
numpy==1.11.2 | ||
six==1.10.0 | ||
Werkzeug==0.11.11 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
python-3.5.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#!/usr/bin/python | ||
|
||
import re | ||
import random | ||
import sys | ||
|
||
|
||
class SentenceGenerator(object): | ||
|
||
def __init__(self): | ||
self.tempMapping = {} | ||
self.mapping = {} | ||
self.starts = [] | ||
|
||
def fixCaps(self, word): | ||
if word.isupper() and word != "I": | ||
word = word.lower() | ||
elif word[0].isupper(): | ||
word = word.lower().capitalize() | ||
else: | ||
word = word.lower() | ||
return word | ||
|
||
def toHashKey(self, lst): | ||
return tuple(lst) | ||
|
||
def wordlist(self, filename): | ||
f = open(filename, 'r') | ||
wordlist = [self.fixCaps(w) | ||
for w in re.findall(r"#?[\w']+|[.,!?;]", f.read())] | ||
f.close() | ||
return wordlist | ||
|
||
def addItemToTempMapping(self, history, word): | ||
while len(history) > 0: | ||
first = self.toHashKey(history) | ||
if first in self.tempMapping: | ||
if word in self.tempMapping[first]: | ||
self.tempMapping[first][word] += 1.0 | ||
else: | ||
self.tempMapping[first][word] = 1.0 | ||
else: | ||
self.tempMapping[first] = {} | ||
self.tempMapping[first][word] = 1.0 | ||
history = history[1:] | ||
|
||
# Building and normalizing the mapping. | ||
def buildMapping(self, wordlist, markovLength): | ||
self.starts.append(wordlist[0]) | ||
for i in range(1, len(wordlist) - 1): | ||
if i <= markovLength: | ||
history = wordlist[: i + 1] | ||
else: | ||
history = wordlist[i - markovLength + 1: i + 1] | ||
follow = wordlist[i + 1] | ||
# if the last elt was a period, add the next word to the start list | ||
if history[-1] == "." and follow not in ".,!?;": | ||
self.starts.append(follow) | ||
self.addItemToTempMapping(history, follow) | ||
# Normalize the values in tempMapping, put them into mapping | ||
for first, followset in self.tempMapping.items(): | ||
total = sum(followset.values()) | ||
# Normalizing here: | ||
self.mapping[first] = dict([(k, v / total) | ||
for k, v in followset.items()]) | ||
|
||
# Returns the next word in the sentence (chosen randomly), | ||
# given the previous ones. | ||
def next(self, prevList): | ||
sum = 0.0 | ||
retval = "" | ||
index = random.random() | ||
# Shorten prevList until it's in mapping | ||
while self.toHashKey(prevList) not in self.mapping and len(prevList) != 0: | ||
prevList.pop(0) | ||
# Get a random word from the mapping, given prevLis | ||
if(len(prevList) == 0): | ||
prevList = ['wall'] | ||
for k, v in self.mapping[self.toHashKey(prevList)].items(): | ||
sum += v | ||
if sum >= index and retval == "": | ||
retval = k | ||
return retval | ||
|
||
def genSentence(self, markovLength, startWord=None): | ||
|
||
if(startWord is None): | ||
startWord = "wall" | ||
# Start with a random "starting word" | ||
curr = startWord | ||
sent = curr.capitalize() | ||
prevList = [curr] | ||
if(len(self.next([curr])) == 0): | ||
curr = "wall" | ||
sent = "WALL" | ||
prevList = [curr] | ||
# Keep adding words until we hit a period | ||
while (curr not in "."): | ||
curr = self.next(prevList) | ||
prevList.append(curr) | ||
# if the prevList has gotten too long, trim it | ||
if len(prevList) > markovLength: | ||
prevList.pop(0) | ||
if (curr not in ".,!?;"): | ||
sent += " " # Add spaces between words (but not punctuation) | ||
sent += curr | ||
return sent | ||
|
||
|
||
# These mappings can get fairly large -- they're stored globally to | ||
# save copying time. | ||
|
||
# (tuple of words) -> {dict: word -> number of times the word appears following the tuple} | ||
# Example entry: | ||
# ('eyes', 'turned') => {'to': 2.0, 'from': 1.0} | ||
# Used briefly while first constructing the normalized mapping | ||
# (tuple of words) -> {dict: word -> *normalized* number of times the word appears following the tuple} | ||
# Example entry: | ||
# ('eyes', 'turned') => {'to': 0.66666666, 'from': 0.33333333} | ||
|
||
# Contains the set of words that can start sentences | ||
|
||
# We want to be able to compare words independent of their capitalization. | ||
# affect processing time too negatively. | ||
# Returns the contents of the file, split into a list of words and | ||
# (some) punctuation. | ||
# Self-explanatory -- adds "word" to the "tempMapping" dict under "history". | ||
# tempMapping (and mapping) both match each word to a list of possible next | ||
# words. | ||
# Given history = ["the", "rain", "in"] and word = "Spain", we add "Spain" to | ||
# the entries for ["the", "rain", "in"], ["rain", "in"], and ["in"]. |
Oops, something went wrong.