Skip to content

Commit

Permalink
load function
Browse files Browse the repository at this point in the history
  • Loading branch information
Emekaborisama committed Mar 24, 2021
1 parent a87c5ab commit 925c75c
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 62 deletions.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ text = ttg.loaddata(data)
```



### build our Model Architeture
```python
pipeline = ttg.tentext(text)
seq_text = pipeline.sequence(padding_method = 'pre')
Expand Down Expand Up @@ -76,6 +76,15 @@ Tune your model to know the best optimizer, activation method to use.
pipeline.hyper_params(epochs = 500)
```

```python
pipeline.saveModel('model')
```

#use a saved model for prediction
```python
#the corpus is the train text file
ttg.load_model_predict(corpus = corpus, padding_method = 'pre', modelname = '../input/model2/model2textgen.h5', sample_text = 'yo yo', word_length = 100)
```
<h1 align="center">
<span> Give us a star :star: </span> 🐉
</h1>
Expand Down
54 changes: 47 additions & 7 deletions build/lib/text_gen/ten_textgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout
Expand Down Expand Up @@ -127,14 +128,14 @@ def fit(self,loss, optimizer, batch, metrics, epochs, verbose, patience):
self.history = history
return self.history

def predict(self, sample_text, word_length, segment): #A text seed is provided
def predict(self, sample_text, word_length, segment, verbose): #A text seed is provided

'''Predicts the next text sequences'''
#model = self.model
for wordLength in range(50): #Generates a text with a range of word length
for wordLength in range(word_length): #Generates a text with a range of word length
tokenList = self.tokenizer.texts_to_sequences([sample_text])[0] #Turns the seed into sequences
tokenList = pad_sequences([tokenList], maxlen=self.maxSequenceLen - 1, padding=self.padding_method)
predicted = self.model.predict_classes(tokenList, verbose=self.verbose) #Predicts the next sequence(generated
predicted = self.model.predict_classes(tokenList, verbose=verbose) #Predicts the next sequence(generated
outputWord = " " #text)
for word, index in self.tokenizer.word_index.items():
if index == predicted:
Expand All @@ -156,9 +157,7 @@ def predict(self, sample_text, word_length, segment): #A text seed
def saveModel(self, modelname):
self.modelsaved = self.model.save(modelname+'textgen.h5')
return self.modelsaved
def loadmodel(self, modelname):
self.loadmodel = keras.models.load_model(modelname)
return self.loadmodel



def plot_loss_accuracy(self):
Expand Down Expand Up @@ -223,9 +222,50 @@ def hyper_param(self,epochs):
print(study.get_best_result())





def load_model_predict(corpus, padding_method, modelname, word_length, sample_text):
model = keras.models.load_model(modelname, compile=False)
'''Tokenizes the data and turns the tokens into sequences'''
padding_method = padding_method
#print('Tokenizing your data', '-------'*7)
tokenizer.fit_on_texts(corpus)
totalWords = len(tokenizer.word_index) + 1
sequences = []
#print('padding sequence', '-------'*7)
for line in corpus:
tokenList = tokenizer.texts_to_sequences([line])[0]
for i in range(1, len(tokenList)):
ngramSequence = tokenList[:i+1]
sequences.append(ngramSequence)
#self.sequences = sequences
totalwords = totalWords
#print(totalWords)
#print(self.sequences)
'''Gives the sequences a uniform length by padding them'''

maxSequenceLen = max([len(seq) for seq in sequences])
_sequences = np.array(pad_sequences(sequences, maxlen=maxSequenceLen, padding=padding_method))

predictors, label = _sequences[:,:-1], _sequences[:,-1]
_label = to_categorical(label, num_classes=totalwords)
#print(totalWords)
#print(sequences[:5])
def generateText(seed):
for wordLength in range(word_length): #Generates a text with a range of word length
tokenList = tokenizer.texts_to_sequences([seed])[0] #Turns the seed into sequences
tokenList = pad_sequences([tokenList], maxlen=maxSequenceLen - 1, padding='pre')
predicted = model.predict_classes(tokenList, verbose=0) #Predicts the next sequence(generated
outputWord = " " #text)
for word, index in tokenizer.word_index.items():
if index == predicted:
outputWord = word
break
seed += " " + outputWord #Returns the seed plus generated text
return seed
ree = generateText(seed = sample_text)
return ree




Expand Down

0 comments on commit 925c75c

Please sign in to comment.