Skip to content
This repository has been archived by the owner on Jan 3, 2023. It is now read-only.

Simple recurrent network training example #364

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
178 changes: 178 additions & 0 deletions examples/impulse_rnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env python
# ----------------------------------------------------------------------------
# Copyright 2017, Alexey Reshetnyak.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------
"""
A simple example that demonstrates recurrent neural network training.
The network has two inputs and one output. The inputs of the network
synchronously receive impulses at random times. The amplitude of the impulse at
the first input is equal to one. The amplitude of the impulses on the second
input is a random variable that is uniformly distributed from zero to one. The
desired network output is a impulse which starts at the same time as the input
impulses. The desired impulse has an amplitude equal to one, but its duration
is proportional to the amplitude of second input.

Usage:

python examples/impulse_rnn.py

"""

from neon.backends import gen_backend
from neon.initializers import Uniform
from neon.layers import GeneralizedCost, Affine, Recurrent
from neon.models import Model
from neon.optimizers import GradientDescentMomentum
from neon.transforms import Logistic
from neon.callbacks.callbacks import Callbacks
from neon.transforms.cost import MeanSquared
from neon import NervanaObject
import numpy as np
do_plots = True
try:
import matplotlib.pyplot as plt
except ImportError:
neon_logger.display('matplotlib needs to be installed manually to generate plots needed '
'for this example. Skipping plot generation')
do_plots = False

np.random.seed()

class ImpulseData(object):
"""
x - network inputs
y - desired output
"""
def __init__(self, data_len):
self.x = np.zeros((data_len, 2), np.float32)
self.y = np.zeros((data_len, 1), np.float32)
period = 11
i = 0
while i < data_len:
a = np.random.randint(2)
self.x[i, 0] = a
if a == 1:
b = np.random.rand()
self.x[i, 1] = b
pulse_len = int(b * 10)
if i + pulse_len < data_len:
self.y[i:i + pulse_len, 0] = 1
else:
self.y[i:-1, 0] = 1
i += period
i += 1

class ImpulseDataIterator(NervanaObject):
def __init__(self, X, y, time_steps):
self.y = y
self.seq_length = time_steps
self.batch_index = 0
self.nfeatures = X.shape[1]
self.nsamples = X.shape[0]
self.ndata = X.shape[0]
self.shape = (self.nfeatures, time_steps)

extra_examples = self.nsamples % (self.be.bsz * time_steps)
if extra_examples:
X = X[:-extra_examples]
y = y[:-extra_examples]
else:
X = X
y = y

self.nbatches = self.nsamples // (self.be.bsz * time_steps)
self.X_dev = self.be.iobuf((self.nfeatures, time_steps))
self.y_dev = self.be.iobuf((y.shape[1], time_steps))

self.X = X.reshape(self.be.bsz, self.nbatches,
time_steps, self.nfeatures)
self.y = y.reshape(self.be.bsz, self.nbatches,
time_steps, y.shape[1])

def reset(self):
self.batch_index = 0

def __iter__(self):
self.batch_index = 0
while self.batch_index < self.nbatches:
X_batch = self.X[:, self.batch_index].T.reshape(
self.X_dev.shape).copy()
y_batch = self.y[:, self.batch_index].T.reshape(
self.y_dev.shape).copy()

# make the data for this batch as backend tensor
self.X_dev.set(X_batch)
self.y_dev.set(y_batch)

self.batch_index += 1

yield self.X_dev, self.y_dev

train_data = ImpulseData(40000)
test_data = ImpulseData(200)

batch_size = 1
epochs = 2
hidden_size = 16
backprop_depth = 10

# setup backend
be = gen_backend(backend='cpu', batch_size = batch_size)

train_set = ImpulseDataIterator(train_data.x, train_data.y, backprop_depth)
valid_set = ImpulseDataIterator(test_data.x, test_data.y, backprop_depth)

# weight initialization
uni = Uniform(low=-0.5, high=0.5)

layers = [
Recurrent(hidden_size, uni, activation=Logistic(), reset_cells=False),
Affine(1, uni, bias=uni, activation=Logistic())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=MeanSquared())
optimizer = GradientDescentMomentum(0.08, momentum_coef=0.9)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, serialize=1)

# train model
model.fit(train_set, optimizer=optimizer,
num_epochs=epochs, cost=cost, callbacks=callbacks)
valid_output = model.get_outputs(valid_set).reshape(-1, 1)

if do_plots:
fig = plt.figure()

ax1 = fig.add_subplot(411)
ax1.title.set_text('First input')
ax1.plot(test_data.x[:, 0])

ax2 = fig.add_subplot(412)
ax2.plot(test_data.x[:, 1])
ax2.title.set_text('Second input')

ax3 = fig.add_subplot(413)
ax3.title.set_text('Desired output')
ax3.plot(test_data.y[:, 0])

ax4 = fig.add_subplot(414)
ax4.title.set_text('Actual output')
ax4.plot(valid_output)

plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)

plt.show()