Source code for gluonnlp.model.lstmpcellwithclip
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""LSTM projection cell with cell clip and projection clip."""
__all__ = ['LSTMPCellWithClip']
from mxnet.gluon.contrib.rnn import LSTMPCell
[docs]class LSTMPCellWithClip(LSTMPCell):
r"""Long-Short Term Memory Projected (LSTMP) network cell with cell clip and projection clip.
Each call computes the following function:
.. math::
\DeclareMathOperator{\sigmoid}{sigmoid}
\begin{array}{ll}
i_t = \sigmoid(W_{ii} x_t + b_{ii} + W_{ri} r_{(t-1)} + b_{ri}) \\
f_t = \sigmoid(W_{if} x_t + b_{if} + W_{rf} r_{(t-1)} + b_{rf}) \\
g_t = \tanh(W_{ig} x_t + b_{ig} + W_{rc} r_{(t-1)} + b_{rg}) \\
o_t = \sigmoid(W_{io} x_t + b_{io} + W_{ro} r_{(t-1)} + b_{ro}) \\
c_t = c_{\text{clip}}(f_t * c_{(t-1)} + i_t * g_t) \\
h_t = o_t * \tanh(c_t) \\
r_t = p_{\text{clip}}(W_{hr} h_t)
\end{array}
where :math:`c_{\text{clip}}` is the cell clip applied on the next cell;
:math:`r_t` is the projected recurrent activation at time `t`,
:math:`p_{\text{clip}}` means apply projection clip on he projected output.
math:`h_t` is the hidden state at time `t`, :math:`c_t` is the
cell state at time `t`, :math:`x_t` is the input at time `t`, and :math:`i_t`,
:math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, and
out gates, respectively.
Parameters
----------
hidden_size : int
Number of units in cell state symbol.
projection_size : int
Number of units in output symbol.
i2h_weight_initializer : str or Initializer
Initializer for the input weights matrix, used for the linear
transformation of the inputs.
h2h_weight_initializer : str or Initializer
Initializer for the recurrent weights matrix, used for the linear
transformation of the hidden state.
h2r_weight_initializer : str or Initializer
Initializer for the projection weights matrix, used for the linear
transformation of the recurrent state.
i2h_bias_initializer : str or Initializer, default 'lstmbias'
Initializer for the bias vector. By default, bias for the forget
gate is initialized to 1 while all other biases are initialized
to zero.
h2h_bias_initializer : str or Initializer
Initializer for the bias vector.
prefix : str
Prefix for name of `Block`s
(and name of weight if params is `None`).
params : Parameter or None
Container for weight sharing between cells.
Created if `None`.
cell_clip : float
Clip cell state between `[-cell_clip, cell_clip]` in LSTMPCellWithClip cell
projection_clip : float
Clip projection between `[-projection_clip, projection_clip]` in LSTMPCellWithClip cell
"""
def __init__(self, hidden_size, projection_size,
i2h_weight_initializer=None, h2h_weight_initializer=None,
h2r_weight_initializer=None,
i2h_bias_initializer='zeros', h2h_bias_initializer='zeros',
input_size=0, cell_clip=None, projection_clip=None, prefix=None, params=None):
super(LSTMPCellWithClip, self).__init__(hidden_size,
projection_size,
i2h_weight_initializer,
h2h_weight_initializer,
h2r_weight_initializer,
i2h_bias_initializer,
h2h_bias_initializer,
input_size,
prefix=prefix,
params=params)
self._cell_clip = cell_clip
self._projection_clip = projection_clip
# pylint: disable= arguments-differ
[docs] def hybrid_forward(self, F, inputs, states, i2h_weight,
h2h_weight, h2r_weight, i2h_bias, h2h_bias):
r"""Hybrid forward computation for Long-Short Term Memory Projected network cell
with cell clip and projection clip.
Parameters
----------
inputs : input tensor with shape `(batch_size, input_size)`.
states : a list of two initial recurrent state tensors, with shape
`(batch_size, projection_size)` and `(batch_size, hidden_size)` respectively.
Returns
--------
out : output tensor with shape `(batch_size, num_hidden)`.
next_states : a list of two output recurrent state tensors. Each has
the same shape as `states`.
"""
prefix = 't%d_'%self._counter
i2h = F.FullyConnected(data=inputs, weight=i2h_weight, bias=i2h_bias,
num_hidden=self._hidden_size*4, name=prefix+'i2h')
h2h = F.FullyConnected(data=states[0], weight=h2h_weight, bias=h2h_bias,
num_hidden=self._hidden_size*4, name=prefix+'h2h')
gates = i2h + h2h
slice_gates = F.SliceChannel(gates, num_outputs=4, name=prefix+'slice')
in_gate = F.Activation(slice_gates[0], act_type='sigmoid', name=prefix+'i')
forget_gate = F.Activation(slice_gates[1], act_type='sigmoid', name=prefix+'f')
in_transform = F.Activation(slice_gates[2], act_type='tanh', name=prefix+'c')
out_gate = F.Activation(slice_gates[3], act_type='sigmoid', name=prefix+'o')
next_c = F._internal._plus(forget_gate * states[1], in_gate * in_transform,
name=prefix+'state')
if self._cell_clip is not None:
next_c = next_c.clip(-self._cell_clip, self._cell_clip)
hidden = F._internal._mul(out_gate, F.Activation(next_c, act_type='tanh'),
name=prefix+'hidden')
next_r = F.FullyConnected(data=hidden, num_hidden=self._projection_size,
weight=h2r_weight, no_bias=True, name=prefix+'out')
if self._projection_clip is not None:
next_r = next_r.clip(-self._projection_clip, self._projection_clip)
return next_r, [next_r, next_c]