Source code for gluonnlp.loss.label_smoothing
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Label Smoothing"""
__all__ = ['LabelSmoothing']
import mxnet as mx
from mxnet.gluon import HybridBlock
# pylint: disable=unused-argument
class _SmoothingWithDim(mx.operator.CustomOp):
def __init__(self, epsilon=0.1, axis=-1):
super(_SmoothingWithDim, self).__init__(True)
self._epsilon = epsilon
self._axis = axis
def forward(self, is_train, req, in_data, out_data, aux):
inputs = in_data[0]
outputs = ((1 - self._epsilon) * inputs) + (self._epsilon / float(inputs.shape[self._axis]))
self.assign(out_data[0], req[0], outputs)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
self.assign(in_grad[0], req[0], (1 - self._epsilon) * out_grad[0])
@mx.operator.register('_smoothing_with_dim')
class _SmoothingWithDimProp(mx.operator.CustomOpProp):
def __init__(self, epsilon=0.1, axis=-1):
super(_SmoothingWithDimProp, self).__init__(True)
self._epsilon = float(epsilon)
self._axis = int(axis)
def list_arguments(self):
return ['data']
def list_outputs(self):
return ['output']
def infer_shape(self, in_shape):
data_shape = in_shape[0]
output_shape = data_shape
return (data_shape,), (output_shape,), ()
def declare_backward_dependency(self, out_grad, in_data, out_data):
return out_grad
def create_operator(self, ctx, in_shapes, in_dtypes):
# create and return the CustomOp class.
return _SmoothingWithDim(self._epsilon, self._axis)
# pylint: enable=unused-argument
[docs]class LabelSmoothing(HybridBlock):
"""Applies label smoothing. See https://arxiv.org/abs/1512.00567.
It changes the construction of the probability to (1 - epsilon) for the true class,
epsilon / (num_classes - 1) otherwise.
Parameters
----------
axis : int, default -1
The axis to smooth.
epsilon : float, default 0.1
The epsilon parameter in label smoothing
sparse_label : bool, default True
Whether input is an integer array instead of one hot array.
units : int or None
Vocabulary size. If units is not given, it will be inferred from the input.
prefix : str
Prefix for name of `Block`s
(and name of weight if params is `None`).
params : Parameter or None
Container for weight sharing between cells.
Created if `None`.
"""
def __init__(self, axis=-1, epsilon=0.1, units=None,
sparse_label=True, prefix=None, params=None):
super(LabelSmoothing, self).__init__(prefix=prefix, params=params)
self._axis = axis
self._epsilon = epsilon
self._sparse_label = sparse_label
self._units = units
[docs] def hybrid_forward(self, F, inputs, units=None): # pylint: disable=arguments-differ
"""
Parameters
----------
inputs : Symbol or NDArray
Shape (batch_size, length) or (batch_size, length, V)
units : int or None
Returns
-------
smoothed_label : Symbol or NDArray
Shape (batch_size, length, V)
"""
if self._sparse_label:
assert units is not None or self._units is not None, \
'units needs to be given in function call or ' \
'instance initialization when sparse_label is False'
if units is None:
units = self._units
inputs = F.one_hot(inputs, depth=units)
if units is None and self._units is None:
return F.Custom(inputs, epsilon=self._epsilon, axis=self._axis,
op_type='_smoothing_with_dim')
else:
if units is None:
units = self._units
return ((1 - self._epsilon) * inputs) + (self._epsilon / units)