Source code for gluonnlp.loss.label_smoothing

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""Label Smoothing"""

__all__ = ['LabelSmoothing']

import mxnet as mx
from mxnet.gluon import HybridBlock

# pylint: disable=unused-argument
class _SmoothingWithDim(mx.operator.CustomOp):
    def __init__(self, epsilon=0.1, axis=-1):
        super(_SmoothingWithDim, self).__init__(True)
        self._epsilon = epsilon
        self._axis = axis

    def forward(self, is_train, req, in_data, out_data, aux):
        inputs = in_data[0]
        outputs = ((1 - self._epsilon) * inputs) + (self._epsilon / float(inputs.shape[self._axis]))
        self.assign(out_data[0], req[0], outputs)

    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        self.assign(in_grad[0], req[0], (1 - self._epsilon) * out_grad[0])


@mx.operator.register('_smoothing_with_dim')
class _SmoothingWithDimProp(mx.operator.CustomOpProp):
    def __init__(self, epsilon=0.1, axis=-1):
        super(_SmoothingWithDimProp, self).__init__(True)
        self._epsilon = float(epsilon)
        self._axis = int(axis)

    def list_arguments(self):
        return ['data']

    def list_outputs(self):
        return ['output']

    def infer_shape(self, in_shape):
        data_shape = in_shape[0]
        output_shape = data_shape
        return (data_shape,), (output_shape,), ()

    def declare_backward_dependency(self, out_grad, in_data, out_data):
        return out_grad

    def create_operator(self, ctx, in_shapes, in_dtypes):
        #  create and return the CustomOp class.
        return _SmoothingWithDim(self._epsilon, self._axis)
# pylint: enable=unused-argument


[docs]class LabelSmoothing(HybridBlock): """Applies label smoothing. See https://arxiv.org/abs/1512.00567. It changes the construction of the probability to (1 - epsilon) for the true class, epsilon / (num_classes - 1) otherwise. Parameters ---------- axis : int, default -1 The axis to smooth. epsilon : float, default 0.1 The epsilon parameter in label smoothing sparse_label : bool, default True Whether input is an integer array instead of one hot array. units : int or None Vocabulary size. If units is not given, it will be inferred from the input. prefix : str Prefix for name of `Block`s (and name of weight if params is `None`). params : Parameter or None Container for weight sharing between cells. Created if `None`. """ def __init__(self, axis=-1, epsilon=0.1, units=None, sparse_label=True, prefix=None, params=None): super(LabelSmoothing, self).__init__(prefix=prefix, params=params) self._axis = axis self._epsilon = epsilon self._sparse_label = sparse_label self._units = units
[docs] def hybrid_forward(self, F, inputs, units=None): # pylint: disable=arguments-differ """ Parameters ---------- inputs : Symbol or NDArray Shape (batch_size, length) or (batch_size, length, V) units : int or None Returns ------- smoothed_label : Symbol or NDArray Shape (batch_size, length, V) """ if self._sparse_label: assert units is not None or self._units is not None, \ 'units needs to be given in function call or ' \ 'instance initialization when sparse_label is False' if units is None: units = self._units inputs = F.one_hot(inputs, depth=units) if units is None and self._units is None: return F.Custom(inputs, epsilon=self._epsilon, axis=self._axis, op_type='_smoothing_with_dim') else: if units is None: units = self._units return ((1 - self._epsilon) * inputs) + (self._epsilon / units)