Source code for gluonnlp.loss.loss
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Loss functions."""
__all__ = ['MaskedSoftmaxCrossEntropyLoss', 'MaskedSoftmaxCELoss']
import numpy as np
from mxnet.gluon.loss import SoftmaxCELoss
[docs]class MaskedSoftmaxCrossEntropyLoss(SoftmaxCELoss):
r"""Wrapper of the SoftmaxCELoss that supports valid_length as the input
(alias: MaskedSoftmaxCELoss)
If `sparse_label` is `True` (default), label should contain integer
category indicators:
.. math::
\DeclareMathOperator{softmax}{softmax}
p = \softmax({pred})
L = -\sum_i \log p_{i,{label}_i}
`label`'s shape should be `pred`'s shape with the channel dimension removed.
i.e. for `pred` with shape (1,2,3) `label`'s shape should
be (1,2).
If `sparse_label` is `False`, `label` should contain probability distribution
and `label`'s shape should be the same with `pred`:
.. math::
p = \softmax({pred})
L = -\sum_i \sum_j {label}_j \log p_{ij}
Parameters
----------
sparse_label : bool, default True
Whether label is an integer array instead of probability distribution.
from_logits : bool, default False
Whether input is a log probability (usually from log_softmax) instead
of unnormalized numbers.
weight : float or None
Global scalar weight for loss.
Inputs:
- **pred**: the prediction tensor, shape should be (N, T, C)
- **label**: the truth tensor. When `sparse_label` is True, `label`'s
shape should be `pred`'s shape with the channel dimension C removed.
i.e. for `pred` with shape (1,2,3) `label`'s shape should be (1,2)
and values should be integers between 0 and 2.
If `sparse_label` is False, `label`'s shape must be the same as `pred`
and values should be floats in the range `[0, 1]`.
- **valid_length**: valid length of each sequence, of shape (batch_size, )
predictions elements longer than their valid_length are masked out
Outputs:
- **loss**: loss tensor with shape (batch_size,). Dimensions other than
batch_axis are averaged out.
"""
def __init__(self, sparse_label=True, from_logits=False, weight=None,
**kwargs):
# The current technique only works with NTC data
axis = -1
batch_axis = 0
super(MaskedSoftmaxCrossEntropyLoss, self).__init__(axis, sparse_label, from_logits,
weight, batch_axis, **kwargs)
[docs] def hybrid_forward(self, F, pred, label, valid_length): # pylint: disable=arguments-differ
if self._sparse_label:
sample_weight = F.cast(F.expand_dims(F.ones_like(label), axis=-1), dtype=np.float32)
else:
sample_weight = F.ones_like(label)
sample_weight = F.SequenceMask(sample_weight,
sequence_length=valid_length,
use_sequence_length=True,
axis=1)
return super(MaskedSoftmaxCrossEntropyLoss, self).hybrid_forward(
F, pred, label, sample_weight)
MaskedSoftmaxCELoss = MaskedSoftmaxCrossEntropyLoss