Source code for gluonnlp.embedding.evaluation

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=eval-used, redefined-outer-name
"""Models for intrinsic and extrinsic word embedding evaluation"""

import mxnet as mx
from mxnet import registry
from mxnet.gluon import HybridBlock

__all__ = [
    'register', 'create', 'list_evaluation_functions',
    'WordEmbeddingSimilarityFunction', 'WordEmbeddingAnalogyFunction',
    'CosineSimilarity', 'ThreeCosMul', 'ThreeCosAdd',
    'WordEmbeddingSimilarity', 'WordEmbeddingAnalogy']


class _WordEmbeddingEvaluationFunction(HybridBlock):  # pylint: disable=abstract-method
    """Base class for word embedding evaluation functions."""


[docs]class WordEmbeddingSimilarityFunction(_WordEmbeddingEvaluationFunction): # pylint: disable=abstract-method """Base class for word embedding similarity functions."""
[docs]class WordEmbeddingAnalogyFunction(_WordEmbeddingEvaluationFunction): # pylint: disable=abstract-method """Base class for word embedding analogy functions. Parameters ---------- idx_to_vec : mxnet.ndarray.NDArray Embedding matrix. k : int, default 1 Number of analogies to predict per input triple. eps : float, optional, default=1e-10 A small constant for numerical stability. """
############################################################################### # Similarity and analogy functions registry helpers ############################################################################### _REGSITRY_KIND_CLASS_MAP = { 'similarity': WordEmbeddingSimilarityFunction, 'analogy': WordEmbeddingAnalogyFunction }
[docs]def register(class_): """Registers a new word embedding evaluation function. Once registered, we can create an instance with :func:`~gluonnlp.embedding.evaluation.create`. Examples -------- >>> @gluonnlp.embedding.evaluation.register ... class MySimilarityFunction(gluonnlp.embedding.evaluation.WordEmbeddingSimilarityFunction): ... def __init__(self, eps=1e-10): ... pass >>> similarity_function = gluonnlp.embedding.evaluation.create('similarity', ... 'MySimilarityFunction') >>> print(type(similarity_function)) <class 'gluonnlp.embedding.evaluation.MySimilarityFunction'> >>> @gluonnlp.embedding.evaluation.register ... class MyAnalogyFunction(gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction): ... def __init__(self, k=1, eps=1E-10): ... pass >>> analogy_function = gluonnlp.embedding.evaluation.create('analogy', 'MyAnalogyFunction') >>> print(type(analogy_function)) <class 'gluonnlp.embedding.evaluation.MyAnalogyFunction'> """ if issubclass(class_, WordEmbeddingSimilarityFunction): register_ = registry.get_register_func( WordEmbeddingSimilarityFunction, 'word embedding similarity evaluation function') elif issubclass(class_, WordEmbeddingAnalogyFunction): register_ = registry.get_register_func( WordEmbeddingAnalogyFunction, 'word embedding analogy evaluation function') else: raise RuntimeError( 'The custom function must either subclass ' 'WordEmbeddingSimilarityFunction or WordEmbeddingAnalogyFunction') return register_(class_)
[docs]def create(kind, name, **kwargs): """Creates an instance of a registered word embedding evaluation function. Parameters ---------- kind : ['similarity', 'analogy'] Return only valid names for similarity, analogy or both kinds of functions. name : str The evaluation function name (case-insensitive). Returns ------- An instance of :class:`gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction`: or :class:`gluonnlp.embedding.evaluation.WordEmbeddingSimilarityFunction`: An instance of the specified evaluation function. """ if kind not in _REGSITRY_KIND_CLASS_MAP.keys(): raise KeyError( 'Cannot find `kind` {}. Use ' '`list_evaluation_functions(kind=None).keys()` to get' 'all the valid kinds of evaluation functions.'.format(kind)) create_ = registry.get_create_func( _REGSITRY_KIND_CLASS_MAP[kind], 'word embedding {} evaluation function'.format(kind)) return create_(name, **kwargs)
[docs]def list_evaluation_functions(kind=None): """Get valid word embedding functions names. Parameters ---------- kind : ['similarity', 'analogy', None] Return only valid names for similarity, analogy or both kinds of functions. Returns ------- dict or list: A list of all the valid evaluation function names for the specified kind. If kind is set to None, returns a dict mapping each valid name to its respective output list. The valid names can be plugged in `gluonnlp.model.word_evaluation_model.create(name)`. """ if kind is None: kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys()) if not isinstance(kind, tuple): if kind not in _REGSITRY_KIND_CLASS_MAP.keys(): raise KeyError( 'Cannot find `kind` {}. Use ' '`list_evaluation_functions(kind=None).keys()` to get all the' 'valid kinds of evaluation functions.'.format(kind)) reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind]) return list(reg.keys()) else: return {name: list_evaluation_functions(kind=name) for name in kind}
############################################################################### # Word embedding similarity functions ###############################################################################
[docs]@register class CosineSimilarity(WordEmbeddingSimilarityFunction): """Computes the cosine similarity. Parameters ---------- eps : float, optional, default=1e-10 A small constant for numerical stability. """ def __init__(self, eps=1e-10, **kwargs): super(CosineSimilarity, self).__init__(**kwargs) self.eps = eps
[docs] def hybrid_forward(self, F, x, y): # pylint: disable=arguments-differ """Compute the cosine similarity between two batches of vectors. The cosine similarity is the dot product between the L2 normalized vectors. Parameters ---------- x : Symbol or NDArray y : Symbol or NDArray Returns ------- similarity : Symbol or NDArray The similarity computed by WordEmbeddingSimilarity.similarity_function. """ x = F.L2Normalization(x, eps=self.eps) y = F.L2Normalization(y, eps=self.eps) x = F.expand_dims(x, axis=1) y = F.expand_dims(y, axis=2) return F.batch_dot(x, y).reshape((-1, ))
############################################################################### # Word embedding analogy functions ###############################################################################
[docs]@register class ThreeCosMul(WordEmbeddingAnalogyFunction): """The 3CosMul analogy function. The 3CosMul analogy function is defined as .. math:: \\arg\\max_{b^* ∈ V}\\frac{\\cos(b^∗, b) \\cos(b^*, a)}{cos(b^*, a^*) + ε} See the following paper for more details: - Levy, O., & Goldberg, Y. (2014). Linguistic regularities in sparse and explicit word representations. In R. Morante, & W. Yih, Proceedings of the Eighteenth Conference on Computational Natural Language Learning, CoNLL 2014, Baltimore, Maryland, USA, June 26-27, 2014 (pp. 171–180). : ACL. Parameters ---------- idx_to_vec : mxnet.ndarray.NDArray Embedding matrix. k : int, default 1 Number of analogies to predict per input triple. exclude_question_words : bool, default True Exclude the 3 question words from being a valid answer. eps : float, optional, default=1e-10 A small constant for numerical stability. """ def __init__(self, idx_to_vec, k=1, eps=1E-10, exclude_question_words=True, **kwargs): super(ThreeCosMul, self).__init__(**kwargs) self.k = k self.eps = eps self._exclude_question_words = exclude_question_words self._vocab_size, self._embed_size = idx_to_vec.shape idx_to_vec = mx.nd.L2Normalization(idx_to_vec, eps=self.eps) with self.name_scope(): self.weight = self.params.get_constant('weight', idx_to_vec)
[docs] def hybrid_forward(self, F, words1, words2, words3, weight): # pylint: disable=arguments-differ """Compute ThreeCosMul for given question words. Parameters ---------- words1 : Symbol or NDArray Question words at first position. Shape (batch_size, ) words2 : Symbol or NDArray Question words at second position. Shape (batch_size, ) words3 : Symbol or NDArray Question words at third position. Shape (batch_size, ) Returns ------- Symbol or NDArray Predicted answer words. Shape (batch_size, k). """ words123 = F.concat(words1, words2, words3, dim=0) embeddings_words123 = F.Embedding(words123, weight, input_dim=self._vocab_size, output_dim=self._embed_size) similarities = F.FullyConnected( embeddings_words123, weight, no_bias=True, num_hidden=self._vocab_size, flatten=False) # Map cosine similarities to [0, 1] similarities = (similarities + 1) / 2 sim_w1w4, sim_w2w4, sim_w3w4 = F.split(similarities, num_outputs=3, axis=0) sim = (sim_w2w4 * sim_w3w4) / (sim_w1w4 + self.eps) if self._exclude_question_words: for words in [words1, words2, words3]: sim = sim * F.one_hot(words, self.weight.shape[0], 0, 1) pred_idxs = F.topk(sim, k=self.k) return pred_idxs
[docs]@register class ThreeCosAdd(WordEmbeddingAnalogyFunction): """The 3CosAdd analogy function. The 3CosAdd analogy function is defined as .. math:: \\arg\\max_{b^* ∈ V}[\\cos(b^∗, b - a + a^*)] See the following paper for more details: - Levy, O., & Goldberg, Y. (2014). Linguistic regularities in sparse and explicit word representations. In R. Morante, & W. Yih, Proceedings of the Eighteenth Conference on Computational Natural Language Learning, CoNLL 2014, Baltimore, Maryland, USA, June 26-27, 2014 (pp. 171–180). : ACL. Parameters ---------- idx_to_vec : mxnet.ndarray.NDArray Embedding matrix. normalize : bool, default True Normalize all word embeddings before computing the analogy. k : int, default 1 Number of analogies to predict per input triple. exclude_question_words : bool, default True Exclude the 3 question words from being a valid answer. eps : float, optional, default=1e-10 A small constant for numerical stability. """ def __init__(self, idx_to_vec, normalize=True, k=1, eps=1E-10, exclude_question_words=True, **kwargs): super(ThreeCosAdd, self).__init__(**kwargs) self.k = k self.eps = eps self.normalize = normalize self._exclude_question_words = exclude_question_words self._vocab_size, self._embed_size = idx_to_vec.shape if self.normalize: idx_to_vec = mx.nd.L2Normalization(idx_to_vec, eps=self.eps) with self.name_scope(): self.weight = self.params.get_constant('weight', idx_to_vec)
[docs] def hybrid_forward(self, F, words1, words2, words3, weight): # pylint: disable=arguments-differ """Compute ThreeCosAdd for given question words. Parameters ---------- words1 : Symbol or NDArray Question words at first position. Shape (batch_size, ) words2 : Symbol or NDArray Question words at second position. Shape (batch_size, ) words3 : Symbol or NDArray Question words at third position. Shape (batch_size, ) Returns ------- Symbol or NDArray Predicted answer words. Shape (batch_size, k). """ words123 = F.concat(words1, words2, words3, dim=0) embeddings_words123 = F.Embedding(words123, weight, input_dim=self._vocab_size, output_dim=self._embed_size) if self.normalize: similarities = F.FullyConnected( embeddings_words123, weight, no_bias=True, num_hidden=self._vocab_size, flatten=False) sim_w1w4, sim_w2w4, sim_w3w4 = F.split(similarities, num_outputs=3, axis=0) pred = sim_w3w4 - sim_w1w4 + sim_w2w4 else: embeddings_word1, embeddings_word2, embeddings_word3 = F.split( embeddings_words123, num_outputs=3, axis=0) vector = (embeddings_word3 - embeddings_word1 + embeddings_word2) pred = F.FullyConnected( vector, weight, no_bias=True, num_hidden=self._vocab_size, flatten=False) if self._exclude_question_words: for words in [words1, words2, words3]: pred = pred * F.one_hot(words, self.weight.shape[0], 0, 1) pred_idxs = F.topk(pred, k=self.k) return pred_idxs
############################################################################### # Evaluation blocks ###############################################################################
[docs]class WordEmbeddingSimilarity(HybridBlock): """Word embeddings similarity task evaluator. Parameters ---------- idx_to_vec : mxnet.ndarray.NDArray Embedding matrix. similarity_function : str, default 'CosineSimilarity' Name of a registered WordEmbeddingSimilarityFunction. eps : float, optional, default=1e-10 A small constant for numerical stability. """ def __init__(self, idx_to_vec, similarity_function='CosineSimilarity', eps=1e-10, **kwargs): super(WordEmbeddingSimilarity, self).__init__(**kwargs) self.eps = eps self._vocab_size, self._embed_size = idx_to_vec.shape with self.name_scope(): self.weight = self.params.get_constant('weight', idx_to_vec) self.similarity = create(kind='similarity', name=similarity_function, eps=self.eps) if not isinstance(self.similarity, WordEmbeddingSimilarityFunction): raise RuntimeError( '{} is not a WordEmbeddingAnalogyFunction'.format( self.similarity.__class__.__name__))
[docs] def hybrid_forward(self, F, words1, words2, weight): # pylint: disable=arguments-differ """Predict the similarity of words1 and words2. Parameters ---------- words1 : Symbol or NDArray The indices of the words the we wish to compare to the words in words2. words2 : Symbol or NDArray The indices of the words the we wish to compare to the words in words1. Returns ------- similarity : Symbol or NDArray The similarity computed by WordEmbeddingSimilarity.similarity_function. """ embeddings_words1 = F.Embedding(words1, weight, input_dim=self._vocab_size, output_dim=self._embed_size) embeddings_words2 = F.Embedding(words2, weight, input_dim=self._vocab_size, output_dim=self._embed_size) similarity = self.similarity(embeddings_words1, embeddings_words2) return similarity
[docs]class WordEmbeddingAnalogy(HybridBlock): """Word embeddings analogy task evaluator. Parameters ---------- idx_to_vec : mxnet.ndarray.NDArray Embedding matrix. analogy_function : str, default 'ThreeCosMul' Name of a registered WordEmbeddingAnalogyFunction. k : int, default 1 Number of analogies to predict per input triple. exclude_question_words : bool, default True Exclude the 3 question words from being a valid answer. """ def __init__(self, idx_to_vec, analogy_function='ThreeCosMul', k=1, exclude_question_words=True, **kwargs): super(WordEmbeddingAnalogy, self).__init__(**kwargs) assert k >= 1 self.k = k self.exclude_question_words = exclude_question_words with self.name_scope(): self.analogy = create( kind='analogy', name=analogy_function, idx_to_vec=idx_to_vec, k=self.k, exclude_question_words=exclude_question_words) if not isinstance(self.analogy, WordEmbeddingAnalogyFunction): raise RuntimeError( '{} is not a WordEmbeddingAnalogyFunction'.format( self.analogy.__class__.__name__))
[docs] def hybrid_forward(self, F, words1, words2, words3): # pylint: disable=arguments-differ, unused-argument """Compute analogies for given question words. Parameters ---------- words1 : Symbol or NDArray Word indices of first question words. Shape (batch_size, ). words2 : Symbol or NDArray Word indices of second question words. Shape (batch_size, ). words3 : Symbol or NDArray Word indices of third question words. Shape (batch_size, ). Returns ------- predicted_indices : Symbol or NDArray Indices of predicted analogies of shape (batch_size, k) """ return self.analogy(words1, words2, words3)