Source code for gluonnlp.embedding.evaluation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=eval-used, redefined-outer-name
"""Models for intrinsic and extrinsic word embedding evaluation"""
import mxnet as mx
from mxnet import registry
from mxnet.gluon import HybridBlock
__all__ = [
'register', 'create', 'list_evaluation_functions',
'WordEmbeddingSimilarityFunction', 'WordEmbeddingAnalogyFunction',
'CosineSimilarity', 'ThreeCosMul', 'ThreeCosAdd',
'WordEmbeddingSimilarity', 'WordEmbeddingAnalogy']
class _WordEmbeddingEvaluationFunction(HybridBlock): # pylint: disable=abstract-method
"""Base class for word embedding evaluation functions."""
[docs]class WordEmbeddingSimilarityFunction(_WordEmbeddingEvaluationFunction): # pylint: disable=abstract-method
"""Base class for word embedding similarity functions."""
[docs]class WordEmbeddingAnalogyFunction(_WordEmbeddingEvaluationFunction): # pylint: disable=abstract-method
"""Base class for word embedding analogy functions.
Parameters
----------
idx_to_vec : mxnet.ndarray.NDArray
Embedding matrix.
k : int, default 1
Number of analogies to predict per input triple.
eps : float, optional, default=1e-10
A small constant for numerical stability.
"""
###############################################################################
# Similarity and analogy functions registry helpers
###############################################################################
_REGSITRY_KIND_CLASS_MAP = {
'similarity': WordEmbeddingSimilarityFunction,
'analogy': WordEmbeddingAnalogyFunction
}
[docs]def register(class_):
"""Registers a new word embedding evaluation function.
Once registered, we can create an instance with
:func:`~gluonnlp.embedding.evaluation.create`.
Examples
--------
>>> @gluonnlp.embedding.evaluation.register
... class MySimilarityFunction(gluonnlp.embedding.evaluation.WordEmbeddingSimilarityFunction):
... def __init__(self, eps=1e-10):
... pass
>>> similarity_function = gluonnlp.embedding.evaluation.create('similarity',
... 'MySimilarityFunction')
>>> print(type(similarity_function))
<class 'gluonnlp.embedding.evaluation.MySimilarityFunction'>
>>> @gluonnlp.embedding.evaluation.register
... class MyAnalogyFunction(gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction):
... def __init__(self, k=1, eps=1E-10):
... pass
>>> analogy_function = gluonnlp.embedding.evaluation.create('analogy', 'MyAnalogyFunction')
>>> print(type(analogy_function))
<class 'gluonnlp.embedding.evaluation.MyAnalogyFunction'>
"""
if issubclass(class_, WordEmbeddingSimilarityFunction):
register_ = registry.get_register_func(
WordEmbeddingSimilarityFunction,
'word embedding similarity evaluation function')
elif issubclass(class_, WordEmbeddingAnalogyFunction):
register_ = registry.get_register_func(
WordEmbeddingAnalogyFunction,
'word embedding analogy evaluation function')
else:
raise RuntimeError(
'The custom function must either subclass '
'WordEmbeddingSimilarityFunction or WordEmbeddingAnalogyFunction')
return register_(class_)
[docs]def create(kind, name, **kwargs):
"""Creates an instance of a registered word embedding evaluation function.
Parameters
----------
kind : ['similarity', 'analogy']
Return only valid names for similarity, analogy or both kinds of
functions.
name : str
The evaluation function name (case-insensitive).
Returns
-------
An instance of
:class:`gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction`:
or
:class:`gluonnlp.embedding.evaluation.WordEmbeddingSimilarityFunction`:
An instance of the specified evaluation function.
"""
if kind not in _REGSITRY_KIND_CLASS_MAP.keys():
raise KeyError(
'Cannot find `kind` {}. Use '
'`list_evaluation_functions(kind=None).keys()` to get'
'all the valid kinds of evaluation functions.'.format(kind))
create_ = registry.get_create_func(
_REGSITRY_KIND_CLASS_MAP[kind],
'word embedding {} evaluation function'.format(kind))
return create_(name, **kwargs)
[docs]def list_evaluation_functions(kind=None):
"""Get valid word embedding functions names.
Parameters
----------
kind : ['similarity', 'analogy', None]
Return only valid names for similarity, analogy or both kinds of functions.
Returns
-------
dict or list:
A list of all the valid evaluation function names for the specified
kind. If kind is set to None, returns a dict mapping each valid name to
its respective output list. The valid names can be plugged in
`gluonnlp.model.word_evaluation_model.create(name)`.
"""
if kind is None:
kind = tuple(_REGSITRY_KIND_CLASS_MAP.keys())
if not isinstance(kind, tuple):
if kind not in _REGSITRY_KIND_CLASS_MAP.keys():
raise KeyError(
'Cannot find `kind` {}. Use '
'`list_evaluation_functions(kind=None).keys()` to get all the'
'valid kinds of evaluation functions.'.format(kind))
reg = registry.get_registry(_REGSITRY_KIND_CLASS_MAP[kind])
return list(reg.keys())
else:
return {name: list_evaluation_functions(kind=name) for name in kind}
###############################################################################
# Word embedding similarity functions
###############################################################################
[docs]@register
class CosineSimilarity(WordEmbeddingSimilarityFunction):
"""Computes the cosine similarity.
Parameters
----------
eps : float, optional, default=1e-10
A small constant for numerical stability.
"""
def __init__(self, eps=1e-10, **kwargs):
super(CosineSimilarity, self).__init__(**kwargs)
self.eps = eps
[docs] def hybrid_forward(self, F, x, y): # pylint: disable=arguments-differ
"""Compute the cosine similarity between two batches of vectors.
The cosine similarity is the dot product between the L2 normalized
vectors.
Parameters
----------
x : Symbol or NDArray
y : Symbol or NDArray
Returns
-------
similarity : Symbol or NDArray
The similarity computed by WordEmbeddingSimilarity.similarity_function.
"""
x = F.L2Normalization(x, eps=self.eps)
y = F.L2Normalization(y, eps=self.eps)
x = F.expand_dims(x, axis=1)
y = F.expand_dims(y, axis=2)
return F.batch_dot(x, y).reshape((-1, ))
###############################################################################
# Word embedding analogy functions
###############################################################################
[docs]@register
class ThreeCosMul(WordEmbeddingAnalogyFunction):
"""The 3CosMul analogy function.
The 3CosMul analogy function is defined as
.. math::
\\arg\\max_{b^* ∈ V}\\frac{\\cos(b^∗, b) \\cos(b^*, a)}{cos(b^*, a^*) + ε}
See the following paper for more details:
- Levy, O., & Goldberg, Y. (2014). Linguistic regularities in sparse and
explicit word representations. In R. Morante, & W. Yih, Proceedings of the
Eighteenth Conference on Computational Natural Language Learning, CoNLL 2014,
Baltimore, Maryland, USA, June 26-27, 2014 (pp. 171–180). : ACL.
Parameters
----------
idx_to_vec : mxnet.ndarray.NDArray
Embedding matrix.
k : int, default 1
Number of analogies to predict per input triple.
exclude_question_words : bool, default True
Exclude the 3 question words from being a valid answer.
eps : float, optional, default=1e-10
A small constant for numerical stability.
"""
def __init__(self, idx_to_vec, k=1, eps=1E-10, exclude_question_words=True, **kwargs):
super(ThreeCosMul, self).__init__(**kwargs)
self.k = k
self.eps = eps
self._exclude_question_words = exclude_question_words
self._vocab_size, self._embed_size = idx_to_vec.shape
idx_to_vec = mx.nd.L2Normalization(idx_to_vec, eps=self.eps)
with self.name_scope():
self.weight = self.params.get_constant('weight', idx_to_vec)
[docs] def hybrid_forward(self, F, words1, words2, words3, weight): # pylint: disable=arguments-differ
"""Compute ThreeCosMul for given question words.
Parameters
----------
words1 : Symbol or NDArray
Question words at first position. Shape (batch_size, )
words2 : Symbol or NDArray
Question words at second position. Shape (batch_size, )
words3 : Symbol or NDArray
Question words at third position. Shape (batch_size, )
Returns
-------
Symbol or NDArray
Predicted answer words. Shape (batch_size, k).
"""
words123 = F.concat(words1, words2, words3, dim=0)
embeddings_words123 = F.Embedding(words123, weight,
input_dim=self._vocab_size,
output_dim=self._embed_size)
similarities = F.FullyConnected(
embeddings_words123, weight, no_bias=True,
num_hidden=self._vocab_size, flatten=False)
# Map cosine similarities to [0, 1]
similarities = (similarities + 1) / 2
sim_w1w4, sim_w2w4, sim_w3w4 = F.split(similarities, num_outputs=3,
axis=0)
sim = (sim_w2w4 * sim_w3w4) / (sim_w1w4 + self.eps)
if self._exclude_question_words:
for words in [words1, words2, words3]:
sim = sim * F.one_hot(words, self.weight.shape[0], 0, 1)
pred_idxs = F.topk(sim, k=self.k)
return pred_idxs
[docs]@register
class ThreeCosAdd(WordEmbeddingAnalogyFunction):
"""The 3CosAdd analogy function.
The 3CosAdd analogy function is defined as
.. math::
\\arg\\max_{b^* ∈ V}[\\cos(b^∗, b - a + a^*)]
See the following paper for more details:
- Levy, O., & Goldberg, Y. (2014). Linguistic regularities in sparse and
explicit word representations. In R. Morante, & W. Yih, Proceedings of the
Eighteenth Conference on Computational Natural Language Learning, CoNLL 2014,
Baltimore, Maryland, USA, June 26-27, 2014 (pp. 171–180). : ACL.
Parameters
----------
idx_to_vec : mxnet.ndarray.NDArray
Embedding matrix.
normalize : bool, default True
Normalize all word embeddings before computing the analogy.
k : int, default 1
Number of analogies to predict per input triple.
exclude_question_words : bool, default True
Exclude the 3 question words from being a valid answer.
eps : float, optional, default=1e-10
A small constant for numerical stability.
"""
def __init__(self,
idx_to_vec,
normalize=True,
k=1,
eps=1E-10,
exclude_question_words=True,
**kwargs):
super(ThreeCosAdd, self).__init__(**kwargs)
self.k = k
self.eps = eps
self.normalize = normalize
self._exclude_question_words = exclude_question_words
self._vocab_size, self._embed_size = idx_to_vec.shape
if self.normalize:
idx_to_vec = mx.nd.L2Normalization(idx_to_vec, eps=self.eps)
with self.name_scope():
self.weight = self.params.get_constant('weight', idx_to_vec)
[docs] def hybrid_forward(self, F, words1, words2, words3, weight): # pylint: disable=arguments-differ
"""Compute ThreeCosAdd for given question words.
Parameters
----------
words1 : Symbol or NDArray
Question words at first position. Shape (batch_size, )
words2 : Symbol or NDArray
Question words at second position. Shape (batch_size, )
words3 : Symbol or NDArray
Question words at third position. Shape (batch_size, )
Returns
-------
Symbol or NDArray
Predicted answer words. Shape (batch_size, k).
"""
words123 = F.concat(words1, words2, words3, dim=0)
embeddings_words123 = F.Embedding(words123, weight,
input_dim=self._vocab_size,
output_dim=self._embed_size)
if self.normalize:
similarities = F.FullyConnected(
embeddings_words123, weight, no_bias=True,
num_hidden=self._vocab_size, flatten=False)
sim_w1w4, sim_w2w4, sim_w3w4 = F.split(similarities, num_outputs=3,
axis=0)
pred = sim_w3w4 - sim_w1w4 + sim_w2w4
else:
embeddings_word1, embeddings_word2, embeddings_word3 = F.split(
embeddings_words123, num_outputs=3, axis=0)
vector = (embeddings_word3 - embeddings_word1 + embeddings_word2)
pred = F.FullyConnected(
vector, weight, no_bias=True, num_hidden=self._vocab_size,
flatten=False)
if self._exclude_question_words:
for words in [words1, words2, words3]:
pred = pred * F.one_hot(words, self.weight.shape[0], 0, 1)
pred_idxs = F.topk(pred, k=self.k)
return pred_idxs
###############################################################################
# Evaluation blocks
###############################################################################
[docs]class WordEmbeddingSimilarity(HybridBlock):
"""Word embeddings similarity task evaluator.
Parameters
----------
idx_to_vec : mxnet.ndarray.NDArray
Embedding matrix.
similarity_function : str, default 'CosineSimilarity'
Name of a registered WordEmbeddingSimilarityFunction.
eps : float, optional, default=1e-10
A small constant for numerical stability.
"""
def __init__(self, idx_to_vec, similarity_function='CosineSimilarity',
eps=1e-10, **kwargs):
super(WordEmbeddingSimilarity, self).__init__(**kwargs)
self.eps = eps
self._vocab_size, self._embed_size = idx_to_vec.shape
with self.name_scope():
self.weight = self.params.get_constant('weight', idx_to_vec)
self.similarity = create(kind='similarity',
name=similarity_function, eps=self.eps)
if not isinstance(self.similarity, WordEmbeddingSimilarityFunction):
raise RuntimeError(
'{} is not a WordEmbeddingAnalogyFunction'.format(
self.similarity.__class__.__name__))
[docs] def hybrid_forward(self, F, words1, words2, weight): # pylint: disable=arguments-differ
"""Predict the similarity of words1 and words2.
Parameters
----------
words1 : Symbol or NDArray
The indices of the words the we wish to compare to the words in words2.
words2 : Symbol or NDArray
The indices of the words the we wish to compare to the words in words1.
Returns
-------
similarity : Symbol or NDArray
The similarity computed by WordEmbeddingSimilarity.similarity_function.
"""
embeddings_words1 = F.Embedding(words1, weight,
input_dim=self._vocab_size,
output_dim=self._embed_size)
embeddings_words2 = F.Embedding(words2, weight,
input_dim=self._vocab_size,
output_dim=self._embed_size)
similarity = self.similarity(embeddings_words1, embeddings_words2)
return similarity
[docs]class WordEmbeddingAnalogy(HybridBlock):
"""Word embeddings analogy task evaluator.
Parameters
----------
idx_to_vec : mxnet.ndarray.NDArray
Embedding matrix.
analogy_function : str, default 'ThreeCosMul'
Name of a registered WordEmbeddingAnalogyFunction.
k : int, default 1
Number of analogies to predict per input triple.
exclude_question_words : bool, default True
Exclude the 3 question words from being a valid answer.
"""
def __init__(self, idx_to_vec, analogy_function='ThreeCosMul', k=1,
exclude_question_words=True, **kwargs):
super(WordEmbeddingAnalogy, self).__init__(**kwargs)
assert k >= 1
self.k = k
self.exclude_question_words = exclude_question_words
with self.name_scope():
self.analogy = create(
kind='analogy',
name=analogy_function,
idx_to_vec=idx_to_vec,
k=self.k,
exclude_question_words=exclude_question_words)
if not isinstance(self.analogy, WordEmbeddingAnalogyFunction):
raise RuntimeError(
'{} is not a WordEmbeddingAnalogyFunction'.format(
self.analogy.__class__.__name__))
[docs] def hybrid_forward(self, F, words1, words2, words3): # pylint: disable=arguments-differ, unused-argument
"""Compute analogies for given question words.
Parameters
----------
words1 : Symbol or NDArray
Word indices of first question words. Shape (batch_size, ).
words2 : Symbol or NDArray
Word indices of second question words. Shape (batch_size, ).
words3 : Symbol or NDArray
Word indices of third question words. Shape (batch_size, ).
Returns
-------
predicted_indices : Symbol or NDArray
Indices of predicted analogies of shape (batch_size, k)
"""
return self.analogy(words1, words2, words3)