def similarity(data, vocab, normalize=False):
"""Return the similarity between some data and the vocabulary.
Computes the dot products between all data vectors and each
vocabulary vector. If ``normalize=True``, normalizes all vectors
to compute the cosine similarity.
Parameters
----------
data: array_like
The data used for comparison.
vocab: Vocabulary or array_like
Vocabulary (or list of vectors) to use to calculate
the similarity values.
normalize : bool, optional (Default: False)
Whether to normalize all vectors, to compute the cosine similarity.
"""
from nengo_spa.vocab import Vocabulary
if isinstance(data, SemanticPointer):
data = data.v
if isinstance(vocab, Vocabulary):
vectors = vocab.vectors
elif is_iterable(vocab):
if isinstance(next(iter(vocab)), SemanticPointer):
vocab = [p.v for p in vocab]
vectors = np.array(vocab, copy=False, ndmin=2)
else:
raise ValidationError("%r object is not a valid vocabulary"
% (type(vocab).__name__), attr='vocab')
dots = np.dot(vectors, data.T)
if normalize:
# Zero-norm vectors should return zero, so avoid divide-by-zero error
eps = np.nextafter(0, 1) # smallest float above zero
dnorm = np.maximum(npext.norm(data.T, axis=0, keepdims=True), eps)
vnorm = np.maximum(npext.norm(vectors, axis=1, keepdims=True), eps)
if len(dots.shape) == 1:
vnorm = np.squeeze(vnorm)
dots /= dnorm
dots /= vnorm
return dots.T
评论列表
文章目录