def process_options(args):
options = argparser().parse_args(args)
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
if options.threshold is not None and options.threshold < 0.0:
raise ValueError('threshold must be >= 0')
if options.tolerance is not None and options.tolerance < 0.0:
raise ValueError('tolerance must be >= 0')
if options.approximate and not options.threshold:
raise ValueError('approximate only makes sense with a threshold')
if options.approximate and options.metric != 'cosine':
raise NotImplementedError('approximate only supported for cosine')
wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
words, vectors = wv.words(), wv.vectors()
if options.whiten:
# whitening should be implemented in wvlib to support together with
# approximate similarity
if options.approximate:
raise NotImplemenedError
logging.info('normalize features to unit variance')
vectors = whiten(vectors)
return words, vectors, wv, options
评论列表
文章目录