def closest_contexts(self, w, n=10):
"""
Assumes the vectors have been normalized.
"""
scores = self.represent(w)
return heapq.nlargest(n, zip(scores.data, [self.ic[i] for i in scores.indices]))
python类nlargest()的实例源码
def closest(self, w, n=10):
"""
Assumes the vectors have been normalized.
"""
scores = self.m.dot(self.represent(w).T).T.tocsr()
return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
def closest(self, w, n=10):
"""
Assumes the vectors have been normalized.
"""
scores = self.m.dot(self.represent(w))
return heapq.nlargest(n, zip(scores, self.iw))
def closest(self, w, n=10):
"""
Assumes the vectors have been normalized.
"""
if self.oov(w):
return []
scores = self.m.dot(self.represent(w).T).T.tocsr()
return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
def closest_first_order(self, w, n=10):
if self.oov(w):
return []
scores = self.m[self.wi[w], :]
return heapq.nlargest(n, zip(scores.data, [self.iw[i] for i in scores.indices]))
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
def most_frequent_terms(self, fieldname, number=5, prefix=''):
"""Returns the top 'number' most frequent terms in the given field as a
list of (frequency, text) tuples.
"""
gen = ((terminfo.weight(), text) for text, terminfo
in self.iter_prefix(fieldname, prefix))
return nlargest(number, gen)
def most_distinctive_terms(self, fieldname, number=5, prefix=''):
"""Returns the top 'number' terms with the highest `tf*idf` scores as
a list of (score, text) tuples.
"""
N = float(self.doc_count())
gen = ((terminfo.weight() * log(N / terminfo.doc_frequency()), text)
for text, terminfo in self.iter_prefix(fieldname, prefix))
return nlargest(number, gen)
def top_fragments(fragments, count, scorer, order, minscore=1):
scored_fragments = ((scorer(f), f) for f in fragments)
scored_fragments = nlargest(count, scored_fragments)
best_fragments = [sf for score, sf in scored_fragments if score >= minscore]
best_fragments.sort(key=order)
return best_fragments
def create_ranking3(edge_weight, k, adj, num):
sink = len(adj)
EMPTY = -2
ROOT = -1
MIN_LENGTH = float('-inf')
# heaps = [[(0, EMPTY, 0) for j in range(k)] for i in xrange(sink + 1)]
heaps = [[(MIN_LENGTH, EMPTY, 0) for j in range(k + 1)] for i in xrange(sink + 1)]
heaps[0][0] = (0, ROOT, 0)
# forward
for current in xrange(sink):
new_rank = 0
for length, parent, rank in heaps[current]:
if parent != EMPTY:
for child in adj[current]:
ew = edge_weight[0, num[(current, child)]]
new_length = length + ew
# heapq.heapreplace(heaps[child], (new_length, current, new_rank))
heapq.heappush(heaps[child], (new_length, current, new_rank))
heaps[child] = heapq.nlargest(k, heaps[child])
new_rank += 1
# backward
ranking = []
for rank in xrange(k):
path = []
current = sink
current_rank = rank
while current != ROOT:
path.append(current)
_, current, current_rank = heaps[current][current_rank]
length, _, _ = heaps[sink][rank]
path = list(reversed(path))
path = tuple(zip(path[:-1], path[1:]))
ranking.append((length, path))
return ranking
def S_diff(lst):
'''Given a list of int or float, calculate S_diff and S_point'''
S_avg = sum(lst) / len(lst)
S_dist = [i-S_avg for i in lst] #distance to average
S_cum=[] #list of cumulative sum
S_cum.append(0)
for i in range(0,len(S_dist)):
S_cum.append(S_cum[i] + S_dist[i])
return [nlargest(1,range(0,len(S_cum)),key=lambda i: S_cum[i]),(max(S_cum) - min(S_cum))]
#return the index of maximum_diff index, and maximum_diff
def similar_to_vec(self, v, N=10):
sims = self._vecs.dot(v)
sims = heapq.nlargest(N, zip(sims,self._vocab,self._vecs))
return sims
def most_similar(self, word, N=10):
w = self._vocab.index(word)
sims = self._vecs.dot(self._vecs[w])
sims = heapq.nlargest(N, zip(sims,self._vocab))
return sims
def analogy(self, pos1, neg1, pos2,N=10,mult=True):
wvecs, vocab = self._vecs, self._vocab
p1 = vocab.index(pos1)
p2 = vocab.index(pos2)
n1 = vocab.index(neg1)
if mult:
p1,p2,n1 = [(1+wvecs.dot(wvecs[i]))/2 for i in (p1,p2,n1)]
if N == 1:
return max(((v,w) for v,w in izip((p1 * p2 / n1),vocab) if w not in [pos1,pos2,neg1]))
return heapq.nlargest(N,((v,w) for v,w in izip((p1 * p2 / n1),vocab) if w not in [pos1,pos2,neg1]))
else:
p1,p2,n1 = [(wvecs.dot(wvecs[i])) for i in (p1,p2,n1)]
if N == 1:
return max(((v,w) for v,w in izip((p1 + p2 - n1),vocab) if w not in [pos1,pos2,neg1]))
return heapq.nlargest(N,((v,w) for v,w in izip((p1 + p2 - n1),vocab) if w not in [pos1,pos2,neg1]))
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
def run(self):
top_10 = nlargest(10, self._input_iterator())
with self.output().open('w') as out_file:
for streams, artist in top_10:
out_line = '\t'.join([
str(self.date_interval.date_a),
str(self.date_interval.date_b),
artist,
str(streams)
])
out_file.write((out_line + '\n'))
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
def vec_to_str(subvec, max_n):
sub_list_sorted = heapq.nlargest(max_n, subvec, key=lambda x: x[1])
sub_strs = [' '.join([word, wf2ws(weight)]) for word, weight in sub_list_sorted]
return '\t'.join(sub_strs)