def kNN_entity(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_e)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_e[i])
else:
dist = LA.norm(vec - self.vec_e[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_e[self.vec2e[item.index]], item.dist))
return rst
#given entity name, find kNN
python类heappush()的实例源码
def update_evaluations(formatter, # type: CodeFormatter
evaluations, # type: List[AttemptResult]
finished_styles, # type: List[AttemptResult]
bestdist # type: Sequence[int]
):
# type: (...) -> Tuple[bool, bool, Sequence[int]]
attemptresult = heapq.heappop(evaluations)
nested_round = False
if bestdist is None or (distquality(attemptresult.distance) < distquality(bestdist)):
bestdist = attemptresult.distance
heapq.heappush(evaluations, attemptresult)
else:
# We found a style that could no longer be improved by adding a single option value.
heapq.heappush(finished_styles, attemptresult)
nested_styles = formatter.nested_derivations(attemptresult.formatstyle)
if not nested_styles:
# This formatstyle does not unlock more options.
return True, nested_round, bestdist
# Restart the optimization from scratch with the attemptresult augmented with
# every nested option as seed styles.
bestdist = None
ndist = (HUGE_DISTANCE, HUGE_DISTANCE, HUGE_DISTANCE, HUGE_DISTANCE)
evaluations[:] = [AttemptResult(ndist, s) for s in nested_styles]
nested_round = True
return False, nested_round, bestdist
def __next__(self):
if not self._pq:
raise StopIteration
pair = heapq.heappop(self._pq)
t = pair.x
for i in range(self._n):
v = t[i] + 1
tp = t[:i] + (v,) + t[i + 1:]
if v > self.max_sizes[i]:
if i not in self.continuation:
self.continuation[i] = tp
continue
if tp not in self._memo:
pair = PairGen._Pair(*tp)
heapq.heappush(self._pq, pair)
self._memo[tp] = True
return t
def need_processing(self):
"""A utility to help determine which connections need
processing. Returns a triple of lists containing those connections that
0) need to read from the network, 1) need to write to the network, 2)
waiting for pending timers to expire. The timer list is sorted with
the connection next expiring at index 0.
"""
readers = []
writers = []
timer_heap = []
for c in iter(self._connections.values()):
if c.needs_input > 0:
readers.append(c)
if c.has_output > 0:
writers.append(c)
if c.deadline:
heapq.heappush(timer_heap, (c.next_tick, c))
timers = []
while timer_heap:
x = heapq.heappop(timer_heap)
timers.append(x[1])
return (readers, writers, timers)
def dijkstra(adj, costs, s, t):
''' Return predecessors and min distance if there exists a shortest path
from s to t; Otherwise, return None '''
Q = [] # priority queue of items; note item is mutable.
d = {s: 0} # vertex -> minimal distance
Qd = {} # vertex -> [d[v], parent_v, v]
p = {} # predecessor
visited_set = set([s])
for v in adj.get(s, []):
d[v] = costs[s, v]
item = [d[v], s, v]
heapq.heappush(Q, item)
Qd[v] = item
while Q:
print Q
cost, parent, u = heapq.heappop(Q)
if u not in visited_set:
print 'visit:', u
p[u]= parent
visited_set.add(u)
if u == t:
return p, d[u]
for v in adj.get(u, []):
if d.get(v):
if d[v] > costs[u, v] + d[u]:
d[v] = costs[u, v] + d[u]
Qd[v][0] = d[v] # decrease key
Qd[v][1] = u # update predecessor
heapq._siftdown(Q, 0, Q.index(Qd[v]))
else:
d[v] = costs[u, v] + d[u]
item = [d[v], u, v]
heapq.heappush(Q, item)
Qd[v] = item
return None
def _merge(sorted_iterables):
"""
>>> list(_merge([(1, 2, 3), (0, 2, 4)]))
[0, 1, 2, 2, 3, 4]
"""
heap = []
for iterable in sorted_iterables:
iterator = iter(iterable)
for value in iterator:
heapq.heappush(heap, (value, iterator))
break
while heap:
value, iterator = heapq.heappop(heap)
yield value
for value in iterator:
heapq.heappush(heap, (value, iterator))
break
def create_ranking2(edge_weight, k, adj, num):
sink = len(adj)
heaps = [[] for i in xrange(sink + 1)]
heaps[0] = [(0, [])]
for current in xrange(sink):
for child in adj[current]:
for length, path in heaps[current]:
new_path = list(path)
new_path.append(current)
# this can be done better using this heapreplace
ew = edge_weight[0, num[(current, child)]]
heapq.heappush(heaps[child], (length + ew, new_path))
heaps[child] = heapq.nlargest(k, heaps[child])
# TODO what with equal lenght paths?
# result: heaps[sink]
return [(length, tuple(zip(nodes, nodes[1:] + [sink]))) for length, nodes in heaps[sink]]
def kNN_relation(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_r)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_r[i])
else:
dist = LA.norm(vec - self.vec_r[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_r[self.vec2r[item.index]], item.dist))
return rst
#given relation name, find kNN
def add_number_to_heap(num):
"""Adds a number to a heap, assuming the heaps correctly represent the lower half of numbers and the upper half.
So the only factor taken into account is the number of items in the heaps.
If same number of items add to lower half, else add to half with the least number of items.
"""
if len(lower_half_max_heap) == 0:
heapq.heappush(lower_half_max_heap, num * -1)
elif len(upper_half_min_heap) == 0:
heapq.heappush(upper_half_min_heap, num)
else:
# Assume the heap is a valid split of the data.
# Difference will be positive if upper_half_min_heap has more elements, else negative.
difference = len(upper_half_min_heap) - len(lower_half_max_heap)
if difference >= 1:
# upper_half_min_heap has more elements, should put this next number in lower_half_max_heap.
heapq.heappush(lower_half_max_heap, num * -1)
elif difference <= -1:
# lower_half_max_heap has more elements, should put this next number in upper_half_min_heap.
heapq.heappush(upper_half_min_heap, num)
else:
# Can push to either, difference is 0.
heapq.heappush(lower_half_max_heap, num * -1)
def rebalance_heaps():
"""Ensures that no element in the upper half is less than any element in the lower half."""
if not (len(lower_half_max_heap) >= 1 and len(upper_half_min_heap) >= 1):
return # Don't need to rebalance if one of them is empty.
max_from_lower_half = lower_half_max_heap[0] * -1
min_from_upper_half = upper_half_min_heap[0]
if min_from_upper_half < max_from_lower_half:
# Upper half has smaller elements than that of lower half.
while min_from_upper_half < max_from_lower_half:
# Swap elements.
max_from_lower_half = heapq.heappop(lower_half_max_heap) * -1
min_from_upper_half = heapq.heappop(upper_half_min_heap)
heapq.heappush(lower_half_max_heap, min_from_upper_half * -1)
heapq.heappush(upper_half_min_heap, max_from_lower_half)
# heapq will handle restructuring heap so smallest values will be at index 0.
max_from_lower_half = lower_half_max_heap[0] * -1
min_from_upper_half = upper_half_min_heap[0]
def dijkstra(graph, source):
pq = []
nodes = graph.get_all_vertices()
distances = [math.inf] * len(nodes)
path = [-1] * len(nodes)
distances[source.index] = 0
for node in nodes:
# Store as (priority, task) tuples, heapq will sort on first element.
heapq.heappush(pq, (distances[node.index], node))
while pq:
# Assumes non negative weights, so when popping a node it is the best way to get there.
dist, node = heapq.heappop(pq)
for edge in graph.get_adjacent_edges(node):
# Note: can't terminate early and do this.
# Eg: (s) -3-> (c) -12-> (d)
# \-20->(d) will be wrong
# if distances[edge.destination.index] != math.inf: # We already have the shortest path to this node.
# continue
if relax(node, edge.destination, edge, distances):
# Found a better way to get to a next node, add that to the pq and set the parent.
heapq.heappush(pq, (distances[edge.destination.index], edge.destination))
path[edge.destination.index] = node.index
return distances, path # Shortest path from source to any other node in distances.
def dijkstra(s, t, g):
q = []
dis = collections.defaultdict(lambda: 0x7fffffff) # [0x7fffffff] * len(g)
vis = collections.defaultdict(bool) # [False] * len(g)
dis[s] = 0
heapq.heappush(q, Node(s, 0))
while q:
cur = heapq.heappop(q).to
if vis[cur]: continue
vis[cur] = True
for to, val in g[cur]:
if not vis[to] and dis[cur] + val < dis[to]:
dis[to] = dis[cur] + val
heapq.heappush(q, Node(to, dis[to]))
return dis
def getSkyline(self, buildings):
"""
:type buildings: List[List[int]]
:rtype: List[List[int]]
"""
hs = []
heap = []
for b in buildings:
hs.append((b[0], -b[2]))
hs.append((b[1], b[2]))
hs.sort()
ans = []
pre = cur = None
for h in hs:
pos = h[0]
height = h[1]
if height < 0:
heapq.heappush(heap, height)
else:
i = heap.index(-height)
heap[i] = heap[-1]
heap.pop()
if i < len(heap):
heapq._siftup(heap, i)
heapq._siftdown(heap, 0, i)
if heap:
cur = heap[0]
if cur != pre:
ans.append((pos, -1 * cur))
pre = cur
else:
ans.append((pos, 0))
return ans
find-median-from-data-stream.py 文件源码
项目:lc-all-solutions
作者: csujedihy
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def addNum(self, num):
"""
Adds a num into the data structure.
:type num: int
:rtype: void
"""
left = self.left
right = self.right
if self.median is None:
self.median = num
return
if num <= self.median:
heapq.heappush(left, -num)
else:
heapq.heappush(right, num)
if len(left) > len(right) + 1:
top = -heapq.heappop(left)
heapq.heappush(right, self.median)
self.median = top
if len(right) > len(left) + 1:
top = heapq.heappop(right)
heapq.heappush(left, -self.median)
self.median = top
def getNewsFeed(self, userId):
"""
Retrieve the 10 most recent tweet ids in the user's news feed. Each item in the news feed must be posted by users who the user followed or by the user herself. Tweets must be ordered from most recent to least recent.
:type userId: int
:rtype: List[int]
"""
ret = []
heap = []
if self.tweets[userId]:
heapq.heappush(heap, self.tweets[userId][-1])
for followeeId in self.friendship[userId]:
if self.tweets[followeeId]:
heapq.heappush(heap, self.tweets[followeeId][-1])
cnt = 10
while heap and cnt > 0:
_, tid, uid, idx = heapq.heappop(heap)
ret.append(tid)
if idx > 0:
heapq.heappush(heap, self.tweets[uid][idx - 1])
cnt -= 1
return ret
kth-smallest-element-in-a-sorted-matrix.py 文件源码
项目:lc-all-solutions
作者: csujedihy
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def kthSmallest(self, matrix, k):
"""
:type matrix: List[List[int]]
:type k: int
:rtype: int
"""
visited = {(0, 0)}
heap = [(matrix[0][0], (0, 0))]
while heap:
val, (i, j) = heapq.heappop(heap)
k -= 1
if k == 0:
return val
if i + 1 < len(matrix) and (i + 1, j) not in visited:
heapq.heappush(heap, (matrix[i + 1][j], (i + 1, j)))
visited.add((i + 1, j))
if j + 1 < len(matrix) and (i, j + 1) not in visited:
heapq.heappush(heap, (matrix[i][j + 1], (i, j + 1)))
visited.add((i, j + 1))
def nthSuperUglyNumber(self, n, primes):
"""
:type n: int
:type primes: List[int]
:rtype: int
"""
dp = [0] * (n + 1)
dp[1] = 1
heap = []
indexes = [1] * len(primes)
for i in xrange(0, len(primes)):
heapq.heappush(heap, (dp[indexes[i]] * primes[i],i))
for i in xrange(2, n + 1):
minV = heap[0][0]
dp[i] = minV
while heap[0][0] == minV:
value, xi = heapq.heappop(heap)
indexes[xi] += 1
heapq.heappush(heap, (dp[indexes[xi]] * primes[xi], xi))
return dp[-1]
def basic_usage():
h=[]
heapq.heappush(h,10)
print h
heapq.heappush(h,1)
print h
heapq.heappush(h,0)
print h
heapq.heappush(h,3)
print h
heapq.heappush(h,6)
print h
heapq.heappush(h,7)
print h
heapq.heappush(h,11)
print h
heapq.heappush(h,9)
print h
heapq.heappush(h,7)
print h
heapq.heappush(h,2)
print h
p=heapq.heappop(h)
print h
print p
def heatmap(self, source_x,source_y):
log.debug("Updating Heatmap.")
for tile in self.grid.values():
tile.value = sys.maxsize
tile.previous = None
tile.visited = False
l_open = []
source = self.lookup(source_x,source_y)
if source: l_open.append((0,source,source)) #(value,cell,previous)
while l_open:
value,cell,previous = heapq.heappop(l_open)
if cell.visited: continue
cell.visited = True
cell.previous = previous
cell.value = value
for x,y in self.get_neighbor_addrs(cell.x,cell.y):
c = self.lookup(x,y)
if c and not (c.visited or c.blocked):
heapq.heappush(l_open, (value+1, c, cell))
return self.render(0,self.width, 0,self.height,heatp=True)
def median_stream_ints(arr):
less_than_med = []
greater_than_med = []
for i in range(len(arr)):
print 'Median after adding:', arr[i]
if len(less_than_med) == 0 and len(greater_than_med) == 0:
heapq.heappush(less_than_med, -arr[i]) # max-heap in Python implemented by negation.
elif arr[i] < abs(less_than_med[0]):
heapq.heappush(less_than_med, -arr[i])
if len(less_than_med) - len(greater_than_med) > 1:
root = abs(heapq.heappop(less_than_med))
heapq.heappush(greater_than_med, root)
elif len(greater_than_med) == 0 or arr[i] > greater_than_med[0]:
heapq.heappush(greater_than_med, arr[i])
if len(greater_than_med) - len(less_than_med) > 1:
root = heapq.heappop(greater_than_med)
heapq.heappush(less_than_med, -root)
if len(less_than_med) == len(greater_than_med):
print (greater_than_med[0] - less_than_med[0]) / 2
else:
print abs(less_than_med[0]) if len(less_than_med) > len(greater_than_med) else greater_than_med[0]
def search_docs(inputs, max_ex=5, opts=None):
"""Given a set of document ids (returned by ranking for a question), search
for top N best matching (by heuristic) paragraphs that contain the answer.
"""
if not opts:
raise RuntimeError('Options dict must be supplied.')
doc_ids, q_tokens, answer = inputs
examples = []
for i, doc_id in enumerate(doc_ids):
for j, paragraph in enumerate(re.split(r'\n+', fetch_text(doc_id))):
found = find_answer(paragraph, q_tokens, answer, opts)
if found:
# Reverse ranking, giving priority to early docs + paragraphs
score = (found[0], -i, -j, random.random())
if len(examples) < max_ex:
heapq.heappush(examples, (score, found[1]))
else:
heapq.heappushpop(examples, (score, found[1]))
return [e[1] for e in examples]
def __iter__(self):
""" Sort the results by Whoosh rank; relevance. """
_iter = super(QueryProxy, self).__iter__()
if self._whoosh_results is None or self._order_by is not False:
return _iter
ordered = []
for row in _iter:
# we have to convert the primary-key, as stored in the SQL database
# into a string because the key is stored as an `ID` in whoosh.
# The ID field is string only; plus, this allows for uuid pk's.
str_pk = str(getattr(row, self._pk))
heapq.heappush(
ordered, (self._whoosh_results[str_pk], row))
def inner():
while ordered:
yield heapq.heappop(ordered)[1]
return inner()
def _create_clusterings(self,dendrogram):
clusterings = [[(-(dendrogram.distance),dendrogram)]]
for threshold in np.linspace(-self._max_dist,-self._min_dist,self.number_of_steps)[1:]:
new_clustering = clusterings[-1][:] # set new clustering to be equivalent to previous
# Expand previous clustering
while new_clustering[0][0] < threshold and new_clustering[0][1].is_cluster():
expanded_cluster = heapq.heappop(new_clustering)[1]
left = expanded_cluster.left
right = expanded_cluster.right
if left.is_cluster():
heapq.heappush(new_clustering,(-left.distance,left))
else:
heapq.heappush(new_clustering,(-(self._min_dist-1),left))
if right.is_cluster():
heapq.heappush(new_clustering,(-right.distance,right))
else:
heapq.heappush(new_clustering,(-(self._min_dist-1),right))
clusterings.append(new_clustering)
return clusterings
def nthSuperUglyNumber(self, n, primes):
"""
:type n: int
:type primes: List[int]
:rtype: int
"""
uglies, idx, heap, ugly_set = [0] * n, [0] * len(primes), [], set([1])
uglies[0] = 1
for k, p in enumerate(primes):
heapq.heappush(heap, (p, k))
ugly_set.add(p)
for i in xrange(1, n):
uglies[i], k = heapq.heappop(heap)
while (primes[k] * uglies[idx[k]]) in ugly_set:
idx[k] += 1
heapq.heappush(heap, (primes[k] * uglies[idx[k]], k))
ugly_set.add(primes[k] * uglies[idx[k]])
return uglies[-1]
def nthUglyNumber(self, n):
"""
:type n: int
:rtype: int
"""
l=[1]
primes=[2,3,5]
heapq.heapify(l)
set1=set(l)
n-=1
while n>0:
z=heapq.heappop(l)
n-=1
for i in primes:
if z*i not in set1:
heapq.heappush(l, z*i)
set1.add(z*i)
return heapq.heappop(l)
def get_skyline(LRH):
"""
Wortst Time Complexity: O(NlogN)
:type buildings: List[List[int]]
:rtype: List[List[int]]
"""
skyline, live = [], []
i, n = 0, len(LRH)
while i < n or live:
if not live or i < n and LRH[i][0] <= -live[0][1]:
x = LRH[i][0]
while i < n and LRH[i][0] == x:
heapq.heappush(live, (-LRH[i][2], -LRH[i][1]))
i += 1
else:
x = -live[0][1]
while live and -live[0][1] <= x:
heapq.heappop(live)
height = len(live) and -live[0][0]
if not skyline or height != skyline[-1][1]:
skyline += [x, height],
return skyline
def explore_frontier(sig, data_type, metric, max_size):
# closure for binding sig and rules easily
expand = producers.generate_expander(sig)
# generate appropriate starting node based on type
start_node = producers.e.Un(producers.parse_type(data_type))
frontier = [(metric(start_node), start_node)]
# go hog wild
while True:
# pull expression off heap, check expansions
m, expr = heapq.heappop(frontier)
expansions = expand(expr)
# if there are expansions, just add them back in
if expansions:
for new_expr in expansions:
heapq.heappush(frontier, (metric(new_expr), new_expr) )
# if there aren't any expansions and the program is closed, we're done
elif len(expr._values_from_kind("un")) == 0:
yield m[0], expr
#------------------------------------------------------------------------------
# now we treat this module as a script - time to execute!
#------------------------------------------------------------------------------
def create_beam_items(self, beam):
beam_items = []
for b in xrange(len(beam)):
config = beam[b]
prevScore = config.score
dense_feats = self.template.feat_template(config.nodes,
config.stack,
config.b0)
pr_scores = self.clf.predict_proba(dense_feats)[0]
pr_scores = np.log(pr_scores)
predictions = zip(pr_scores, self.clf.classes_)
valid_trans = self.get_valid_transitions(config)
for score, (action, label) in predictions:
if self.transitions[action] in valid_trans:
next_transition = valid_trans[self.transitions[action]]
heapq.heappush(
beam_items,
(prevScore + score, b, next_transition, label))
if len(beam_items) > self.beamwidth:
heapq.heappop(beam_items)
return beam_items
def find_k_nearest(self, k, s_query):
'''
Gets KNN using heap sort
'''
neighbors = []
for n in self.nodes:
dist = np.linalg.norm(s_query - n.state)
if dist > 0:
hn = HeapNode(n, dist)
heapq.heappush(neighbors, hn)
#Check if we have <= k to choose from
#in which case, simply return that list
ret_list = []
if len(neighbors) <= k:
for k in neighbors:
ret_list.append(k.node)
return ret_list
#get the k nearest neighbors
for i in xrange(k):
t = heapq.heappop(neighbors)
ret_list.append(t.node)
return ret_list
def largest_export_versions(n):
"""Creates a filter that keeps the largest n export versions.
Args:
n: number of versions to keep.
Returns:
A filter function that keeps the n largest paths.
"""
def keep(paths):
heap = []
for idx, path in enumerate(paths):
if path.export_version is not None:
heapq.heappush(heap, (path.export_version, idx))
keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
return sorted(keepers)
return keep