def kNN_entity(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_e)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_e[i])
else:
dist = LA.norm(vec - self.vec_e[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_e[self.vec2e[item.index]], item.dist))
return rst
#given entity name, find kNN
python类heapreplace()的实例源码
def try_improve_a_from_b(idx_a, idx_b, heap_a, heap_b,
patches, M, N, alpha, g):
k = len(heap_a)
p0 = patches[idx_a, :, :]# patch around pixel idx_a
for nn in range(0,k):
# compute the 2D offset corresponding idx_b's nn-est nearest neighbour
offs_b = heap_b[nn][1]
idx_d = int(idx_a + offs_b)
idx_d = max(idx_d, 0)
idx_d = min(idx_d, patches.shape[0]-1)
offs_b = idx_d - idx_a
p2 = patches[idx_d, :, :]# patch around the new pixel to compare to idx_a
w_b = patches_dissimilarity(p0, p2, alpha, g)# new weight
if w_b > heap_a[0][0] and not ((w_b, offs_b) in heap_a):
heapq.heapreplace(heap_a, (w_b, offs_b))
return heap_a
def imerge(iterables):
_hpop, _hreplace, _Stop = (heappop, heapreplace, StopIteration)
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
nx = it.next
h_append([nx(), itnum, nx])
except _Stop:
pass
heapify(h)
while 1:
try:
while 1:
v, itnum, nx = s = h[0]
yield v
s[0] = nx()
_hreplace(h, s)
except _Stop:
_hpop(h)
except IndexError:
return
def create_ranking2(edge_weight, k, adj, num):
sink = len(adj)
heaps = [[] for i in xrange(sink + 1)]
heaps[0] = [(0, [])]
for current in xrange(sink):
for child in adj[current]:
for length, path in heaps[current]:
new_path = list(path)
new_path.append(current)
# this can be done better using this heapreplace
ew = edge_weight[0, num[(current, child)]]
heapq.heappush(heaps[child], (length + ew, new_path))
heaps[child] = heapq.nlargest(k, heaps[child])
# TODO what with equal lenght paths?
# result: heaps[sink]
return [(length, tuple(zip(nodes, nodes[1:] + [sink]))) for length, nodes in heaps[sink]]
def kNN_relation(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_r)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_r[i])
else:
dist = LA.norm(vec - self.vec_r[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_r[self.vec2r[item.index]], item.dist))
return rst
#given relation name, find kNN
def imerge(iterables):
_hpop, _hreplace, _Stop = (heappop, heapreplace, StopIteration)
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
nx = it.next
h_append([nx(), itnum, nx])
except _Stop:
pass
heapify(h)
while 1:
try:
while 1:
v, itnum, nx = s = h[0]
yield v
s[0] = nx()
_hreplace(h, s)
except _Stop:
_hpop(h)
except IndexError:
return
def imerge(iterables):
_hpop, _hreplace, _Stop = (heappop, heapreplace, StopIteration)
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
nx = it.next
h_append([nx(), itnum, nx])
except _Stop:
pass
heapify(h)
while 1:
try:
while 1:
v, itnum, nx = s = h[0]
yield v
s[0] = nx()
_hreplace(h, s)
except _Stop:
_hpop(h)
except IndexError:
return
def assign_jobs(self):
# Trivial case
if len(self.jobs) <= self.num_workers:
self._assigned_workers = [i for i in range(len(self._jobs))]
self._start_times = [0] * len(self._jobs)
return
# Create Heap
from collections import namedtuple
import heapq
MyThread = namedtuple('MyThread', 'start_time, id')
heap = [MyThread(0, i) for i in range(self._num_workers)]
heapq.heapify(heap)
for i, job in enumerate(self._jobs):
# Read the root contents
sched_thread_id = heap[0].id
sched_thread_start = heap[0].start_time
# Append to output
self._assigned_workers[i] = sched_thread_id
self._start_times[i] = sched_thread_start
# Update heap with next start time
heapq.heapreplace(heap, MyThread(sched_thread_start + job, sched_thread_id))
def imerge(iterables):
_hpop, _hreplace, _Stop = (heappop, heapreplace, StopIteration)
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
nx = it.next
h_append([nx(), itnum, nx])
except _Stop:
pass
heapify(h)
while 1:
try:
while 1:
v, itnum, nx = s = h[0]
yield v
s[0] = nx()
_hreplace(h, s)
except _Stop:
_hpop(h)
except IndexError:
return
def mergeSort(seqs):
"""
perform merge sort on a list of sorted iterators
"""
queue = []
for s in seqs:
s = assertIsSorted(s)
it = iter(s)
try:
queue.append((it.next(), it.next))
except StopIteration:
pass
heapq.heapify(queue)
while queue:
item, it = queue[0]
yield item
try:
heapq.heapreplace(queue, (it(), it))
except StopIteration:
heapq.heappop(queue)
# ---------------------------------------------------------------------------
def push(self, elem):
'''Push new elem to heap
Args:
elem ?the elem to add
Returns:
if the elem have added to queue
'''
if len(self.data) < self.k:
heapq.heappush(self.data, elem)
return True
else:
topk_small = self.data[0]
if elem > topk_small:
heapq.heapreplace(self.data, elem)
return True
return False
def imerge(iterables):
_hpop, _hreplace, _Stop = (heappop, heapreplace, StopIteration)
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
nx = it.next
h_append([nx(), itnum, nx])
except _Stop:
pass
heapify(h)
while 1:
try:
while 1:
v, itnum, nx = s = h[0]
yield v
s[0] = nx()
_hreplace(h, s)
except _Stop:
_hpop(h)
except IndexError:
return
def replace(self, item):
return heapq.heapreplace(self._items, item)
def create_ranking3(edge_weight, k, adj, num):
sink = len(adj)
EMPTY = -2
ROOT = -1
MIN_LENGTH = float('-inf')
# heaps = [[(0, EMPTY, 0) for j in range(k)] for i in xrange(sink + 1)]
heaps = [[(MIN_LENGTH, EMPTY, 0) for j in range(k + 1)] for i in xrange(sink + 1)]
heaps[0][0] = (0, ROOT, 0)
# forward
for current in xrange(sink):
new_rank = 0
for length, parent, rank in heaps[current]:
if parent != EMPTY:
for child in adj[current]:
ew = edge_weight[0, num[(current, child)]]
new_length = length + ew
# heapq.heapreplace(heaps[child], (new_length, current, new_rank))
heapq.heappush(heaps[child], (new_length, current, new_rank))
heaps[child] = heapq.nlargest(k, heaps[child])
new_rank += 1
# backward
ranking = []
for rank in xrange(k):
path = []
current = sink
current_rank = rank
while current != ROOT:
path.append(current)
_, current, current_rank = heaps[current][current_rank]
length, _, _ = heaps[sink][rank]
path = list(reversed(path))
path = tuple(zip(path[:-1], path[1:]))
ranking.append((length, path))
return ranking
def kNN_entity(self, vec, tgt_lan='en', topk=10, method=0, self_vec_id=None, replace_q=True):
q = []
model = self.models.get(tgt_lan)
if model == None:
print "Model for language", tgt_lan," does not exist."
return None
for i in range(len(model.vec_e)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, model.vec_e[i])
else:
dist = LA.norm(vec - model.vec_e[i])
if (not replace_q) or len(q) < topk:
HP.heappush(q, model.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, model.index_dist(i, dist) )
rst = []
if replace_q:
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_e[model.vec2e[item.index]], item.dist))
else:
while len(q) > topk:
HP.heappop(q)
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_e[model.vec2e[item.index]], item.dist))
return rst
#given entity name, find kNN
def kNN_relation(self, vec, tgt_lan='fr', topk=10, method=0, self_vec_id=None):
q = []
model = self.models.get(tgt_lan)
if model == None:
print "Model for language", tgt_lan," does not exist."
return None
for i in range(len(model.vec_r)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, model.vec_r[i])
else:
dist = LA.norm(vec - model.vec_r[i])
if len(q) < topk:
HP.heappush(q, model.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, model.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_r[model.vec2r[item.index]], item.dist))
return rst
#given relation name, find kNN
def add(self, item, priority=None):
if priority is None:
priority = item
if len(self.lst) < self.capacity:
heapq.heappush(self.lst, (priority, item))
elif priority > self.lst[0][0]:
heapq.heapreplace(self.lst, (priority, item))
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def occurrences_after(self, after=None, tzinfo=pytz.utc):
"""
It is often useful to know what the next occurrence is given a list of
events. This function produces a generator that yields the
the most recent occurrence after the date ``after`` from any of the
events in ``self.events``
"""
from schedule.models import Occurrence
if after is None:
after = timezone.now()
occ_replacer = OccurrenceReplacer(
Occurrence.objects.filter(event__in=self.events))
generators = [event._occurrences_after_generator(after) for event in self.events]
occurrences = []
for generator in generators:
try:
heapq.heappush(occurrences, (next(generator), generator))
except StopIteration:
pass
while True:
if len(occurrences) == 0:
raise StopIteration
generator = occurrences[0][1]
try:
next_occurence = heapq.heapreplace(occurrences, (next(generator), generator))[0]
except StopIteration:
next_occurence = heapq.heappop(occurrences)[0]
yield occ_replacer.get_occurrence(next_occurence)
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def reservoir_weighted(it, k, weights):
"""Weighted reservoir Sampling from job posting iterator
Randomly choosing a sample of k items from a streaming iterator based on the weights.
Args:
it (iterator): Job posting iterator to sample from. The format should be (job_posting, label)
k (int): Sample size
weights (dict): a dictionary that has key-value pairs as label-weighting pairs. It expects every
label in the iterator to be present as a key in the weights dictionary For example,
weights = {'11': 2, '13', 1}. In this case, the label/key is the occupation major
group and the value is the weight you want to sample with.
Returns:
generator: The result sample of k items from weighted reservori sampling.
"""
heap = []
hkey = lambda w: np.power(np.random.uniform(0.0, 1.0), 1.0 / w)
for i, datum in enumerate(it):
weight = weights[datum[1]]
score = hkey(weight)
if len(heap) < k:
hq.heappush(heap, (hkey(weight), datum))
elif score > heap[0][0]:
hq.heapreplace(heap, (score, datum))
while len(heap) > 0:
yield hq.heappop(heap)[1]
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def _iter(self):
rlist = []
self._rdate.sort()
self._genitem(rlist, iter(self._rdate))
for gen in [iter(x) for x in self._rrule]:
self._genitem(rlist, gen)
exlist = []
self._exdate.sort()
self._genitem(exlist, iter(self._exdate))
for gen in [iter(x) for x in self._exrule]:
self._genitem(exlist, gen)
lastdt = None
total = 0
heapq.heapify(rlist)
heapq.heapify(exlist)
while rlist:
ritem = rlist[0]
if not lastdt or lastdt != ritem.dt:
while exlist and exlist[0] < ritem:
exitem = exlist[0]
advance_iterator(exitem)
if exlist and exlist[0] is exitem:
heapq.heapreplace(exlist, exitem)
if not exlist or ritem != exlist[0]:
total += 1
yield ritem.dt
lastdt = ritem.dt
advance_iterator(ritem)
if rlist and rlist[0] is ritem:
heapq.heapreplace(rlist, ritem)
self._len = total
def largest(X, k):
"""
Return the k largest elements from X.
"""
# maintain a min-heap of size k containing the largest elements so far
h = []
for x in X:
if len(h) < k:
heapq.heappush(h, x)
elif x > h[0]:
heapq.heapreplace(h, x)
return h
def smallest(X, k):
"""
Return the k smallest elements from X.
"""
h = []
for x in X:
if len(h) < k:
heapq.heappush(h, -x)
elif -x > h[0]:
heapq.heapreplace(h, -x)
return [-x for x in h]
def sort_almost_sorted(A, k):
h = list(A[:k+1])
heapq.heapify(h)
result = []
for a in A[k+1:]:
x = heapq.heapreplace(h, a)
result.append(x)
while len(h) > 0:
result.append(heapq.heappop(h))
return result