def kNN_entity(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_e)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_e[i])
else:
dist = LA.norm(vec - self.vec_e[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_e[self.vec2e[item.index]], item.dist))
return rst
#given entity name, find kNN
python类nsmallest()的实例源码
def kNN_relation(self, vec, topk=10, method=0, self_vec_id=None):
q = []
for i in range(len(self.vec_r)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, self.vec_r[i])
else:
dist = LA.norm(vec - self.vec_r[i])
if len(q) < topk:
HP.heappush(q, self.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, self.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (self.vocab_r[self.vec2r[item.index]], item.dist))
return rst
#given relation name, find kNN
def takeOrdered(self, num, key=None):
"""
Get the N elements from an RDD ordered in ascending order or as
specified by the optional key function.
.. note:: this method should only be used if the resulting array is expected
to be small, as all the data is loaded into the driver's memory.
>>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
[1, 2, 3, 4, 5, 6]
>>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
[10, 9, 7, 6, 5, 4]
"""
def merge(a, b):
return heapq.nsmallest(num, a + b, key)
return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
def new_wrapper(cls, func, cache):
# define the wrapper...
def callable(*arguments, **keywords):
heap = [res for _,res in heapq.nsmallest(len(cache), cache)]
f, (a, w, k) = cls.match((arguments[:],keywords), heap)
return f(*arguments, **keywords)
#return f(*(arguments + tuple(w)), **keywords)
# swap out the original code object with our wrapper's
f,c = callable, callable.func_code
cargs = c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, \
c.co_code, c.co_consts, c.co_names, c.co_varnames, \
c.co_filename, '.'.join((func.__module__, func.func_name)), \
c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars
newcode = types.CodeType(*cargs)
res = types.FunctionType(newcode, f.func_globals, f.func_name, f.func_defaults, f.func_closure)
res.func_name, res.func_doc = func.func_name, func.func_doc
# assign the specified cache to it
setattr(res, cls.cache_name, cache)
# ...and finally add a default docstring
setattr(res, '__doc__', '')
return res
def _timer(self):
if not self._queue or not self._waiters:
return
ts = heapq.nsmallest(1, self._queue)[0][0]
t = ts - time.time()
if t < 0:
score, f = self._waiters.pop(0)
ts, val = heapq.heappop(self._queue)
if score:
f.set_result((val, ts))
else:
f.set_result(val)
else:
await asyncio.sleep(t, loop=self.loop)
self._future = self.loop.create_task(self._timer())
def knn(self, itemVector):
"""returns the predicted class of itemVector using k
Nearest Neighbors"""
# changed from min to heapq.nsmallest to get the
# k closest neighbors
neighbors = heapq.nsmallest(self.k,
[(self.manhattan(itemVector, item[1]), item)
for item in self.data])
# each neighbor gets a vote
results = {}
for neighbor in neighbors:
theClass = neighbor[1][0]
results.setdefault(theClass, 0)
results[theClass] += 1
resultList = sorted([(i[1], i[0]) for i in results.items()], reverse=True)
#get all the classes that have the maximum votes
maxVotes = resultList[0][0]
possibleAnswers = [i[1] for i in resultList if i[0] == maxVotes]
# randomly select one of the classes that received the max votes
answer = random.choice(possibleAnswers)
return( answer)
def GetLeastNumbers(self, tinput, k):
import heapq
if tinput == None or len(tinput) < k or len(tinput) <= 0 or k <= 0:
return []
output = []
for number in tinput:
if len(output) < k:
output.append(number)
else:
# ?????? ???
# output = heapq.nsmallest(k, output)
# if number >= output[-1]:
# continue
# else:
# output[-1] = number
# ?????? ??
output = heapq.nlargest(k, output)
if number >= output[0]:
continue
else:
output[0] = number
return output[::-1] # ???? return output
def search(lattice, ngrams, queues, beam_size, viterbi_size):
for i in range(len(lattice)):
for j in range(len(lattice[i])):
for target, source in lattice[i][j]:
word_queue = []
for previous_cost, previous_history in queues[j]:
history = previous_history + [(target, source)]
cost = previous_cost + get_ngram_cost(ngrams, tuple(history[-3:]))
hypothesis = (cost, history)
word_queue.append(hypothesis)
# prune word_queue to viterbi size
if viterbi_size > 0:
word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))
queues[i] += word_queue
# prune queues[i] to beam size
if beam_size > 0:
queues[i] = heapq.nsmallest(beam_size, queues[i], key=operator.itemgetter(0))
return queues
def filter_batch(self, batch_index, *args):
selected_number = self.cost_threshold(self.iteration)
selected_batch_data = [data[batch_index] for data in self.all_data]
selected_batch_data = self.prepare_data(*selected_batch_data)
targets = selected_batch_data[-1]
cost_list = self.model.f_cost_list_without_decay(*selected_batch_data)
label_cost_lists = [cost_list[targets == label] for label in range(self.model.output_size)]
result = []
for i, label_cost_list in enumerate(label_cost_lists):
if label_cost_list.size != 0:
threshold = heapq.nsmallest(selected_number, label_cost_list)[-1]
for j in range(len(targets)):
if targets[j] == i and cost_list[j] <= threshold:
result.append(batch_index[j])
if Config['temp_job'] == 'log_data':
self.add_index(batch_index[j], cost_list[j])
return result
def report_best_styles(formatter, finished_styles, evaluations, bestofround, metricdesc,
roundnr):
# type: (CodeFormatter, List[AttemptResult], List[AttemptResult], int, str, int) -> None
"""Report the best style and its metric for the round.
Also report the next best styles with their metrics relative to the best style.
"""
attempts = finished_styles[:]
bestofround = max(0, bestofround)
for attempt in heapq.nsmallest(bestofround, evaluations):
heapq.heappush(attempts, attempt)
for idx, attemptresult in enumerate(heapq.nsmallest(bestofround, attempts)):
if idx == 0:
bestresult = attemptresult
bestmsg = '\nBest distance %s round %d: %s' % (metricdesc, roundnr,
attemptresult.distance)
iprint(INFO_USER, cyan(bestmsg))
iprint(INFO_USER, formatter.styletext(attemptresult.formatstyle))
else:
place = '%d. ' % (idx + 1)
m_diff = distdifference(attemptresult.distance, bestresult.distance)
iprint(INFO_USER, yellow('\n%sbest differential distance %s round %d: %s' %
(place, metricdesc, roundnr, m_diff)))
unique_from, unique_to = deep_difference(bestresult.formatstyle,
attemptresult.formatstyle)
text_from = formatter.styletext(style_make(unique_from))
text_to = formatter.styletext(style_make(unique_to))
separator = ' | '
block = alignedblocks(text_from, text_to, separator, color_right=YELLOW)
iprint(INFO_USER, block)
def kNN_entity(self, vec, tgt_lan='en', topk=10, method=0, self_vec_id=None, replace_q=True):
q = []
model = self.models.get(tgt_lan)
if model == None:
print "Model for language", tgt_lan," does not exist."
return None
for i in range(len(model.vec_e)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, model.vec_e[i])
else:
dist = LA.norm(vec - model.vec_e[i])
if (not replace_q) or len(q) < topk:
HP.heappush(q, model.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, model.index_dist(i, dist) )
rst = []
if replace_q:
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_e[model.vec2e[item.index]], item.dist))
else:
while len(q) > topk:
HP.heappop(q)
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_e[model.vec2e[item.index]], item.dist))
return rst
#given entity name, find kNN
def kNN_relation(self, vec, tgt_lan='fr', topk=10, method=0, self_vec_id=None):
q = []
model = self.models.get(tgt_lan)
if model == None:
print "Model for language", tgt_lan," does not exist."
return None
for i in range(len(model.vec_r)):
#skip self
if self_vec_id != None and i == self_vec_id:
continue
if method == 1:
dist = SP.distance.cosine(vec, model.vec_r[i])
else:
dist = LA.norm(vec - model.vec_r[i])
if len(q) < topk:
HP.heappush(q, model.index_dist(i, dist))
else:
#indeed it fetches the biggest
tmp = HP.nsmallest(1, q)[0]
if tmp.dist > dist:
HP.heapreplace(q, model.index_dist(i, dist) )
rst = []
while len(q) > 0:
item = HP.heappop(q)
rst.insert(0, (model.vocab_r[model.vec2r[item.index]], item.dist))
return rst
#given relation name, find kNN
def neighboring_msgs(msg):
"Generate nearby keys, hopefully better ones."
def swap(a,b): return msg.translate(string.maketrans(a+b, b+a))
for bigram in heapq.nsmallest(20, set(ngrams(msg, 2)), P2l):
b1,b2 = bigram
for c in alphabet:
if b1==b2:
if P2l(c+c) > P2l(bigram): yield swap(c,b1)
else:
if P2l(c+b2) > P2l(bigram): yield swap(c,b1)
if P2l(b1+c) > P2l(bigram): yield swap(c,b2)
while True:
yield swap(random.choice(alphabet), random.choice(alphabet))
################ Spelling Correction (p. 236-)
def solution(n, array):
if n < 3:
return False
three_max = heapq.nlargest(3, array)
two_min = heapq.nsmallest(2, array)
max_res = max(three_max[0]*three_max[1]*three_max[2], two_min[0]*two_min[1]*three_max[0])
return max_res
def _new_(self, name):
'''Overwrite the hook ``name`` with a priorityhook.'''
if not hasattr(self.object, name):
cls = self.__class__
raise AttributeError("{:s}._new_({!r}) : Unable to create a hook for unknown method.".format('.'.join(('internal',__name__,cls.__name__)), name))
def method(hookinstance, *args):
if name in self.__cache and name not in self.__disabled:
hookq = self.__cache[name][:]
for _,func in heapq.nsmallest(len(hookq), hookq):
try:
res = func(*args)
except:
cls = self.__class__
message = functools.partial("{:s}.callback : {:s}".format, '.'.join(('internal',__name__,cls.__name__)))
logging.fatal("{:s}.callback : Callback for {:s} raised an exception.".format('.'.join(('internal',__name__,cls.__name__)), '.'.join((self.__type__.__name__,name))))
res = map(message, traceback.format_exception(*sys.exc_info()))
map(logging.fatal, res)
logging.warn("{:s}.callback : Hook originated from -> ".format('.'.join(('internal',__name__,cls.__name__))))
res = map(message, traceback.format_list(self.__traceback[name,func]))
map(logging.warn, res)
res = self.STOP
if not isinstance(res, self.result) or res == self.CONTINUE:
continue
elif res == self.STOP:
break
cls = self.__class__
raise TypeError("{:s}.callback : Unable to determine result type. : {!r}".format('.'.join(('internal',__name__,cls.__name__)), res))
supermethod = getattr(super(hookinstance.__class__, hookinstance), name)
return supermethod(*args)
return types.MethodType(method, self.object, self.object.__class__)
def get(self, score=False):
waiter = self.loop.create_future()
if self._queue:
timestamp, value = heapq.nsmallest(1, self._queue)[0]
if time.time() >= timestamp:
ts, value = heapq.heappop(self._queue)
if score:
waiter.set_result((value, ts))
else:
waiter.set_result(value)
return waiter
if not self._future or self._future.done():
self._future = self.loop.create_task(self._timer())
self._waiters.append((score, waiter))
return waiter
def _n_best_results(self, results, n):
if self.hist_comparator.reversed:
function = heapq.nlargest
else:
function = heapq.nsmallest
return function(n, results, key=operator.itemgetter(1))
def __event_loop(self):
def dispatch_event(event):
for listener in self.__event_listeners:
try:
listener(event)
except Exception as e:
self._logger.exception("Event processing")
def get_delayed_events():
"""
:return: As many delayed events as are due to be executed, empty array if none.
"""
events = []
with self.__delayed_event_lock:
while len(self.__delayed_events) and heapq.nsmallest(1, self.__delayed_events)[0][0] <= time.time():
events.append(heapq.heappop(self.__delayed_events))
for ev in events:
ev[1].time = time.time()
self._logger.debug("Firing for t=%f (%d ms late)" % (ev[0], int((time.time() - ev[0]) * 1000)))
return list([x[1] for x in events])
# Here begins the body of __event_loop
while not self.stop or not self.__event_queue.empty():
try:
for e in get_delayed_events():
dispatch_event(e)
dispatch_event(self.__event_queue.get(block=True, timeout=0.05))
self.__event_queue.task_done()
except queue.Empty:
pass
self.__event_queue = None
def findKthLargest(self, nums, k):
"""
:type nums: List[int]
:type k: int
:rtype: int
"""
nums = [-x for x in nums]
heapq.heapify(nums)
# ret = None
# for i in range(k):
# ret = heapq.heappop(nums)
ret = heapq.nsmallest(k, nums)
return -ret.pop()
def kSmallestPairs(self, nums1, nums2, k ):
"""
:type nums1: List[int]
:type nums2: List[int]
:type k: int
:rtype: List[List[int]]
"""
return min(list, heapq.nsmallest(k, itertools.product(nums1, nums2), key=sum))
def apply_guess(tmp):
date = tmp["date"]
code = tmp["code"]
date_end = datetime.datetime.strptime(date, "%Y%m%d")
date_start = (date_end + datetime.timedelta(days=-300)).strftime("%Y-%m-%d")
date_end = date_end.strftime("%Y-%m-%d")
print(code, date_start, date_end)
# open, high, close, low, volume, price_change, p_change, ma5, ma10, ma20, v_ma5, v_ma10, v_ma20, turnover
# ??????????????
stock = common.get_hist_data_cache(code, date_start, date_end)
stock = pd.DataFrame({"close": stock["close"]}, index=stock.index.values)
stock = stock.sort_index(0) # ???????????
# print(stock.head(10))
arr = pd.Series(stock["close"].values)
# print(df_arr)
wave_mean = arr.mean()
# ?????????
wave_crest = heapq.nlargest(5, enumerate(arr), key=lambda x: x[1])
wave_crest_mean = pd.DataFrame(wave_crest).mean()
# ??????????index??????????? ????????
wave_base = heapq.nsmallest(5, enumerate(arr), key=lambda x: x[1])
wave_base_mean = pd.DataFrame(wave_base).mean()
# ????
# print("##############")
tmp = {"code": code, "wave_mean": wave_mean,
"wave_crest": wave_crest_mean[1], "wave_base": wave_base_mean[1]}
# print(tmp)
# code date wave_base wave_crest wave_mean ????????????????????
return list([code, date, wave_base_mean[1], wave_crest_mean[1], wave_mean])
# main????
def takeOrdered(self, num, key=None):
"""
Get the N elements from a RDD ordered in ascending order or as
specified by the optional key function.
>>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
[1, 2, 3, 4, 5, 6]
>>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
[10, 9, 7, 6, 5, 4]
"""
def merge(a, b):
return heapq.nsmallest(num, a + b, key)
return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
def search(lattice, queues, rnn_predictor, ngrams, beam_size, viterbi_size):
# Breadth first search with beam pruning and viterbi-like pruning
for i in range(len(lattice)):
queue = []
# create hypotheses without predicting next word
for j in range(len(lattice[i])):
for target, source, word_id in lattice[i][j]:
word_queue = []
for previous_cost, previous_history, previous_state, previous_prediction in queues[j]:
history = previous_history + [(target, source)]
cost = previous_cost + interpolate(previous_prediction[word_id], get_ngram_cost(ngrams, history))
# Temporal hypothesis is tuple of (cost, history, word_id, previous_state)
# Lazy prediction replaces word_id and previous_state to state and prediction
hypothesis = (cost, history, word_id, previous_state)
word_queue.append(hypothesis)
# prune word_queue to viterbi size
if viterbi_size > 0:
word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))
queue += word_queue
# prune queue to beam size
if beam_size > 0:
queue = heapq.nsmallest(beam_size, queue, key=operator.itemgetter(0))
# predict next word and state before continue
for cost, history, word_id, previous_state in queue:
predictions, states = rnn_predictor.predict([word_id], [previous_state])
hypothesis = (cost, history, states[0], predictions[0])
queues[i].append(hypothesis)
return queues
def simple_search(lattice, queues, rnn_predictor, beam_size):
# Simple but slow implementation of beam search
for i in range(len(lattice)):
for j in range(len(lattice[i])):
for target, word_id in lattice[i][j]:
for previous_cost, previous_string, previous_state, previous_prediction in queues[j]:
cost = previous_cost + previous_prediction[word_id]
string = previous_string + target
predictions, states = rnn_predictor.predict([word_id], [previous_state])
hypothesis = (cost, string, states[0], predictions[0])
queues[i].append(hypothesis)
# prune queues[i] to beam size
queues[i] = heapq.nsmallest(beam_size, queues[i], key=operator.itemgetter(0))
return queues
def search(lattice, queues, rnn_predictor, beam_size, viterbi_size):
# Breadth first search with beam pruning and viterbi-like pruning
for i in range(len(lattice)):
queue = []
# create hypotheses without predicting next word
for j in range(len(lattice[i])):
for target, word_id in lattice[i][j]:
word_queue = []
for previous_cost, previous_string, previous_state, previous_prediction in queues[j]:
cost = previous_cost + previous_prediction[word_id]
string = previous_string + target
hypothesis = (cost, string, word_id, previous_state)
word_queue.append(hypothesis)
# prune word_queue to viterbi size
if viterbi_size > 0:
word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))
queue += word_queue
# prune queue to beam size
if beam_size > 0:
queue = heapq.nsmallest(beam_size, queue, key=operator.itemgetter(0))
# predict next word and state before continue
for cost, string, word_id, previous_state in queue:
predictions, states = rnn_predictor.predict([word_id], [previous_state])
hypothesis = (cost, string, states[0], predictions[0])
queues[i].append(hypothesis)
return queues
def selectCol(self, colName="", only_extendables=False, add=None):
fields = self.csv.getFieldsWithAutodetection() if not only_extendables else []
for f in self.csv.guesses.extendable_fields:
d = self.csv.guesses.getGraph().dijkstra(f, ignore_private=True)
s = "from " + ", ".join(sorted([k for k in nsmallest(3, d, key=d.get)]))
if len(d) > 3:
s += "..."
fields.append(("new " + f + "...", s))
col_i = Dialogue.pickOption(fields, colName)
if only_extendables or col_i >= len(self.csv.fields):
newFieldI = col_i if only_extendables else col_i - len(self.csv.fields)
col_i = self.extendColumn(self.csv.guesses.extendable_fields[newFieldI], add=add)
return col_i
def wiggleSort(self, nums):
"""
Improvement with three-way-partition and find-kth-largest.
:type nums: List[int]
:rtype: void Do not return anything, modify nums in-place instead.
"""
def find_kth_largest(nums, k):
import heapq
return heapq.nsmallest(k, nums)[-1]
def three_way_partition(nums, mid_val):
'''
Rearrange nums, so nums could be separated into 3 parts.
1 part < 2 part < 3 part.
And all numbers in 2 part are all equal to mid_val.
'''
i, j, k = 0, 0, len(nums)-1
while j <= k:
if nums[j] < mid_val:
nums[i], nums[j] = nums[j], nums[i] # Swop is necessary.
i += 1
j += 1
elif nums[j] > mid_val:
nums[j], nums[k] = nums[k], nums[j]
k -= 1
else:
j += 1
mid_val = find_kth_largest(nums, (len(nums)+1) // 2)
three_way_partition(nums, mid_val)
nums[::2], nums[1::2] = nums[:(len(nums)+1)//2][::-1], nums[(len(nums)+1)//2:][::-1]
def getMin(self):
"""
:rtype: int
"""
return heapq.nsmallest(1)[0]
# Your MinStack object will be instantiated and called as such:
def calculate_min_hashes(self, elements):
return heapq.nsmallest(n=self.sketch_size, iterable=self._hashes(elements, 1))
def heap_sort_batch(unsorted):
while unsorted:
yield from heapq.nsmallest(100, unsorted)