def sort(items, maxsize=100000, tempdir=None, maxfiles=128):
"""Sorts the given items using an external merge sort.
:param tempdir: the path of a directory to use for temporary file
storage. The default is to use the system's temp directory.
:param maxsize: the maximum number of items to keep in memory at once.
:param maxfiles: maximum number of files to open at once.
"""
p = SortingPool(maxsize=maxsize, tempdir=tempdir)
for item in items:
p.add(item)
return p.items(maxfiles=maxfiles)
python类merge()的实例源码
def hamming_numbers():
# Generate "5-smooth" numbers, also called "Hamming numbers"
# or "Regular numbers". See: http://en.wikipedia.org/wiki/Regular_number
# Finds solutions to 2**i * 3**j * 5**k for some integers i, j, and k.
def deferred_output():
'Works like a forward reference to the "output" global variable'
for i in output:
yield i
result, p2, p3, p5 = tee(deferred_output(), 4) # split the output streams
m2 = (2*x for x in p2) # multiples of 2
m3 = (3*x for x in p3) # multiples of 3
m5 = (5*x for x in p5) # multiples of 5
merged = merge(m2, m3, m5)
combined = chain([1], merged) # prepend starting point
output = (k for k, v in groupby(combined)) # eliminate duplicates
return result
def items(self, maxfiles=128):
"""Returns a sorted list or iterator of the items in the pool.
:param maxfiles: maximum number of files to open at once.
"""
if maxfiles < 2:
raise ValueError("maxfiles=%s must be >= 2" % maxfiles)
if not self.runs:
# We never wrote a run to disk, so just sort the queue in memory
# and return that
return sorted(self.current)
# Write a new run with the leftover items in the queue
self.save()
# If we have more runs than allowed open files, merge some of the runs
if maxfiles < len(self.runs):
self.reduce_to(maxfiles, maxfiles)
# Take all the runs off the run list and merge them
runs = self.runs
self.runs = [] # Minor detail, makes this object reusable
return self._merge_runs(runs)
def items(self, maxfiles=128):
"""Returns a sorted list or iterator of the items in the pool.
:param maxfiles: maximum number of files to open at once.
"""
if maxfiles < 2:
raise ValueError("maxfiles=%s must be >= 2" % maxfiles)
if not self.runs:
# We never wrote a run to disk, so just sort the queue in memory
# and return that
return sorted(self.current)
# Write a new run with the leftover items in the queue
self.save()
# If we have more runs than allowed open files, merge some of the runs
if maxfiles < len(self.runs):
self.reduce_to(maxfiles, maxfiles)
# Take all the runs off the run list and merge them
runs = self.runs
self.runs = [] # Minor detail, makes this object reusable
return self._merge_runs(runs)
def _merge_sorted_lists(keyfn):
def merge(l1, l2):
# TODO might be nice to have a specialized version of this which
# re-uses uninterrupted subtrees from one side; it would boost
# lists that are mostly sorted already.
min1, max1 = keyfn(l1[0]), keyfn(l1[-1])
min2, max2 = keyfn(l2[0]), keyfn(l2[-1])
if max1 <= min2:
return l1 + l2
if max2 < min1: # not "<=" here because it would make the sort unstable
return l2 + l1
return viewablelist(list(_mergesorted(iter(l1), iter(l2), keyfn)))
return merge
#
# Other
#
def items(self, maxfiles=128):
"""Returns a sorted list or iterator of the items in the pool.
:param maxfiles: maximum number of files to open at once.
"""
if maxfiles < 2:
raise ValueError("maxfiles=%s must be >= 2" % maxfiles)
if not self.runs:
# We never wrote a run to disk, so just sort the queue in memory
# and return that
return sorted(self.current)
# Write a new run with the leftover items in the queue
self.save()
# If we have more runs than allowed open files, merge some of the runs
if maxfiles < len(self.runs):
self.reduce_to(maxfiles, maxfiles)
# Take all the runs off the run list and merge them
runs = self.runs
self.runs = [] # Minor detail, makes this object reusable
return self._merge_runs(runs)
def nthSuperUglyNumber(self, n, primes):
"""
:type n: int
:type primes: List[int]
:rtype: int
"""
from heapq import merge
uglies = [1]
def gen(prime): # Here "gen" is a iterator
for ugly in uglies:
yield ugly * prime
merged = heapq.merge(*map(gen, primes)) # Here "merge" is a generator
while len(uglies) < n:
ugly = next(merged)
if ugly != uglies[-1]:
uglies.append(ugly)
return uglies[-1]
def items(self, maxfiles=128):
"""Returns a sorted list or iterator of the items in the pool.
:param maxfiles: maximum number of files to open at once.
"""
if maxfiles < 2:
raise ValueError("maxfiles=%s must be >= 2" % maxfiles)
if not self.runs:
# We never wrote a run to disk, so just sort the queue in memory
# and return that
return sorted(self.current)
# Write a new run with the leftover items in the queue
self.save()
# If we have more runs than allowed open files, merge some of the runs
if maxfiles < len(self.runs):
self.reduce_to(maxfiles, maxfiles)
# Take all the runs off the run list and merge them
runs = self.runs
self.runs = [] # Minor detail, makes this object reusable
return self._merge_runs(runs)
313_super_ugly_number.py 文件源码
项目:Machine_Learning_Playground
作者: yao23
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def nthSuperUglyNumber1(self, n, primes):
"""
:type n: int
:type primes: List[int]
:rtype: int
"""
uglies = [1]
def gen(prime):
for ugly in uglies:
yield ugly * prime
merged = heapq.merge(*map(gen, primes))
while len(uglies) < n:
ugly = next(merged)
if ugly != uglies[-1]:
uglies.append(ugly)
return uglies[-1]
def __init__(self, width, height, rot=True, *args, **kwargs):
"""
_skyline is the list used to store all the skyline segments, each
one is a list with the format [x, y, width] where x is the x
coordinate of the left most point of the segment, y the y coordinate
of the segment, and width the length of the segment. The initial
segment is allways [0, 0, surface_width]
Arguments:
width (int, float):
height (int, float):
rot (bool): Enable or disable rectangle rotation
"""
self._waste_management = False
self._waste = WasteManager(rot=rot)
super(Skyline, self).__init__(width, height, rot, merge=False, *args, **kwargs)
def _placement_points_generator(self, skyline, width):
"""Returns a generator for the x coordinates of all the placement
points on the skyline for a given rectangle.
WARNING: In some cases could be duplicated points, but it is faster
to compute them twice than to remove them.
Arguments:
skyline (list): Skyline HSegment list
width (int, float): Rectangle width
Returns:
generator
"""
skyline_r = skyline[-1].right
skyline_l = skyline[0].left
# Placements using skyline segment left point
ppointsl = (s.left for s in skyline if s.left+width <= skyline_r)
# Placements using skyline segment right point
ppointsr = (s.right-width for s in skyline if s.right-width >= skyline_l)
# Merge positions
return heapq.merge(ppointsl, ppointsr)
def lazysort(l: list) -> typing.Iterator:
# Stage 1
stack = []
current_list = iter(l)
sentinel = object()
first = next(current_list, sentinel)
while first is not sentinel:
sortedish, surplus = dropsort(itertools.chain((first,), current_list))
stack.append(sortedish)
current_list = surplus
first = next(current_list, sentinel)
# Stage 2
if len(stack) < 2: # the case where the list `l` is already sorted
return iter(l)
cur = heapq.merge(stack.pop(), stack.pop())
while stack:
cur = heapq.merge(cur, stack.pop())
return cur
def lazysort(l: list) -> typing.Iterator:
# Stage 1
stack = []
current_list = iter(l)
sentinel = object()
first = next(current_list, sentinel)
while first is not sentinel:
sortedish, surplus = dropsort(chain((first,), current_list))
stack.append(sortedish)
current_list = surplus
first = next(current_list, sentinel)
# Stage 2
if len(stack) < 2: # the case where the list `l` is already sorted
return iter(l)
cur = merge(stack.pop(), stack.pop())
while stack:
cur = merge(cur, stack.pop())
return cur
def _get_merged_catchup_txns(existing_txns, new_txns):
"""
Merge any newly received txns during catchup with already received txns
:param existing_txns:
:param new_txns:
:return:
"""
idx_to_remove = []
for i, (seq_no, _) in enumerate(existing_txns):
if seq_no < new_txns[0][0]:
continue
if seq_no > new_txns[-1][0]:
break
idx_to_remove.append(seq_no - new_txns[0][0])
for idx in reversed(idx_to_remove):
new_txns.pop(idx)
return list(heapq.merge(existing_txns, new_txns,
key=operator.itemgetter(0)))
def items(self, maxfiles=128):
"""Returns a sorted list or iterator of the items in the pool.
:param maxfiles: maximum number of files to open at once.
"""
if maxfiles < 2:
raise ValueError("maxfiles=%s must be >= 2" % maxfiles)
if not self.runs:
# We never wrote a run to disk, so just sort the queue in memory
# and return that
return sorted(self.current)
# Write a new run with the leftover items in the queue
self.save()
# If we have more runs than allowed open files, merge some of the runs
if maxfiles < len(self.runs):
self.reduce_to(maxfiles, maxfiles)
# Take all the runs off the run list and merge them
runs = self.runs
self.runs = [] # Minor detail, makes this object reusable
return self._merge_runs(runs)
def posts_for_user(user: User, limit: Optional[int] = None) -> List[Post]:
relevant = merge(*[user_posts[u] for u in following[user]], reverse=True)
return list(islice(relevant, limit))
def date_sorted_sources(*sources):
"""
Takes an iterable of sources, generating namestrings and
piping their output into date_sort.
"""
sorted_stream = heapq.merge(*(_decorate_source(s) for s in sources))
# Strip out key decoration
for _, message in sorted_stream:
yield message
def apply(func, *mappings):
"""Return a new IntervalMapping applying func to all values of mappings.
For example, apply(lambda x, y: x + y, m1, m2) returns a new
matching with the sum of values for m1 and
m2. IntervalMapping.nothing is passed to func when the value is
undefined.
>>> m1 = IntervalMapping()
>>> m2 = IntervalMapping()
>>> m1[:] = m2[:] = 0 # avoid problems with undefined values
>>> m1[0:2] = 1
>>> m2[1:3] = 2
>>> m3 = apply(lambda a, b: a + b, m1, m2)
>>> m3[-1], m3[0], m3[1], m3[2], m3[3]
(0, 1, 3, 2, 0)
"""
values = [m.leftmost() for m in mappings]
def changes():
def start_i_value(i_m):
i, m = i_m
res = ((k.start, i, v) for k, v in m.iteritems(True))
next(res)
return res
change_points = merge(*map(start_i_value, enumerate(mappings)))
lastbound = None
for bound, i, v in change_points:
values[i] = v
if bound != lastbound:
yield bound, func(*values)
lastbound = bound
yield bound, func(*values)
return IntervalMapping.from_changes(func(*values), changes())
def imerge(iterables):
return merge(*iterables)
def sort(items, maxsize=100000, tempdir=None, maxfiles=128):
"""Sorts the given items using an external merge sort.
:param tempdir: the path of a directory to use for temporary file
storage. The default is to use the system's temp directory.
:param maxsize: the maximum number of items to keep in memory at once.
:param maxfiles: maximum number of files to open at once.
"""
p = SortingPool(maxsize=maxsize, tempdir=tempdir)
for item in items:
p.add(item)
return p.items(maxfiles=maxfiles)
def merge(key=None, *iterables):
# based on code posted by Scott David Daniels in c.l.p.
# http://groups.google.com/group/comp.lang.python/msg/484f01f1ea3c832d
if key is None:
keyed_iterables = iterables
else:
keyed_iterables = [(Keyed(key(obj), obj) for obj in iterable)
for iterable in iterables]
for element in heapq.merge(*keyed_iterables):
yield element.obj
def msort(input_, output, key=None, buffer_size=32000, tempdirs=None):
if tempdirs is None:
tempdirs = []
if not tempdirs:
tempdirs.append(gettempdir())
chunks = []
try:
with open(input_,'rb',64*1024) as input_file:
input_iterator = iter(input_file)
for tempdir in cycle(tempdirs):
current_chunk = list(islice(input_iterator,buffer_size))
if not current_chunk:
break
current_chunk.sort(key=key)
output_chunk = open(os.path.join(tempdir,'%06i'%len(chunks)),'w+b',64*1024)
chunks.append(output_chunk)
output_chunk.writelines(current_chunk)
output_chunk.flush()
output_chunk.seek(0)
with open(output,'wb',64*1024) as output_file:
output_file.writelines(merge(key, *chunks))
finally:
for chunk in chunks:
try:
chunk.close()
os.remove(chunk.name)
except Exception:
pass
def date_sorted_sources(*sources):
"""
Takes an iterable of sources, generating namestrings and
piping their output into date_sort.
"""
sorted_stream = heapq.merge(*(_decorate_source(s) for s in sources))
# Strip out key decoration
for _, message in sorted_stream:
yield message
def sorted_idx_iter(self, types: List[int]) -> Iterable[int]:
"""
Returns an iterator of file positions sorted by file position (across different message types)
:param types: The message types to return, None returns all types
:return: Generator object for sorted file positions
"""
if types:
idx_iters = [self.idx[key] for key in types if key in self.idx]
else:
idx_iters = [val for key, val in self.idx.items()]
# Use the heapq.merge function to return sorted iterator of file indices
return heapq.merge(*idx_iters)
def imerge(iterables):
return merge(*iterables)
def sort(items, maxsize=100000, tempdir=None, maxfiles=128):
"""Sorts the given items using an external merge sort.
:param tempdir: the path of a directory to use for temporary file
storage. The default is to use the system's temp directory.
:param maxsize: the maximum number of items to keep in memory at once.
:param maxfiles: maximum number of files to open at once.
"""
p = SortingPool(maxsize=maxsize, tempdir=tempdir)
for item in items:
p.add(item)
return p.items(maxfiles=maxfiles)
def sorted_items(iterables):
sorted_iterable = heapq.merge(*(_decorate_items(s) for s in iterables))
for _, item in sorted_iterable:
yield item
def imerge(iterables):
return merge(*iterables)
def sort(items, maxsize=100000, tempdir=None, maxfiles=128):
"""Sorts the given items using an external merge sort.
:param tempdir: the path of a directory to use for temporary file
storage. The default is to use the system's temp directory.
:param maxsize: the maximum number of items to keep in memory at once.
:param maxfiles: maximum number of files to open at once.
"""
p = SortingPool(maxsize=maxsize, tempdir=tempdir)
for item in items:
p.add(item)
return p.items(maxfiles=maxfiles)
def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
"""batch sort helper with temporary files, supports sorting large stuff"""
if not output_class:
output_class = input_iterator.__class__
chunks = []
try:
while True:
current_chunk = list(islice(input_iterator, buffer_size))
if not current_chunk:
break
current_chunk.sort()
fd, filepath = tempfile.mkstemp()
os.close(fd)
output_chunk = output_class(filepath)
chunks.append(output_chunk)
for elem in current_chunk:
output_chunk.write(elem.obj)
output_chunk.close()
output_file = output_class(output_path)
for elem in heapq.merge(*chunks):
output_file.write(elem.obj)
output_file.close()
finally:
for chunk in chunks:
try:
chunk.close()
os.remove(chunk.name)
except Exception:
pass