def dump_status_strings(conf):
dontwi = Dontwi(conf)
in_cn = dontwi.get_connector("inbound")
in_cn.connect()
operation_cf = dontwi.config.items["operation"]
trigger_str = dontwi.get_trigger()
[since, until, limit] = [
dontwi.config.inbound.get(option, "")
for option in ["since", "until", "limit"]]
statuses, statuses2 = tee(in_cn.get_timeline_statuses_by_hashtag(
hashtag=trigger_str, since=since, until=until, limit=limit))
status_pr = StatusText(dontwi.config.outbound)
result_log = ResultLog(dontwi.config.items)
summaries = dontwi.summaries_to_be_listed_in_waiting_list(result_log=result_log,
status_pr=status_pr,
statuses=statuses,
trigger_str=trigger_str)
status_dc = {a_status.status["id"]: a_status.status["content"]
for a_status in statuses2}
dump_strs = ["{0}\n{1}\n{2}\n[{3}]".format(a_summary["inbound_status_id"], a_summary["status_string"],
a_summary["inbound_status_url"], status_dc[a_summary["inbound_status_id"]])
for a_summary in summaries]
for lint_str in dump_strs:
print(lint_str)
python类tee()的实例源码
def get_supervised_data(self, preprocessed, bin_sites,
active_learning=False, random_state=1234,
n_jobs=-1):
"""Compute the feature matrix and the regression values."""
preprocessed, preprocessed_ = tee(preprocessed)
if self.mode == 'sequence':
dists = [attr['dist'] for attr, _ in preprocessed_]
else:
dists = [g.graph['id']['dist'] for g in preprocessed_]
vals = np.array([common.dist_to_val(d, self.max_dist) for d in dists])
if self.mode == 'sequence':
self.vectorizer = SeqVectorizer(auto_weights=True,
**self.vectorizer_args)
else:
self.vectorizer = GraphVectorizer(auto_weights=True,
**self.vectorizer_args)
matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
block_size=400, n_jobs=n_jobs)
return matrix, vals
def get_predict_data(self, preprocessed, n_jobs=-1):
"""Compute the feature matrix and extract the subseq info."""
def _subdict(dic):
subdict = dict((k, dic[k]) for k in [
'tr_name', 'center', 'tr_len'] if k in dic)
return subdict
preprocessed, preprocessed_ = tee(preprocessed)
if self.mode == 'sequence':
info = [_subdict(attr) for attr, _ in preprocessed_]
else:
info = [_subdict(g.graph['id']) for g in preprocessed_]
if self.mode == 'sequence':
self.vectorizer = SeqVectorizer(auto_weights=True,
**self.vectorizer_args)
else:
self.vectorizer = GraphVectorizer(auto_weights=True,
**self.vectorizer_args)
matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
block_size=400, n_jobs=n_jobs)
return matrix, info
def graph_preprocessor(graphs, which_set, bin_sites=None, max_dist=None,
random_state=1234, **params):
"""Preprocess graphs."""
assert which_set == 'train' or which_set == 'test', \
"which_set must be either 'train' or 'test'."
if which_set == 'train':
graphs = add_distance(graphs, bin_sites)
graphs = split_iterator(graphs, **params)
graphs = add_type(graphs, max_dist)
return graphs
elif which_set == 'test':
graphs, graphs_ = tee(graphs)
full_graphs = transform_dictionary(graphs_)
graphs = split_iterator(graphs, **params)
return full_graphs, graphs
else:
raise Exception("ERROR: unrecognized which_set type: %s" %
which_set)
def hamming_numbers():
# Generate "5-smooth" numbers, also called "Hamming numbers"
# or "Regular numbers". See: http://en.wikipedia.org/wiki/Regular_number
# Finds solutions to 2**i * 3**j * 5**k for some integers i, j, and k.
def deferred_output():
'Works like a forward reference to the "output" global variable'
for i in output:
yield i
result, p2, p3, p5 = tee(deferred_output(), 4) # split the output streams
m2 = (2*x for x in p2) # multiples of 2
m3 = (3*x for x in p3) # multiples of 3
m5 = (5*x for x in p5) # multiples of 5
merged = merge(m2, m3, m5)
combined = chain([1], merged) # prepend starting point
output = (k for k, v in groupby(combined)) # eliminate duplicates
return result
def apply_inf_list(self, a:Node.infinite, b:Node.infinite):
def apply_iterator(a, b):
a, a_copy = tee(a, 2)
b, b_copy = tee(b, 2)
yield self.run(next(a_copy), [next(b_copy)])
size = 1
while 1:
next_a = next(a_copy)
next_b = next(b_copy)
a, new_a = tee(a, 2)
b, new_b = tee(b, 2)
yield from (self.run(next(new_a), [next_b]) for i in range(size))
yield from (self.run(next_a, [next(new_b)]) for i in range(size))
yield self.run(next_a, [next_b])
size += 1
return DummyList(apply_iterator(a, b))
def __call__(self, tokens):
from itertools import tee
count = len(self.filters)
# Tee the token iterator and wrap each teed iterator with the
# corresponding filter
gens = [filter(t.copy() for t in gen) for filter, gen
in zip(self.filters, tee(tokens, count))]
# Keep a count of the number of running iterators
running = count
while running:
for i, gen in enumerate(gens):
if gen is not None:
try:
yield next(gen)
except StopIteration:
gens[i] = None
running -= 1
def build_phrase_models(content, base_path, settings):
""" Build and save the phrase models
"""
ngram_level = int(settings['level'])
# According to tee() docs, this may be inefficient in terms of memory.
# We need to do this because we need multiple passes through the
# content stream.
content = chain.from_iterable(doc.tokenized_text for doc in content)
cs1, cs2 = tee(content, 2)
for i in range(ngram_level-1):
phrases = Phrases(cs1)
path = "%s.%s" % (base_path, i + 2) # save path as n-gram level
logger.info("Phrase processor: Saving %s", path)
phrases.save(path)
# TODO: gensim complains about not using Phraser(phrases)
content = phrases[cs2] # tokenize phrases in content stream
cs1, cs2 = tee(content, 2)
def find_links(file):
"""Find all markdown links in a file object.
Yield (lineno, regexmatch) tuples.
"""
# don't yield same link twice
seen = set()
# we need to loop over the file two lines at a time to support
# multi-line (actually two-line) links, so this is kind of a mess
firsts, seconds = itertools.tee(file)
next(seconds) # first line is never second line
# we want 1-based indexing instead of 0-based and one-line links get
# caught from linepair[1], so we need to start at two
for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
lines = linepair[0] + linepair[1]
for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
if match.group(0) not in seen:
seen.add(match.group(0))
yield match, lineno
def __init__(self, l_hours, i_milis=2, i_sec=None):
'''
Initiate a NextStopTime object. Save all parameters as attributes
:param l_hours: list. Hours to be used in stoptime calculation
:param i_milis*: integer. Number of miliseconds between each stoptime
:param i_sec*: integer. Number of seconds between each stoptime. If
defined, the i_milis is not used
'''
i_noise = None
if i_milis > 4:
i_noise = min(1, i_milis/5)
self.gen_stoptime = get_next_stoptime(l_hours, i_milis, i_sec, i_noise)
self.gen_stoptime, self.gen_backup = itertools.tee(self.gen_stoptime)
self.s_last_stoptime = ''
self.s_stoptime_was_set = ''
self.s_time = "{:0>2}:{:0>2}:{:0>2}.{:0>3}"
self.b_use_last = False
def trailing_windows(window_size=24, window_units='hours', window_count=3):
"""
Args:
window_size (int):
window_units (str):
window_count (int):
Yields:
Dict[str,str]
"""
tos, froms = tee(trailing_periods(window_size, window_units, window_count))
next(froms, None)
for to, _from in zip(tos, froms):
yield {'_from': _from, 'to': to}
def iter_split_evaluate_wrapper(self, fn, local_vars, in_size, q_in, q_out):
l = Lock()
idx_q = Queue()
def split_iter():
try:
while True:
l.acquire()
i, data_in = q_in.get()
idx_q.put(i)
if data_in is EOFMessage:
return
yield data_in
except BaseException:
traceback.print_exc(file=sys.stdout)
gs = itertools.tee(split_iter(), in_size)
for data_out in self.evaluate((fn,) + tuple((lambda i: (x[i] for x in gs[i]))(i) for i in range(in_size)), local_vars=local_vars):
q_out.put((idx_q.get(), data_out))
l.release()
q_out.put((0, EOFMessage))
def iwindowed(iterable, n):
'''
Take successive n-tuples from an iterable using a sliding window
'''
# Take n copies of the iterable
iterables = tee(iterable, n)
# Advance each to the correct starting position
for step, it in enumerate(iterables):
for s in range(step):
next(it)
# Zip the modified iterables and yield the elements as a genreator
# NOTE: not using zip longest as we want to stop when we reach the end
for t in zip(*iterables):
yield t
def dummy_type_tee():
"""Give itertools.tee(yielder)[0]
Edited date:
160704
Test:
160704
Returns:
itertools.tee: this is used self.type_generator_or_tee
"""
def dummy():
yield None
copy1, copy2 = itertools.tee(dummy())
return copy2
def __call__(self, tokens):
from itertools import tee
count = len(self.filters)
# Tee the token iterator and wrap each teed iterator with the
# corresponding filter
gens = [filter(t.copy() for t in gen) for filter, gen
in zip(self.filters, tee(tokens, count))]
# Keep a count of the number of running iterators
running = count
while running:
for i, gen in enumerate(gens):
if gen is not None:
try:
yield next(gen)
except StopIteration:
gens[i] = None
running -= 1
def __call__(self, tokens):
from itertools import tee
count = len(self.filters)
# Tee the token iterator and wrap each teed iterator with the
# corresponding filter
gens = [filter(t.copy() for t in gen) for filter, gen
in zip(self.filters, tee(tokens, count))]
# Keep a count of the number of running iterators
running = count
while running:
for i, gen in enumerate(gens):
if gen is not None:
try:
yield next(gen)
except StopIteration:
gens[i] = None
running -= 1
def approx_stabilities(instance, num, reps, random_instance=RandomState()):
"""
This function approximates the stability of the given `instance` for
`num` challenges evaluating it `reps` times per challenge. The stability
is the probability that the instance gives the correct response when
evaluated.
:param instance: pypuf.simulation.base.Simulation
The instance for the stability approximation
:param num: int
Amount of challenges to be evaluated
:param reps: int
Amount of repetitions per challenge
:return: array of float
Array of the stabilities for each challenge
"""
challenges = sample_inputs(instance.n, num, random_instance)
responses = zeros((reps, num))
for i in range(reps):
challenges, unpacked_challenges = itertools.tee(challenges)
responses[i, :] = instance.eval(array(list(unpacked_challenges)))
return 0.5 + 0.5 * np_abs(np_sum(responses, axis=0)) / reps
def prev_this_next(items):
"""
Loop over a collection with look-ahead and look-back.
From Thomas Guest,
http://wordaligned.org/articles/zippy-triples-served-with-python
Seriously useful looping tool (Google "zippy triples")
lets you loop a collection and see the previous and next items,
which get set to None at the ends.
To be used in layout algorithms where one wants a peek at the
next item coming down the pipe.
"""
extend = itertools.chain([None], items, [None])
prev, this, next = itertools.tee(extend, 3)
try:
next(this)
next(next)
next(next)
except StopIteration:
pass
return zip(prev, this, next)
def parse_file_keeplines(lines, require_order=None):
r"""
>>> def gen_lines(x):
... yield "# field:int\n"
... for i in range(x):
... yield "%s\n" % (test_field,)
>>> parsed = parse_file_keeplines(gen_lines(2))
>>> next(parsed)
'# field:int\n'
>>> test_field = 1; next(parsed)
('1\n', Rec(field=1))
>>> test_field = 2; next(parsed)
('2\n', Rec(field=2))
"""
lines_iter, lines_iter_parse = tee(iter(lines), 2)
try:
yield next(lines_iter)
except StopIteration:
raise Exception("No header")
for line, rec in izip(lines_iter, parse_file(lines_iter_parse)):
yield line, rec
def __iter__(self):
""" Return the inner iterator
Example:
>>> from ww import g
>>> gen = g(range(10))
>>> iter(gen) == gen.iterator
True
Returns:
Inner iterator.
Raises:
RuntimeError: if trying call __iter__ after calling .tee()
"""
if self._tee_called:
raise RuntimeError("You can't iterate on a g object after g.tee "
"has been called on it.")
return self.iterator
# TODO: type self, and stuff that returns things depending on self
def __mul__(self, num):
# type: (int) -> IterableWrapper
""" Duplicate itself and concatenate the results.
It's basically a shortcut for `g().chain(*g().tee())`.
Args:
num: The number of times to duplicate.
Example:
>>> from ww import g
>>> (g(range(3)) * 3).list()
[0, 1, 2, 0, 1, 2, 0, 1, 2]
>>> (2 * g(range(3))).list()
[0, 1, 2, 0, 1, 2]
"""
clones = itertools.tee(self.iterator, num)
return self.__class__(itertools.chain(*clones))
def tee(self, num=2):
# type: (int) -> IterableWrapper
""" Return copies of this generator.
Proxy to itertools.tee().
If you want to concatenate the results afterwards, use
g() * x instead of g().tee(x) which does that for you.
Args:
num: The number of returned generators.
Example:
>>> from ww import g
>>> a, b, c = g(range(3)).tee(3)
>>> [tuple(a), tuple(b), tuple(c)]
[(0, 1, 2), (0, 1, 2), (0, 1, 2)]
"""
cls = self.__class__
gen = cls(cls(x) for x in itertools.tee(self.iterator, num))
self._tee_called = True
return gen
# TODO: allow negative end boundary
def copy(self):
# type: () -> IterableWrapper
""" Return an exact copy of the iterable.
The reference of the new iterable will be the same as the source
when `copy()` was called.
Example:
>>> from ww import g
>>> my_g_1 = g(range(3))
>>> my_g_2 = my_g_1.copy()
>>> next(my_g_1)
0
>>> next(my_g_1)
1
>>> next(my_g_2)
0
"""
self.iterator, new = itertools.tee(self.iterator)
return self.__class__(new)
def previous_current_next(items):
"""
From http://www.wordaligned.org/articles/zippy-triples-served-with-python
Creates an iterator which returns (previous, current, next) triples,
with ``None`` filling in when there is no previous or next
available.
"""
extend = itertools.chain([None], items, [None])
prev, cur, nex = itertools.tee(extend, 3)
# Advancing an iterator twice when we know there are two items (the
# two Nones at the start and at the end) will never fail except if
# `items` is some funny StopIteration-raising generator. There's no point
# in swallowing this exception.
next(cur)
next(nex)
next(nex)
return zip(prev, cur, nex)
def __call__(self, seq):
min_order = self.min_order
max_order = self.max_order
t = tee(seq, max_order)
for i in xrange(max_order):
for j in xrange(i):
# advance iterators, ignoring result
t[i].next()
while True:
token = ''.join(tn.next() for tn in t)
if len(token) < max_order: break
for n in xrange(min_order-1, max_order):
yield token[:n+1]
for a in xrange(max_order-1):
for b in xrange(min_order, max_order-a):
yield token[a:a+b]
def __call__(self, seq):
_seq = str.split(seq)
min_order = self.min_order
max_order = self.max_order
t = tee(_seq, max_order)
for i in xrange(max_order):
for j in xrange(i):
# advance iterators, ignoring result
t[i].next()
while True:
token = [tn.next() for tn in t]
if len(token) < max_order: break
for n in xrange(min_order-1, max_order):
yield ' '.join(token[:n+1])
for a in xrange(max_order-1):
for b in xrange(min_order, max_order-a):
yield ' '.join(token[a:a+b])
def __call__(self, tokens):
from itertools import tee
count = len(self.filters)
# Tee the token iterator and wrap each teed iterator with the
# corresponding filter
gens = [filter(t.copy() for t in gen) for filter, gen
in zip(self.filters, tee(tokens, count))]
# Keep a count of the number of running iterators
running = count
while running:
for i, gen in enumerate(gens):
if gen is not None:
try:
yield next(gen)
except StopIteration:
gens[i] = None
running -= 1
def itercopy(iterable, copies = 2):
"""
Split iterable into 'copies'. Once this is done, the original iterable *should
not* be used again.
Parameters
----------
iterable : iterable
Iterable to be split. Once it is split, the original iterable
should not be used again.
copies : int, optional
Number of copies. Also determines the number of returned iterables.
Returns
-------
iter1, iter2, ... : iterable
Copies of ``iterable``.
Examples
--------
By rebinding the name of the original iterable, we make sure that it
will never be used again.
>>> from npstreams import itercopy
>>> evens = (2*n for n in range(1000))
>>> evens, evens_copy = itercopy(evens, copies = 2)
See Also
--------
itertools.tee : equivalent function
"""
# itercopy is included because documentation of itertools.tee isn't obvious
# to everyone
return tee(iterable, copies)
def nsmallest(n, iterable, key=None):
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable, key=key)[:n]
"""
# Short-cut for n==1 is to use min() when len(iterable)>0
if n == 1:
it = iter(iterable)
head = list(islice(it, 1))
if not head:
return []
if key is None:
return [min(chain(head, it))]
return [min(chain(head, it), key=key)]
# When n>=size, it's faster to use sorted()
try:
size = len(iterable)
except (TypeError, AttributeError):
pass
else:
if n >= size:
return sorted(iterable, key=key)[:n]
# When key is none, use simpler decoration
if key is None:
it = izip(iterable, count()) # decorate
result = _nsmallest(n, it)
return map(itemgetter(0), result) # undecorate
# General case, slowest method
in1, in2 = tee(iterable)
it = izip(imap(key, in1), count(), in2) # decorate
result = _nsmallest(n, it)
return map(itemgetter(2), result) # undecorate
def nlargest(n, iterable, key=None):
"""Find the n largest elements in a dataset.
Equivalent to: sorted(iterable, key=key, reverse=True)[:n]
"""
# Short-cut for n==1 is to use max() when len(iterable)>0
if n == 1:
it = iter(iterable)
head = list(islice(it, 1))
if not head:
return []
if key is None:
return [max(chain(head, it))]
return [max(chain(head, it), key=key)]
# When n>=size, it's faster to use sorted()
try:
size = len(iterable)
except (TypeError, AttributeError):
pass
else:
if n >= size:
return sorted(iterable, key=key, reverse=True)[:n]
# When key is none, use simpler decoration
if key is None:
it = izip(iterable, count(0,-1)) # decorate
result = _nlargest(n, it)
return map(itemgetter(0), result) # undecorate
# General case, slowest method
in1, in2 = tee(iterable)
it = izip(imap(key, in1), count(0,-1), in2) # decorate
result = _nlargest(n, it)
return map(itemgetter(2), result) # undecorate