def add_unk(self, thresh=0, unk_string='<UNK>'):
if unk_string in self.s2t.keys(): raise Exception("tried to add an UNK token that already existed")
if self.unk is not None: raise Exception("already added an UNK token")
strings = [unk_string]
for token in self.tokens:
if token.count >= thresh: strings.append(token.s)
if self.START_TOK is not None and self.START_TOK not in strings: strings.append(self.START_TOK.s)
if self.END_TOK is not None and self.END_TOK not in strings: strings.append(self.END_TOK.s)
self.tokens = set([])
self.strings = set([])
self.i2t = defaultdict(lambda :self.unk)
self.s2t = defaultdict(lambda :self.unk)
for string in strings:
self.add_string(string)
self.unk = self.s2t[unk_string]
if self.START_TOK is not None: self.START_TOK = self.s2t[self.START_TOK.s]
if self.END_TOK is not None: self.END_TOK = self.s2t[self.END_TOK.s]
python类count()的实例源码
def load_from_corpus(cls, reader, remake=False, src_or_tgt="src"):
vocab_fname = reader.fname+".vocab-"+reader.mode+"-"+src_or_tgt
if not remake and os.path.isfile(vocab_fname):
return Vocab.load(vocab_fname)
else:
v = Vocab()
count = 0 # count of sentences
for item in reader:
toklist = item
for token in toklist:
v.add(token)
count += 1
if count % 10000 == 0:
print("...", count, end="")
print("\nSaving " + src_or_tgt + " vocab of size", v.size)
v.START_TOK = v[reader.begin] if reader.begin is not None else None
v.END_TOK = v[reader.end] if reader.end is not None else None
v.save(vocab_fname)
return v
#### reader class
def __init__(self, cim_files):
if type(cim_files) == str:
cim_files = [cim_files]
self.cim_files = cim_files
xmlns = RDFXMLReader.xmlns(cim_files[0])
self._package_map = RDFXMLReader.get_cim_ns(xmlns)[1]
self._prim_onet = self._get_equipment_cls(CIM2Matpower._primary_one_t)
self._prim_twot = self._get_equipment_cls(CIM2Matpower._primary_two_t)
self._sec_onet = self._get_equipment_cls(CIM2Matpower._secondary_one_t)
self._sec_twot = self._get_equipment_cls(CIM2Matpower._secondary_two_t)
# reinitilize the automatically created ids
Topology_BusBranch.Bus.id_generator = count(1)
Topology_NodeBreaker.Node.id_generator = count(1)
Topology_NodeBreaker.Area.id_generator = count(1)
Topology_NodeBreaker.Zone.id_generator = count(1)
Topology_NodeBreaker.Branch.id_generator = count(1)
Topology_NodeBreaker.Generator.id_generator = count(1)
####################################################################################################################
def play(self, nb_rounds):
img_saver = save_image()
img_saver.next()
game_cnt = it.count(1)
for i in xrange(nb_rounds):
game = self.game(width=self.width, height=self.height)
screen, _ = game.next()
img_saver.send(screen)
frame_cnt = it.count()
try:
state = np.asarray([screen] * self.nb_frames)
while True:
frame_cnt.next()
act_idx = np.argmax(
self.model.predict(state[np.newaxis]), axis=-1)[0]
screen, _ = game.send(self.actions[act_idx])
state = np.roll(state, 1, axis=0)
state[0] = screen
img_saver.send(screen)
except StopIteration:
print 'Saved %4i frames for game %3i' % (
frame_cnt.next(), game_cnt.next())
img_saver.close()
client.py 文件源码
项目:Daniel-Arbuckles-Mastering-Python
作者: PacktPublishing
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def client(host, port):
reader, writer = await asyncio.open_connection(host, port)
for i in itertools.count():
writer.write(b'ping\n')
response = await reader.readline()
if response == b'pong\n':
print(i)
else:
return
def retry(fn):
""" Barebones retry decorator
"""
def wrapper(*args, **kwargs):
exceptions = AssertionError
max_retries = 3
delay = 1
counter = itertools.count()
while True:
try:
return fn(*args, **kwargs)
except exceptions:
if next(counter) == max_retries:
raise
time.sleep(delay)
return wrapper
def __init__(self, maxSize=100, resizeTo=70):
'''
============== =========================================================
**Arguments:**
maxSize (int) This is the maximum size of the cache. When some
item is added and the cache would become bigger than
this, it's resized to the value passed on resizeTo.
resizeTo (int) When a resize operation happens, this is the size
of the final cache.
============== =========================================================
'''
assert resizeTo < maxSize
self.maxSize = maxSize
self.resizeTo = resizeTo
self._counter = 0
self._dict = {}
if _IS_PY3:
self._nextTime = itertools.count(0).__next__
else:
self._nextTime = itertools.count(0).next
def __init__(self, maxSize=100, resizeTo=70):
'''
============== =========================================================
**Arguments:**
maxSize (int) This is the maximum size of the cache. When some
item is added and the cache would become bigger than
this, it's resized to the value passed on resizeTo.
resizeTo (int) When a resize operation happens, this is the size
of the final cache.
============== =========================================================
'''
assert resizeTo < maxSize
self.maxSize = maxSize
self.resizeTo = resizeTo
self._counter = 0
self._dict = {}
if _IS_PY3:
self._nextTime = itertools.count(0).__next__
else:
self._nextTime = itertools.count(0).next
def _load_image_data(self, file_, source):
"""Read image settings from SVG tags"""
tree = etree.parse(source, self.parser)
root = tree.getroot()
xhtml = "{%s}" % root.nsmap[None]
imagedata = ImageData(file_, tree)
transform_tag = root.find(".//%s*[@id='transform']" % xhtml)
imagedata.set_transform(transform_tag)
background_tag = root.find(".//%s*[@id='background']" % xhtml)
imagedata.set_background(background_tag)
counter = count(1)
while True:
index = next(counter)
id_ = "color" + str(index)
tag = root.find(".//%s*[@id='%s']" % (xhtml, id_))
if tag is None:
break
imagedata.set_color(tag, id_)
return imagedata
def get_percent_identity(a_aln_seq, b_aln_seq):
"""Get the percent identity between two alignment strings"""
if len(a_aln_seq) != len(b_aln_seq):
raise ValueError('Sequence lengths not equal - was an alignment run?')
count = 0
gaps = 0
for n in range(0, len(a_aln_seq)):
if a_aln_seq[n] == b_aln_seq[n]:
if a_aln_seq[n] != "-":
count += 1
else:
gaps += 1
return count / float((len(a_aln_seq) - gaps))
def initialize(self, impl, time_func=None, **kwargs):
super(PollIOLoop, self).initialize(**kwargs)
self._impl = impl
if hasattr(self._impl, 'fileno'):
set_close_exec(self._impl.fileno())
self.time_func = time_func or time.time
self._handlers = {}
self._events = {}
self._callbacks = []
self._callback_lock = threading.Lock()
self._timeouts = []
self._cancellations = 0
self._running = False
self._stopped = False
self._closing = False
self._thread_ident = None
self._blocking_signal_threshold = None
self._timeout_counter = itertools.count()
# Create a pipe that we send bogus data to when we want to wake
# the I/O loop when it is idle
self._waker = Waker()
self.add_handler(self._waker.fileno(),
lambda fd, events: self._waker.consume(),
self.READ)
def initialize(self, impl, time_func=None, **kwargs):
super(PollIOLoop, self).initialize(**kwargs)
self._impl = impl
if hasattr(self._impl, 'fileno'):
set_close_exec(self._impl.fileno())
self.time_func = time_func or time.time
self._handlers = {}
self._events = {}
self._callbacks = []
self._callback_lock = threading.Lock()
self._timeouts = []
self._cancellations = 0
self._running = False
self._stopped = False
self._closing = False
self._thread_ident = None
self._blocking_signal_threshold = None
self._timeout_counter = itertools.count()
# Create a pipe that we send bogus data to when we want to wake
# the I/O loop when it is idle
self._waker = Waker()
self.add_handler(self._waker.fileno(),
lambda fd, events: self._waker.consume(),
self.READ)
def values(self):
"""The values of the registry key
:type: [:class:`KeyValue`] - A list of values"""
res = []
with ExpectWindowsError(259):
for i in itertools.count():
name_value_type = _winreg.EnumValue(self.phkey, i)
# _winreg doest not support REG_QWORD
# See http://bugs.python.org/issue23026
if name_value_type[2] == REG_QWORD:
name = name_value_type[0]
value = struct.unpack("<Q", name_value_type[1])[0]
type = name_value_type[2]
name_value_type = name, value, type
res.append(name_value_type)
return [KeyValue(*r) for r in res]
def read_string(self, addr):
"""Read an ascii string at ``addr``"""
res = []
read_size = 0x100
readden = 0
for i in itertools.count():
try:
x = self.read_memory(addr + readden, read_size)
except winproxy.Kernel32Error as e:
if read_size == 2:
raise
# handle read_wstring at end of page
# Of read failed: read only the half of size
# read_size must remain a multiple of 2
read_size = read_size / 2
continue
readden += read_size
if "\x00" in x:
res.append(x.split("\x00", 1)[0])
break
res.append(x)
return "".join(res)
def assert_mpa_identical(mpa1, mpa2, decimal=np.infty):
"""Verify that two MPAs are complety identical
"""
assert len(mpa1) == len(mpa2)
assert mpa1.canonical_form == mpa2.canonical_form
assert mpa1.dtype == mpa2.dtype
for i, lten1, lten2 in zip(it.count(), mpa1.lt, mpa2.lt):
if decimal is np.infty:
assert_array_equal(lten1, lten2,
err_msg='mismatch in lten {}'.format(i))
else:
assert_array_almost_equal(lten1, lten2, decimal=decimal,
err_msg='mismatch in lten {}'.format(i))
# TODO: We should make a comprehensive comparison between `mpa1`
# and `mpa2`. Are we missing other things?
def get(word):
'''
make the word unique by appending a numbered suffix as necessary
:param word: such as 'foo'
:return: word with suffix such as 'foo_1'
'''
# global words
# if word not in words:
# words.add(word)
# return word
root = rootword(word) # word if len(chop)>1 and not chop[1].isdigit() else chop[0]
for n in itertools.count(1):
candidate = '%s_%d' % (root, n)
if candidate in words: continue
words.add(candidate)
return candidate
def initialize(self, impl, time_func=None, **kwargs):
super(PollIOLoop, self).initialize(**kwargs)
self._impl = impl
if hasattr(self._impl, 'fileno'):
set_close_exec(self._impl.fileno())
self.time_func = time_func or time.time
self._handlers = {}
self._events = {}
self._callbacks = []
self._callback_lock = threading.Lock()
self._timeouts = []
self._cancellations = 0
self._running = False
self._stopped = False
self._closing = False
self._thread_ident = None
self._blocking_signal_threshold = None
self._timeout_counter = itertools.count()
# Create a pipe that we send bogus data to when we want to wake
# the I/O loop when it is idle
self._waker = Waker()
self.add_handler(self._waker.fileno(),
lambda fd, events: self._waker.consume(),
self.READ)
def exclude_data_files(self, package, src_dir, files):
"""Filter filenames for package's data files in 'src_dir'"""
globs = (
self.exclude_package_data.get('', [])
+ self.exclude_package_data.get(package, [])
)
bad = set(
item
for pattern in globs
for item in fnmatch.filter(
files,
os.path.join(src_dir, convert_path(pattern)),
)
)
seen = collections.defaultdict(itertools.count)
return [
fn
for fn in files
if fn not in bad
# ditch dupes
and not next(seen[fn])
]
def renumber_dfa(dfa, base=0):
c = itertools.count(base)
mapping = {}
def remap(state):
if state in mapping:
newnum = mapping[state]
else:
newnum = next(c)
mapping[state] = newnum
return newnum
newdfa = DFA(remap(dfa.initial))
for src, trans in iteritems(dfa.transitions):
for label, dest in iteritems(trans):
newdfa.add_transition(remap(src), label, remap(dest))
for finalstate in dfa.final_states:
newdfa.add_final_state(remap(finalstate))
for src, dest in iteritems(dfa.defaults):
newdfa.set_default_transition(remap(src), remap(dest))
return newdfa
def u_to_utf8(dfa, base=0):
c = itertools.count(base)
transitions = dfa.transitions
for src, trans in iteritems(transitions):
trans = transitions[src]
for label, dest in list(iteritems(trans)):
if label is EPSILON:
continue
elif label is ANY:
raise Exception
else:
assert isinstance(label, text_type)
label8 = label.encode("utf8")
for i, byte in enumerate(label8):
if i < len(label8) - 1:
st = next(c)
dfa.add_transition(src, byte, st)
src = st
else:
dfa.add_transition(src, byte, dest)
del trans[label]
def old_win_idx(self, window_id):
r = None
for winnr, window in zip(count(1), vim.windows):
curwindow_id = self._vim_getwinvar(winnr, 'powerline_window_id')
if curwindow_id and not (r is not None and curwindow_id == window_id):
curwindow_id = int(curwindow_id)
else:
curwindow_id = self.last_window_id
self.last_window_id += 1
self._vim_setwinvar(winnr, 'powerline_window_id', curwindow_id)
statusline = self.construct_window_statusline(curwindow_id)
if self._vim_getwinvar(winnr, '&statusline') != statusline:
self._vim_setwinvar(winnr, '&statusline', statusline)
if curwindow_id == window_id if window_id else window is vim.current.window:
r = (window, curwindow_id, winnr)
return r
def _get_entries(self, list_id, list_id_type):
name = None
entries = []
for pagenum in itertools.count(1):
list_info = self._download_json(
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
entries.extend([
self.url_result(
'http://tvpot.daum.net/v/%s' % clip['vid'])
for clip in list_info['clip_list']
])
if not name:
name = list_info.get('playlist_bean', {}).get('name') or \
list_info.get('potInfo', {}).get('name')
if not list_info.get('has_more'):
break
return name, entries
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
u = mobj.group('u')
profile_url = "%sapi/users/profiles/%s%s" % (
self._VINE_BASE_URL, 'vanity/' if not u else '', user)
profile_data = self._download_json(
profile_url, user, note='Downloading user profile data')
user_id = profile_data['data']['userId']
timeline_data = []
for pagenum in itertools.count(1):
timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (
self._VINE_BASE_URL, user_id, pagenum)
timeline_page = self._download_json(
timeline_url, user, note='Downloading page %d' % pagenum)
timeline_data.extend(timeline_page['data']['records'])
if timeline_page['data']['nextPage'] is None:
break
entries = [
self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
return self.playlist_result(entries, user)
def _extract_playlist(self, channel_id):
info = self._download_json(
'%s/kraken/channels/%s' % (self._API_BASE, channel_id),
channel_id, 'Downloading channel info JSON')
channel_name = info.get('display_name') or info.get('name')
entries = []
offset = 0
limit = self._PAGE_LIMIT
for counter in itertools.count(1):
response = self._download_json(
self._PLAYLIST_URL % (channel_id, offset, limit),
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
page_entries = self._extract_playlist_page(response)
if not page_entries:
break
entries.extend(page_entries)
offset += limit
return self.playlist_result(
[self.url_result(entry) for entry in set(entries)],
channel_id, channel_name)
def _get_n_results(self, query, n):
"""Get a specified number of results for a query"""
entries = []
for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
m = info['m']
results = info['results']
for (i, r) in enumerate(results):
if (pagenum * 30) + i >= n:
break
mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
entries.append(e)
if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
break
return {
'_type': 'playlist',
'id': query,
'entries': entries,
}
def _extract_entries(self, id):
video_ids = set()
processed_urls = set()
for pagenum in itertools.count(1):
page_url = self._PAGE_TEMPLATE % (id, pagenum)
webpage, urlh = self._download_webpage_handle_no_ff(
page_url, id, 'Downloading page %s' % pagenum)
if urlh.geturl() in processed_urls:
self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
page_url, urlh.geturl()), id)
break
processed_urls.add(urlh.geturl())
for video_id in re.findall(r'data-xid="(.+?)"', webpage):
if video_id not in video_ids:
yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
video_ids.add(video_id)
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
def _entries(self, page, playlist_id):
more_widget_html = content_html = page
for page_num in itertools.count(1):
for entry in self._process_page(content_html):
yield entry
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
more = self._download_json(
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape)
content_html = more['content_html']
if not content_html.strip():
# Some webpages show a "Load more" button but they don't
# have more videos
break
more_widget_html = more['load_more_widget_html']
def convert_string_to_variable(address, length=None):
variables = []
current_value = []
if length is None:
for p_byte in itertools.count(address):
c = getCurrentMemoryValue(p_byte)
if c == 0:
break
current_value.append(chr(c))
setConcreteMemoryValue(p_byte, c)
var_id = convertMemoryToSymbolicVariable(MemoryAccess(p_byte, 1)).getId()
variables.append(var_id)
else:
for p_byte in xrange(address, address + length):
c = getCurrentMemoryValue(p_byte)
current_value.append(chr(c))
setConcreteMemoryValue(p_byte, c)
var_id = convertMemoryToSymbolicVariable(MemoryAccess(p_byte, 1)).getId()
variables.append(var_id)
return variables, ''.join(current_value)
def _reset_local_state(self):
# filter tracking
self._filter_counter = itertools.count()
self._log_filters = {}
self._block_filters = {}
self._pending_transaction_filters = {}
# snapshot tracking
self._snapshot_counter = itertools.count()
self._snapshots = {}
# raw accounts
self._account_passwords = {}
self._account_unlock = collections.defaultdict(lambda: False)
#
# Fork Rules
#
def generator_input(input_file, chunk_size):
"""Generator function to produce features and labels
needed by keras fit_generator.
"""
input_reader = pd.read_csv(tf.gfile.Open(input_file[0]),
names=CSV_COLUMNS,
chunksize=chunk_size,
na_values=" ?")
for input_data in input_reader:
input_data = input_data.dropna()
label = pd.get_dummies(input_data.pop(LABEL_COLUMN))
input_data = to_numeric_features(input_data)
n_rows = input_data.shape[0]
return ( (input_data.iloc[[index % n_rows]], label.iloc[[index % n_rows]]) for index in itertools.count() )