def _generate_negative_patches(self, negative_image_files, window_size, step, pyramid_scale, threshold_prob):
widgets = ["Generating negative samples which represent high probability: ",
progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(negative_image_files), widgets=widgets).start()
for i, image_file in enumerate(negative_image_files):
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# detect objects in the image
(boxes, probs) = self.run(image,
window_size, step, pyramid_scale,
threshold_prob,
do_nms=False,
show_result=False,
show_operation=False)
pbar.update(i)
for (y1, y2, x1, x2), prob in zip(boxes, probs):
negative_patch = cv2.resize(image[y1:y2, x1:x2], (window_size[1], window_size[0]), interpolation=cv2.INTER_AREA)
yield negative_patch, prob
pbar.finish()
# todo: code review
python类ETA的实例源码
def createSQL(table,values,name='insert'):
'''
Generate the SQL insert line, breaking each insert to up to ~1k values
and up to ~1k insert's (~1M values total for each SQL file)
'''
logger.info('Generating SQL file')
queryInsert='INSERT INTO %s (itemid,clock,num,value_min,value_avg,value_max) VALUES' % table
i=0 # Controls the progress bar
x=0 # Controls number of inserts in one line
y=0 # Controls number of lines in one file
z=0 # Controls number of file name
valuesLen=values.__len__()
sqlFile='%s.sql.%d' % (name,z)
logger.debug('Total itens for %s: %d' % (name,valuesLen))
if valuesLen > 0:
bar=ProgressBar(maxval=valuesLen,widgets=[Percentage(), ReverseBar(), ETA(), RotatingMarker(), Timer()]).start()
for value in values:
i+=1
x+=1
if x != 1: # First line only
sqlInsert='%s,%s' % (sqlInsert,value)
else:
sqlInsert=value
if y >= 1000: # If there is more than 1k lines, write to new file
z+=1
y=0
if x >= 1000 or i == valuesLen: # If there is more than 1k values or we finished our list, write to file
sqlFile='%s.sql.%d' % (name,z)
fileAppend(f=sqlFile,content='%s %s;\n' % (queryInsert,sqlInsert))
x=0
y+=1
sqlInsert=''
if args.loglevel.upper() != 'DEBUG': # Dont print progressbar if in debug mode
bar.update(i)
bar.finish()
else:
logger.warning('No values received')
def discovery_disable_all(status=0):
"""
Alterar status de todos os discoveries *auto*
Status 0 = enable
Status 1 = disable
"""
logger.info('Disabling all network discoveries')
druleids = zapi.drule.get(output=[ 'druleid', 'iprange', 'name', 'proxy_hostid', 'status' ],
selectDChecks='extend', filter={ 'status': 0 })
if ( druleids.__len__() == 0 ):
logger.info('Done')
return
bar = ProgressBar(maxval=druleids.__len__(),widgets=[Percentage(), ReverseBar(), ETA(), RotatingMarker(), Timer()]).start()
i = 0
for x in druleids:
params_disable = {
'druleid': x['druleid'],
'iprange': x['iprange'],
'name': x['name'],
'dchecks': x['dchecks'],
'status': 1
}
out = zapi.drule.update(**params_disable)
logger.echo = False
if out:
logger.debug('\tNew status: %s (%s) --> %d' % (x['name'],out['druleids'],status))
else:
logger.warning('\tFAILED to change status: %s (%s) --> %d' % (x['name'],out['druleids'],status))
i += 1
bar.update(i)
logger.echo = True
bar.finish()
logger.info('Done')
return
def desabilitaItensNaoSuportados():
query = {
"output": "extend",
"filter": {
"state": 1
},
"monitored": True
}
filtro = raw_input('Qual a busca para key_? [NULL = ENTER] ')
if filtro.__len__() > 0:
query['search']={'key_': filtro}
limite = raw_input('Qual o limite de itens? [NULL = ENTER] ')
if limite.__len__() > 0:
try:
query['limit']=int(limite)
except:
print 'Limite invalido'
raw_input("Pressione ENTER para voltar")
main()
opcao = raw_input("Confirma operação? [s/n]")
if opcao == 's' or opcao == 'S':
itens = zapi.item.get(query)
print 'Encontramos {} itens'.format(itens.__len__())
bar = ProgressBar(maxval=itens.__len__(),widgets=[Percentage(), ReverseBar(), ETA(), RotatingMarker(), Timer()]).start()
i = 0
for x in itens:
result = zapi.item.update({"itemid": x['itemid'], "status": 1})
i += 1
bar.update(i)
bar.finish()
print "Itens desabilitados!!!"
raw_input("Pressione ENTER para continuar")
main()
def __init__(self, maxval):
pbar.ProgressBar.__init__(self, widgets=[pbar.Percentage(), ' ',
pbar.Bar(), ' ', pbar.ETA(), ' ', GeneratorSpeed()],
maxval=maxval)
# def update(self, value=None):
# if value is None:
# pbar.ProgressBar.update(self, self.currval + 1)
# else:
# pbar.ProgressBar.update(self, value)
def find_samples_bounding_rect(path):
min_w = 0
min_h = 0
print ('finding bounding box:')
bar = progressbar.ProgressBar(maxval=num_classes*num_samples,
widgets=[
' [', progressbar.Timer(), '] ',
progressbar.Bar(),
' (', progressbar.ETA(), ') ',
])
bar.start()
counter = 0
for i in range(1, num_classes + 1):
for j in range(1, num_samples + 1):
filename = '{0}/Sample{1:03d}/img{1:03d}-{2:03d}.png'.format(path, i, j)
# opencv read -> Gray Image -> Bounding Rect
im = cv2.imread(filename)
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
imgray = cv2.bitwise_not(imgray)
_, contours, _ = cv2.findContours(imgray, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
_, _, w, h = cv2.boundingRect(contours[len(contours) - 1])
# find maximum resolution
min_w = max(min_w, w)
min_h = max(min_h, h)
# update progress bar
counter = counter + 1
bar.update(counter)
bar.finish()
return min_w, min_h
def bar(ndigits=3, **kwargs):
if progressbar.__version__ > '3':
counterfmt = '%(value)'+str(ndigits)+'d'
else:
counterfmt = '%'+str(ndigits)+'d'
pbar = IncrementingProgressBar(widgets=[
progressbar.Percentage(), '|', progressbar.Counter(counterfmt),
progressbar.Bar(), progressbar.ETA()], **kwargs)
return pbar
def pbar(maxval):
widgets = [progressbar.Percentage(), ' ', progressbar.Bar(), progressbar.ETA()]
return progressbar.ProgressBar(widgets=widgets, maxval=maxval).start()
def _init_pbar(self, ini_val, max_val, label):
self._pbar = progressbar.ProgressBar(
min_value=0,
max_value=max_val,
initial_value=ini_val,
widgets=[
label,
progressbar.Percentage(),
'(', progressbar.SimpleProgress(), ')',
progressbar.Bar(),
progressbar.Timer(), ' ',
'|', progressbar.ETA(),
]
)
self._pbar.start()
def load_corpus(self, corenlpserver, process=True):
"""Load the CHEMDNER corpus file on the dir element"""
# open filename and parse lines
total_lines = sum(1 for line in open(self.path))
widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), ' ', pb.Timer()]
pbar = pb.ProgressBar(widgets=widgets, maxval=total_lines).start()
n_lines = 1
time_per_abs = []
with io.open(self.path, 'r', encoding="utf-8") as inputfile:
for line in inputfile:
t = time.time()
# each line is PMID title abs
tsv = line.split('\t')
doctext = tsv[1].strip().replace("<", "(").replace(">", ")").replace(". ", ", ") + ". "
doctext += tsv[2].strip().replace("<", "(").replace(">", ")")
newdoc = Document(doctext, process=False,
did=tsv[0], title=tsv[1].strip() + ".")
newdoc.sentence_tokenize("biomedical")
if process:
newdoc.process_document(corenlpserver, "biomedical")
self.documents[newdoc.did] = newdoc
abs_time = time.time() - t
time_per_abs.append(abs_time)
pbar.update(n_lines)
n_lines += 1
pbar.finish()
abs_avg = sum(time_per_abs)*1.0/len(time_per_abs)
logging.info("average time per abstract: %ss" % abs_avg)
def load_corpus(self, corenlpserver, process=True):
soup = BeautifulSoup(codecs.open(self.path, 'r', "utf-8"), 'html.parser')
docs = soup.find_all("article")
widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), ' ', pb.Timer()]
pbar = pb.ProgressBar(widgets=widgets, maxval=len(docs)).start()
n_lines = 1
time_per_abs = []
for doc in docs:
did = "GENIA" + doc.articleinfo.bibliomisc.text.split(":")[1]
title = doc.title.sentence.get_text()
sentences = doc.abstract.find_all("sentence")
doc_sentences = []
doc_text = title + " "
doc_offset = 0
for si, s in enumerate(sentences):
t = time.time()
stext = s.get_text()
sid = did + ".s" + str(si)
doc_text += stext + " "
this_sentence = Sentence(stext, offset=doc_offset, sid=sid, did=did)
doc_offset = len(doc_text)
doc_sentences.append(this_sentence)
newdoc = Document(doc_text, process=False, did=did)
newdoc.sentences = doc_sentences[:]
newdoc.process_document(corenlpserver, "biomedical")
#logging.info(len(newdoc.sentences))
self.documents[newdoc.did] = newdoc
abs_time = time.time() - t
time_per_abs.append(abs_time)
logging.debug("%s sentences, %ss processing time" % (len(newdoc.sentences), abs_time))
pbar.update(n_lines)
n_lines += 1
pbar.finish()
abs_avg = sum(time_per_abs)*1.0/len(time_per_abs)
logging.info("average time per abstract: %ss" % abs_avg)
def doUploadFileProgress(self,filePath, url):
startTime = getNow()
result = False
try:
widgets = ['Progress: ', Percentage(), ' ', Bar(
marker=RotatingMarker('>-=')), ' ', ETA(), ' ', FileTransferSpeed()]
pbar = ProgressBar(widgets=widgets, maxval=os.path.getsize(filePath)).start()
progress = Progress()
fileSizeStr = formatSize(os.path.getsize(filePath))
logger.info("??????{0} ?? {1}".format(filePath,fileSizeStr))
stream = file_with_callback(filePath, 'rb', progress.update,os.path.basename(filePath),pbar)
params = {"filedata": stream}
datagen, headers = multipart_encode(params)
upload_request =urllib2.Request(url, datagen, headers)
response = urllib2.urlopen(upload_request).read()
endTime = getNow()
totlaTime = caltime(startTime, endTime)
logger.info("??????{0} ????{1} ????{2} ????{3} ??{4} ????{5}"
.format(filePath,startTime, endTime, fileSizeStr, totlaTime,response))
#???????????'4b ? 0'???json ??errmsg("Extra data", s, end, len(s) ??????,??????????
if "code\":0" in response.replace(' ', ''):
result = True
else:
result = json.loads(response)["code"] == 0
except Exception as e:
logger.error("??????{0} exception: {1}".format(filePath,e))
return result
def _addresses_to_check_with_caching(self, show_progress=True):
num_addrs = len(list(self._addresses_to_check()))
widgets = ['ROP: ', progressbar.Percentage(), ' ',
progressbar.Bar(marker=progressbar.RotatingMarker()),
' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]
progress = progressbar.ProgressBar(widgets=widgets, maxval=num_addrs)
if show_progress:
progress.start()
self._cache = dict()
seen = dict()
for i, a in enumerate(self._addresses_to_check()):
if show_progress:
progress.update(i)
try:
bl = self.project.factory.block(a)
if bl.size > self._max_block_size:
continue
block_data = bl.bytes
except (SimEngineError, SimMemoryError):
continue
if block_data in seen:
self._cache[seen[block_data]].add(a)
continue
else:
if len(bl.vex.constant_jump_targets) == 0 and not self._block_has_ip_relative(a, bl):
seen[block_data] = a
self._cache[a] = set()
yield a
if show_progress:
progress.finish()
def update_salary_history(sport, min_date=None, max_date=None):
min_date = min_date or datetime.datetime.today() - datetime.timedelta(days=1)
max_date = max_date or datetime.datetime.today()
if isinstance(min_date, basestring):
min_date = parser.parse(min_date)
if isinstance(max_date, basestring):
max_date = parser.parse(max_date)
date = min_date
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()],
maxval=int((max_date-min_date).total_seconds() / (60*60*24)) + 1)
pbar.start()
saved = 0
hit = 0
while date <= max_date:
time.sleep(1)
day_salaries = load_positions_for_day(sport, date)
if len(day_salaries) > 0:
save_rg_salary_info(sport, date, day_salaries)
saved += 1
hit += 1
date += datetime.timedelta(days=1)
pbar.update(value=hit)
pbar.finish()
return saved
def load_overview_pages(players):
"""
Hit the overview page and load gamelog_url_list for each of the players in the player dict.
Maybe this should be in the webio submodule? I am leaving it here since it controls scraping program flow.
:param players: player dict
:return dict: player dict
"""
# Helper function to guess which position a player plays from the overview table of stats.
# Just grab the position from the most recent year in which it was defined, and return that.
def quick_position_guess(overview_table):
return overview_table.dropna(subset=['Pos'])['Pos'].iloc[-1]
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
print 'Accessing and parsing overview pages...'
for i, (bref_id, player_dict) in pbar(list(enumerate(players.items()))):
overview_soup = getSoupFromURL(players[bref_id]['overview_url'])
players[bref_id]['overview_url_content'] = overview_soup.text
# the links to each year's game logs are in <li> tags, and the text contains 'Game Logs'
# so we can use those to pull out our urls.
for li in overview_soup.find_all('li'):
if 'Game Logs' in li.getText():
game_log_links = li.findAll('a')
for game_log_link in game_log_links:
players[bref_id]['gamelog_url_list'].append('http://www.basketball-reference.com' + game_log_link.get('href'))
player_name = overview_soup.find('h1').text
players[bref_id]['info']['name'] = player_name
# Read (guess?) player's position
overview_table = dfFromOverviewPage(overview_soup)
if len(overview_table.dropna(subset=['Pos'])) > 0:
players[bref_id]['info']['pos'] = quick_position_guess(overview_table)
else:
players[bref_id]['info']['pos'] = '?' # this will only happen for chumps but by defining a value we should block exceptions
return players
def update_players(year=None):
year_to_update = year
if not year_to_update:
year_to_update = datetime.date.today().year
if datetime.date.today().month > 8: # it's really the 201X-201(x+1) season, we should use x+1 as year
year_to_update += 1
logging.info("update_players: Loading all stats for new players and re-examining stats from %d" % year_to_update)
scrape_overview_for_new_players()
players = load_overview_dict()
players = load_dataframes(players)
# Identify players we know of, but haven't loaded full stats for.
# This will include any players we just found with scrape_overview_for_new_players.
players_to_load = [p for p in players if 'gamelog_data' not in players[p]]
if players_to_load:
logging.info("update_players: loading first-time stats for %d players", len(players_to_load))
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for player in pbar(players_to_load):
logging.info('update_players: loading first-time stats for %s', player)
players = load_player(players, player)
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for player in pbar(players.keys()):
# found name, load player data
logging.info('update_players: updating player data for %s...', player)
players = update_player(players, player, year=year_to_update)
save_dataframes(players)
update_mapping_df(players)
def get_active_players(letters=list('abcdefghijklmnopqrstuvwxyz')):
players = []
print 'Loading currently active players from basketball-reference.com...'
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for letter in pbar(letters):
letter_page = getSoupFromURL('http://www.basketball-reference.com/players/%s/' % (letter))
# we know that all the currently active players have <strong> tags, so we'll limit our names to those
current_names = letter_page.findAll('strong')
for n in current_names:
name_data = n.children.next()
full_url = 'http://www.basketball-reference.com' + name_data.attrs['href']
bref_id = bbr_id_regex.match(full_url).group('pid')
players.append((bref_id, full_url))
players = dict(players)
return players
def get_active_players():
letters = list('abcdefghijklmnopqrstuvwxyz')
player_and_url_list = []
print 'Checking currently active players on baseball-reference.com...'
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for letter in pbar(letters):
letter_page = getSoupFromURL('http://www.baseball-reference.com/players/%s/' % (letter))
# we don't just need active players (<b> tags), we need anyone who played in 2015!
prefix_sections = letter_page.findAll('pre')
for section in prefix_sections:
player_and_url_list += list(_parse_bsbr_prefix_section(section))
bref_id_dict = dict(player_and_url_list)
return bref_id_dict
def update_numberfire_history():
# Start by updating our slug dict and overall numberfire player information
overall_stats = scrape_numberfire_overview_page()
save_nf_overview_data(sport, overall_stats)
# We only load & update numberfire slug information for players appearing in the most recent batch of overview data
# and only if we are also able to match this player to a BREF ID. A side effect of this is that we will make no
# predictions for any NBA players who haven't played yet this year.
pids_to_load = []
for ix, row in overall_stats.iterrows():
pid, confidence = name2nbaid(row['name_player'], player_team=row['name_team'], get_confidence=True)
if confidence > 75:
pids_to_load.append((pid, row['slug_player']))
old_predictions = load_nf_histplayerinfo(sport, identifiers_to_load=pids_to_load)
scraped_salaries = {}
new_dataframes, updated_dataframes = 0, 0
print "Scraping updated player predictions from Numberfire..."
pbar = progressbar.ProgressBar(widgets=[progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()])
for pid, slug in pbar(pids_to_load):
time.sleep(1)
player_df, salary_df = load_stats_tables_from_history_page(nf_player_url.format(slug=slug))
old_player_df = old_predictions.get(pid)
if old_player_df is None:
old_predictions[pid] = player_df
new_dataframes += 1
else:
try:
new_data = old_player_df.combine_first(player_df)
old_predictions[pid] = new_data
except ValueError as ex:
ipdb.set_trace()
updated_dataframes += 1
scraped_salaries[pid] = salary_df
logging.info('Saving scraped predictions (%d updated, %d added)', updated_dataframes, new_dataframes)
save_nf_histplayerinfo(sport, old_predictions)
save_nf_salary_info(sport, scraped_salaries)
def make_progress_bar(name, size):
widgets = [
'%s: ' % name,
progressbar.Percentage(),
' ',
progressbar.Bar(),
' ',
progressbar.ETA(),
' ',
progressbar.DataSize(),
]
return progressbar.ProgressBar(widgets=widgets, max_value=size)