def prepare_inception_data(o_dir, i_dir):
if not os.path.exists(o_dir):
os.makedirs(o_dir)
cnt = 0
bar = progressbar.ProgressBar(redirect_stdout=True,
max_value=progressbar.UnknownLength)
for root, subFolders, files in os.walk(i_dir):
if files:
for f in files:
if 'jpg' in f:
f_name = str(cnt) + '_ins.' + f.split('.')[-1]
cnt += 1
file_dir = os.path.join(root, f)
dest_path = os.path.join(o_dir, f)
dest_new_name = os.path.join(o_dir, f_name)
copy(file_dir, o_dir)
os.rename(dest_path, dest_new_name)
bar.update(cnt)
bar.finish()
print('Total number of files: {}'.format(cnt))
python类ProgressBar()的实例源码
def load_images(o_dir, i_dir, n_images=3000, size=128):
prepare_inception_data(o_dir, i_dir)
image_list = []
done = False
cnt = 0
bar = progressbar.ProgressBar(redirect_stdout=True,
max_value=progressbar.UnknownLength)
for root, dirs, files in os.walk(o_dir):
if files:
for f in files:
cnt += 1
file_dir = os.path.join(root, f)
image_list.append(ip.load_image_inception(file_dir, 0))
bar.update(cnt)
if len(image_list) == n_images:
done = True
break
if done:
break
bar.finish()
print('Finished Loading Files')
return image_list
def render(self, length=None, progress=False):
"""
Render this signal into an numpy array of floats. Return the array.
:param length: The length to render, in seconds. Optional.
:param progress: Whether to show a progress bar for rendering
"""
if progress and not progressbar:
print('Install the progressbar module to see a progress bar for rendering')
progress = False
duration = self.duration if length is None else length * SAMPLE_RATE
if duration == float('inf'):
duration = 3*SAMPLE_RATE
else:
duration = int(duration)
out = numpy.empty((duration, 1))
pbar = progressbar.ProgressBar(widgets=['Rendering: ', progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()], maxval=duration-1).start() if progress else None
for i in range(duration):
out[i] = self.amplitude(i)
if pbar: pbar.update(i)
if pbar: pbar.finish()
return out
def multi_core_scrape(num_pages, db_coll):
'''
Map the API scrape across number of processors - 1 for performance boost.
INPUT:
num_pages: int, number of pages to scrape
db_coll: pymongo collection object, collection to add documents to
OUTPUT:
None, records inserted into MongoDB
'''
cpus = cpu_count() - 1
pool = Pool(processes=cpus)
pages = range(1, num_pages + 1)
employers = pool.map(scrape_api_page, pages)
pool.close()
pool.join()
print 'Inserting Employer Records into MongoDB . . .'
pbar = ProgressBar()
for page in pbar(employers):
db_coll.insert_many(page)
def main():
uri, outfile, dataset = get_arguments()
fd = tempfile.NamedTemporaryFile()
progress = ProgressBar(widgets=[Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()])
def update(count, blockSize, totalSize):
if progress.maxval is None:
progress.maxval = totalSize
progress.start()
progress.update(min(count * blockSize, totalSize))
urllib.urlretrieve(uri, fd.name, reporthook = update)
if dataset == 'zinc12':
df = pandas.read_csv(fd.name, delimiter = '\t')
df = df.rename(columns={'SMILES':'structure'})
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
elif dataset == 'chembl22':
df = pandas.read_table(fd.name,compression='gzip')
df = df.rename(columns={'canonical_smiles':'structure'})
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
pass
else:
df = pandas.read_csv(fd.name, delimiter = '\t')
df.to_hdf(outfile, 'table', format = 'table', data_columns = True)
smell_datamine_multiprocessing.py 文件源码
项目:Smelly-London
作者: Smelly-London
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def main():
start = timer()
files = get_file_names()
smell_results = []
bar = progressbar.ProgressBar(max_value=len(files))
processed_files = 0
with concurrent.futures.ProcessPoolExecutor() as executor:
for file, smell in zip(files, executor.map(worker, files)):
smell_results = smell_results + smell
processed_files += 1
bar.update(processed_files)
smell_results = [x for x in smell_results if x]
end = timer()
print(end - start)
dataminer = SmellDataMine()
dataminer.save_to_database(smell_results)
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):
raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
# Using 'brute' method since we only want to do one query per classifier
# so this will be quicker as it avoids overhead of creating a search tree
knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
total_images = raw_im_data.shape[0]
pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
for i in range(total_images):
mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
rep_mask = np.tile(mask,(trX.shape[0],1))
# Corrupt whole training set according to the current mask
corr_trX = np.multiply(trX, rep_mask)
knn_m.fit(corr_trX, trY)
prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
pbar.update(i)
pbar.finish()
return prob_Y_hat
def preprocess(self, questions: List[QASetting],
answers: Optional[List[List[Answer]]] = None,
is_eval: bool = False) -> List[XQAAnnotation]:
if answers is None:
answers = [None] * len(questions)
preprocessed = []
if len(questions) > 1000:
bar = progressbar.ProgressBar(
max_value=len(questions),
widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') '])
for q, a in bar(zip(questions, answers)):
preprocessed.append(self.preprocess_instance(q, a))
else:
for q, a in zip(questions, answers):
preprocessed.append(self.preprocess_instance(q, a))
return preprocessed
def preprocess(self, questions: List[QASetting],
answers: Optional[List[List[Answer]]] = None,
is_eval: bool = False) -> List[MCAnnotation]:
if answers is None:
answers = [None] * len(questions)
preprocessed = []
if len(questions) > 1000:
bar = progressbar.ProgressBar(
max_value=len(questions),
widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') '])
for i, (q, a) in bar(enumerate(zip(questions, answers))):
preprocessed.append(self.preprocess_instance(i, q, a))
else:
for i, (q, a) in enumerate(zip(questions, answers)):
preprocessed.append(self.preprocess_instance(i, q, a))
return preprocessed
def evaluate_network(self, ckpt):
path = config.EVAL_DIR + '/Data/'
self.filename = path + 'coco_%s_%s_%i.json' % (self.loader.split, args.run_name, ckpt)
detections = []
filenames = self.loader.get_filenames()
bar = progressbar.ProgressBar()
for i in bar(range(len(filenames))):
img_id = filenames[i]
detections.extend(self.process_image(img_id, i))
with open(self.filename, 'w') as f:
json.dump(detections, f)
if args.segment:
iou = self.compute_mean_iou()
cocoEval = self.compute_ap()
return self.compact_results(cocoEval.stats, ckpt)
def to_pairs(self, pairs):
"""Copies labels and scores from self to pairs matrix.
Args:
pairs (SimilarityMatrix):
"""
six.print_('copy labels', flush=True)
self.build_label_cache()
pairs.labels.update(self.cache_l2i)
six.print_('copy matrix to pairs', flush=True)
limit = self.scores.shape[0]
bar = ProgressBar()
for query_id in bar(six.moves.range(0, limit)):
subjects = self.scores[query_id, ...]
filled_subjects_ids = subjects.nonzero()[0]
filled_subjects = [(query_id, i, subjects[i]) for i in filled_subjects_ids if query_id < i]
if filled_subjects:
pairs.pairs.table.append(filled_subjects)
def collect_mailids(server):
folders = server.list_folders()
#construct progressbar
progressbar_widgets = [
'[Searching for mails on server] ',
progressbar.Percentage(),
progressbar.Bar(marker=progressbar.RotatingMarker()), ' ']
progressbar_instance = progressbar.ProgressBar(widgets=progressbar_widgets, maxval=len(folders)).start()
#collect all mailids for all folders
folder_contents = {}
folder_progress = 0
for flags, delimiter, folder in folders:
#read all mailids for the folder
server.select_folder(folder, readonly=True)
folder_contents[folder] = server.search()
#update progrssbar
folder_progress += 1
progressbar_instance.update(folder_progress)
progressbar_instance.finish()
return folder_contents
def download(download_list, total_download_size):
progressbar_widgets = [
'[Downloading mails ] ',
progressbar.Percentage(),
progressbar.Bar(marker=progressbar.RotatingMarker()), ' ',
progressbar.ETA(), ' ',
bitmath.integrations.BitmathFileTransferSpeed()]
progressbar_instance = progressbar.ProgressBar(widgets=progressbar_widgets, maxval=int(total_download_size)).start()
downloaded_size = bitmath.Byte(0)
for folder, mails in download_list.items():
server.select_folder(folder, readonly=True)
for mailid, mailfilename, mailsize in mails:
#make parent directory
if not os.path.isdir(os.path.dirname(mailfilename)):
os.makedirs(os.path.dirname(mailfilename))
#download mail
with open(mailfilename, 'wb') as mailfile:
mailfile.write(server.fetch([mailid], ['RFC822'])[mailid][b'RFC822'])
#update progressbar
downloaded_size += mailsize
progressbar_instance.update(int(downloaded_size))
progressbar_instance.finish()
def validate(test_data, test_labels, model, batchsize, silent, gpu):
N_test = test_data.shape[0]
pbar = ProgressBar(0, N_test)
sum_accuracy = 0
sum_loss = 0
for i in range(0, N_test, batchsize):
x_batch = test_data[i:i + batchsize]
y_batch = test_labels[i:i + batchsize]
if gpu >= 0:
x_batch = cuda.to_gpu(x_batch.astype(np.float32))
y_batch = cuda.to_gpu(y_batch.astype(np.int32))
x = Variable(x_batch)
t = Variable(y_batch)
loss, acc = model(x, t, train=False)
sum_loss += float(cuda.to_cpu(loss.data)) * y_batch.size
sum_accuracy += float(cuda.to_cpu(acc.data)) * y_batch.size
if not silent:
pbar.update(i + y_batch.size)
return sum_loss, sum_accuracy
def __filter_regions_by_class(self, regions):
print("Filtering regions...")
act_regions = []
region_sub = {}
bar = progressbar.ProgressBar()
for region in bar(regions):
try:
reg_obj = self.region_objects[region.image.id][region.id]
reg_obj = frozenset([x.lower()
for x in reg_obj])
except KeyError:
reg_obj = frozenset({})
if reg_obj in self.obj_idx:
act_regions.append(region)
if region.image.id not in region_sub:
region_sub[region.image.id] = {}
reg_img = region_sub[region.image.id]
global_region_img = self.region_objects[region.image.id]
reg_img[region.id] = global_region_img[region.id]
return act_regions, region_sub
def load_data(path):
n_lines = count_lines(path)
bar = progressbar.ProgressBar()
train = []
test = []
print('loading...: %s' % path)
with open(path) as f:
i = 0
for line in bar(f, max_value=n_lines):
words = line.strip().split()
if i < 1000:
test.append(np.array(words))
i+=1
else:
train.append(np.array(words))
return train, test
def load_data(word_voc, char_voc, path):
n_lines = count_lines(path)
bar = progressbar.ProgressBar()
data = []
print('loading...: %s' % path)
with open(path) as f:
for line in bar(f, max_value=n_lines):
words = line.strip().split()
'''
array = np.array([word_voc.get(w, UNK) for w in words], dtype=np.int32)
unk_words = np.array(words)[array==UNK]
unk_array = np.array([
np.array([char_voc.get(c, UNK) for c in list(w)], dtype=np.int32)
for w in unk_words])
array = np.array([array, unk_array])
if len(unk_array)!=0:
print(array)
'''
data.append(np.array(words))
return data
def progressbarize(iterable, progress=False):
"""Construct progressbar for loops if progressbar requested, otherwise return directly iterable.
:param iterable: iterable to use
:param progress: True if print progressbar
"""
if progress:
# The casting to list is due to possibly yielded value that prevents
# ProgressBar to compute overall ETA
return progressbar.ProgressBar(widgets=[
progressbar.Timer(), ', ',
progressbar.Percentage(), ', ',
progressbar.SimpleProgress(), ', ',
progressbar.ETA()
])(list(iterable))
return iterable
def bruteforce():
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=60, \
widgets=[progressbar.Bar('==', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
for i in xrange(10):
bar.update(i+1)
sleep(0.05)
wordlist = "/root/2fassassin/crack/wordlist/2fa-wordlist.txt"
target = "/root/2fassassin/loot/*.pfx"
sign = ""
sign += "crackpkcs12 -v -b"
sign += " "
sign += target
sign += "| tee crack.log"
os.system(sign)
bar.finish()
sys.exit()
def bruteforce():
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=60, \
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
for i in xrange(10):
bar.update(i+1)
sleep(0.05)
wordlist = "/root/2fassassin/crack/wordlist/2fa-wordlist.txt"
target = "/root/2fassassin/loot/*.pfx"
sign = ""
sign += "crackpkcs12 -v -b"
sign += " "
sign += target
sign += "| tee crack.log"
os.system(sign)
bar.finish()
sys.exit()
def __init__(self, n_estimators, learning_rate, min_samples_split,
min_impurity, max_depth, regression):
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.min_samples_split = min_samples_split
self.min_impurity = min_impurity
self.max_depth = max_depth
self.regression = regression
self.bar = progressbar.ProgressBar(widgets=bar_widgets)
# Square loss for regression
# Log loss for classification
self.loss = SquareLoss()
if not self.regression:
self.loss = CrossEntropy()
# Initialize regression trees
self.trees = []
for _ in range(n_estimators):
tree = RegressionTree(
min_samples_split=self.min_samples_split,
min_impurity=min_impurity,
max_depth=self.max_depth)
self.trees.append(tree)
def __init__(self, n_estimators=100, max_features=None, min_samples_split=2,
min_gain=0, max_depth=float("inf")):
self.n_estimators = n_estimators # Number of trees
self.max_features = max_features # Maxmimum number of features per tree
self.min_samples_split = min_samples_split
self.min_gain = min_gain # Minimum information gain req. to continue
self.max_depth = max_depth # Maximum depth for tree
self.progressbar = progressbar.ProgressBar(widgets=bar_widgets)
# Initialize decision trees
self.trees = []
for _ in range(n_estimators):
self.trees.append(
ClassificationTree(
min_samples_split=self.min_samples_split,
min_impurity=min_gain,
max_depth=self.max_depth))
def precompute(db, dir):
m = megatron.Megatron(db)
m.database.drop_all()
m.database.create_database()
importer = import_book.BookImporter(m)
progress = progressbar.ProgressBar()
importer.import_from(dir, progress)
counting_worker.run(m)
tfidf = tf_idf.TFIDF(m)
tfidf.compute_idf()
tfidf.compute_tfidf()
tfidf.compute_top_words()
def assembly(overlap_length, percent_identity, threads, wd, verbose):
"""
"""
manage = Manager()
queue = manage.Queue()
pool = Pool(processes=int(threads), maxtasksperchild=10)
new_commands = []
for root, dirs, file in os.walk(wd):
for fasta_file in file:
complete_data = (fasta_file, percent_identity, overlap_length, wd, verbose, queue)
new_commands.append(complete_data)
results = pool.map_async(iAssembler, new_commands)
with progressbar.ProgressBar(max_value=len(new_commands)) as bar:
while not results.ready():
size = queue.qsize()
bar.update(size)
time.sleep(1)
def __load_embeding_model(self, file_path, max_vocab_size=100000):
self.__embed_vectors = dict()
if not file_path:
print('Embeddings file not provided')
return
if not os.path.exists(file_path):
print('Embeddings file not found:', file_path)
return
print('Loading the embedding model from:', file_path)
bar = progressbar.ProgressBar(max_value=max_vocab_size)
with open(file_path, "r") as embed_f:
for line in embed_f:
try:
tab = line.rstrip().split()
word = tab[0].lower()
if not word in self.__embed_vectors:
vec = numpy.array(tab[1:], dtype=float)
self.__embed_vectors[word] = vec
except ValueError:
continue
bar.update(len(self.__embed_vectors))
if len(self.__embed_vectors) == max_vocab_size:
bar.finish()
return
def Steg_brute(ifile, dicc):
i = 0
ofile = ifile.split('.')[0] + "_flag.txt"
nlines = len(open(dicc).readlines())
with open(dicc, 'r') as passFile:
pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=nlines).start()
for line in passFile.readlines():
password = line.strip('\n')
r = commands.getoutput("steghide extract -sf %s -p '%s' -xf %s" % (ifile, password, ofile))
if not "no pude extraer" in r and not "could not extract" in r:
print(color.GREEN + "\n\n " + r + color.ENDC)
print("\n\n [+] " + color.INFO + "Information obtained with password:" + color.GREEN + " %s\n" % password + color.ENDC)
if check_file(ofile):
with open(ofile, 'r') as outfile:
for line in outfile.readlines():
print(line)
break
pbar.update(i + 1)
i += 1
def emit(self, record):
import progressbar as pb
msg = json.loads(record.msg)
# print(msg)
if msg[0] == 'SET':
pass
self.pbar.update(msg[1])
elif msg[0] == 'START':
print(msg[1] + ':', file=sys.stderr)
self.pbar = pb.ProgressBar(maxval=msg[2], **self.pbar_args)
self.pbar.start()
elif msg[0] == 'DONE':
self.pbar.finish()
del self.pbar
print('', file=sys.stderr)
def progress_bar(n):
import progressbar
return progressbar.ProgressBar(
max_value=n,
widgets=[
progressxbar.Percentage(),
' ',
'(',
progressbar.SimpleProgress(),
')',
' ',
progressbar.Bar(),
' ',
progressbar.AdaptiveETA(),
])
# http://code.activestate.com/recipes/577058/
def sub(self, msg=None, **kwargs):
"""Creates a new progress bar for tracking a sub-process.
Parameters
----------
msg : str, optional
Description of sub-process
"""
if self.sub_bar is not None and self.sub_bar.finished is False:
self.sub_bar.finish()
self.sub_bar = ProgressBar(
present="%s: %s" % (self.present, msg) if msg else self.present,
**kwargs)
self.sub_bar.finish = partial(self.sub_bar.finish, end="\r")
return self.sub_bar
def deleteHostsByHostgroup(groupname):
hostgroup = zapi.hostgroup.get(output=['groupid'],filter={'name': groupname})
if hostgroup.__len__() != 1:
logger.error('Hostgroup not found: %s\n\tFound this: %s' % (groupname,hostgroup))
groupid = int(hostgroup[0]['groupid'])
hosts = zapi.host.get(output=['name','hostid'],groupids=groupid)
total = len(hosts)
logger.info('Hosts found: %d' % (total))
if ( args.run ):
x = 0
bar = ProgressBar(maxval=total,widgets=[Percentage(), ReverseBar(), ETA(), RotatingMarker(), Timer()]).start()
logger.echo = False
for host in hosts:
x = x + 1
bar.update(x)
logger.debug('(%d/%d) >> Removing >> %s' % (x, total, host))
out = zapi.globo.deleteMonitors(host['name'])
bar.finish()
logger.echo = True
else:
logger.info('No host removed due to --no-run arg. Full list of hosts:')
for host in hosts:
logger.info('%s' % host['name'])
return