def _generate_datasets(self):
datasets = {'train': [], 'test': []}
for dataset in self.datasets:
print 'Generate dataset from {0}.'.format(dataset.name)
# load dataset.
dataset.load()
# generate dataset
for index in tqdm.trange(len(dataset), ascii=True):
# get i-th data in the dataset.
label, joint, image_file, image = dataset.get_data(index)
# pad and crop image
image, joint = self._pad_image(image, joint)
image, joint = self._crop_image(image, joint)
if not self._validate(joint):
continue
# save the image
image_path = self._save_image(dataset.name, image_file, image)
# write database
line = self._make_dataset_line(image_path, joint)
datasets[label].append(line)
return datasets
python类trange()的实例源码
def time_hdf5():
data_path = create_hdf5(BATCH_SIZE * NSTEPS)
f = h5py.File(data_path)
durs = []
for step in tqdm.trange(NSTEPS, desc='running hdf5'):
start_time = time.time()
arr = f['data'][BATCH_SIZE * step: BATCH_SIZE * (step+1)]
read_time = time.time()
arr = copy.deepcopy(arr)
copy_time = time.time()
durs.append(['hdf5 read', step, read_time - start_time])
durs.append(['hdf5 copy', step, copy_time - read_time])
f.close()
os.remove(data_path)
durs = pandas.DataFrame(durs, columns=['kind', 'stepno', 'dur'])
return durs
def render(self, steps = 1, verbose = False):
if len(self.cameras) == 0:
self.cameras.append(([], {}))
images = [[] for _ in self.cameras]
depths = [[] for _ in self.cameras]
masks = [[] for _ in self.cameras]
for _ in trange(steps) if verbose else range(steps):
for i, (args, kwargs) in enumerate(self.cameras):
_, _, image, depth, mask = p.getCameraImage(self.width, self.height, *args, **kwargs)
images[i].append(image)
depths[i].append(depth)
masks[i].append(mask)
p.stepSimulation()
if len(self.cameras) == 1:
return images[0], depths[0], masks[0]
else:
return images, depths, masks
def fit(self, dataset, target=None):
"""
Fit loop.
Args:
target (np.ndarray, None): initial target of FQI.
Returns:
Last target computed.
"""
if self._boosted:
if self._target is None:
self._prediction = 0.
self._next_q = 0.
self._idx = 0
fit = self._fit_boosted
else:
fit = self._fit
for _ in trange(self._n_iterations, dynamic_ncols=True,
disable=self._quiet, leave=False):
fit(dataset)
def train(self):
tf.logging.info("Training starts...")
self.data_loader.run_input_queue(self.sess)
summary_writer = None
for k in trange(self.max_step, desc="train"):
fetch = {
'optim': self.model.optim,
}
result = self.model.train(self.sess, fetch, summary_writer)
if result['step'] % self.log_step == 0:
self._test(self.summary_writer)
summary_writer = self._get_summary_writer(result)
self.data_loader.stop_input_queue()
def train(self):
z_fixed = np.random.normal(size=[self.batch_size*10, self.z_dim]) # samples of 10 times batch size
gen = inf_train_gen(self.lines, self.batch_size, self.charmap)
for step in trange(self.max_step):
# Train generator
_data = gen.next()
summary_str, _ = self.sess.run([self.summary_op, self.g_optim], feed_dict={self.real_data: _data})
self.summary_writer.add_summary(summary_str, global_step=step)
self.summary_writer.flush()
# Train critic
for i in range(self.critic_iters):
_data = gen.next()
self.sess.run(self.d_optim, feed_dict={self.real_data: _data})
if step % 100 == 99:
_data = gen.next()
g_loss, d_loss, slope = self.sess.run([self.g_loss, self.d_loss, self.slope],
feed_dict={self.real_data: _data})
print("[{}/{}] Loss_D: {:.6f} Loss_G: {:.6f} Slope: {:.6f}".
format(step+1, self.max_step, d_loss, g_loss, slope))
self.generate_samples(z_fixed, idx=step+1)
def load_to_ram(self, is_training):
len_keys = self.len_train_keys if is_training else self.len_val_keys
labs = np.empty([len_keys, 4], dtype=np.int32)
poses = np.empty([len_keys,self.pshape[0],self.pshape[1],self.max_plen], dtype=np.float32)
random_crop_bkp = self.random_crop
random_pick_bkp = self.random_pick
self.random_crop = False
self.random_pick = False
splitname = 'train' if is_training else 'val'
print('Loading "%s" data to ram...' % splitname)
t = trange(len_keys, dynamic_ncols=True)
for k in t:
key_idx, subject, action, pose, plen = self.read_h5_data(k, is_training)
pose = pose[:, :, :self.max_plen] if plen > self.max_plen else pose
plen = self.max_plen if plen > self.max_plen else plen
labs[k, :] = [key_idx, subject, action, plen]
poses[k, :, :, :plen] = pose
self.random_crop = random_crop_bkp
self.random_pick = random_pick_bkp
return labs, poses
def compute_moments(pose_seq_input):
labs, poses = pose_seq_input.load_to_ram(True)
mask = np.zeros(np.shape(poses), dtype=np.bool)
print('Computing Masks...')
t = trange(len(pose_seq_input.train_keys), dynamic_ncols=True)
for k in t:
mask[k, :, :, :labs[k, 3]] = True
poses = np.transpose(poses, [1, 2, 3, 0])
mask = np.transpose(mask, [1, 2, 3, 0])
masked_pose = np.reshape(poses[mask], [pose_seq_input.pshape[0], pose_seq_input.pshape[1], -1])
del mask
del labs
del poses
print('Computing Moments...')
skel_mean = np.reshape(np.mean(masked_pose, axis=(0, 2)), [1, 1, 1, 3])
skel_std = np.reshape(np.std(masked_pose, axis=(0, 2)), [1, 1, 1, 3])
del masked_pose
print(skel_mean, skel_std)
save_arr(pose_seq_input, skel_mean, 'skel_mean')
save_arr(pose_seq_input, skel_std, 'skel_std')
def train(self, lr, iters, batch_size = 256):
optimizer = optim.Adam(self.parameters(), lr=lr)
t = trange(iters)
for i in t:
optimizer.zero_grad()
inds = torch.floor(torch.rand(batch_size) * self.M).long().cuda()
# bug: floor(rand()) sometimes gives 1
inds[inds >= self.M] = self.M - 1
inds = Variable(inds)
loss = self.forward(inds)
# print loss.data[0]
t.set_description( str(loss.data[0]) )
loss.backward()
optimizer.step()
return self.state_model, self.goal_model
def train(self, lr, iters):
optimizer = optim.Adam(self.parameters(), lr=lr)
t = trange(iters)
for i in t:
optimizer.zero_grad()
loss = self.forward( () )
# print loss.data[0]
t.set_description( '%.3f | %.3f | %.3f | %.3f' % (self.mse, self.divergence, self.world_mse, self.location_mse) )
loss.backward()
optimizer.step()
U, V = self.__lookup()
recon = torch.mm(U, V.t())
# print U, V, recon
U = U.data.cpu().numpy()
V = V.data.cpu().numpy()
recon = recon.data.cpu().numpy()
return U, V, recon
def train_loop(self,num_iter=None):
'''
This is a function for handling the training of either CausalBEGAN or
CausalGAN models. The python function Model.train_step() is called
num_iter times and some general image save features: intervening,
conditioning, etc are done here too.
'''
num_iter=num_iter or self.model_config.num_iter
#Train loop
print('Entering train loop..')
for counter in trange(num_iter):
self.model.train_step(self.sess,counter)
#scalar and histogram summaries
if counter % self.config.log_step == 0:
step,summ=self.sess.run([self.model.step,self.model.summary_op])
self.summary_writer.add_summary(summ,step)
self.summary_writer.flush()
#expensive summaries
if counter % (self.config.log_step * 50) == 0:
self.causal_sampling([8,16])
self.label_interpolation()
self.sample_diversity()
#more rare events
if counter % (self.config.log_step * 100) == 0:
self.causal_sampling([2,10])
##Wrapper methods
def create_hdf5(n_img, path=None, shape=(IMSIZE, IMSIZE, 3), im_dtype=np.float32):
if path is None:
path = '/tmp'
# os.makedirs(path)
tempf = tempfile.NamedTemporaryFile(suffix='.hdf5', dir=path, delete=False)
tempf.close()
with h5py.File(tempf.name, 'w') as f:
f.create_dataset('data', ((n_img, ) + shape), dtype=im_dtype)
f.create_dataset('labels', (n_img, ), dtype=np.int64)
img = np.random.randn(*shape)
label = np.random.randint(1000)
for i in tqdm.trange(n_img, desc='creating hdf5 file'):
f['data'][i] = img
f['labels'][i] = label
return tempf.name
def time_tf(data):
m = model.alexnet_nonorm(data.batch['data'])
targets = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(m.output, data.batch['labels']))
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# start our custom queue runner's threads
if hasattr(data, 'start_threads'):
data.start_threads(sess)
durs = []
for step in tqdm.trange(NSTEPS, desc='running ' + data.kind):
start_time = time.time()
if hasattr(data, 'start_threads') or not hasattr(data, 'next'):
sess.run(targets)
else:
batch = data.next()
feed_dict = {node: batch[name] for name, node in data.batch.items()}
sess.run(targets, feed_dict=feed_dict)
end_time = time.time()
durs.append([data.kind, step, end_time - start_time])
if hasattr(data, 'stop_threads'):
data.stop_threads(sess)
sess.close()
durs = pandas.DataFrame(durs, columns=['kind', 'stepno', 'dur'])
return durs
def run_targets(sess,
dbinterface,
target_name,
target,
valid_loop,
num_steps,
online_agg_func,
agg_func,
save_intermediate_freq=None,
validation_only=False):
"""TODO: this code resembles train() function, possible want to unify."""
agg_res = None
if save_intermediate_freq is not None:
n0 = len(dbinterface.outrecs)
for _step in tqdm.trange(num_steps, desc=target_name):
if valid_loop is not None:
res = valid_loop(sess, target)
else:
res = sess.run(target)
assert hasattr(res, 'keys'), 'result must be a dictionary'
if save_intermediate_freq is not None and _step % save_intermediate_freq == 0:
dbinterface.save(valid_res={target_name: res},
step=_step,
validation_only=validation_only)
agg_res = online_agg_func(agg_res, res, _step)
result = agg_func(agg_res)
if save_intermediate_freq is not None:
dbinterface.sync_with_host()
n1 = len(dbinterface.outrecs)
result['intermediate_steps'] = dbinterface.outrecs[n0: n1]
return result
def csv_rel2abs_path_convertor(csv_filenames: str, delimiter: str=' ', encoding='utf8') -> None:
"""
Convert relative paths into absolute paths
:param csv_filenames: filename of csv
:param delimiter: character to delimit felds in csv
:param encoding: encoding format of csv file
:return:
"""
for filename in tqdm(csv_filenames):
absolute_path, basename = os.path.split(os.path.abspath(filename))
relative_paths = list()
labels = list()
# Reading CSV
with open(filename, 'r', encoding=encoding) as f:
csvreader = csv.reader(f, delimiter=delimiter)
for row in csvreader:
relative_paths.append(row[0])
labels.append(row[1])
# Writing converted_paths CSV
export_filename = os.path.join(absolute_path, '{}_abs{}'.format(*os.path.splitext(basename)))
with open(export_filename, 'w', encoding=encoding) as f:
csvwriter = csv.writer(f, delimiter=delimiter)
for i in trange(0, len(relative_paths)):
csvwriter.writerow([os.path.abspath(os.path.join(absolute_path, relative_paths[i])), labels[i]])
def make_forecast(model: Sequential, look_back_buffer: numpy.ndarray, timesteps: int=1, batch_size: int=1):
forecast_predict = numpy.empty((0, 1), dtype=numpy.float32)
for _ in trange(timesteps, desc='predicting data\t', mininterval=1.0):
# make prediction with current lookback buffer
cur_predict = model.predict(look_back_buffer, batch_size)
# add prediction to result
forecast_predict = numpy.concatenate([forecast_predict, cur_predict], axis=0)
# add new axis to prediction to make it suitable as input
cur_predict = numpy.reshape(cur_predict, (cur_predict.shape[1], cur_predict.shape[0], 1))
# remove oldest prediction from buffer
look_back_buffer = numpy.delete(look_back_buffer, 0, axis=1)
# concat buffer with newest prediction
look_back_buffer = numpy.concatenate([look_back_buffer, cur_predict], axis=1)
return forecast_predict
def train(self):
"""
Train the model on the training set.
A checkpoint of the model is saved after each epoch
and if the validation accuracy is improved upon,
a separate ckpt is created for use on the test set.
"""
# switch to train mode for dropout
self.model.train()
# load the most recent checkpoint
if self.resume:
self.load_checkpoint(best=False)
for epoch in trange(self.start_epoch, self.epochs):
# decay learning rate
if self.is_decay:
self.anneal_learning_rate(epoch)
# train for 1 epoch
self.train_one_epoch(epoch)
# evaluate on validation set
valid_acc = self.validate(epoch)
is_best = valid_acc > self.best_valid_acc
self.best_valid_acc = max(valid_acc, self.best_valid_acc)
self.save_checkpoint({
'epoch': epoch + 1,
'state_dict': self.model.state_dict(),
'best_valid_acc': self.best_valid_acc}, is_best)
def save_imshow_grid(images, logs_dir, filename, shape):
"""
Plot images in a grid of a given shape.
"""
fig = plt.figure(1)
grid = ImageGrid(fig, 111, nrows_ncols=shape, axes_pad=0.05)
size = shape[0] * shape[1]
for i in trange(size, desc="Saving images"):
grid[i].axis('off')
grid[i].imshow(images[i])
plt.savefig(os.path.join(logs_dir, filename))
def get_bbox(bbox_dir, imglist, train_or_test):
ret = []
ret_label = []
def parse_bbox(fname):
root = ET.parse(fname).getroot()
size = root.find('size').getchildren()
size = map(int, [size[0].text, size[1].text])
box = root.find('object').find('bndbox').getchildren()
box = map(lambda x: float(x.text), box)
label = root.find('object').find('name').text
return np.asarray(list(box), dtype='float32'),label
with timed_operation('Loading Bounding Boxes ...'):
cnt = 0
import tqdm
for k in tqdm.trange(len(imglist)):
fname = imglist[k][0]
fname = fname[:-4] + 'xml'
fname = os.path.join(bbox_dir,train_or_test ,fname)
try:
box,label = parse_bbox(fname)
ret.append(box)
ret_label.append(label)
cnt += 1
except KeyboardInterrupt:
raise
except:
ret.append(None)
ret_label.append(-1)
logger.info("{}/{} images have bounding box.".format(cnt, len(imglist)))
return ret, ret_label
def __init__(self, cache=None, **kwargs):
super(GTZAN, self).__init__(**kwargs)
if kwargs.get('conf') is not None:
conf = kwargs['conf']
cache = conf.get('cache', None)
data_set_path = osp.join(DEFAULT_IMAGEST_BASE, self.data_set)
self.data_set_path = data_set_path
self.cache = cache
X, y = parse_anno_file(data_set_path)
if cache == 'raw':
import librosa
from tqdm import trange
X_new = np.zeros((len(X), 1, 661500, 1))
for i in trange(len(X)):
x,_ = librosa.load(osp.join(DEFAULT_DATA_BASE, X[i]))
x_len = min(661500, len(x))
X_new[i,:,:x_len,0] = x[:x_len]
if cache is not None and cache != 'raw':
X = self.load_cache_X(X, cache)
if cache == 'mfcc':
X_new = np.zeros((len(X), X[0].shape[0], 1280, 1))
for i, x in enumerate(X):
x_len = min(x.shape[1], 1280)
X_new[i,:,:x_len,0] = x[:,:x_len]
X = X_new
# layout_X
if self.layout_x == 'rel_path':
self.X = X
else:
self.X = self.init_layout_X(X)
# layout_y
self.y = self.init_layout_y(y)
ilsvrc_cls_multithread_scipy.py 文件源码
项目:tensorflow_yolo2
作者: wenxichen
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def prepare_multithread(self):
"""Preperation for mutithread processing."""
self.reset = False
# num_batch_left should always be -1 until the last batch block of the epoch
self.num_batch_left = -1
self.num_child = 10
self.child_processes = [None] * self.num_child
self.batch_cursor_read = 0
self.batch_cursor_fetched = 0
# TODO: add this to cfg file
self.prefetch_size = 5 # in terms of batch
# TODO: may not need readed_batch after validating everything
self.read_batch_array_size = self.total_batch + self.prefetch_size * self.batch_size
self.readed_batch = Array('i', self.read_batch_array_size)
for i in range(self.read_batch_array_size):
self.readed_batch[i] = 0
self.prefetched_images = np.zeros((self.batch_size * self.prefetch_size
* self.num_child,
self.image_size, self.image_size, 3))
self.prefetched_labels = np.zeros(
(self.batch_size * self.prefetch_size * self.num_child))
self.queue_in = []
self.queue_out = []
for i in range(self.num_child):
self.queue_in.append(Queue())
self.queue_out.append(Queue())
self.start_process(i)
self.start_prefetch(i)
# fetch the first one
desc = 'receive the first half: ' + \
str(self.num_child * self.prefetch_size / 2) + ' batches'
for i in trange(self.num_child / 2, desc=desc):
# print "collecting", i
self.collect_prefetch(i)
def prepare_multithread(self):
"""Preperation for mutithread processing."""
self.reset = False
# num_batch_left should always be -1 until the last batch block of the epoch
self.num_batch_left = -1
self.num_child = 10
self.child_processes = [None] * self.num_child
self.batch_cursor_read = 0
self.batch_cursor_fetched = 0
# TODO: add this to cfg file
self.prefetch_size = 5 # in terms of batch
# TODO: may not need readed_batch after validating everything
self.read_batch_array_size = self.total_batch + self.prefetch_size * self.batch_size
self.readed_batch = Array('i', self.read_batch_array_size)
for i in range(self.read_batch_array_size):
self.readed_batch[i] = 0
self.prefetched_images = np.zeros((self.batch_size * self.prefetch_size
* self.num_child,
self.image_size, self.image_size, 3))
self.prefetched_labels = np.zeros(
(self.batch_size * self.prefetch_size * self.num_child))
self.queue_in = []
self.queue_out = []
for i in range(self.num_child):
self.queue_in.append(Queue())
self.queue_out.append(Queue())
self.start_process(i)
self.start_prefetch(i)
# fetch the first one
desc = 'receive the first half: ' + \
str(self.num_child * self.prefetch_size / 2) + ' batches'
for i in trange(self.num_child / 2, desc=desc):
# print "collecting", i
self.collect_prefetch(i)
core_process_time_evaluator.py 文件源码
项目:DeepPoseComparison
作者: ynaka81
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def plot(self, samples, title):
""" Plot core process time of chainer and pytorch. """
batch_sizes = [2**m for m in range(self.max_batch_index + 1)]
process_time = {'chainer': {'mean': [], 'std': []},
'pytorch': {'mean': [], 'std': []}}
for batch_size in tqdm(batch_sizes, desc='batch size'):
for name, process in tqdm(self.process.items(), desc='testers'):
# set batch size.
process.set_batch_size(batch_size)
compute_time = []
# get compute time.
for index in trange(samples, desc='samples'):
start = time.time()
process.run(self.only_inference)
compute_time.append(time.time() - start)
# calculate mean and std.
process_time[name]['mean'].append(np.mean(compute_time))
process_time[name]['std'].append(np.std(compute_time))
# plot core process time of each batch size.
for name, p_t in process_time.items():
plt.errorbar(batch_sizes, p_t['mean'], yerr=p_t['std'], label=name)
# plot settings.
plt.title(title)
plt.legend(loc='lower right')
plt.xlabel('batch size')
plt.ylabel('core process time [sec]')
# save plot.
if self.debug:
plt.show()
else:
filename = '_'.join(title.split(' ')) + '.png'
plt.savefig(os.path.join(self.output, filename))
def segment(locs, info, segmentation, kwargs={}, callback=None):
Y = info[0]['Height']
X = info[0]['Width']
n_frames = info[0]['Frames']
n_seg = n_segments(info, segmentation)
bounds = _np.linspace(0, n_frames-1, n_seg+1, dtype=_np.uint32)
segments = _np.zeros((n_seg, Y, X))
if callback is not None:
callback(0)
for i in _trange(n_seg, desc='Generating segments', unit='segments'):
segment_locs = locs[(locs.frame >= bounds[i]) & (locs.frame < bounds[i+1])]
_, segments[i] = render(segment_locs, info, **kwargs)
if callback is not None:
callback(i+1)
return bounds, segments
def _sequence_data(self, Column=None):
data = self.row_data
if Column is None:
self.Column = []
else:
self.Column = Column
for i in trange(data.nrows, desc='seq rows in memory'):
Rows = {}
for cell in trange(data.ncols, desc='seq cols in memory', leave=False):
Rows[cell] = data.row_values(i)[cell]
self.Column.append(Rows)
return self.Column
def matrix_data(self, Column=None):
data = self.sequence_data
header = self._header_matrix_mapping(column=data)
coordinate = self.coordinate
row_start_point = coordinate['row_start_point']
col_title_length = coordinate['col_title_length']
if Column is None:
self.Column = []
else:
self.Column = Column
title, comp = '', ''
for row in trange(len(data), desc='matrix rows in memory'):
if row > row_start_point:
if data[row][header[self.item]] == None or data[row][header[self.item]] == '':
Rows, config = {}, ''
for cell in trange(len(data[row]), desc='matrix cols in memory', leave=False):
if cell < col_title_length:
Rows[cell] = data[row][cell]
if cell >= col_title_length and str(data[row][cell]).lower() != self.tip:
config += data[coordinate['row_start_point']][cell] + ';'
# title
if cell == header[self.item]:
Rows[cell] = title
# component
if cell == header[self.component]:
if data[row][header[self.component]] == None or data[row][header[self.component]] == '':
Rows[cell] = comp
else:
comp = data[row][header[self.component]]
Rows[col_title_length] = config
self.Column.append(Rows)
else:
title = data[row][header[self.item]]
return {'title': header, 'data': self.Column}
def config_data(self, Column=None, configColumn=None):
data = self.sequence_data
coordinate = self.coordinate
sku_start_point = coordinate['sku_start_point'] #config ????
sku_end_point = coordinate['sku_end_point'] #config ????
sku_col_start_point = coordinate['sku_col_start_point'] #config ????
if Column is None:
self.Column = []
else:
self.Column = Column
if configColumn is None:
self.configColumn = []
else:
self.configColumn = configColumn
for i in tqdm(range(len(data))): # config ???????
if i >= sku_start_point and i <= sku_end_point:
rows = OrderedDict()
for k, v in dict(data[i]).items():
if k >= sku_col_start_point:
rows.setdefault(k, v)
self.Column.append(rows)
for i in trange(sku_col_start_point+1, sku_col_start_point+len(self.Column[0]), desc='config rows in memory'):
rows = OrderedDict()
for row in trange(len(self.Column), desc='config cols in memory', leave=False):
for k, v in dict(self.Column[row]).items():
if k == i:
rows.setdefault(row, str(v).upper())
self.configColumn.append(rows)
return {'column': self.Column, 'configcolumn': self.configColumn}
def create_copy_task_files(self, context_filename, answer_filename, vocab_size, num_examples, max_sequence_length):
with open(context_filename, 'w') as file:
for _ in trange(num_examples):
num_tokens = np.random.randint(2, max_sequence_length, 1)
tokens = np.random.randint(0, vocab_size, num_tokens)
file.write(" ".join([str(x) for x in list(tokens)]) + "\n")
shutil.copyfile(context_filename, answer_filename)
Filter_Stock_Cashflow_CHN.py 文件源码
项目:StockRecommendSystem
作者: doncat99
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def summary_stock_tick_data(root_path, df, symbol, date_list):
file_path = root_path + "/Data/CSV/tick/" + symbol + "/"
out_file = root_path + "/Data/CSV/cashflow/" + symbol + ".csv"
#pbar = trange(len(date_list), mininterval=0.1, smoothing=1, leave=False)
#for i in pbar:
for date in date_list:
#date = date_list[i]
start = time.time()
file_name = file_path + symbol + "_" + date + ".csv"
if os.path.exists(file_name) == False:
continue
try:
data = pd.read_csv(file_name, index_col=0)
except:
print("error on symbol:", symbol, " date:", date)
continue
if (data is None) or data.empty or len(data) < 4:
buy, sell, even = 0, 0, 0
else:
buy_amount, sell_amount, even_amount, buy_volume, sell_volume, even_volume, buy_max, buy_min, buy_average, sell_max, sell_min, sell_average, even_max, even_min, even_average = group_tick_data_to_cashflow(data)
df.loc[len(df)] = [date, symbol, buy_amount, sell_amount, even_amount, buy_volume, sell_volume, even_volume, buy_max, buy_min, buy_average, sell_max, sell_min, sell_average, even_max, even_min, even_average]
#outMessage = '%s processed in: %.3s seconds' % (date, (time.time() - start))
#pbar.set_description(outMessage)
df = df.sort_values(['symbol','date'], ascending=[True, True])
df.to_csv(out_file)
Filter_Stock_Cashflow_CHN.py 文件源码
项目:StockRecommendSystem
作者: doncat99
项目源码
文件源码
阅读 21
收藏 0
点赞 0
评论 0
def process_data(root_path, symbols, dates):
negative_pect = {}
stock_memory = {}
symbol_memory = {}
range_len = 3
my_range = range(-1, -200, -1)
#pbar = tqdm(total=len(my_range))
pbar = trange(len(my_range))
out_path = root_path + "/Data/CSV/target/"
if os.path.exists(out_path) == False:
os.mkdir(out_path)
for index in my_range:
day_range = [ dates[idx] for idx in range(index-range_len, index+1) ]
file_name = out_path + day_range[-1] + ".csv"
if os.path.exists(file_name):
stock_filter = pd.read_csv(file_name, index_col=0)
else:
db_cashflow = process_all_stocks_data(root_path, symbols, day_range, stock_memory, symbol_memory, index, range_len)
stock_filter = filter_cashflow(db_cashflow)
if len(stock_filter) > 0:
stock_filter.to_csv(file_name)
negative_pect[day_range[-1]] = get_result(stock_filter)
# outMessage = '%-*s processed in: %.4s seconds' % (6, index, (time.time() - startTime))
# pbar.set_description(outMessage)
pbar.update(1)
pbar.close()
print(negative_pect)