def parse_testing_data(dataset, output):
rm_postag = re.compile(r'/[\w\-\.\,\?\"\':\!\@\#\$%%\^\&\*\(\)\[\]_\+\=\\\`\~]+')
utils.mkdir(output)
stack = os.listdir(dataset)
print 'loading data in ' + dataset
while (len(stack) > 0):
file_name = stack.pop()
file_path = dataset + '/' + file_name
if (os.path.isdir(file_path)): # neu la thu muc thi day vao strong stack
utils.push_data_to_stack(stack, file_path, file_name)
else: # nguoc lai tien hanh readfile
with open(file_path, 'r', encoding='utf-8') as ff:
content = ff.read()
content = rm_postag.sub(u'', content)
content = content.replace(u'/“', u'').replace(u'/”', u'')\
.replace(u'/…', u'').replace(u'…', u'...')
with open(output + '/' + file_name, 'w', encoding='utf-8') as f:
f.write(content)
python类mkdir()的实例源码
def save_model(self, model, path):
print('saving %s ...' % (path))
utils.mkdir('model')
joblib.dump(model, path)
return
def save_training(self, X_train, y_train, X_test, y_test):
utils.mkdir('model')
self.save_model(X_train, 'model/X_train.pkl')
self.save_model(X_test, 'model/X_test.pkl')
self.save_model(y_train, 'model/y_train.pkl')
self.save_model(y_test, 'model/y_test.pkl')
def parse_training_data(dataset, output):
utils.mkdir(output)
stack = os.listdir(dataset)
print 'loading data in ' + dataset
while (len(stack) > 0):
file_name = stack.pop()
file_path = dataset + '/' + file_name
if (os.path.isdir(file_path)): # neu la thu muc thi day vao strong stack
utils.push_data_to_stack(stack, file_path, file_name)
else: # nguoc lai tien hanh readfile
with open(file_path, 'r', encoding='utf-8') as ff:
content = ff.read()
bs = BeautifulSoup(content)
with open(output + '/' + file_name, 'w', encoding='utf-8') as f:
f.write(bs.text)
def init_logging(args, conf):
'''Initialize the logging system. Uses argdir and id from args, adds 'trackfile' to conf
as a file object to which track data should be written.'''
# setup log files
filetimestamp = time.strftime("%Y%m%d-%H%M%S")
if args.id:
name = "%s-%s" % (filetimestamp, args.id)
else:
name = filetimestamp
conf['name'] = name
# ensure log and image directories exist
utils.mkdir(config.TRACKDIR, config.REMOTE_USER)
debugframe_dir = "%s/%s" % (config.DBGFRAMEDIR, name)
# Make debugframedir world-writable so rsync can delete it.
oldmask = os.umask(0)
utils.mkdir(debugframe_dir, config.REMOTE_USER)
os.umask(oldmask)
conf['debugframe_dir'] = debugframe_dir
trackfilename = "%s/%s-track.csv" % (config.TRACKDIR, name)
logfilename = "%s/%s.log" % (config.TRACKDIR, name)
# Setup the ROOT level logger to send to a log file and console both
logger = logging.getLogger()
logger.setLevel(logging.INFO)
fh = logging.FileHandler(filename=logfilename)
fh.setFormatter(
logging.Formatter(fmt="%(asctime)s [%(levelname)s] %(message)s"))
sh = logging.StreamHandler()
sh.setFormatter(
logging.Formatter(fmt="[%(levelname)s] %(message)s"))
logger.addHandler(fh)
logger.addHandler(sh)
logging.info("Logging started.")
conf['trackfile'] = open(trackfilename, 'w')
def _relative_move(srcroot, srcrel, destroot):
''' Move a file relative to a given root to a given destination.
E.g. relative_move('foo/', 'bar/baz', 'bob/') will move
foo/bar/baz into bob/bar/baz, creating bob/bar if needed.
'''
srcfile = srcroot / srcrel
destfile = destroot / srcrel
utils.mkdir(destfile.parent) # make sure directory exists
shutil.move(str(srcfile), str(destfile))
def init_db():
utils.mkdir(config.db_dir)
conn = sqlite3.connect("%s/%s" % (config.db_dir, config.db_file))
c = conn.cursor()
c.execute('''Create table features (digest text, content text, currency text, t text, size INTEGER, n INTEGER, increase real, feature_size INTEGER, point INTEGER)''')
c.execute('''Create table trade (tid INTEGER PRIMARY KEY, created_at INTEGER default 0, updated_at INTEGER default 0, digest text, buy real default 0.0, sell real default 0.0)''')
c.execute('''Create table training (tid INTEGER PRIMARY KEY, created_at INTEGER default 0, updated_at INTEGER default 0, digest text, buy real default 0.0, sell real default 0.0)''')
conn.commit()
conn.close()
def init_db():
utils.mkdir(config.db_dir)
conn = sqlite3.connect("%s/%s" % (config.db_dir, config.db_file))
c = conn.cursor()
c.execute('''Create table features (digest text, content text, currency text, t text, size INTEGER, n INTEGER, increase real, feature_size INTEGER, point INTEGER)''')
c.execute('''Create table trade (tid INTEGER PRIMARY KEY, created_at INTEGER default 0, updated_at INTEGER default 0, digest text, buy real default 0.0, sell real default 0.0)''')
c.execute('''Create table training (tid INTEGER PRIMARY KEY, created_at INTEGER default 0, updated_at INTEGER default 0, digest text, buy real default 0.0, sell real default 0.0)''')
conn.commit()
conn.close()
def set_name(self, name):
if name is not None:
self.name = name
self.path = directories.MODELS + name + '/'
utils.mkdir(self.path)
def __init__(self, name='clusterer',
# model initialization
load_weights_from=None, weights_file=None, randomize_weights=False,
# network architecture
top_layers=3, learnable_layers=3, pooling='maxavg', risk_objective=True,
# dropout and learning rates
input_dropout=0, dropout=0.0, learning_rate=1e-7):
assert pooling in ['max', 'avg', 'maxavg']
self.name = name
self.path = directories.CLUSTERERS + '/'
utils.mkdir(self.path)
self.load_weights_from = load_weights_from
self.weights_file = weights_file
self.randomize_weights = randomize_weights
self.top_layers = top_layers
self.learnable_layers = learnable_layers
self.pooling = pooling
self.risk_objective = risk_objective
self.input_dropout = input_dropout
self.dropout = dropout
self.learning_rate = learning_rate
self.single_size = 855 if directories.CHINESE else 674
self.pair_size = 1733 if directories.CHINESE else 1370
self.static_layers = top_layers - learnable_layers
if self.static_layers == 0:
self.anaphoricity_input_size = self.single_size
self.pair_input_size = self.pair_size
elif self.static_layers == 1:
self.anaphoricity_input_size = self.pair_input_size = 1000
else:
self.anaphoricity_input_size = self.pair_input_size = 500