def __init__(self, *a, **kw):
super(Movie, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'movie.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
utils.make_dir(self.log_dir)
self.init()
python类make_dir()的实例源码
def __init__(self, *a, **kw):
super(Movieurls, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
utils.make_dir(self.log_dir)
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'movie.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
self.init()
def __init__(self, *a, **kw):
super(Bookurls, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
utils.make_dir(self.log_dir)
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'book.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
self.init()
def __init__(self, *a, **kw):
super(Book, self).__init__(*a, **kw)
self.log_dir = 'log/%s' % self.name
self.sql = SqlHelper()
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'book.douban.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}
utils.make_dir(self.log_dir)
self.init()
def __init__(self, name = None, **kwargs):
super(JDCommentSpider, self).__init__(name, **kwargs)
self.url = kwargs.get("url")
self.guid = kwargs.get('guid', 'guid')
self.product_id = kwargs.get('product_id')
# self.url = 'https://item.jd.com/11478178241.html'
# self.url = 'https://item.jd.com/4142680.html'
# self.url = 'https://item.jd.com/3133859.html'
# self.url = 'https://item.jd.com/3995645.html'
# self.product_id = 3995645
self.log('product_id:%s' % self.product_id)
self.item_table = 'item_%s' % self.product_id
self.urls_key = '%s_urls' % self.product_id
self.log_dir = 'log/%s' % self.product_id
self.is_record_page = False
self.sql = kwargs.get('sql')
self.red = kwargs.get('red')
proxymng.red = self.red
if self.is_record_page:
utils.make_dir(self.log_dir)
self.init()
def __init__(self, name = None, **kwargs):
super(JDItemInfoSpider, self).__init__(name, **kwargs)
self.url = kwargs.get("url")
self.guid = kwargs.get('guid', 'guid')
self.product_id = kwargs.get('product_id')
# self.url = 'https://item.jd.com/11478178241.html'
# self.url = 'https://item.jd.com/4142680.html'
# self.url = 'https://item.jd.com/3133859.html'
# self.url = 'https://item.jd.com/3995645.html'
# self.product_id = 3995645
self.log('product_id:%s' % self.product_id)
self.item_table = 'item_%s' % self.product_id
self.urls_key = '%s_urls' % self.product_id
self.log_dir = 'log/%s' % self.product_id
self.is_record_page = False
self.sql = kwargs.get('sql')
self.red = kwargs.get('red')
if self.is_record_page:
utils.make_dir(self.log_dir)
def init(self):
self.meta = {
'download_timeout': self.timeout,
}
self.dir_log = 'log/proxy/%s' % self.name
utils.make_dir(self.dir_log)
self.sql.init_proxy_table(config.free_ipproxy_table)
def init(self):
self.dir_log = 'log/validator/%s' % self.name
utils.make_dir(self.dir_log)
self.sql.init_proxy_table(self.name)
def __init__(self, name = None, **kwargs):
super(JDSpider, self).__init__(name, **kwargs)
self.product_id = kwargs.get('product_id', -1)
self.log('product_id:%s' % self.product_id)
self.item_table = 'item_%s' % self.product_id
self.product_page = '%s_page' % self.product_id
self.log_dir = 'log/%s' % self.product_id
self.is_record_page = False
if self.is_record_page:
utils.make_dir(self.log_dir)
self.sql = SqlHelper()
self.red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db,
password = config.redis_pass)
def init(self):
self.meta = {
'download_timeout': self.timeout,
}
self.dir_log = 'log/proxy/%s' % self.name
utils.make_dir(self.dir_log)
self.sql.init_proxy_table(config.free_ipproxy_table)
def init(self):
self.dir_log = 'log/validator/%s' % self.name
utils.make_dir(self.dir_log)
self.sql.init_proxy_table(self.name)
def make_dir(fol):
if not os.path.isdir(fol):
os.makedirs(fol)
return fol
def call_script(script_fname, args, log_name=''):
if args.blender_fol == '':
args.blender_fol = get_blender_dir()
if not op.isdir(args.blender_fol):
print('No Blender folder!')
return
logs_fol = utils.make_dir(op.join(utils.get_parent_fol(__file__, 4), 'logs'))
if log_name == '':
log_name = utils.namebase(script_fname)
if len(args.subjects) == 0:
args.subjects = [args.subject]
for subject in args.subjects:
args.subject = subject
args.subjects = ''
print('*********** {} ***********'.format(subject))
call_args = create_call_args(args)
blend_fname = get_subject_fname(args)
log_fname = op.join(logs_fol, '{}.log'.format(log_name))
cmd = '{blender_exe} {blend_fname} --background --python {script_fname} {call_args}'.format( # > {log_fname}
blender_exe=op.join(args.blender_fol, 'blender'),
blend_fname = blend_fname, script_fname = script_fname, call_args=call_args, log_fname = log_fname)
mmvt_addon_fol = utils.get_parent_fol(__file__, 2)
os.chdir(mmvt_addon_fol)
print(cmd)
utils.run_script(cmd)
print('Finish! For more details look in {}'.format(log_fname))
def get_figures_dir(args):
figures_dir = op.join(get_mmvt_dir(), args.subject, 'figures')
make_dir(figures_dir)
return figures_dir
def build_vocab(words, vocab_size):
""" Build vocabulary of VOCAB_SIZE most frequent words """
dictionary = dict()
count = [('UNK', -1)]
count.extend(Counter(words).most_common(vocab_size - 1))
index = 0
utils.make_dir('processed')
with open('processed/vocab_1000.tsv', "w") as f:
for word, _ in count:
dictionary[word] = index
if index < 1000:
f.write(word + "\n")
index += 1
index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return dictionary, index_dictionary
def __init__(self, *a, **kwargs):
super(AssetStoreSpider, self).__init__(*a, **kwargs)
# ?????????
self.dir_plugins = 'Plugins/'
self.dir_all = self.dir_plugins + 'all'
utils.make_dir(self.dir_plugins)
utils.make_dir(self.dir_all)
# ?????????
self.plugin_list = []
self.sql = SqlHelper()
self.table_name = config.assetstore_table_name
self.priority_adjust = 2
# unity ???
self.unity_version = ''
# ?? header
self.headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Connection': 'keep-alive',
'Host': 'www.assetstore.unity3d.com',
'Referer': 'https://www.assetstore.unity3d.com/en/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0',
'X-Kharma-Version': self.unity_version,
'X-Requested-With': 'UnityAssetStore',
'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41',
}
self.init()
def build_vocab(words, vocab_size):
""" Build vocabulary of VOCAB_SIZE most frequent words """
dictionary = dict()
count = [('UNK', -1)]
count.extend(Counter(words).most_common(vocab_size - 1))
index = 0
utils.make_dir('processed')
with open('processed/vocab_1000.tsv', "w") as f:
for word, _ in count:
dictionary[word] = index
if index < 1000:
f.write(word + "\n")
index += 1
index_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return dictionary, index_dictionary
def train_model(model, batch_gen, num_train_steps, weights_fld):
saver = tf.train.Saver() # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias
initial_step = 0
utils.make_dir('checkpoints')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
# if that checkpoint exists, restore from checkpoint
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
writer = tf.summary.FileWriter('improved_graph/lr' + str(LEARNING_RATE), sess.graph)
initial_step = model.global_step.eval()
for index in range(initial_step, initial_step + num_train_steps):
centers, targets = next(batch_gen)
feed_dict={model.center_words: centers, model.target_words: targets}
loss_batch, _, summary = sess.run([model.loss, model.optimizer, model.summary_op],
feed_dict=feed_dict)
writer.add_summary(summary, global_step=index)
total_loss += loss_batch
if (index + 1) % SKIP_STEP == 0:
print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP))
total_loss = 0.0
saver.save(sess, 'checkpoints/skip-gram', index)
####################
# code to visualize the embeddings. uncomment the below to visualize embeddings
# run "'tensorboard --logdir='processed'" to see the embeddings
# final_embed_matrix = sess.run(model.embed_matrix)
# # it has to variable. constants don't work here. you can't reuse model.embed_matrix
# embedding_var = tf.Variable(final_embed_matrix[:1000], name='embedding')
# sess.run(embedding_var.initializer)
# config = projector.ProjectorConfig()
# summary_writer = tf.summary.FileWriter('processed')
# # add embedding to the config file
# embedding = config.embeddings.add()
# embedding.tensor_name = embedding_var.name
# # link this tensor to its metadata file, in this case the first 500 words of vocab
# embedding.metadata_path = 'processed/vocab_1000.tsv'
# # saves a configuration file that TensorBoard will read during startup.
# projector.visualize_embeddings(summary_writer, config)
# saver_embed = tf.train.Saver([embedding_var])
# saver_embed.save(sess, 'processed/model3.ckpt', 1)
def get_categories(self, response):
self.write_file(self.dir_plugins + 'categories.json', response.body)
# ????? json ??
categories = json.loads(response.body)
for category in categories.get('categories'):
name = category.get('name', '')
subs = category.get('subs', '')
dir_name = self.dir_plugins + name
utils.make_dir(dir_name)
if subs is not '':
self.get_all_subs(subs, dir_name)
else:
# ????
name = category.get('name', '')
count = category.get('count', 0)
id = category.get('id', 0)
child_subs = category.get('subs', '')
plugin = {}
plugin['name'] = name
plugin['count'] = count
plugin['id'] = id
plugin['dir_name'] = dir_name
if child_subs == '':
plugin['child'] = 'yes'
else:
plugin['child'] = 'no'
self.plugin_list.append(plugin)
for plugin in self.plugin_list:
id = plugin.get('id', '')
count = plugin.get('count')
dir_name = plugin.get('dir_name')
name = plugin.get('name')
yield Request(
url = 'https://www.assetstore.unity3d.com/api/en-US/search/results.json?q=' + 'category:' + id + \
'&rows=' + count + '&page=' + str(1) + '&order_by=popularity' + '&engine=solr',
method = 'GET',
dont_filter = True,
headers = self.headers,
meta = {
'dir_name': dir_name,
'name': name,
'id': id,
'download_timeout': 60,
'is_proxy': False,
},
callback = self.get_plugin_list,
errback = self.error_parse,
)
# ??????? unity ??
# ????????????
def train_model(model, batch_gen, num_train_steps, weights_fld):
saver = tf.train.Saver(
) # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias
initial_step = 0
utils.make_dir('checkpoints')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(
os.path.dirname('checkpoints/checkpoint'))
# if that checkpoint exists, restore from checkpoint
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps
writer = tf.summary.FileWriter(
'improved_graph/lr' + str(LEARNING_RATE), sess.graph)
initial_step = model.global_step.eval()
for index in range(initial_step, initial_step + num_train_steps):
centers, targets = next(batch_gen)
feed_dict = {
model.center_words: centers,
model.target_words: targets
}
loss_batch, _, summary = sess.run(
[model.loss, model.optimizer, model.summary_op],
feed_dict=feed_dict)
writer.add_summary(summary, global_step=index)
total_loss += loss_batch
if (index + 1) % SKIP_STEP == 0:
print('Average loss at step {}: {:5.1f}'.format(
index, total_loss / SKIP_STEP))
total_loss = 0.0
saver.save(sess, 'checkpoints/skip-gram', index)
####################
# code to visualize the embeddings. uncomment the below to visualize embeddings
# run "'tensorboard --logdir='processed'" to see the embeddings
final_embed_matrix = sess.run(model.embed_matrix)
# # it has to variable. constants don't work here. you can't reuse model.embed_matrix
embedding_var = tf.Variable(
final_embed_matrix[:1000], name='embedding')
sess.run(embedding_var.initializer)
config = projector.ProjectorConfig()
summary_writer = tf.summary.FileWriter('processed')
# # add embedding to the config file
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# # link this tensor to its metadata file, in this case the first 500 words of vocab
embedding.metadata_path = 'processed/vocab_1000.tsv'
# # saves a configuration file that TensorBoard will read during startup.
projector.visualize_embeddings(summary_writer, config)
saver_embed = tf.train.Saver([embedding_var])
saver_embed.save(sess, 'processed/model3.ckpt', 1)