def get_per_sample_tf(self, texts, field, silent=0):
"""
Each sample is a document.
Input:
texts: ["train","text"]
"""
if self.sample_tf is not None:
return
self.sample_tf = {}
self.get_per_sample_words_count(texts, field, 1)
for text in texts:
name = "{}/{}_sample_tf_{}.p".format(self.flags.data_path,self.name,text)
if os.path.exists(name):
self.sample_tf[text] = pickle.load(open(name,'rb'))
else:
print("gen",name)
tf_list = tf(self.sample_words_count[text],0)
pickle.dump(tf_list,open(name,'wb'))
self.sample_tf[text] = tf_list
if silent==0:
print("\n{} sample tf done".format(text))
评论列表
文章目录