def fill_tf_idf_shelve(self):
tf_idf_shelve = shelve.open(self.tf_idf_shelve_file_name, writeback=True)
if TF not in tf_idf_shelve:
tf_idf_shelve[TF] = {}
if DF not in tf_idf_shelve:
tf_idf_shelve[DF] = {}
if D not in tf_idf_shelve:
tf_idf_shelve[D] = 0
if TF_IDF not in tf_idf_shelve:
tf_idf_shelve[TF_IDF] = {}
if CENTROID not in tf_idf_shelve:
tf_idf_shelve[CENTROID] = {}
for action,trigger_txt in self.trigger_dict.iteritems():
if action not in tf_idf_shelve[TF].keys():
trigger = self.tokenize_text(trigger_txt)
tf_idf_shelve[TF][action] = Counter(trigger)
for word in unique(trigger):
if word not in tf_idf_shelve[DF].keys():
tf_idf_shelve[DF][word] = 0
tf_idf_shelve[DF][word] += 1
tf_idf_shelve[D] = len(tf_idf_shelve[TF])
tf_idf_shelve.close()
self.compute_tf_idf()
self.compute_centroids()
评论列表
文章目录