def set_new_path(self, new_idf_path):
if self.path != new_idf_path:
self.path = new_idf_path
cache_file = "idf.cache"
cache_file = os.path.join(tempfile.gettempdir(), cache_file)
try:
with open(cache_file, 'rb') as cf:
self.idf_freq, self.median_idf = marshal.load(cf)
# print("Loading model from cache %s" % cache_file)
except Exception:
content = open(new_idf_path, 'rb').read().decode('utf-8')
self.idf_freq = {}
for line in content.splitlines():
word, freq = line.strip().split(' ')
self.idf_freq[word] = float(freq)
self.median_idf = sorted(
self.idf_freq.values())[len(self.idf_freq) // 2]
with open(cache_file, 'wb') as cf:
marshal.dump((self.idf_freq, self.median_idf), cf)
评论列表
文章目录