def _map1(name):
key_term_freq = {}
print(name)
for line in open(name):
line = line.strip()
#print(line)
key, val = line.split('\t')
val = json.loads(val)
if key_term_freq.get(key) is None:
key_term_freq[key] = {}
for term, freq in val.items():
if key_term_freq[key].get(term) is None:
key_term_freq[key][term] = 0
key_term_freq[key][term] += freq
#print( term, key_term_freq[key][term] )
save_name = 'shrink/{}.pkl.gz'.format(name.split('/').pop())
#print( key_term_freq )
open(save_name,'wb').write( gzip.compress(pickle.dumps(key_term_freq)) )
评论列表
文章目录