def _step5(arr):
kmeans = pickle.loads(open("kmeans.model", "rb").read())
key, lines, tipe = arr
print(key)
open("./tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe,key=key), "w").write("\n".join(lines))
res = os.popen("./fasttext print-sentence-vectors ./models/model.bin < tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe, key=key)).read()
w = open("tmp/tmp.{tipe}.{key}.json".format(tipe=tipe,key=key), "w")
for line in res.split("\n"):
try:
vec = list(map(float, line.split()[-100:]))
except:
print(line)
print(res)
continue
x = np.array(vec)
if np.isnan(x).any():
continue
cluster = kmeans.predict([vec])
txt = line.split()[:-100]
obj = {"txt": txt, "cluster": cluster.tolist()}
data = json.dumps(obj, ensure_ascii=False)
w.write( data + "\n" )
评论列表
文章目录