def compare_tweet_with_storage(tweet, storage=None, bow=False):
if storage is None:
if not os.path.isfile(os.path.join(config.data_folder, config.model_file)):
raise('Model was not found!')
else:
storage = pickle.load(open(os.path.join(config.data_folder, config.model_file), 'rb'))
print(tweet)
transformed_tweet = transform_tweet(tweet, bow)
print([x[0] for x in transformed_tweet], [np.sum(y) for y in (x[2] for x in transformed_tweet)])
scores = {}
for i, (entity, entity_type, vector_array) in enumerate(transformed_tweet):
temp_score = 0.0
for j, (tweetid, item) in enumerate(storage[storage['Entity'] == entity].iterrows()):
if bow:
clusterids = np.unique([vector_array.keys() + item['Vector array'].keys()])
vector1 = np.zeros([len(clusterids)])
vector2 = np.zeros([len(clusterids)])
for k, cid in enumerate(clusterids):
vector1[k] = vector_array.get(cid, 0)
vector2[k] = item['Vector array'].get(cid, 0)
temp_score = np.max([1.0 * np.sum(np.logical_and(vector1, vector2)) / np.min([np.sum(vector1), np.sum(vector2)]), temp_score])
else:
if SPLIT:
result = [1 - cosine(vector_array[x], item['Vector array'][x]) for x in range(3)]
isnan = np.isnan(result)
res = 0.0
for v in range(3):
if not isnan[v]:
res+=result[v]
res = 1.0 * res/(np.sum(isnan==False)+10**(-10))
temp_score = np.max([res, temp_score])
# print(entity, entity_type)
else:
temp_score = np.max([1 - cosine(vector_array, item['Vector array']), temp_score])
print(1 - cosine(vector_array, item['Vector array']), entity, tweet, str(tweetid))
scores.update({entity: temp_score})
return combine_scores(scores)
评论列表
文章目录