utils.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:aihackathon 作者: nicoheidtke 项目源码 文件源码
def compare_tweet_with_storage(tweet, storage=None, bow=False):
    if storage is None:
        if not os.path.isfile(os.path.join(config.data_folder, config.model_file)):
            raise('Model was not found!')
        else:
            storage = pickle.load(open(os.path.join(config.data_folder, config.model_file), 'rb'))
    print(tweet)
    transformed_tweet = transform_tweet(tweet, bow)
    print([x[0] for x in transformed_tweet], [np.sum(y) for y in (x[2] for x in transformed_tweet)])
    scores = {}
    for i, (entity, entity_type, vector_array) in enumerate(transformed_tweet):
        temp_score = 0.0
        for j, (tweetid, item) in enumerate(storage[storage['Entity'] == entity].iterrows()):
            if bow:
                clusterids = np.unique([vector_array.keys() + item['Vector array'].keys()])
                vector1 = np.zeros([len(clusterids)])
                vector2 = np.zeros([len(clusterids)])
                for k, cid in enumerate(clusterids):
                    vector1[k] = vector_array.get(cid, 0)
                    vector2[k] = item['Vector array'].get(cid, 0)
                temp_score = np.max([1.0 * np.sum(np.logical_and(vector1, vector2)) / np.min([np.sum(vector1), np.sum(vector2)]), temp_score])
            else:
                if SPLIT:
                    result = [1 - cosine(vector_array[x], item['Vector array'][x]) for x in range(3)]
                    isnan = np.isnan(result)
                    res = 0.0
                    for v in range(3):
                        if not isnan[v]:
                            res+=result[v]
                    res = 1.0 * res/(np.sum(isnan==False)+10**(-10))
                    temp_score = np.max([res, temp_score])
                    # print(entity, entity_type)
                else:
                    temp_score = np.max([1 - cosine(vector_array, item['Vector array']), temp_score])
                    print(1 - cosine(vector_array, item['Vector array']), entity, tweet, str(tweetid))
        scores.update({entity: temp_score})
    return combine_scores(scores)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号