finance_similarity.py 文件源码

python
阅读 21 收藏 0 点赞 0 评论 0

项目:Spark-in-Finance-Quantitative-Investing 作者: litaotao 项目源码 文件源码
def cal_minute_bar_similarity(line_data):
    """?????

    line_data format: file_path, json_data

    ???
        1. ??????
        2. ?????
        3. ?????? - ????
        4. ????? - ????

    Return:
        square diff and var diff of two lines.
        [diff_square, diff_var, (line_path)]
        [diff_square_normalized, diff_var_normalized, (line_path)]
    """
    tmp = pd.DataFrame()

    import sklearn.preprocessing
    scaler = sklearn.preprocessing.MinMaxScaler()

    today_data = pd.DataFrame.from_dict(json.loads(df_today_share.value))
    today_data_length = today_length_share.value
    line_path, line_df = line_data

    line_df = pd.DataFrame.from_dict(json.loads(line_df))
    line_df.sort(columns=['barTime'], ascending=True, inplace=True)

    tmp['first'] = list(today_data[: today_data_length]['ratio'])
    tmp['second'] = list(line_df[: today_data_length]['ratio'])

    _first, _second = list(tmp['first']), list(tmp['second'])
    tmp['first_normalized'] = list(scaler.fit_transform(np.array(_first)))
    tmp['second_normalized'] = list(scaler.fit_transform(np.array(_second)))

    tmp['diff'] = tmp['first'] - tmp['second']
    tmp['diff_normalized'] = tmp['first_normalized'] - tmp['second_normalized']

    diff_square = sum(tmp['diff'] ** 2)
    diff_square_normalized = sum(tmp['diff_normalized'] ** 2)

    diff_var = float(tmp['diff'].var())
    diff_var_normalized = float(tmp['diff_normalized'].var())
    res_square = [round(diff_square, 5), round(diff_square_normalized, 5), (line_path)]
    res_var = [round(diff_var, 5), round(diff_var_normalized, 5), (line_path)]

    return res_square + res_var


# ### ???
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号