asset_writer.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:catalyst 作者: enigmampc 项目源码 文件源码
def _split_symbol_mappings(df):
    """Split out the symbol: sid mappings from the raw data.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe with multiple rows for each symbol: sid pair.

    Returns
    -------
    asset_info : pd.DataFrame
        The asset info with one row per asset.
    symbol_mappings : pd.DataFrame
        The dataframe of just symbol: sid mappings. The index will be
        the sid, then there will be three columns: symbol, start_date, and
        end_date.
    """
    mappings = df[list(mapping_columns)]
    ambigious = {}
    for symbol in mappings.symbol.unique():
        persymbol = mappings[mappings.symbol == symbol]
        intersections = list(intersecting_ranges(map(
            from_tuple,
            zip(persymbol.start_date, persymbol.end_date),
        )))
        if intersections:
            ambigious[symbol] = (
                intersections,
                persymbol[['start_date', 'end_date']].astype('datetime64[ns]'),
            )

    if ambigious:
        raise ValueError(
            'Ambiguous ownership for %d symbol%s, multiple assets held the'
            ' following symbols:\n%s' % (
                len(ambigious),
                '' if len(ambigious) == 1 else 's',
                '\n'.join(
                    '%s:\n  intersections: %s\n  %s' % (
                        symbol,
                        tuple(map(_format_range, intersections)),
                        # indent the dataframe string
                        '\n  '.join(str(df).splitlines()),
                    )
                    for symbol, (intersections, df) in sorted(
                        ambigious.items(),
                        key=first,
                    ),
                ),
            )
        )
    return (
        df.groupby(level=0).apply(_check_asset_group),
        df[list(mapping_columns)],
    )
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号