def _split_symbol_mappings(df):
"""Split out the symbol: sid mappings from the raw data.
Parameters
----------
df : pd.DataFrame
The dataframe with multiple rows for each symbol: sid pair.
Returns
-------
asset_info : pd.DataFrame
The asset info with one row per asset.
symbol_mappings : pd.DataFrame
The dataframe of just symbol: sid mappings. The index will be
the sid, then there will be three columns: symbol, start_date, and
end_date.
"""
mappings = df[list(mapping_columns)]
ambigious = {}
for symbol in mappings.symbol.unique():
persymbol = mappings[mappings.symbol == symbol]
intersections = list(intersecting_ranges(map(
from_tuple,
zip(persymbol.start_date, persymbol.end_date),
)))
if intersections:
ambigious[symbol] = (
intersections,
persymbol[['start_date', 'end_date']].astype('datetime64[ns]'),
)
if ambigious:
raise ValueError(
'Ambiguous ownership for %d symbol%s, multiple assets held the'
' following symbols:\n%s' % (
len(ambigious),
'' if len(ambigious) == 1 else 's',
'\n'.join(
'%s:\n intersections: %s\n %s' % (
symbol,
tuple(map(_format_range, intersections)),
# indent the dataframe string
'\n '.join(str(df).splitlines()),
)
for symbol, (intersections, df) in sorted(
ambigious.items(),
key=first,
),
),
)
)
return (
df.groupby(level=0).apply(_check_asset_group),
df[list(mapping_columns)],
)
评论列表
文章目录