def parse(self, entry):
data = pd.read_csv(str(entry),
engine= "c",
sep= "\t",
parse_dates= False,
index_col= [0, 1])
data.index.names = ["date", "srcid"]
# Check for AMT bug that adds row of ('nvsplDate', 'Total_All') with all 0s, drop if exists
if data.index[-1][0] == 'nvsplDate':
data = data.iloc[:-1, :]
## Pandas cannot seem to handle a MultiIndex with dates;
## slicing syntax becomes even crazier, and often doesn't even work.
## So date conversion is disabled for now.
# # Convert dates
# datetimes = data.index.get_level_values('date').to_datetime()
# data.index.set_levels(datetimes, level= 'date', inplace= True)
# Ensure MultiIndex sortedness
data.sortlevel(inplace= True)
return data.apply(pd.to_numeric, raw= True, errors= "coerce")
评论列表
文章目录