def setUp(self):
self.df = pd.DataFrame(np.random.randn(1000, 4),
columns=['A', 'B', 'AdmitDTS', 'LastLoadDTS'])
# generate load date
self.df['LastLoadDTS'] = pd.datetime(2015, 5, 20)
# generate datetime objects for admit date
admit = pd.Series(1000)
delta = pd.datetime(2015, 5, 20) - pd.datetime(2015, 5, 1)
int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
for i in range(1000):
random_second = randrange(int_delta)
admit[i] = pd.datetime(2015, 5, 1) + timedelta(seconds=random_second)
self.df['AdmitDTS'] = admit
# add nulls
a = np.random.rand(1000) > .5
self.df.loc[a, ['A']] = np.nan
a = np.random.rand(1000) > .75
self.df.loc[a, ['B']] = np.nan
test_feature_availability_profiler.py 文件源码
python
阅读 38
收藏 0
点赞 0
评论 0
评论列表
文章目录