def _chunk_to_dataframe(self):
n = self._current_row_in_chunk_index
m = self._current_row_in_file_index
ix = range(m - n, m)
rslt = pd.DataFrame(index=ix)
js, jb = 0, 0
for j in range(self.column_count):
name = self.column_names[j]
if self.column_types[j] == b'd':
rslt[name] = self._byte_chunk[jb, :].view(
dtype=self.byte_order + 'd')
rslt[name] = np.asarray(rslt[name], dtype=np.float64)
if self.convert_dates and (self.column_formats[j] == "MMDDYY"):
epoch = pd.datetime(1960, 1, 1)
rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d')
jb += 1
elif self.column_types[j] == b's':
rslt[name] = self._string_chunk[js, :]
rslt[name] = rslt[name].apply(lambda x: x.rstrip(b'\x00 '))
if self.encoding is not None:
rslt[name] = rslt[name].apply(
lambda x: x.decode(encoding=self.encoding))
if self.blank_missing:
ii = rslt[name].str.len() == 0
rslt.loc[ii, name] = np.nan
js += 1
else:
raise ValueError("unknown column type %s" %
self.column_types[j])
return rslt
sas7bdat.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录