def _parse_fq_data(url, index, retry_count, pause):
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(url)
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
html = lxml.html.parse(StringIO(text))
res = html.xpath('//table[@id=\"FundHoldSharesTable\"]')
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
df = pd.read_html(sarr, skiprows = [0, 1])[0]
if len(df) == 0:
return pd.DataFrame()
if index:
df.columns = ct.HIST_FQ_COLS[0:7]
else:
df.columns = ct.HIST_FQ_COLS
if df['date'].dtypes == np.object:
df['date'] = df['date'].astype(np.datetime64)
df = df.drop_duplicates('date')
except Exception as e:
print(e)
else:
return df
raise IOError(ct.NETWORK_URL_ERROR_MSG)
评论列表
文章目录