def _newstocks(data, pageNo, retry_count, pause):
for _ in range(retry_count):
time.sleep(pause)
ct._write_console()
try:
html = lxml.html.parse(rv.NEW_STOCKS_URL%(ct.P_TYPE['http'],ct.DOMAINS['vsf'],
ct.PAGES['newstock'], pageNo))
res = html.xpath('//table[@id=\"NewStockTable\"]/tr')
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = sarr.replace('<font color="red">*</font>', '')
sarr = '<table>%s</table>'%sarr
df = pd.read_html(StringIO(sarr), skiprows=[0, 1])[0]
df = df.drop([df.columns[idx] for idx in [1, 12, 13, 14]], axis=1)
df.columns = rv.NEW_STOCKS_COLS
df['code'] = df['code'].map(lambda x : str(x).zfill(6))
res = html.xpath('//table[@class=\"table2\"]/tr[1]/td[1]/a/text()')
tag = '???' if ct.PY3 else unicode('???', 'utf-8')
hasNext = True if tag in res else False
data = data.append(df, ignore_index=True)
pageNo += 1
if hasNext:
data = _newstocks(data, pageNo, retry_count, pause)
except Exception as ex:
print(ex)
else:
return data
评论列表
文章目录