def _broker_tops(last=5, pageNo=1, retry_count=3, pause=0.001, dataArr=pd.DataFrame()):
ct._write_console()
for _ in range(retry_count):
time.sleep(pause)
try:
request = Request(rv.LHB_SINA_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], rv.LHB_KINDS[1],
ct.PAGES['fd'], last, pageNo))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@id=\"dataTable\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = '<table>%s</table>'%sarr
df = pd.read_html(sarr)[0]
df.columns = rv.LHB_YYTJ_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _broker_tops(last, pageNo, retry_count, pause, dataArr)
else:
return dataArr
except Exception as e:
print(e)
评论列表
文章目录