def _get_growth_data(year, quarter, pageNo, dataArr):
ct._write_console()
try:
request = Request(ct.GROWTH_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'],
ct.PAGES['fd'], year,
quarter, pageNo, ct.PAGE_NUM[1]))
text = urlopen(request, timeout=10).read()
text = text.decode('GBK')
text = text.replace('--', '')
html = lxml.html.parse(StringIO(text))
res = html.xpath("//table[@class=\"list_table\"]/tr")
if ct.PY3:
sarr = [etree.tostring(node).decode('utf-8') for node in res]
else:
sarr = [etree.tostring(node) for node in res]
sarr = ''.join(sarr)
sarr = '<table>%s</table>'%sarr
df = pd.read_html(sarr)[0]
df.columns=ct.GROWTH_COLS
dataArr = dataArr.append(df, ignore_index=True)
nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
if len(nextPage)>0:
pageNo = re.findall(r'\d+', nextPage[0])[0]
return _get_growth_data(year, quarter, pageNo, dataArr)
else:
return dataArr
except Exception as e:
print(e)
评论列表
文章目录