get_nodes.py 文件源码-python代码片段

def get_data(username, no):
    if no == 0:
        z = 'followers'
    else:
        z = 'following'
    # these lines of code gets the list of followers or the following on the first page
    # when there are no further pages of followers or following. And if there are go forward with the next page
    s = requests.Session()
    final=[]
    x = 1
    pages = [""]
    data = []
    while(pages != [] and x <= max_number/no_per_page):
        r = s.get('https://github.com/' + username + '?page=' +  str(x) + '&tab=' + z) #first getting all the followers for z=0, and following for z=1
        soup = BeautifulSoup(r.text)
        data = data + soup.find_all("div", {"class" : "d-table col-12 width-full py-4 border-bottom border-gray-light"})
        pages = soup.find_all("div", {"class" : "pagination"})
        x += 1
   # getting company and area.
    for i in data:
        username = i.find_all("a")[0]['href']
        try:
            company = i.find_all("span", {"class" : "mr-3"})[0].text.strip()
        except:
            company = "xxxxx"
        try:
            area = i.find_all("p", {"class" : "text-gray text-small mb-0"})[0].text.strip()
        except:
            area = "xxxxx"
        soup2 = BeautifulSoup(str(i))
        name = soup2.find_all("span",{"class" : "f4 link-gray-dark"})[0].text
        final.append([username,company,area,name])
    return final