get_nodes.py 文件源码-python代码片段

get_nodes.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

def scrape_org_general(org,main_list,organisation):
    org.replace(" ","+")
    s = requests.Session()
    count = 1
    k = "https://github.com/search?p="+str(count)+"&q="+org+"+type%3Auser&type=Users&utf8=%E2%9C%93"
    r = s.get(k)

    soup = BeautifulSoup(r.text,"lxml")
    data = soup.find_all("div",{"class":"user-list-info ml-2"})
    while data!=[]:
        for i in data:
            username = i.find_all("a")[0]['href']
            name = i.find_all("span",{"class":"f4 ml-1"})[0].text.strip()
            main_list.append([username,name])
        count+=1
        k = "https://github.com/search?p="+str(count)+"&q="+org+"+type%3Auser&type=Users&utf8=%E2%9C%93"
        r = s.get(k)
        soup = BeautifulSoup(r.text,"lxml")
        data = soup.find_all("div",{"class":"user-list-info ml-2"})

# scraping the github pages