suneo-bot.py 文件源码-python代码片段

suneo-bot.py 文件源码
python
阅读 31 收藏 0 点赞 0 评论 0
def detect_wp(html, dominio):
    soup = BeautifulSoup(html, "html.parser")
    try:    
        #Buscamos generator
        gen = soup.findAll(attrs={"name":"generator"})
        if "Wordpress" in str(gen):
            return True
        else: #Buscamos wp-content en el html
            if html.find("wp-content")>0:
                return True
            else:#Buscamos links con xmlrpc.php
                links = soup.findAll("link")
                for l in links:
                    if "xmlrpc.php" in str(l):
                        return True
                    else:#Buscamos el readme.html
                        try:
                            url = "http://" + dominio + "/readme.html"
                            html = urllib.urlopen(url).read()
                            soup = BeautifulSoup(html)
                            for h1 in soup.find_all('h1', {'id':"logo"}):
                                h1 = remove_tags(str(h1)) #PARSER
                                if h1:
                                    return True
                        except urllib2.HTTPError, e:
                            continue 
                        except urllib2.URLError, e:
                            continue
                        except httplib.HTTPException, e:
                            continue
    except:
        return False