extract.py 文件源码-python代码片段

extract.py 文件源码
python
阅读 20 收藏 0 点赞 0 评论 0
def parse(self, text, company_name):
        soup = BeautifulSoup(text, 'lxml')
        lis = soup.findAll('li', {'class':'search-result'})

        for item in lis:
            name = item.find('span', {'class':'actor-name'})
            name = name.text if name else "??"
            occupation = item.find('p', {'class':'search-result__snippets'})
            occupation = occupation.text.replace('\n', ' ') if occupation else "??"
            try:
                print('[+] :: {} :: {}'.format(unidecode(name), unidecode(occupation)))
                self.filewrite('[+] :: {} :: {}\n'.format(unidecode(name), unidecode(occupation)))
            except Exception as e:
                print('[+] :: {} :: {}\n'.format(unidecode(name.encode('utf-8', 'replace')),
                                                 unidecode(occupation.encode('utf-8', 'replace'))))
                self.filewrite('[+] :: {} :: {}\n'.format(unidecode(name.encode('utf-8', 'replace')),
                                                          unidecode(occupation.encode('utf-8', 'replace'))))