downloader.py 文件源码-python代码片段

downloader.py 文件源码

python

阅读 47 收藏 0 点赞 0 评论 0

def packages(self):
        """
        Parse XML file to locate packages.
        """
        xml = requests.get(self._xml_url).content
        soup = BeautifulSoup(xml, "html.parser")
        nltk_packages, third_party = defaultdict(dict), defaultdict(dict)
        for pack in soup.find_all('package'):
            package_attributes = pack.attrs
            name = package_attributes['id']
            # Keeps track of nltk_data packages vs third_party packages.
            if package_attributes['url'].startswith(self._nltk_data_url):
                nltk_packages[name] = package_attributes
            else:
                third_party[name] = package_attributes
        return nltk_packages, third_party