def packages(self):
"""
Parse XML file to locate packages.
"""
xml = requests.get(self._xml_url).content
soup = BeautifulSoup(xml, "html.parser")
nltk_packages, third_party = defaultdict(dict), defaultdict(dict)
for pack in soup.find_all('package'):
package_attributes = pack.attrs
name = package_attributes['id']
# Keeps track of nltk_data packages vs third_party packages.
if package_attributes['url'].startswith(self._nltk_data_url):
nltk_packages[name] = package_attributes
else:
third_party[name] = package_attributes
return nltk_packages, third_party
评论列表
文章目录