def __init__(self, onet_source=OnetSourceDownloader):
self.onet_downloader = onet_source()
self.onet_titles = self.retrieve_onet_titles()
logging.info('Retrieved onet titles')
# ... Following the ESA description:
# https://en.wikipedia.org/wiki/Explicit_semantic_analysis
self.tfidf_vectorizer = TfidfVectorizer(stop_words='english')
# optimization note: convert from CSR to CSC
self.tf = self.tfidf_vectorizer.fit_transform(self.onet_titles['Description'].values)
self.concept_row = self.onet_titles.index.values
try:
wn.synset
except LookupError:
nltk.download('wordnet')
esa_jobtitle_normalizer.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录