def create_keyword_regex(keyword):
print 'create_keyword_regex'
# import nltk
ensure_package_path()
from nltk.tokenize import wordpunct_tokenize as tokenize
print 'tokenize ==> %s' % (keyword)
tokens = tokenize(keyword)
pattern = '\\s+'.join(tokens)
pattern = '\\b%s\\b' % pattern
print 'compile pattern ==> %s' % (pattern)
return re.compile(pattern, re.I | re.UNICODE)
评论列表
文章目录