def words(self, lang=None, fileids=None, ignore_lines_startswith='#'):
"""
This module returns a list of nonbreaking prefixes for the specified
language(s).
>>> from nltk.corpus import nonbreaking_prefixes as nbp
>>> nbp.words('en')[:10] == [u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H', u'I', u'J']
True
>>> nbp.words('ta')[:5] == [u'\u0b85', u'\u0b86', u'\u0b87', u'\u0b88', u'\u0b89']
True
:return: a list words for the specified language(s).
"""
# If *lang* in list of languages available, allocate apt fileid.
# Otherwise, the function returns non-breaking prefixes for
# all languages when fileids==None.
if lang in self.available_langs:
lang = self.available_langs[lang]
fileids = ['nonbreaking_prefix.'+lang]
return [line for line in line_tokenize(self.raw(fileids))
if not line.startswith(ignore_lines_startswith)]
python类line_tokenize()的实例源码
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
wordlist.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
def words(self, fileids=None, ignore_lines_startswith='\n'):
return [line for line in line_tokenize(self.raw(fileids))
if not line.startswith(ignore_lines_startswith)]
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))
def words(self, fileids=None):
return line_tokenize(self.raw(fileids))