def __init__(self,lang=None,text=None,tokenize=None,chunkers=None,filters=None):
"""Constructor for the SpellChecker class.
SpellChecker objects can be created in two ways, depending on
the nature of the first argument. If it is a string, it
specifies a language tag from which a dictionary is created.
Otherwise, it must be an enchant Dict object to be used.
Optional keyword arguments are:
* text: to set the text to be checked at creation time
* tokenize: a custom tokenization function to use
* chunkers: a list of chunkers to apply during tokenization
* filters: a list of filters to apply during tokenization
If <tokenize> is not given and the first argument is a Dict,
its 'tag' attribute must be a language tag so that a tokenization
function can be created automatically. If this attribute is missing
the user's default language will be used.
"""
if lang is None:
lang = get_default_language()
if isinstance(lang,basestring):
dict = enchant.Dict(lang)
else:
dict = lang
try:
lang = dict.tag
except AttributeError:
lang = get_default_language()
if lang is None:
raise DefaultLanguageNotFoundError
self.lang = lang
self.dict = dict
if tokenize is None:
try:
tokenize = get_tokenizer(lang,chunkers,filters)
except TokenizerNotFoundError:
# Fall back to default tokenization if no match for 'lang'
tokenize = get_tokenizer(None,chunkers,filters)
self._tokenize = tokenize
self.word = None
self.wordpos = None
self._ignore_words = {}
self._replace_words = {}
# Default to the empty string as the text to be checked
self._text = array.array('u')
self._use_tostring = False
self._tokens = iter([])
if text is not None:
self.set_text(text)
评论列表
文章目录