python类Dict()的实例源码

spelling.py 文件源码 项目:node-gn 作者: Shouqun 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
spelling.py 文件源码 项目:depot_tools 作者: webrtc-uwp 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
replacers.py 文件源码 项目:Notes2ppt 作者: gsengupta2810 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self, dict_name='en', max_dist=2):
    self.spell_dict = enchant.Dict(dict_name)
    self.max_dist = max_dist
spelling.py 文件源码 项目:wuye.vim 作者: zhaoyingnan911 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def open(self):
        self.initialized = False
        self.private_dict_file = None

        if enchant is None:
            return
        dict_name = self.config.spelling_dict
        if not dict_name:
            return

        self.ignore_list = [w.strip() for w in self.config.spelling_ignore_words.split(",")]
        # "param" appears in docstring in param description and
        # "pylint" appears in comments in pylint pragmas.
        self.ignore_list.extend(["param", "pylint"])

        if self.config.spelling_private_dict_file:
            self.spelling_dict = enchant.DictWithPWL(
                dict_name, self.config.spelling_private_dict_file)
            self.private_dict_file = open(
                self.config.spelling_private_dict_file, "a")
        else:
            self.spelling_dict = enchant.Dict(dict_name)

        if self.config.spelling_store_unknown_words:
            self.unknown_words = set()

        # Prepare regex for stripping punctuation signs from text.
        # ' and _ are treated in a special way.
        puncts = string.punctuation.replace("'", "").replace("_", "")
        self.punctuation_regex = re.compile('[%s]' % re.escape(puncts))
        self.initialized = True
__init__.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def __init__(self,lang=None,text=None,tokenize=None,chunkers=None,filters=None):
        """Constructor for the SpellChecker class.

        SpellChecker objects can be created in two ways, depending on
        the nature of the first argument.  If it is a string, it
        specifies a language tag from which a dictionary is created.
        Otherwise, it must be an enchant Dict object to be used.

        Optional keyword arguments are:

            * text:  to set the text to be checked at creation time
            * tokenize:  a custom tokenization function to use
            * chunkers:  a list of chunkers to apply during tokenization
            * filters:  a list of filters to apply during tokenization

        If <tokenize> is not given and the first argument is a Dict,
        its 'tag' attribute must be a language tag so that a tokenization
        function can be created automatically.  If this attribute is missing
        the user's default language will be used.
        """
        if lang is None:
            lang = get_default_language()
        if isinstance(lang,basestring):
            dict = enchant.Dict(lang)
        else:
            dict = lang
            try:
                lang = dict.tag
            except AttributeError:
                lang = get_default_language()
        if lang is None:
            raise DefaultLanguageNotFoundError
        self.lang = lang
        self.dict = dict
        if tokenize is None:
            try:
                tokenize = get_tokenizer(lang,chunkers,filters)
            except TokenizerNotFoundError:
                # Fall back to default tokenization if no match for 'lang'
                tokenize = get_tokenizer(None,chunkers,filters)
        self._tokenize = tokenize

        self.word = None
        self.wordpos = None
        self._ignore_words = {}
        self._replace_words = {}
        # Default to the empty string as the text to be checked
        self._text = array.array('u')
        self._use_tostring = False
        self._tokens = iter([])

        if text is not None:
            self.set_text(text)
emailControl.py 文件源码 项目:SWCheckIn 作者: gsugar87 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def getInfoFromEmail(emailData):
    msgTextList = getEmailText(emailData[0][1])
    for msgText in msgTextList:
        confNum = getConfNum(msgText)
        # see if there are multiple itineraries
        msgTextSplit = msgText.split()
        if confNum in msgTextSplit:
            confNumIndex = msgTextSplit.index(confNum)
        else:
            confNumIndex = msgTextSplit.index('*'+confNum+'*')
        firstName = msgTextSplit[confNumIndex+1]
        lastName = msgTextSplit[confNumIndex+2]
        if 'Passenger(s)' in firstName:
            # See if there is a / in the name
            if '/' in lastName:
                firstName = lastName[lastName.index('/')+1:]
                lastName = lastName[0:lastName.index('/')]
            else:
                print("PROBLEM PARSING THE FIRST AND LAST NAMES!")
        elif msgTextSplit[confNumIndex+4] == 'Date':
            lastName = msgTextSplit[confNumIndex+3]
            print("Make sure user used a middle initial")

        # see if there are < formatting issues
        if firstName == '>':
            firstName = msgTextSplit[confNumIndex+2]
            lastName = msgTextSplit[confNumIndex+4]
        if lastName == '>':
            print("AAAH")
            print(msgTextSplit[confNumIndex+3])
            lastName = msgTextSplit[confNumIndex+3]
        possible2ndConf = msgTextSplit[confNumIndex+3][1:-1]
        if len(possible2ndConf) == 6 and not enchant.Dict("en_US").check(possible2ndConf):
            confNum = [confNum,str(possible2ndConf)]
            firstName = [firstName, str(msgTextSplit[confNumIndex+4])]
            lastName = [lastName, str(msgTextSplit[confNumIndex+5])]
        else:
            confNum = [confNum]
            firstName = [firstName]
            lastName = [lastName]
        # get the time you need to check in
        checkInTime = getCheckInTime(msgText)
        checkInDate = getCheckInDate(msgText)
        checkInCity = getCheckInCity(msgText)

        try:
            infoList = []
            for j in xrange(len(checkInDate)):
                for i in xrange(len(firstName)):
                    info = {'confNum':confNum[i],
                            'firstName':firstName[i],
                            'lastName':lastName[i],
                            'datetime':parser.parse(checkInDate[j] + ' ' + \
                                                    checkInTime[j]),
                            'city':checkInCity[j]}
                    infoList.append(info)
        except:
            infoList = []
        print('info from email:')
        print(infoList)
        return infoList
correct_item_descriptions.py 文件源码 项目:event-cui-transfer 作者: mit-ddig 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def consolidate_carevue(carevue):
    """Consolidate itsems from CV.
    """
    cv_item_text = clean_text(carevue['label'])
    cv_vectorizer = CountVectorizer(analyzer = "word")
    cv_bow_data = cv_vectorizer.fit_transform(cv_item_text)
    cv_vocab = cv_vectorizer.get_feature_names()
    cv_counts = cv_bow_data.sum(axis=0)

    # Compute edit distance between each element in vocabulary
    # with "dictionary"
    correct_by_count = []
    corrected = {}
    count = 0
    corrected_words = []
    no_match = []
    d = enchant.request_pwl_dict(
        main_dir + "/metavision_ids_icds_vocab_new.txt")
    d_english = enchant.Dict("en_US")
    for word in cv_vocab:
        word = word.lower()
        count += 1
        if not d.check(word) and not d.check(word.upper()) \
           and not d_english.check(word):
            no_match.append(word)
            suggestions = d.suggest(word)
            if suggestions == []:
                corrected[word] = word
            else:
                corrected[word] = best_match(word, suggestions, [])
                corrected_words.append(word)
        else:
            corrected[word] = word
    # apply map to correct spellings
    cv_item_corrected = \
        cv_item_text.str.split().apply(translate_words, args=(corrected,))
    cv_items_spellcheck = cv_item_corrected.str.join(' ')
    cv_items_df = pd.DataFrame({'itemid': cv_items_spellcheck.index.values,
                                'label': cv_items_spellcheck.values})
    grouped = cv_items_df[['itemid', 'label']].groupby('label')
    grouped_trimmed = {}
    for key in grouped.groups.keys():
        # take the minimum itemid corresponding to this description.
        grouped_trimmed[key] = grouped.get_group(key).itemid.astype(str).min()
    dict_consolidate = {}
    for itemid in cv_items_df.itemid.astype(str):
        dict_consolidate[itemid] = []
    for key in grouped.groups.keys():
        values = grouped.get_group(key)
        min_val = min(values.itemid.astype(str))
        for val in values.itemid.astype(str):
            dict_consolidate[val].append(min_val)
    map_to_unique = set()
    for key in dict_consolidate:
        if min(dict_consolidate[key]) not in map_to_unique:
            map_to_unique.add(min(dict_consolidate[key]))
    cv_items_spellcheck.index = cv_items_spellcheck.index.astype(str)
    # filter cv_items_spellcheck so that there are no redundant items
    cv_items_spellcheck2 = cv_items_spellcheck.loc[map_to_unique]
    return cv_item_text, cv_items_spellcheck, \
        cv_items_spellcheck2, dict_consolidate


问题


面经


文章

微信
公众号

扫码关注公众号