python类unidecode()的实例源码

unidecoder.py 文件源码 项目:NLPre 作者: NIHOPA 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __call__(self, unicode_text):
        '''
        Runs the parser.

        Args:
            unicode_text: a unicode document
        Returns:
            text: An ascii equivalent of unicode_text
        '''

        return unidecode.unidecode(unicode_text)

# if __name__ == "__main__":
#    text = u"?-Helix ?-sheet ?? ?? ?? ?? ?? ??"
#    parser = unidecoder()
#    print(parser(text))
__init__.py 文件源码 项目:geoextract 作者: stadt-karlsruhe 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def normalize(self, s):
        '''
        Normalize text.
        '''
        s = s.strip().lower()
        if self.to_ascii:
            s = unidecode(s)
        if self.rejoin_lines:
            s = re.sub(r'(\w-)\s*\n\s*', r'\1', s, flags=_RE_FLAGS)
        if self.remove_hyphens:
            s = re.sub(r'([^\W\d_])-+(?=[^\W\d_])', r'\1', s, flags=_RE_FLAGS)
        if self.remove_specials:
            s = re.sub(r'(\D|^)([^\w\s]|_)+(?=\D|$)', r'\1 ', s,
                       flags=_RE_FLAGS)
            s = re.sub(r'(\w)([^\w\s]|_)+\s+', r'\1 ', s, flags=_RE_FLAGS)
            s = re.sub(r'\s+([^\w\s]|_)+(?=\w)', r'\1 ', s, flags=_RE_FLAGS)
        for pattern, replacement in self.subs:
            s = re.sub(pattern, replacement, s, flags=_RE_FLAGS)
        if self._stemmer:
            callback = lambda m: self._stemmer.stem(m.group())
            s = re.sub(r'([^\W\d_]|-)+', callback, s, flags=_RE_FLAGS)
        s = re.sub(r'\s+', ' ', s, flags=_RE_FLAGS)
        return s.strip()
publications.py 文件源码 项目:czl-scrape 作者: code4romania 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_type(self, text):
        text = unidecode(text).lower().strip()
        type = None

        stop_pos = re.search(r'(pentru|privind)', text).start()
        if stop_pos:
            text = text[0:stop_pos]

        if re.search(r'ordin', text):
            type = 'OM'

        if re.search(r'lege', text):
            type = 'LEGE'

        if re.search(r'hotarare', text):
            type = 'HG'

        if re.search(r'ordonanta', text):
            if re.search(r'urgenta', text):
                type = 'OUG'
            else:
                type = 'OG'

        return type
tineret.py 文件源码 项目:czl-scrape 作者: code4romania 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_feedback_date(self, text):
        formats = ['%d %B %Y', '%d.%m.%Y']
        text = unidecode(text.strip().lower())

        phrase = re.search(r'data limita.*((\d\d?\.\d\d?\.20\d\d)|(\d\d?\s[a-z]+\s20\d\d))', text)
        if phrase:
            date = re.search(r'(\d\d?\.\d\d?\.20\d\d)|(\d\d?\s[a-z]+\s20\d\d)', phrase.group(0))

            if date:
                date = date.group(0)
                for format in formats:
                    try:
                        result = datetime.datetime.strptime(date, format)
                        if result:
                            return result
                    except ValueError:
                        pass
tineret.py 文件源码 项目:czl-scrape 作者: code4romania 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_type(self, text):
        text = unidecode(text).lower().strip()
        type = None

        stop_pos = re.search(r'(pentru|privind)', text)
        if stop_pos:
            text = text[0:stop_pos.start()]

        if re.search(r'ordin', text):
            type = 'OM'

        if re.search(r'lege', text):
            type = 'LEGE'

        if re.search(r'hotarare', text):
            type = 'HG'

        if re.search(r'ordonanta', text):
            if re.search(r'urgenta', text):
                type = 'OUG'
            else:
                type = 'OG'

        return type
common.py 文件源码 项目:pyjam 作者: 10se1ucgo 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def __init__(self, audio_dir=os.curdir, audio_rate=11025, mod_path=os.curdir,
                 name=None, play_key='F8', relay_key='=', use_aliases=True):
        """
        Args:
            audio_dir (str): Path for finding audio.
            audio_rate (int): The sample rate the game accepts.
            mod_path (str): Path to the mod folder (e.g. "Steam/SteamApps/common/Team Fortress 2/tf2")
            name (str): The name of the game.
            play_key (str): The key used to start/stop music in-game.
            relay_key (str): The key used to interact with the game.
            use_aliases (bool): Whether or not to use aliases to select songs in-game.
        """
        self.audio_dir = audio_dir
        self.audio_rate = audio_rate
        self.mod_path = mod_path
        self.name = unidecode.unidecode(name)
        self.play_key = play_key if bindable(play_key) else "F8"
        self.relay_key = relay_key if bindable(relay_key) else "="
        self.use_aliases = use_aliases
preprocess2.py 文件源码 项目:Hanhan_NLP 作者: hanhanwu 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def output_preprocessed_data(self, json_input, file_name):
        '''
        Output preprocessed data into a file.
        :param json_input: json formatted data generated from function str_process
        :param file_name: output file name
        :return: None
        '''

        rows = []
        for sent in json_input['sentences']:
            parsed_sent = " ".join([t['originalText'] + "/" + t['pos'] for t in sent['tokens']])
            rows.append(parsed_sent)
        output_file_path = self.output_folder + '/' + file_name
        with open(output_file_path, 'a') as preprocessed_out:
            for r in rows:
                preprocessed_out.write(unidecode.unidecode(r) + "\n")
metadataparser.py 文件源码 项目:aio 作者: pavhofman 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def parseToJsonStr(self, metadata: dict) -> Optional[str]:
        """
        :return: json string or None if no matching non-empty metadata found
        """
        jsonDict = {}
        for md, possibleKeys in self.__rulesDict.items():
            for key in possibleKeys:
                if key in metadata:
                    value = metadata.get(key)
                    if len(value) > 0:
                        jsonDict[md.value] = unidecode(value)
                        # found first value, skipping other possible keys for the metadata
                        break

        if len(jsonDict) > 0:
            return json.dumps(jsonDict)
        else:
            return None
recom.py 文件源码 项目:recobot 作者: h4ck3rk3y 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def similar_users(user):
    if not type(user) is str:
        user = unidecode.unidecode(user)
    if db.done_users.find_one({'user':user})['recommended']==False:
        user_files = db.user_list.find({'user':user})
        f = open('./dc_recom.dat','a')
        for u in user_files:
            f.write(u['user'] + '::' + u['tth'])
            f.write('\n')
        f.close()
        db.done_users.update({'user': user}, {'user':user, 'recommended': True})

    data = Data()
    data.load('./dc_recom.dat', sep='::', format={'col':1,'row':0})
    svd = SVD()
    svd.set_data(data)
    svd.compute(k=1000,min_values=0, pre_normalize=None, mean_center=False, post_normalize=True)
    return [i[0] for i in svd.similar(user)]
genderComputer.py 文件源码 项目:gender_classifier 作者: LU-C4i 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def initialCheckName(self, name):
        '''Check if name is written in Cyrillic or Greek script, and transliterate'''
        if only_cyrillic_chars(name) or only_greek_chars(name):
            name = unidecode(name)

        '''Initial check for gender-specific words at the beginning of the name'''
        f = name.split()[0]
        if f in self.maleWords:
            conf = 1
            return ('male',conf)
        elif f in self.femaleWords:
            conf = 1
            return ('female', conf)

        '''Check for gender-specific words at the second part of the name'''
        if len(name.split())> 1:
            l = name.split()[1]
            if l in self.maleWords:
                conf = 1
                return ('male',conf)
            elif l in self.femaleWords:
                conf = 1
                return ('female', conf)
        return (None,0)
politifact.py 文件源码 项目:indivisible 作者: danieltahara 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def get_statements_by_person(self, first_name, last_name, limit=0):
        """
        Get statements and ratings by name.

        @param first_name: of MoC
        @param last_name: of MoC
        @param limit: optional limit
        @return: statements
        """
        limit = limit if limit > 0 else 10
        results = self._get(
            "statements/truth-o-meter/people/{first_name}-{last_name}/"
            "json/?n={limit}".format(first_name=unidecode(first_name.lower()),
                                     last_name=unidecode(last_name.lower()),
                                     limit=limit))
        return results if results else []
curl.py 文件源码 项目:telegram-yt_mp3-bot 作者: Javinator9889 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def descarga(full_name):
    url = 'https://file.io/?expires=1w'
    files = {'file': open(full_name,'rb')}
    print("\n\tSubiendo archivo a 'file.io'")
    link = None
    n=0
    while link==None:                                                    # For ensuring that the file is uploaded correctly
        response = requests.post(url, files=files)
        test = response.text
        print("JSON recibido: ",test)
        decoded = unidecode(test)                                        # It's needed to decode text for avoiding 'bytes' problems (b'<meta...)
        print("JSON decodificado: ",decoded)
        if '<html>' in decoded:                                          # When upload fails, 'file.io' sends a message with <html> header.
            print("\n\tFallo al subir el archivo. Reintentando... #",n)  # If it's detected, assings 'link = None' and then 'while' loop restars
            link = None
            n=n+1                                                        # Little counter
        else:
            json_data = json.loads(decoded)
            link = json_data['link']
            print("\n\nEnlace de descarga directa: ",link)
    return link
utils.py 文件源码 项目:openrefine-wikidata 作者: wetneb 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def fuzzy_match_strings(ref, val):
    """
    Returns the matching score of two values.
    """
    if not ref or not val:
        return 0
    ref_q = to_q(ref)
    val_q = to_q(val)
    if ref_q or val_q:
        return 100 if ref_q == val_q else 0
    simplified_val = unidecode(val).lower()
    simplified_ref = unidecode(ref).lower()

    # Return symmetric score
    r1 = fuzz.token_sort_ratio(simplified_val, simplified_ref)
    r2 = fuzz.token_sort_ratio(simplified_ref, simplified_val)
    r2 = r1
    return int(0.5*(r1+r2))
Fun.py 文件源码 项目:TwentyTwo 作者: EPITECH-2022 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def emoji(self, context):
        ''' Sends a text and replace letters with regional indicators '''
        from unidecode   import unidecode
        content = self.bot.get_text(context)
        if content in [None, '', ' '] or context.invoked_with == 'riz' and not self.bot.is_owner(context.message.author):
            return
        msg = ''
        if context.invoked_with in ['ri', 'bi']:
            msg += '`{}`: '.format(context.message.author)
        for c in content:
            if c.isalpha():
                b = context.invoked_with == 'bi' and c in ['b', 'B', 'p', 'P']
                if b:
                    msg += ':b:'
                else:
                    msg += ':regional_indicator_{}:'.format(unidecode(c.lower()))
            else:
                msg += c
        await self.bot.say(msg)
        await self.bot.replied(context)
        if context.invoked_with in ['ri', 'riz', 'bi']:
            try:
                await self.bot.delete_message(context.message)
            except discord.errors.Forbidden:
                pass
models.py 文件源码 项目:Wagtail-Image-Folders 作者: anteatersa 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def validate_folder(self):
        """Validates whether a folder can be created.
        Performs two types of validation:
        1. Checks if a DB entry is present.
        2. Checks if a physical folder exists in the system."""

        unicoded_title = "".join((i if ord(i) < 128 else '_') for i in unidecode(self.title))
        parent_folder = self.folder

        if parent_folder:
            if ImageFolder.objects.filter(folder=parent_folder, title=self.title).count() > 0:
                raise ValidationError("Folder exists in the DB!", code='db')
            folder_path = os.path.join(settings.MEDIA_ROOT, parent_folder.path, unicoded_title)
            if os.path.isdir(folder_path):
                raise ValidationError("Folder exists in the OS!", code='os')
        else:
            if ImageFolder.objects.filter(folder__isnull=True, title=self.title).count() > 0:
                raise ValidationError("Folder exists in the DB!", code='db')
            folder_path = os.path.join(settings.MEDIA_ROOT, IMAGES_FOLDER_NAME, unicoded_title)
            if os.path.isdir(folder_path):
                raise ValidationError("Folder exists in the OS!", code='os')
models.py 文件源码 项目:Wagtail-Image-Folders 作者: anteatersa 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def get_upload_to(self, filename):
        filename = self.file.field.storage.get_valid_name(filename)

        # do a unidecode in the filename and then
        # replace non-ascii characters in filename with _ , to sidestep issues with filesystem encoding
        filename = "".join((i if ord(i) < 128 else '_') for i in unidecode(filename))

        # Truncate filename so it fits in the 100 character limit
        # https://code.djangoproject.com/ticket/9893
        if self.folder:
            full_path = os.path.join(self.folder.path, filename)
        else:
            full_path = os.path.join(IMAGES_FOLDER_NAME, filename)

        if len(full_path) >= 95:
            chars_to_trim = len(full_path) - 94
            prefix, extension = os.path.splitext(filename)
            filename = prefix[:-chars_to_trim] + extension
            if self.folder:
                full_path = os.path.join(self.folder.path, filename)
            else:
                full_path = os.path.join(IMAGES_FOLDER_NAME, filename)
        return full_path
check.py 文件源码 项目:openkamer 作者: openkamer 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_members_missing(members_current, members_current_check):
    members_missing = []
    for member_check in members_current_check:
        found = False
        member_check_name = unidecode(member_check['name'])
        member_check_forename = unidecode(member_check['forename'])
        for member in members_current:
            member_name = unidecode(member.person.surname_including_prefix())
            if member_check_name == member_name and member_check_forename == unidecode(member.person.forename):
                found = True
                break
        if not found:
            members_missing.append(
                member_check['initials'] + ' ' + member_check['name'] + ' (' + member_check['forename'] + ')')
            # print(member_check['name'])
    return members_missing
check.py 文件源码 项目:openkamer 作者: openkamer 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_members_incorrect(members_current, members_current_check):
    members_incorrect = []
    for member in members_current:
        found = False
        member_name = unidecode(member.person.surname_including_prefix())
        member_forename = unidecode(member.person.forename)
        for member_check in members_current_check:
            member_check_name = unidecode(member_check['name'])
            member_check_forename = unidecode(member_check['forename'])
            if member_check_name == member_name and member_check_forename == member_forename:
                found = True
                break
        if not found:
            members_incorrect.append(member)
            # print(member.person.fullname())
    return members_incorrect
models.py 文件源码 项目:openkamer 作者: openkamer 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def find_party(name):
        name_ascii = unidecode(name)
        name_lid = 'Lid-' + name
        name_no_dash = name.replace('-', ' ')
        parties = PoliticalParty.objects.filter(name__iexact=name) \
                  | PoliticalParty.objects.filter(name__iexact=name_ascii) \
                  | PoliticalParty.objects.filter(name__iexact=name_lid) \
                  | PoliticalParty.objects.filter(name__iexact=name_no_dash)
        if parties.exists():
            return parties[0]
        parties = PoliticalParty.objects.filter(name_short__iexact=name) \
                  | PoliticalParty.objects.filter(name_short__iexact=name_ascii) \
                  | PoliticalParty.objects.filter(name_short__iexact=name_lid) \
                  | PoliticalParty.objects.filter(name_short__iexact=name_no_dash)
        if parties.exists():
            return parties[0]
        logger.warning('party not found: ' + name)
        return None


问题


面经


文章

微信
公众号

扫码关注公众号