python类punctuation()的实例源码

__init__.py 文件源码 项目:dupandas 作者: shivam5992 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def clean_text(self, txt):
        """
        function to clean a text on the basis of configurations mentioned in clean config.
        """

        txt = str(txt)

        if self.cc['lower']:
            txt = txt.lower()

        if self.cc['punctuation']:
            txt = "".join([x for x in txt if x not in punctuations])

        if self.cc['whitespace']:
            txt = "".join(txt.split()).strip()

        if self.cc['digit']:
            txt = "".join(x for x in txt if x not in "0987654321")

        return txt
squad_evaluate.py 文件源码 项目:dict_based_learning 作者: tombosc 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
validationScript.py 文件源码 项目:bedrock-core 作者: Bedrock-py 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def list_returns(fileToCheck, desiredInterface):
    returnsList = []
    newLine = ""
    with open(fileToCheck, 'r') as pyFile:
        for line in pyFile:
            if line.find("#") == -1:
                newFront = line.find("return")
                if newFront != -1:
                    possibleErrorMessageCheck1 = line.find("'")
                    bracketBefore = line.find("{")
                    lastBracket = line.find("}")
                    newLine = line[possibleErrorMessageCheck1:]
                    possibleErrorMessageCheck2 = newLine.find(" ")
                    if possibleErrorMessageCheck2 == -1:
                        line = line[newFront + 7:]
                        line.split()
                        line = [word.strip(punctuation) for word in line.split()]
                        returnsList.extend(line)
                    elif possibleErrorMessageCheck1 == bracketBefore + 1:
                        line = line[newFront + 7:lastBracket + 1]
                        line.split()
                        returnsList.append(line)
    return returnsList
squad_evaluate.py 文件源码 项目:MachineComprehension 作者: sa-j 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
ndeftool.py 文件源码 项目:nfcpy 作者: nfcpy 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def make_wifipassword(args):
    import random, string, hashlib
    if args.password is None:
        printable = string.digits + string.letters + string.punctuation
        args.password = ''.join([random.choice(printable) for i in xrange(32)])
    if args.password_id is None:
        args.password_id = random.randint(0x0010, 0xFFFF)
    pkhash = hashlib.sha256(args.pubkey.read()).digest()[0:20]

    record = nfc.ndef.WifiPasswordRecord()
    record.password['public-key-hash'] = pkhash
    record.password['password-id'] = args.password_id
    record.password['password'] = args.password

    message = nfc.ndef.Message(record)
    if args.outfile.name == "<stdout>":
        args.outfile.write(str(message).encode("hex"))
    else:
        args.outfile.write(str(message))
evaluate.py 文件源码 项目:Question-Answering 作者: MurtyShikhar 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
dnd_list.py 文件源码 项目:PandasDataFrameGUI 作者: bluenote10 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _insert(self, x, y, text):
        """ Insert text at given x, y coordinates --- used with drag-and-drop. """

        # Clean text.
        import string
        text = filter(lambda x: x in (string.letters + string.digits + string.punctuation + ' '), text)

        # Find insertion point.
        index, flags = self.HitTest((x, y))

        if index == wx.NOT_FOUND:
            if flags & wx.LIST_HITTEST_NOWHERE:
                index = self.GetItemCount()
            else:
                return

        # Get bounding rectangle for the item the user is dropping over.
        rect = self.GetItemRect(index)

        # If the user is dropping into the lower half of the rect, we want to insert _after_ this item.
        if y > rect.y + rect.height/2:
            index += 1

        self.InsertStringItem(index, text)
bot.py 文件源码 项目:miptnews 作者: fiztehradio 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def public_posts(self):
        now = datetime.now()
        # ???????? 30 ????????? ??????? ?? rss ?????? ? ??????? ?? ??, ? ??????? message_id=0
        posts_from_db = self.db.get_post_without_message_id()
        today_news = [i for i in self.src.news if (
            now - datetime.fromtimestamp(i.date)).days < 1]
        # ????? ?????????? ???? ???????
        for_publishing = list(set(today_news) & set(posts_from_db))
        for_publishing = sorted(for_publishing, key=lambda news: news.date)
        # for_publishing = sorted(today_news, key=lambda news: news.date)
        # ??????? ??????? ?????????
        for post in tqdm(for_publishing, desc="Posting news"):
            header = base64.b64decode(post.text).decode('utf8')
            header = ''.join(c for c in header if c not in set(punctuation + '—«»'))
            header = '#' + '_'.join(header.lower().split())
            text = '%s %s' % (header,
                              self.bit_ly.short_link(base64.b64decode(post.link).decode('utf8')))
            a = self.send_message(
                chat_id=self.chat_id, text=text)  # , parse_mode=telegram.ParseMode.HTML)
            message_id = a.message_id
            chat_id = a['chat']['id']
            self.db.update(post.link, chat_id, message_id)
            logging.info(u'Public: %s;%s;' %
                         (str(post), message_id))
            time.sleep(self.delay_between_messages)
string_funcs.py 文件源码 项目:PyperGrabber 作者: pykong 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def rem_whitespace(string):
    """ careful to keep this order of patterns or duplicate whitespace created in first round
    will not be removed
    """
    unwanted_chars = punctuation + whitespace

    pat_l = [r'[' + unwanted_chars + ']',
             r'\s+',
             r'  ',
             r' \\',
             r' \ '
             ]

    for p in pat_l:
        rx = re.compile(p)
        string = re.sub(rx, ' ', string)

    return string.strip()
coq_install_ice_ng.py 文件源码 项目:coquery 作者: gkunter 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_tag_translate(self, tag):
        translate_dict = {
            "p": "p",
            "punctuation": "",
            "heading": "span style='font-style: bold'",
            #"heading": "span style='font-style: bold; font-size:150%'",
            #"h1": "span style='font-style: bold; font-size:150%'",
            "boldface": "b",
            "italics": "i",
            "underline": "u",
            "superscript": "sup",
            "subscript": "sup",
            "object": "object",
            "text": "html"}

        if tag in translate_dict:
            return translate_dict[tag]
        else:
            print("unsupported tag: ", tag)
            return tag
coq_install_generic.py 文件源码 项目:coquery 作者: gkunter 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def add_token(self, token_string, token_pos=None):
        # get lemma string:
        if all(x in string.punctuation for x in token_string):
            token_pos = "PUNCT"
            lemma = token_string
        else:
            try:
                # use the current lemmatizer to assign the token to a lemma:
                lemma = self._lemmatize(token_string, self._pos_translate(token_pos)).lower()
            except Exception:
                lemma = token_string.lower()

        # get word id, and create new word if necessary:
        word_dict = {self.word_lemma: lemma, self.word_label: token_string}
        if token_pos and self.arguments.use_nltk:
            word_dict[self.word_pos] = token_pos
        word_id = self.table(self.word_table).get_or_insert(word_dict, case=True)

        # store new token in corpus table:
        return self.add_token_to_corpus(
            {self.corpus_word_id: word_id,
             self.corpus_sentence: self._sentence_id,
             self.corpus_file_id: self._file_id})
compare_models.py 文件源码 项目:bi-att-flow 作者: allenai 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def normalize_answer(self, s):
        """Lower text and remove punctuation, articles and extra whitespace."""
        def remove_articles(text):
            return re.sub(r'\b(a|an|the)\b', ' ', text)

        def white_space_fix(text):
            return ' '.join(text.split())

        def remove_punc(text):
            exclude = set(string.punctuation)
            return ''.join(ch for ch in text if ch not in exclude)

        def lower(text):
            return text.lower()

        return white_space_fix(remove_articles(remove_punc(lower(s))))
evaluate-v1.1.py 文件源码 项目:bi-att-flow 作者: allenai 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
evaluate.py 文件源码 项目:bi-att-flow 作者: allenai 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))
text_helpers.py 文件源码 项目:TensorFlow-Machine-Learning-Cookbook 作者: PacktPublishing 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def normalize_text(texts, stops):
    # Lower case
    texts = [x.lower() for x in texts]

    # Remove punctuation
    texts = [''.join(c for c in x if c not in string.punctuation) for x in texts]

    # Remove numbers
    texts = [''.join(c for c in x if c not in '0123456789') for x in texts]

    # Remove stopwords
    texts = [' '.join([word for word in x.split() if word not in (stops)]) for x in texts]

    # Trim extra whitespace
    texts = [' '.join(x.split()) for x in texts]

    return(texts)


# Build dictionary of words
word2vec_skipgram.py 文件源码 项目:TensorFlow-Machine-Learning-Cookbook 作者: PacktPublishing 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def normalize_text(texts, stops):
    # Lower case
    texts = [x.lower() for x in texts]

    # Remove punctuation
    texts = [''.join(c for c in x if c not in string.punctuation) for x in texts]

    # Remove numbers
    texts = [''.join(c for c in x if c not in '0123456789') for x in texts]

    # Remove stopwords
    texts = [' '.join([word for word in x.split() if word not in (stops)]) for x in texts]

    # Trim extra whitespace
    texts = [' '.join(x.split()) for x in texts]

    return(texts)
trigram.py 文件源码 项目:IntroPython2016 作者: UWPCE-PythonCert 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def strip_punctuation(text):
    """
    strips the punctuation from a bunch of text
    """
    # build a translation table for string.translate:
    # there are other ways to do this:

    # create a translation table to replace all punctuation with spaces
    #    -- then split() will remove the extra spaces
    punctuation = string.punctuation
    punctuation = punctuation.replace("'", "")  # keep apostropies
    punctuation = punctuation.replace("-", "")  # keep hyphenated words
    # building a translation table
    table = {}
    for c in punctuation:
        table[ord(c)] = ' '
    # remove punctuation with the translation table
    text = text.translate(table)
    # remove "--" -- can't do multiple characters with translate
    text = text.replace("--", " ")

    return text
trigram.py 文件源码 项目:IntroPython2016 作者: UWPCE-PythonCert 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def make_words(text):

    """
    make a list of words from a large bunch of text

    strips all the punctuation and other stuff from a string
    """
    text = strip_punctuation(text)

    # lower-case everything to remove that complication:
    text = text.lower()

    # split into words
    words = text.split()

    # remove the bare single quotes: "'" is both a quote and an apostrophe
    # and capitalize "i"
    words2 = []
    for word in words:
        if word != "'":  # remove quote by itself
            # "i" by itself should be capitalized
            words2.append("I" if word == 'i' else word)
    # could be done with list comprehension too -- next week!
    # words2 = [("I" if word == 'i' else word) for word in words if word != "'"]
    return words2
translate.py 文件源码 项目:DogeGen 作者: MemeTrash 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _get_base_doge_words(self, eng_text):
        """
        Get all base words from text to make doge phrases from.
        eg. 'Hello there, I am happy' -> ['hello', 'are', 'happy']

        Args:
            eng_text (str): Text to get words from.

        Returns:
            list[str]: List of lower case words to use from text.
        """
        phrase_no_punct = "".join([ch for ch in eng_text if ch not in string.punctuation])
        tagged_words = nltk.pos_tag([w.lower() for w in phrase_no_punct.split(' ') if w.isalpha()])
        chosen_words = []
        for word, tag in tagged_words:
            if tag[0] in ['N', 'V', 'J']:
                # make noun singular
                if tag[0] == 'N':
                    word = self._lemmatizer.lemmatize(word, pos='n')
                # make verb infinitive
                elif tag[0] == 'V':
                    word = self._lemmatizer.lemmatize(word, pos='v')
                chosen_words.append(word.encode('ascii', 'ignore'))  # lemmatize makes word unicode
        return list(set(chosen_words))
data.py 文件源码 项目:Personal_AI_Assistant 作者: PratylenClub 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def str2index(str_):

    # clean white space
    str_ = ' '.join(str_.split())
    # remove punctuation and make lower case
    str_ = str_.translate(None, string.punctuation).lower()

    res = []
    for ch in str_:
        try:
            res.append(byte2index[ch])
        except KeyError:
            # drop OOV
            pass
    return res


# convert index list to string


问题


面经


文章

微信
公众号

扫码关注公众号