python类escape()的实例源码-面圈网

utils.py 文件源码项目：SynThai 作者: KenjiroAI 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def _load(self):
        """Load text to memory"""

        corpus_directory = glob.escape(self.corpus_directory)
        file_list = sorted(glob.glob(os.path.join(corpus_directory, "*.txt")))

        for path in file_list:
            with open(path, "r", encoding="utf8") as text:
                # Read content from text file
                content = text.read()

                # Preprocessing
                content = self._preprocessing(content)

                # Create text instance
                text = Text(path, os.path.basename(path), content)

                # Add text to corpus
                self.__corpus.append(text)

glob.py 文件源码项目：CodingDojo 作者: ComputerSocietyUNB 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def glob_escape(pathname):
        """
        Escape all special characters.
        """
        drive, pathname = os.path.splitdrive(pathname)
        pathname = _magic_check.sub(r'[\1]', pathname)
        return drive + pathname

utils.py 文件源码项目：SynThai 作者: KenjiroAI 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def _preprocessing(self, content):
        """Text preprocessing"""

        # Remove new line
        content = re.sub(r"(\r\n|\r|\n)+", r"", content)

        # Convert one or multiple non-breaking space to space
        content = re.sub(r"(\xa0)+", r"\s", content)

        # Convert multiple spaces to only one space
        content = re.sub(r"\s{2,}", r"\s", content)

        # Trim whitespace from starting and ending of text
        content = content.strip(string.whitespace)

        if self.word_delimiter and self.tag_delimiter:
            # Trim word delimiter from starting and ending of text
            content = content.strip(self.word_delimiter)

            # Convert special characters (word and tag delimiter)
            # in text's content to escape character
            find = "{0}{0}{1}".format(re.escape(self.word_delimiter),
                                      re.escape(self.tag_delimiter))
            replace = "{0}{2}{1}".format(re.escape(self.word_delimiter),
                                         re.escape(self.tag_delimiter),
                                         re.escape(constant.ESCAPE_WORD_DELIMITER))
            content = re.sub(find, replace, content)

            find = "{0}{0}".format(re.escape(self.tag_delimiter))
            replace = "{1}{0}".format(re.escape(self.tag_delimiter),
                                      re.escape(constant.ESCAPE_TAG_DELIMITER))
            content = re.sub(find, replace, content)

        # Replace distinct quotation mark into standard quotation
        content = re.sub(r"\u2018|\u2019", r"\'", content)
        content = re.sub(r"\u201c|\u201d", r"\"", content)

        return content

utils.py 文件源码项目：SynThai 作者: KenjiroAI 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def get_token_list(self, index):
        """Get list of (word, tag) pair"""

        if not self.word_delimiter or not self.tag_delimiter:
            return list()

        # Get content by index
        content = self.__corpus[index].content

        # Empty file
        if not content:
            return list()

        # Split each word by word delimiter
        token_list = content.split(self.word_delimiter)

        for idx, token in enumerate(token_list):
            # Empty or Spacebar
            if token == "" or token == constant.SPACEBAR:
                word = constant.SPACEBAR
                tag = constant.PAD_TAG_INDEX

            # Word
            else:
                # Split word and tag by tag delimiter
                datum = token.split(self.tag_delimiter)
                word = datum[0]
                tag = datum[-2]

                # Replace escape character to proper character
                word = word.replace(constant.ESCAPE_WORD_DELIMITER, self.word_delimiter)
                tag = tag.replace(constant.ESCAPE_TAG_DELIMITER, self.tag_delimiter)

            # Replace token with word and tag pair
            token_list[idx] = (word, tag)

        return token_list

glob.py 文件源码项目：lifesoundtrack 作者: MTG 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def glob_escape(pathname):
        """
        Escape all special characters.
        """
        drive, pathname = os.path.splitdrive(pathname)
        pathname = _magic_check.sub(r'[\1]', pathname)
        return drive + pathname

test_glob.py 文件源码项目：ouroboros 作者: pybee 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def check_escape(self, arg, expected):
        self.assertEqual(glob.escape(arg), expected)
        self.assertEqual(glob.escape(os.fsencode(arg)), os.fsencode(expected))

glob.py 文件源码项目：liberator 作者: libscie 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def glob_escape(pathname):
        """
        Escape all special characters.
        """
        drive, pathname = os.path.splitdrive(pathname)
        pathname = _magic_check.sub(r'[\1]', pathname)
        return drive + pathname

glob.py 文件源码项目：djanoDoc 作者: JustinChavez 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def glob_escape(pathname):
        """
        Escape all special characters.
        """
        drive, pathname = os.path.splitdrive(pathname)
        pathname = _magic_check.sub(r'[\1]', pathname)
        return drive + pathname

test_glob.py 文件源码项目：kbe_server 作者: xiaohaoppy 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def check_escape(self, arg, expected):
        self.assertEqual(glob.escape(arg), expected)
        self.assertEqual(glob.escape(os.fsencode(arg)), os.fsencode(expected))

glob.py 文件源码项目：django-next-train 作者: bitpixdigital 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def glob_escape(pathname):
        """
        Escape all special characters.
        """
        drive, pathname = os.path.splitdrive(pathname)
        pathname = _magic_check.sub(r'[\1]', pathname)
        return drive + pathname

download_subtitles.py 文件源码项目：dopplerr 作者: Stibbons 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def search_file(root_dir, base_name):
        # This won't work with python < 3.5
        found = []
        base_name = glob.escape(base_name)
        beforext, _, ext = base_name.rpartition('.')
        if ext.lower() in VIDEO_FILES_EXT:
            protected_path = os.path.join(root_dir, "**", "*" + beforext + "*" + ext)
        else:
            protected_path = os.path.join(root_dir, "**", "*" + beforext + "*")
        protected_path = protected_path
        log.debug("Searching %r", protected_path)
        for filename in glob.iglob(protected_path, recursive=True):
            log.debug("Found: %s", filename)
            found.append(filename)
        return found