inputstream.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:chihu 作者: yelongyu 项目源码 文件源码
def detectEncoding(self, parseMeta=True, chardet=True):
        # First look for a BOM
        # This will also read past the BOM if present
        encoding = self.detectBOM()
        confidence = "certain"
        # If there is no BOM need to look for meta elements with encoding
        # information
        if encoding is None and parseMeta:
            encoding = self.detectEncodingMeta()
            confidence = "tentative"
        # Guess with chardet, if avaliable
        if encoding is None and chardet:
            confidence = "tentative"
            try:
                try:
                    from charade.universaldetector import UniversalDetector
                except ImportError:
                    from chardet.universaldetector import UniversalDetector
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
                    buffer = self.rawStream.read(self.numBytesChardet)
                    assert isinstance(buffer, bytes)
                    if not buffer:
                        break
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
                encoding = detector.result['encoding']
                self.rawStream.seek(0)
            except ImportError:
                pass
        # If all else fails use the default encoding
        if encoding is None:
            confidence = "tentative"
            encoding = self.defaultEncoding

        # Substitute for equivalent encodings:
        encodingSub = {"iso-8859-1": "windows-1252"}

        if encoding.lower() in encodingSub:
            encoding = encodingSub[encoding.lower()]

        return encoding, confidence
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号