postag.py 文件源码

python
阅读 28 收藏 0 点赞 0 评论 0

项目:senti 作者: stevenxxiu 项目源码 文件源码
def transform(self, docs, buffer_size=100):
        args = shlex.split(self.RUN_TAGGER_CMD) + ['--output-format', 'conll']
        proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        buffer_sema = threading.BoundedSemaphore(buffer_size)
        t = threading.Thread(target=self._write_input, args=(docs, proc, buffer_sema))
        t.start()
        while True:
            # reading can only follow writing unless EOF is reached so buffer_sema >= 0
            res = []
            while True:
                line = proc.stdout.readline().decode('utf-8').rstrip()
                if line == '':
                    break
                word, tag, confidence = line.split('\t')
                res.append((word, tag, float(confidence)))
            if not res:
                break
            yield res
            buffer_sema.release()
        t.join()
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号