tree_loader.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:BadParser 作者: stanojevic 项目源码 文件源码
def load_from_export_format(export_file, encoding):

    trees = []
    SCRIPT_FOLDER = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    hf = HeadFinder(join(SCRIPT_FOLDER, "negra.headrules"))

    with codecs.open(export_file, encoding=encoding) as fh:
        sent_id = None
        buffered_lines = []
        for line in fh:
            if line.startswith("#BOS"):
                sent_id = int(line.split(" ")[1])
            elif line.startswith("#EOS"):
                sent_id2 = int(line.split(" ")[1])
                assert(sent_id == sent_id2)
                if len(buffered_lines) > 0:
                    tree = _give_me_a_tree_from_export_format(buffered_lines)
                    tree.attributes["sent_id"] = sent_id
                    hf.mark_head(tree)
                    trees.append(tree)
                else:
                    trees.append(None)
                if sent_id % 1000 == 0:
                    print("loaded %d trees" % sent_id, file=stderr)
                    stderr.flush()
                sent_id = None
                buffered_lines = []
            elif sent_id is not None:
                buffered_lines.append(line)
            else:
                raise Exception("oh nooooooooo")

    return trees
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号