parse.py 文件源码-python代码片段

parse.py 文件源码
python
阅读 26 收藏 0 点赞 0 评论 0
def parse(model_file, embed_file):

    # Load files
    Log.i('initialize preprocessor with %s' % embed_file)
    processor = Preprocessor(embed_file)

    Log.v('')
    Log.v("initialize ...")
    Log.v('')

    with np.load(model_file) as f:
        embeddings = np.zeros(f['embed/W'].shape, dtype=np.float32)

    # Set up a neural network
    cls = BLSTMCRF if _use_crf else BLSTM
    model = cls(
        embeddings=embeddings,
        n_labels=4,
        dropout=0.2,
        train=False,
    )
    Log.i("loading a model from %s ..." % model_file)
    serializers.load_npz(model_file, model)

    LABELS = ['B', 'M', 'E', 'S']

    def _process(raw_text):
        if not raw_text:
            return
        xs = [processor.transform_one([c for c in raw_text])]
        ys = model.parse(xs)
        labels = [LABELS[y] for y in ys[0]]
        print(' '.join(labels))
        seq = []
        for c, label in zip(raw_text, labels):
            seq.append(c)
            if label == 'E' or label == 'S':
                seq.append(' ')
        print(''.join(seq))
        print('-')

    print("Input a Chinese sentence! (use 'q' to exit)")
    while True:
        x = input()
        if x == 'q':
            break
        _process(x)