data_helpers.py 文件源码-python代码片段

data_helpers.py 文件源码

python

阅读 31 收藏 0 点赞 0 评论 0

项目：CNNChineseClassifyer 作者: winnerineast 项目源码文件源码

def load_utf8_data_and_labels(positive_data_file, negative_data_file):
    # Load data from files
    positive_data = list(codecs.open(positive_data_file, "r", encoding='utf-8').readlines())
    positive_examples = list()
    for s in positive_data:
        positive_examples.append(" ".join(jieba.cut(s)))

    negative_data = list(codecs.open(negative_data_file, "r", encoding='utf-8').readlines())
    negative_examples = list()
    for s in negative_data:
        negative_examples.append(" ".join(jieba.cut(s)))

    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent) for sent in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x_text, y]