data_helpers.py 文件源码-python代码片段

data_helpers.py 文件源码

python

阅读 37 收藏 0 点赞 0 评论 0

项目：lstm-context-embeddings 作者: chaitjo 项目源码文件源码

def load_data_and_labels():
    """
    Loads polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """

    # Load data from files
    positive_examples = list(open("./data/rt-polaritydata/rt-polarity.pos", "r").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    negative_examples = list(open("./data/rt-polaritydata/rt-polarity.neg", "r").readlines())
    negative_examples = [s.strip() for s in negative_examples]

    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent) for sent in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)

    # Generate sequence lengths
    seqlen = np.array([len(sent.split(" ")) for sent in x_text])

    return [x_text, y, seqlen]