def load_data_and_labels_gameforum():
# load
with open("./input/gameforum-1000.csv", 'rU') as f:
rdr = csv.reader(f)
dataset = list(rdr)[1:] # remove header
dataset = [entry for entry in dataset if (entry[1] == '1' or entry[1] == '2' or entry[1] == '3')]
# generate x
tk = TweetTokenizer(reduce_len=True)
x_text = [entry[0] for entry in dataset]
x_text = [clean_str(post) for post in x_text]
x_text = [tk.tokenize(post) for post in x_text]
# generate y
y = [entry[1] for entry in dataset]
for idx, label in enumerate(y):
if label == '1': # positive
y[idx] = [1, 0, 0]
elif label == '2': # neutral
y[idx] = [0, 1, 0]
elif label == '3': # negative
y[idx] = [0, 0, 1]
else:
print 'wrong label in gameforum: ' + label
return [x_text, y]
评论列表
文章目录