def process_imdb(fname, setting):
labels, sentences = [], []
filename = setting + ".csv"
quota = [0,0]
if setting == 'test':
maxquota = 5000
else:
maxquota = 15000
with open(os.path.join(fname, filename), 'rb') as f:
csvreader = csv.reader(f)
for line in csvreader:
label = 0 if line[0] == "1" else 1
quota[label] += 1
if quota[label] > maxquota:
continue
sentence = line[2].replace("\"", "")
text = nltk.word_tokenize(sentence.decode('utf-8'))
labels.append(int(label))
sentences.append(text)
return sentences, labels
评论列表
文章目录