def prepro_question(imgs, params):
# preprocess all the question
print 'example processed tokens:'
for i,img in enumerate(imgs):
s = img['question']
if params['token_method'] == 'nltk':
txt = word_tokenize(str(s).lower())
else:
txt = tokenize(s)
img['processed_tokens'] = txt
if i < 10: print txt
if i % 100 == 0:
sys.stdout.write("processing %d/%d (%.2f%% done) \r" % (i, len(imgs), i*100.0/len(imgs)) )
sys.stdout.flush()
return imgs
评论列表
文章目录