def extractVecs():
## Pandas read_csv breaks while reading text file. Very buggy. Manually read each line.
t0 = time.clock()
with open(options.pretrained,'r') as f:
content = [item.rstrip().lower().split(' ') for item in f.readlines()]
globalWordFile = np.asmatrix(content,dtype = str)
globalWordTokens = globalWordFile[:,0].astype('str')
globalWordVectors = globalWordFile[:,1:].astype(np.float)
globalWordFile = None
### Pandas read_csv implementation - Broken
#globalWordFile = pd.read_csv(options.pretrained,delimiter = ' ', header = None)
#globalWordVectors = globalWordFile.ix[:,1:]
#globalWordTokens = globalWordFile.ix[:,0]
#globalWordFile = None
print time.clock() - t0, " seconds taken for loading and slicing gLoVe Word Vectors"
return globalWordTokens,globalWordVectors
评论列表
文章目录