def assign(self, datafile=None, outputfile=None):
if not datafile:
datafile = click.prompt('Data file',
type=str,
default='sentences_all.csv')
datafile = self.__dest(datafile)
self.datafile = datafile
if not outputfile:
datafilename, ext = os.path.splitext(datafile)
default_outputfile = datafilename+'_result'+ext
outputfile = click.prompt('output file',
type=str,
default=default_outputfile)
assert os.path.isfile(datafile), 'No such file: %s' % datafile
print 'start assiging'
start = time()
with open(datafile) as fi, open(outputfile, 'w') as fo:
csv_reader = csv.reader(fi, delimiter=',')
csv_writer = csv.writer(fo, delimiter=',')
for row in csv_reader:
out_row = row[:2] # post_id and sentence_seq
filtered_words = LdaUtils.filter_words(row[-1].split(' '))
out_row.append(' '.join(map(str, self.query_tag(filtered_words))))
csv_writer.writerow(out_row)
print 'assigning takes: %s' % LdaUtils.human_readable_time(time() - start)
评论列表
文章目录