def run(self):
all_file_names = []
all_labels = []
for n, folder_name in enumerate(os.listdir(self.in_txtdir().path)):
full_folder_name = self.in_txtdir().path+'/'+folder_name
if os.path.isfile(full_folder_name):
continue
for file_name in os.listdir(full_folder_name):
all_labels.append(n)
all_file_names.append(full_folder_name+'/'+file_name)
vectorizer = CountVectorizer(input='filename')
vector = vectorizer.fit_transform(all_file_names)
numpy.save(self.out_npy().path,vector)
numpy.save('labels',numpy.array(all_labels)) #Where and how do we want to save this?
#This is just to test the tasks above
评论列表
文章目录