def get_data(self,labels_=None, data=None):
print('Loading CleanText from DataBase from...')
conn = connect('/home/gondin/metis/project/clinton-email-download/hrcemail3.sqlite')
sql = """SELECT Keywords, Polarity, Subjectivity, "from", cluster_labels, pdf_path as "Email" FROM document;"""
self.data = pd.read_sql_query(sql, conn)
self.data['Similarity'] = self.similarity[:,0]
conn.close()
#self.data = self.data.sample(1000)
self.data = self.data.sample(15000,random_state=44)
# labels_ =self.labels_ ==self.label_
labels_ = self.labels_
print(self.data.shape)
print(labels_.shape)
self.data.Polarity = self.data.Polarity.apply(lambda x: round(x,2))
return (self.data.ix[labels_ & (self.data.cluster_labels>0), ['Keywords','Similarity','Polarity', 'Subjectivity', "from","Email"]].sort_values('Similarity'))
#return (self.data.ix[labels_, ['Keywords','dist', "Email"]].sort_values('dist'))
评论列表
文章目录