search_function.py 文件源码

python
阅读 30 收藏 0 点赞 0 评论 0

项目:email-sherlock 作者: jgondin 项目源码 文件源码
def get_data(self,labels_=None, data=None):
        print('Loading CleanText from DataBase from...')
        conn = connect('/home/gondin/metis/project/clinton-email-download/hrcemail3.sqlite')
        sql = """SELECT Keywords, Polarity, Subjectivity, "from", cluster_labels, pdf_path as "Email" FROM document;"""
        self.data = pd.read_sql_query(sql, conn)
        self.data['Similarity'] = self.similarity[:,0] 

        conn.close()

        #self.data = self.data.sample(1000)
        self.data = self.data.sample(15000,random_state=44)


        # labels_ =self.labels_ ==self.label_
        labels_ = self.labels_
        print(self.data.shape)
        print(labels_.shape)
        self.data.Polarity = self.data.Polarity.apply(lambda x: round(x,2)) 



        return (self.data.ix[labels_ & (self.data.cluster_labels>0), ['Keywords','Similarity','Polarity', 'Subjectivity', "from","Email"]].sort_values('Similarity'))
        #return (self.data.ix[labels_, ['Keywords','dist', "Email"]].sort_values('dist'))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号