Content_Based.py 文件源码-python代码片段

def load_articles(self):
        """
        Loads the DataFrame with all the  articles. 
        Return: DataFrame with the title, content, tags and author of all  articles
        """
        #parser = SafeConfigParser()
        #parser.read('Config.ini')
        #file_path = settings['IP_FILE_PATH']
        #file_name = settings['IP_FILE_NAME']

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))

        #logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))    
        #file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
        #file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
        path = os.path.join(self.ip_file_path, self.ip_file_name)
        #commented by shwenag
        #self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8')         # Load articles in a DataFrame
        self.df = pd.read_excel(path,  na_values=['NA'], parse_cols = "A,B,C")
        #self.df = self.df[['Sno', 'title', 'content_text']]  # Slice to remove redundant columns
        #commenting the below by shwenag
        print(self.df)
        logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))