def load_articles(self):
"""
Loads the DataFrame with all the articles.
Return: DataFrame with the title, content, tags and author of all articles
"""
#parser = SafeConfigParser()
#parser.read('Config.ini')
#file_path = settings['IP_FILE_PATH']
#file_name = settings['IP_FILE_NAME']
#logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))
#logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))
#file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
#file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
path = os.path.join(self.ip_file_path, self.ip_file_name)
#commented by shwenag
#self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8') # Load articles in a DataFrame
self.df = pd.read_excel(path, na_values=['NA'], parse_cols = "A,B,C")
#self.df = self.df[['Sno', 'title', 'content_text']] # Slice to remove redundant columns
#commenting the below by shwenag
print(self.df)
logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))
评论列表
文章目录