def execute_internal(self, context, **kwargs):
"""
the internal execution process to be implemented
:param context:
:param kwargs:
:return:
"""
df = pd.read_csv('https://raw.githubusercontent.com/bailaohe/parade/master/assets/movie_metadata.csv')
# Process projection on the dataset to get our interested attributes
df = df[['movie_title', 'genres', 'title_year', 'content_rating', 'budget', 'num_voted_users', 'imdb_score']]
# Filter out records with *NAN* title_year and budget
df = df[pd.notnull(df['title_year'])]
df = df[df['budget'] > 0]
# Extract the genres ROOT
df['genres_root'] = df['genres'].apply(lambda g: g.split('|')[0])
return df
评论列表
文章目录