def get_training_features(potential_event, fb_event, fb_event_attending):
if 'owner' in fb_event['info']:
owner_name = 'id%s' % fb_event['info']['owner']['id']
else:
owner_name = ''
location = event_locations.get_address_for_fb_event(fb_event).encode('utf-8')
def strip_text(s):
return strip_punctuation(s.encode('utf8')).lower()
name = strip_text(fb_event['info'].get('name', ''))
description = strip_text(fb_event['info'].get('description', ''))
attendee_list = ' '.join(['id%s' % x['id'] for x in fb_event_attending['attending']['data']])
source_list = ' '.join('id%s' % x.id for x in potential_event.source_ids_only())
#TODO(lambert): maybe include number-of-keywords and keyword-density?
#TODO(lambert): someday write this as a proper mapreduce that reduces across languages and builds a classifier model per language?
# for now we can just grep and build sub-models per-language on my client machine.
return (attendee_list,)
return (potential_event.language, owner_name, location, name, description, attendee_list, source_list)
评论列表
文章目录