def _ner_features(self, row):
"""Name entity recognition features
- Args:
row(pandas.dataframe): dataframe of current row
- Returns:
row(pandas.dataframe): result a pandas dataframe with new feature
"""
answer = row.Answer
question = row.Question
if answer is not None and question is not None:
sentence_len = len(row.Sentence.split())
ners_answer = self.st.tag(answer.split())
ners_question = self.st.tag(question.split())
ner_values_answer = [v for k, v in ners_answer if v in [
'PERSON', 'ORGANIZATION', 'LOCATION']]
ner_values_question = [v for k, v in ners_question if v in [
'PERSON', 'ORGANIZATION', 'LOCATION']]
else:
return None
# NER IN ANSWER
if 'PERSON' in ner_values_answer:
row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 1
else:
row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 0
if 'ORGANIZATION' in ner_values_answer:
row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 1
else:
row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 0
if 'LOCATION' in ner_values_answer:
row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 1
else:
row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 0
# NER IN QUESTION
if 'PERSON' in ner_values_question:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 1
else:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 0
if 'ORGANIZATION' in ner_values_question:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 1
else:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 0
if 'LOCATION' in ner_values_question:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 1
else:
row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 0
row['NUM_NAMED_ENTITIES_IN_ANSWER'] = len(ner_values_answer)
row['NUM_NAMED_ENTITIES_OUT_ANSWER'] = len(ner_values_question)
row['ANSWER_NAMED_ENTITY_DENSITY'] = float(
len(ner_values_answer)) / sentence_len
row['QUESTION_NAMED_ENTITY_DENSITY'] = float(
len(ner_values_question)) / sentence_len
return row
feature_construction.py 文件源码
python
阅读 53
收藏 0
点赞 0
评论 0
评论列表
文章目录