feature_construction.py 文件源码

python
阅读 53 收藏 0 点赞 0 评论 0

项目:Automatic-Question-Generation 作者: bwanglzu 项目源码 文件源码
def _ner_features(self, row):
        """Name entity recognition features
        - Args:
            row(pandas.dataframe): dataframe of current row
        - Returns:
            row(pandas.dataframe): result a pandas dataframe with new feature
        """
        answer = row.Answer
        question = row.Question
        if answer is not None and question is not None:
            sentence_len = len(row.Sentence.split())
            ners_answer = self.st.tag(answer.split())
            ners_question = self.st.tag(question.split())
            ner_values_answer = [v for k, v in ners_answer if v in [
                'PERSON', 'ORGANIZATION', 'LOCATION']]
            ner_values_question = [v for k, v in ners_question if v in [
                'PERSON', 'ORGANIZATION', 'LOCATION']]
        else:
            return None
        # NER IN ANSWER
        if 'PERSON' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_PERS'] = 0
        if 'ORGANIZATION' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_ORG'] = 0
        if 'LOCATION' in ner_values_answer:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 1
        else:
            row['NAMED_ENTITY_IN_ANSWER_COUNT_LOC'] = 0
        # NER IN QUESTION
        if 'PERSON' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_PERS'] = 0
        if 'ORGANIZATION' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_ORG'] = 0
        if 'LOCATION' in ner_values_question:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 1
        else:
            row['NAMED_ENTITY_OUT_ANSWER_COUNT_LOC'] = 0
        row['NUM_NAMED_ENTITIES_IN_ANSWER'] = len(ner_values_answer)
        row['NUM_NAMED_ENTITIES_OUT_ANSWER'] = len(ner_values_question)
        row['ANSWER_NAMED_ENTITY_DENSITY'] = float(
            len(ner_values_answer)) / sentence_len
        row['QUESTION_NAMED_ENTITY_DENSITY'] = float(
            len(ner_values_question)) / sentence_len
        return row
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号