def _answer_stop_word_density(self, row):
"""Percentage of tokens in the answer are stopwords
- Args:
row(pandas.dataframe): input row vector
- Returns:
row(pandas.dataframe): ouput vector with new feature
"""
stop = stopwords.words('english')
answer = row.Answer
if answer:
tokens = answer.split()
num_tokens = len(tokens)
stop_word_in_answer = [i for i in tokens if i in stop]
num_stop_word_in_answer = len(stop_word_in_answer)
row['ANSWER_STOPWORD_DENSITY'] = float(
num_stop_word_in_answer) / num_tokens
return row
else:
row['ANSWER_STOPWORD_DENSITY'] = 0
return row
feature_construction.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录