def __div__(self, other, *args):
'''
Integer division (//)
'''
return self._apply_operator(other, "__div__", *args)
python类division()的实例源码
def __truediv__(self, other, *args):
'''
Float division (/)
'''
return self._apply_operator(other, "__truediv__", *args)
def get_total_seconds(td):
# integer division used here to emulate built-in total_seconds
return ((86400 * td.days + td.seconds) * 10 ** 6 + td.microseconds) / 10 ** 6
tf_idf_visualiser.py 文件源码
项目:scientific-paper-summarisation
作者: EdCo95
项目源码
文件源码
阅读 18
收藏 0
点赞 0
评论 0
def calculate_tf_idf(sentence, global_count_of_papers_words_occur_in, paper_bag_of_words):
"""
Calculates the tf-idf score for a sentence based on all of the papers.
:param sentence: the sentence to calculate the score for, as a list of words
:param global_count_of_papers_words_occur_in: a dictionary of the form (word: number of papers the word occurs in)
:param paper_bag_of_words: the bag of words representation for a paper
:return: the tf-idf score of the sentence
"""
bag_of_words = paper_bag_of_words
sentence_tf_idf = 0
length = 0
tf_idfs = []
for word in sentence:
# Get the number of documents containing this word - the idf denominator (1 is added to prevent division by 0)
docs_containing_word = global_count_of_papers_words_occur_in[word] + 1
# Count of word in this paper - the tf score
count_word = bag_of_words[word]
idf = np.log(NUMBER_OF_PAPERS / docs_containing_word)
word_tf_idf = count_word * idf
tf_idfs.append(word_tf_idf)
return [x for x in zip(sentence, tf_idfs)]
useful_functions.py 文件源码
项目:scientific-paper-summarisation
作者: EdCo95
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def calculate_tf_idf(sentence, global_count_of_papers_words_occur_in, paper_bag_of_words):
"""
Calculates the tf-idf score for a sentence based on all of the papers.
:param sentence: the sentence to calculate the score for, as a list of words
:param global_count_of_papers_words_occur_in: a dictionary of the form (word: number of papers the word occurs in)
:param paper_bag_of_words: the bag of words representation for a paper
:return: the tf-idf score of the sentence
"""
bag_of_words = paper_bag_of_words
sentence_tf_idf = 0
length = 0
for word in sentence:
if word in STOPWORDS:
continue
# Get the number of documents containing this word - the idf denominator (1 is added to prevent division by 0)
docs_containing_word = global_count_of_papers_words_occur_in[word] + 1
# Count of word in this paper - the tf score
count_word = bag_of_words[word]
idf = np.log(NUMBER_OF_PAPERS / docs_containing_word)
#word_tf_idf = (1 + np.log(count_word)) * idf
word_tf_idf = count_word * idf
sentence_tf_idf += word_tf_idf
length += 1
if length == 0:
return 0
else:
sentence_tf_idf = sentence_tf_idf / length
return sentence_tf_idf
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def weighted_avg_and_std(values, weights):
"""
Return the weighted average and standard deviation.
values, weights -- Numpy ndarrays with the same shape.
References:
- http://stackoverflow.com/questions/2413522/weighted-standard-deviation-in-numpy
- https://en.wikipedia.org/wiki/Mean_square_weighted_deviation
Note: The method is biased (division by N and not (N-1)).
"""
average = NP.average(values, weights=weights)
variance = NP.average((values-average)**2, weights=weights) # Fast and numerically precise
return (average, math.sqrt(variance))
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def get_total_seconds(td):
# integer division used here to emulate built-in total_seconds
return ((86400 * td.days + td.seconds) * 10 ** 6 + td.microseconds) / 10 ** 6
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def download_speed(self):
# Avoid zero division errors...
if self.avg == 0.0:
return "..."
return format_size(1 / self.avg) + "/s"
def __div__(self, other):
"""self / other without __future__ division
May promote to float.
"""
raise NotImplementedError
def __rdiv__(self, other):
"""other / self without __future__ division"""
raise NotImplementedError
def __truediv__(self, other):
"""self / other with __future__ division.
Should promote to float when necessary.
"""
raise NotImplementedError
def __rtruediv__(self, other):
"""other / self with __future__ division"""
raise NotImplementedError