def cluster_words(words, service_name, size):
stopwords = ["GET", "POST", "total", "http-requests", service_name, "-", "_"]
cleaned_words = []
for word in words:
for stopword in stopwords:
word = word.replace(stopword, "")
cleaned_words.append(word)
def distance(coord):
i, j = coord
return 1 - jaro_distance(cleaned_words[i], cleaned_words[j])
indices = np.triu_indices(len(words), 1)
distances = np.apply_along_axis(distance, 0, indices)
return cluster_of_size(linkage(distances), size)
metricsnamecluster.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录