def cluster(self, data_set, threshold, verbose=True):
prev_map = {}
grp_id = 0
for index in range(len(data_set)):
sample = data_set[index]
if not verbose:
print "[+] Processing Sample:", sample["id"]
scores = {prev["id"] : Util.simscore(sample["encoded"], prev["encoded"]) for prev in data_set[:index]}
if len(scores) > 0 and max(scores.values()) > threshold:
closest = max(scores.iteritems(), key=operator.itemgetter(1))[0]
if not verbose:
print "[+] Found Closet Cluster:", closest
cur_grp_id = prev_map[closest]
else:
grp_id += 1
cur_grp_id = grp_id
prev_map[sample["id"]] = cur_grp_id
grp_info = {}
for sid, gid in prev_map.iteritems():
if gid not in grp_info:
grp_info[gid] = []
grp_info[gid].append(sid)
return grp_info
评论列表
文章目录