def get_similar_ssid_sets(ssid_sets, threshold):
"""Return a mapping of ssid set to similar ssid sets.
:param ssid_sets: Iterable of SSID sets
:param threshold: Minimum Jaccard index for two sets to be matched as similar.
"""
ssid_set_to_matches = defaultdict(set)
ssid_pairs = combinations(ssid_sets, r=2)
# Distribute calulcations to worker processes
# Significant speed-up over single process
with multiprocessing.Pool() as pool:
task = partial(jaccard_worker, threshold=threshold)
# Immediately returns an iterable
similar_ssids = pool.imap_unordered(task, ssid_pairs, chunksize=300000)
# Consumes the iterable whenever a worker process yields
for match in similar_ssids:
if match:
ssid_set_to_matches[match[0]].add(match[1])
ssid_set_to_matches[match[1]].add(match[0])
return ssid_set_to_matches
clusterSimilarSSIDSets.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录