def __init__(self):
#load table to memory
logging.info("Loading ids")
cur = getCursor()
executeAndLog(cur,
"SELECT id, entity_name, address, original_address, lat, lng FROM entities")
for row in cur:
norm_name = self.normalize(row["entity_name"])
logging.info(norm_name)
norm_address = self.normalize(row["address"])
norm_orig_address = self.normalize(row["original_address"])
t_address = (hash(norm_name), hash(norm_address))
t_orig_address = (hash(norm_name), hash(norm_orig_address))
t_lat_lng = (hash(norm_name), hash((str(row["lat"]), str(row["lng"]))))
self.name_address[t_address] = row["id"]
self.name_address[t_orig_address] = row["id"]
self.name_lat_lng[t_lat_lng] = row["id"]
self.address_data[hash(norm_orig_address)] = row["id"]
cur.close()
logging.info("Loading done")
# normalize string be removing spaces, dots, commas and turning everything into lower cases
# TODO: normalize upper case letters with diacritics
评论列表
文章目录