def correct_orphan_records(self, provider='europeana', end=None):
"""[#185] Delete records from the search engine which aren't found in the database"""
s = Search()
q = Q('term', provider=provider)
s = s.query(q)
response = s.execute()
total = response.hits.total
# A file extracted from the production database listing all of the europeana identifiers
identifier_file = '/tmp/europeana-identifiers.json'
db_identifiers = set(json.load(open(identifier_file)))
total_in_db = len(db_identifiers)
log.info("Using search engine instance %s", settings.ELASTICSEARCH_URL)
log.info("Total records: %d (search engine), %d (database) [diff=%d]", total, total_in_db, total - total_in_db)
deleted_count = 0
for r in s.scan():
if r.identifier not in db_identifiers:
img = search.Image.get(id=r.identifier)
log.debug("Going to delete image %s", img)
deleted_count += 1
log.info("Deleted %d from search engine", deleted_count)
评论列表
文章目录