def delete_duplicates(self, path):
"""Delete all but one entries in elasticsearch whose `path` value is equivalent to that of path.
need to modify!!!
Args:
path (string): path value to compare to those in the elastic search
"""
result = self.es.search(body={'query':
{'match':
{'path': path}
}
},
index=self.index)['hits']['hits']
matching_paths = []
matching_thumbnail = []
for item in result:
if item['_source']['path'] == path:
matching_paths.append(item['_id'])
matching_thumbnail.append(item['_source']['thumbnail'])
if len(matching_paths) > 0:
for i, id_tag in enumerate(matching_paths[1:]):
self.es.delete(index=self.index, doc_type=self.doc_type, id=id_tag)
if os.path.isfile(matching_thumbnail[i]):
os.remove(matching_thumbnail[i])
评论列表
文章目录