def get_queryset(self, krs, nip, regon, google, no_regon, no_nip):
regon = regon or []
nip = nip or []
self.processor = html2text.HTML2Text()
self.processor.ignore_emphasis = True
self.processor.bypass_tables = True
self.processor.ignore_links = True
self.session = requests.Session()
for keyword in tqdm(google or []):
if not no_regon:
result = self.search_google("{} REGON".format(keyword), REGON_PATTERN)
print("For '{}' found {}".format(keyword, result))
regon += result
if not no_nip:
result = self.search_google("{} NIP".format(keyword), NIP_PATTERN)
print("For '{}' found {}".format(keyword, result))
nip += [x.replace('-', '') for x in result if len(x.replace('-', '')) == 10]
queries = [{'krs': v} for v in set(krs)] if krs else []
queries += [{'nip': v} for v in set(nip)] if nip else []
queries += [{'regon': v} for v in set(regon)] if regon else []
return queries
评论列表
文章目录