def crawl_articles(self, numbers):
citation_loader = CitationLoader(numbers)
entries = citation_loader.get_bibtex()
articles = {}
for entry in entries:
number = entry['ID']
try:
article = Article.objects.get(entry_number=number)
logger.info('Article [%s] already exists, it will be updated.' % number)
except (DoesNotExist, ServerSelectionTimeoutError):
article = Article()
article.entry_number = number
logger.info('Article [%s] is a new article.' % number)
article.title = entry['title'] if 'title' in entry else ''
article.author = entry['author'] if 'author' in entry else ''
article.journal = entry['journal'] if 'journal' in entry else ''
article.year = entry['year'] if 'year' in entry else ''
article.volume = entry['volume'] if 'volume' in entry else ''
article.number = entry['number'] if 'number' in entry else ''
article.pages = entry['pages'] if 'pages' in entry else ''
article.abstract = entry['abstract'] if 'abstract' in entry else ''
article.keyword = entry['keyword'] if 'keyword' in entry else ''
article.doi = entry['doi'] if 'doi' in entry else ''
article.issn = entry['issn'] if 'issn' in entry else ''
article.issue_reference = self.__issue
try:
article.save()
logger.info('Article [%s] saved.' % number)
except ServerSelectionTimeoutError:
logger.info('Cannot connect to database, Article [%s] will not be saved.' % number)
articles[number] = article
return articles
评论列表
文章目录