sciencedirect_collect.py 文件源码-python代码片段

sciencedirect_collect.py 文件源码

python

阅读 21 收藏 0 点赞 0 评论 0

项目：scientific-paper-summarisation 作者: EdCo95 项目源码文件源码

def downloadAllJournalArticles(skip = False, dontskip = ""):
# download journal articles from a list of journal names
    if dontskip != "":
        skip = True
    f = io.open("../compsci_journals.txt")  # all Elsevier CS journal names
    for l in f:
        if len(l) > 2:
            j = l.strip("\n")
            # skip the ones for which there are already folders, we assume for those downloading has finished
            if skip == True:
                if j.lower().replace(" ", "_") in os.listdir("../elsevier_papers_xml"):
                    if not j == dontskip:
                        print("Skipping journal:", j)
                        continue
            print("Downloading articles for journal:", j)
            jurl = getJournalURL(j)
            downloadArticles("../elsevier_papers_xml/" + j.lower().replace(" ", "_") + "/", jurl)