def buildSampleData(numPapers, inputDir, outputDir):
papers = set()
authors = set()
with open(dataDir + "/PaperAuthor.csv") as csvfile:
reader = csv.DictReader(csvfile)
with open(sampleDataDir + "/PaperAuthor.csv", 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=reader.fieldnames)
writer.writeheader()
for row in reader:
# make sure to stop after numPapers
if len(papers) >= numPapers:
break
papers.add(row["PaperId"])
authors.add(row["AuthorId"])
writer.writerow(row)
copyFile("Author.csv", authors, inputDir, outputDir)
copyFile("Paper.csv", papers, inputDir, outputDir)
return papers, authors
评论列表
文章目录