def validate_turtle(contents):
all_lines = contents.splitlines()
# Separate prefixes from the body
prefix_lines = [line for line in all_lines if line.startswith(u"@prefix")]
prefixes = {
"books": "http://www.books.org/",
"isbn": "http://www.books.org/isbn/",
"xsd": "http://www.w3.org/2001/XMLSchema#"
}
# Validate that specified prefixes are there
for pre, url in prefixes.items():
pattern = r"@prefix {}:[\s]* <{}> \.".format(pre, url)
assert any([re.match(pattern, x) is not None for x in prefix_lines]), \
"{} is not found among prefixes".format(pre)
# Validate subject grouping
# Move the cursor until the first subject
iter = dropwhile(lambda x: len(x) == 0 or re.match("[\s]+$", x) or x.startswith(u"@prefix"), all_lines)
# Check the block for each subject
for s in range(NUM_SUBJECTS):
this_sub_lines = list(takewhile(lambda x: len(x) != 0 and not re.match("[\s]+$", x), iter))
assert len(this_sub_lines) == NUM_TRIPLES_PER_SUBJ
# First line is where subject is defined
subj_line = this_sub_lines[0]
assert subj_line.startswith(u"isbn:")
assert subj_line.endswith(";")
# Rest of the lines starts with some whitespace
assert all([re.match(r"^[\s]+", x) for x in this_sub_lines[1:]])
# Next two lines end with ;
assert all([x.endswith(u";") for x in this_sub_lines[1:(NUM_TRIPLES_PER_SUBJ-1)]])
# Last line ends with a dot
assert this_sub_lines[-1].endswith(u".")
# Each line has a "books:" for the predicate
assert all(["books:" in x for x in this_sub_lines])
# One of the lines has true or false
assert any(["true" in x or "false" in x for x in this_sub_lines])
# Two of the lines has xsd:
assert sum([1 for x in this_sub_lines if "xsd:" in x]) == 2
评论列表
文章目录