test_formats.py 文件源码-python代码片段

def validate_turtle(contents):
    all_lines = contents.splitlines()
    # Separate prefixes from the body
    prefix_lines = [line for line in all_lines if line.startswith(u"@prefix")]
    prefixes = {
        "books": "http://www.books.org/",
        "isbn": "http://www.books.org/isbn/",
        "xsd": "http://www.w3.org/2001/XMLSchema#"
    }
    # Validate that specified prefixes are there
    for pre, url in prefixes.items():
        pattern = r"@prefix {}:[\s]* <{}> \.".format(pre, url)
        assert any([re.match(pattern, x) is not None for x in prefix_lines]), \
            "{} is not found among prefixes".format(pre)

    # Validate subject grouping
    # Move the cursor until the first subject
    iter = dropwhile(lambda x: len(x) == 0 or re.match("[\s]+$", x) or x.startswith(u"@prefix"), all_lines)
    # Check the block for each subject

    for s in range(NUM_SUBJECTS):
        this_sub_lines = list(takewhile(lambda x: len(x) != 0 and not re.match("[\s]+$", x), iter))
        assert len(this_sub_lines) == NUM_TRIPLES_PER_SUBJ
        # First line is where subject is defined
        subj_line = this_sub_lines[0]
        assert subj_line.startswith(u"isbn:")
        assert subj_line.endswith(";")
        # Rest of the lines starts with some whitespace
        assert all([re.match(r"^[\s]+", x) for x in this_sub_lines[1:]])
        # Next two lines end with ;
        assert all([x.endswith(u";") for x in this_sub_lines[1:(NUM_TRIPLES_PER_SUBJ-1)]])
        # Last line ends with a dot
        assert this_sub_lines[-1].endswith(u".")
        # Each line has a "books:" for the predicate
        assert all(["books:" in x for x in this_sub_lines])
        # One of the lines has true or false
        assert any(["true" in x or "false" in x for x in this_sub_lines])
        # Two of the lines has xsd:
        assert sum([1 for x in this_sub_lines if "xsd:" in x]) == 2