push.py 文件源码-python代码片段

def iter_json_batches(inputs, base_url, batch_size, keep_instance_path):
    parsed_base_url = urlparse(base_url)

    current_uri = None
    current_batch = []

    for href, resource in inputs:
        # Skip over links-only (discovery) resources
        if resource.keys() == ["_links"]:
            continue

        # Inject the base URL's scheme and netloc; `urljoin` should do exactly this operation,
        # but actually won't if the right-hand-side term defines its own netloc
        parsed_href = urlparse(href)
        uri = urlunparse(parsed_href._replace(
            scheme=parsed_base_url.scheme,
            netloc=parsed_base_url.netloc,
        ))

        if batch_size == 1:
            yield (uri, [resource])
        else:
            # batch handling
            if keep_instance_path:
                collection_uri = uri.rsplit("?", 1)[0]
            else:
                collection_uri = uri.rsplit("/", 1)[0]

            if any((
                    current_uri is not None and current_uri != collection_uri,
                    len(current_batch) >= batch_size,
            )):
                yield (current_uri, current_batch)
                current_batch = []

            current_uri = collection_uri
            current_batch.append(resource)

    if current_batch:
        yield (current_uri, current_batch)