parse.py 文件源码

python
阅读 47 收藏 0 点赞 0 评论 0

项目:wos_parser 作者: alexander-belikov 项目源码 文件源码
def parse_address(branch):
    """
    expected address structure:

    required:
        organization : str
        add_no : int

    optional:
        full_address : str
        organization synonyms : list of str
        country : str
        city : str
        state : str
        zipcode : str
        street : str
    """
    success = True

    try:
        org_names = branch.findall(org_path)

        def condition(x):
            return x.attrib and 'pref' in x.attrib and x.attrib['pref'] == 'Y'

        # find first org with pref='Y'
        orgs_pref = list(filter(condition, org_names))
        orgs_pref = list(map(lambda x: x.text, filterfalse(lambda x: x is None, orgs_pref)))
        result_dict = {'organizations_pref': orgs_pref}

        orgs_rest = list(filterfalse(condition, org_names))
        orgs_rest = list(map(lambda x: x.text, filterfalse(lambda x: x is None, orgs_rest)))
        result_dict.update({'organizations': orgs_rest})

        suborg_names = branch.findall(suborg_path)
        suborgs = list(map(lambda x: x.text, filterfalse(lambda y: y is None, suborg_names)))
        result_dict.update({'suborganizations': suborgs})

        if branch.attrib:
            if add_no_key in branch.attrib:
                # TODO add try-catch-raise with logging
                # if not int-able : exception triggered
                addr_number = int(branch.attrib[add_no_key])
                result_dict.update({add_no_key: addr_number})
        else:
            result_dict.update({add_no_key: 1})

        # entries below are optional
        add_entry(result_dict, branch, full_address_path)
        add_entry(result_dict, branch, country_path)
        add_entry(result_dict, branch, city_path)
        add_entry(result_dict, branch, state_path)
        add_entry(result_dict, branch, zipcode_path)
        add_entry(result_dict, branch, street_path)

    except:
        success = False
        result_dict = etree_to_dict(branch)
    return success, result_dict
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号