stratdoc_parser3.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:policynet 作者: mitre 项目源码 文件源码
def standardize_act_target(tgt_url): 
    ''' Produce label for identified Act target based on URL. This is cleaner 
    than pulling the text and accounting for typos and inconsistencies.'''

    surl = tgt_url.split("/")    

    date = surl[3].split("-")
    date = "{} {}, {}".format(calendar.month_name[int(date[1])], date[2], date[0])
    try:
        tgt_title = "Act of {}, ch. {} {}".format(date, surl[4].strip("ch"), surl[5].strip("s"))
    except:
        try:
            if "ch" in surl[4]:
                tgt_title = "Act of {}, ch. {}".format(date, surl[4].strip("ch"))
            elif "s" in surl[4]:
                tgt_title = "Act of {}, {}".format(date, surl[4].strip("s"))
        except: 
            tgt_title = "Act of {}".format(date)

    try:    surl[4] = surl[4].lstrip("ch")
    except: pass
    try:    surl[5] = surl[5].lstrip("s")
    except: pass

    tgt_url = "/".join(x for x in surl)

    try: 
        tgt_url_broad = "/".join(tgt_url.split("/")[0:6]) 
    except: 
        tgt_url_broad = "/".join(tgt_url.split("/")[0:5]) 


    return tgt_title, tgt_url, tgt_url_broad
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号