obtain_date.py 文件源码

python
阅读 35 收藏 0 点赞 0 评论 0

项目:Broad_Crawler 作者: rafacheng 项目源码 文件源码
def pre_process(dates):
    # \u5e74\u6708\u65e5
    # dates = [re.subn(r'[\u4e00-\u5e73]|[\u5e75-\u6707]', '', tm)[0] for tm in dates]
    # dates = [re.subn(r'[\u6709-\u65e4]|[\u65e6-\u9fa5]', '', tm)[0] for tm in dates]
    # ???
    dates = dates[:int(len(dates) / 2)]
    # ?????100??
    dates = [d for d in dates if 100 > len(d) > 0]
    # ????????
    dates = [d.replace('\n', ' ').replace('\t', ' ').replace('\b', ' ').replace(' ', ' ') for d in dates]
    # ????????????????
    new_dates = []
    for v in dates:
        v = re.sub('\A\s*', '', v)
        v = re.sub('\s*\Z', '', v)
        v = re.sub('\s+', ' ', v)
        new_dates.append(v)
    # ???????????????????????
    shorter_dates = []
    for v in new_dates:
        ffdi = find_first_digit_index(v)
        if ffdi:
            shorter_dates.append(v[ffdi-1:])
    return shorter_dates
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号