Mmrz-Sync.py 文件源码-python代码片段

def get_hujiang_tts():
    key_word = request.params.get('key_word', None)
    job_id   = request.params.get('job_id', None)

    if not key_word:
        return "key_word is null"

    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36',
        'Accept-Encoding': 'gzip, deflate, sdch',
    }

    url = "http://dict.hjenglish.com/jp/jc/" + urllib.quote(key_word)

    req = urllib2.Request(url, None, headers)
    response = urllib2.urlopen(req)
    compressedData = response.read()

    compressedStream = StringIO.StringIO(compressedData)
    gzipper = gzip.GzipFile(fileobj=compressedStream)
    html = gzipper.read()

    soup = BeautifulSoup(html, "html.parser")

    ret_info = {
        "found": False,
        "message_str": "",
        "tts_url": "",
        "job_id": job_id,
    }

    jpSound_list = soup.select('span[class=jpSound]')
    if len(jpSound_list) < 1:
        ret_info["found"] = False
        ret_info["message_str"] = "jpSound not found"
        return json.dumps(ret_info)

    jpSound = str(jpSound_list[0])
    mc = re.search("GetTTSVoice\(\"(.*?)\"\)", jpSound)
    if not mc:
        ret_info["found"] = False
        ret_info["message_str"] = "tts_url not found"
        return json.dumps(ret_info)

    tts_url = mc.group(1)
    ret_info["found"] = True
    ret_info["message_str"] = "tts_url is found"
    ret_info["tts_url"] = tts_url
    return json.dumps(ret_info)