python类findall()的实例源码-面圈网

database.py 文件源码项目：ln2sql 作者: FerreroJeremy 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def create_table(self, table_string):
        lines = table_string.split("\n")
        table = Table()
        for line in lines:
            if 'TABLE' in line:
                table_name = re.search("`(\w+)`", line)
                table.name = table_name.group(1)
                if self.thesaurus_object is not None:
                    table.equivalences = self.thesaurus_object.get_synonyms_of_a_word(table.name)
            elif 'PRIMARY KEY' in line:
                primary_key_columns = re.findall("`(\w+)`", line)
                for primary_key_column in primary_key_columns:
                    table.add_primary_key(primary_key_column)
            else:
                column_name = re.search("`(\w+)`", line)
                if column_name is not None:
                    column_type = self.predict_type(line)
                    if self.thesaurus_object is not None:
                        equivalences = self.thesaurus_object.get_synonyms_of_a_word(column_name.group(1))
                    else:
                        equivalences = []
                    table.add_column(column_name.group(1), column_type, equivalences)
        return table

zhaifuliall.py 文件源码项目：python- 作者: secondtonone1 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()

DY2018.py 文件源码项目：Python 作者: Guzi219 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def ParseHtml(self, html):
        soup = BeautifulSoup(html)
        links = soup.findAll('a', attrs={'class': 'ulink'})
        #print len(links)
        if len(links) == 0: #the js return
            # tmp_js = soup.find(name='script', attrs={'language': 'javascript'})
            js_str = soup.script.string #two ways to get the <script></script>
            new_url = js_str[16:-1] #get the new url
            new_url = eval(new_url) #eval:??????????
            self.ParseHtml(self.LoadPage(new_url))
        else:
            # print type(links)
            for link in links:
                # print type(link)
                # print type(link.string)
                # print unicode(link.string)
                titles = re.findall(r'?(.+?)?', str(link.string)) #unicode(link.string))
                if len(titles) <> 0:
                    print titles[0]
                # print 'url is %s, title is %s.' %(link['href'], titles[0])

wifikeys.py 文件源码项目：Stitch 作者: nathanlopez 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get_profiles():
    passwd=''
    netsh_output = run_command("netsh wlan show profiles")
    if "not running" in netsh_output:
        net_wlan = run_command("net start wlansvc")
        if "started successfully" in net_wlan:
            netsh_output = run_command("netsh wlan show profiles")
        else:
            return net_wlan
    if "no wireless interface" in netsh_output:
        return netsh_output
    else:
        profiles=re.findall(': (.*)\r',netsh_output)
        for x in profiles:
            output= run_command('netsh wlan show profiles "{}" key=clear'.format(x))
            #output=re.findall('(Key Content.*)\r',proc)
            if output:
                passwd += "\n{}\n{}\n\n".format(x,output)
        return passwd

zhaifuliall.py 文件源码项目：python- 作者: secondtonone1 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def getDetailList(self,content):
        s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
        pattern =re.compile(s2 , re.S
            )
        result = re.findall(pattern, content)
        with open('file.txt','w',encoding='gbk') as f:
            f.write(content)

        if not result:
            print('???????..............')


        threadsList=[] 
        for item in result:
            t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
            threadsList.append(t)
            t.start()

        for threadid in threadsList:
            threadid.join()

generate_reaction_templates.py 文件源码项目：ochem_predict_nn 作者: connorcoley 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def reassign_atom_mapping(transform):
    '''This function takes an atom-mapped reaction and reassigns 
    the atom-mapping labels (numbers) from left to right, once 
    that transform has been canonicalized.'''

    all_labels = re.findall('\:([0-9]+)\]', transform)

    # Define list of replacements which matches all_labels *IN ORDER*
    replacements = []
    replacement_dict = {}
    counter = 1
    for label in all_labels: # keep in order! this is important
        if label not in replacement_dict:
            replacement_dict[label] = str(counter)
            counter += 1
        replacements.append(replacement_dict[label])

    # Perform replacements in order
    transform_newmaps = re.sub('\:[0-9]+\]', 
        lambda match: (':' + replacements.pop(0) + ']'),
        transform)

    return transform_newmaps

mcs_raw_binary.py 文件源码项目：spyking-circus 作者: spyking-circus 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def _read_from_header(self):

        a, b, c                = self._get_header()
        header                 = a
        header['data_offset']  = b
        header['nb_channels']  = c
        #header['dtype_offset'] = int(header['ADC zero'])
        header['gain']         = float(re.findall("\d+\.\d+", header['El'])[0])
        header['data_dtype']   = self.params['data_dtype']

        self.data   = numpy.memmap(self.file_name, offset=header['data_offset'], dtype=header['data_dtype'], mode='r')
        self.size   = len(self.data)
        self._shape = (self.size//header['nb_channels'], header['nb_channels'])
        del self.data

        return header

new_url.py 文件源码项目：sopel-modules 作者: phixion 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def title_command(bot, trigger):
    """
    Show the title or URL information for the given URL, or the last URL seen
    in this channel.
    """
    if not trigger.group(2):
        if trigger.sender not in bot.memory['last_seen_url']:
            return
        matched = check_callbacks(bot, trigger,
                                  bot.memory['last_seen_url'][trigger.sender],
                                  True)
        if matched:
            return
        else:
            urls = [bot.memory['last_seen_url'][trigger.sender]]
    else:
        urls = re.findall(url_finder, trigger)

    results = process_urls(bot, trigger, urls)
    for title, domain in results[:4]:
        bot.reply('[ %s ] - %s' % (title, domain))

test_events.py 文件源码项目：pyselenium-js 作者: neetjn 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_trigger_single_event(self):
        """Test: Trigger click event on button, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event='click'
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+1,
            'Counter label was not modified as expected; %s clicks' % modified
        )

test_events.py 文件源码项目：pyselenium-js 作者: neetjn 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_trigger_multiple_events(self):
        """Test: Trigger click event on button twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event=('click', 'click')
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+2,
            'Counter label was not modified as expected; %s clicks' % modified
        )

test_events.py 文件源码项目：pyselenium-js 作者: neetjn 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def test_trigger_multiple_events_multiple_elements(self):
        """Test: Trigger click event on two buttons twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        num_counter_original = eval(re.findall(regex, self.page.counter_label.text)[0])
        num_users_original = len(self.page.user_cards)
        self.page.js.trigger_event(
            element=(self.page.add_counter_button, self.page.add_user_button),
            event=('click', 'click')
        )
        for i in range(10):
            if (num_counter_original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        num_counter_modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            num_counter_modified, num_counter_original+2,
            'Counter label was not modified as expected; %s clicks' % num_counter_modified
        )
        self.assertEqual(
            len(self.page.user_cards), num_users_original+2,
            'Expected %s user cards found %s' % (
                num_users_original+2, len(self.page.user_cards)
            )
        )

analyser.py 文件源码项目：trf 作者: aistairc 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def calc_rs_modality(self) -> Dict[str, float]:

        modality_counter = Counter()
        for i, s in enumerate(self.sentences):
            chunks = []
            for bnst in self.knp.parse(s).bnst_list():
                chunk = Chunk(chunk_id=bnst.bnst_id,
                              link=bnst.parent,
                              description=bnst.fstring)
                chunks.append(chunk)

            s = "".join([chunk.description for chunk in chunks])
            ms = set(re.findall("<?????-(.+?)>", s))
            modality_counter += Counter(ms)

            n = len(self.sentences)

        return dict([(k, float(c) / n)
                     for k, c in modality_counter.items()])

reference.py 文件源码项目：cellranger 作者: 10XGenomics 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def get_transcript_gc_content(self, transcript_obj):
        pattern = re.compile('[cCgG]')

        gc, length = 0, 0
        for interval in transcript_obj.intervals:
            if interval.chrom not in self.chroms:
                continue

            seq = self.chroms[interval.chrom][interval.start:interval.end]
            gc += len(re.findall(pattern, seq))
            length += interval.length

        if length > 0:
            return float(gc) / float(length)
        else:
            return 0

# NOTE: these stub classes are necessary to maintain backwards compatibility with old refdata (1.2 or older)

comic.py 文件源码项目：ComicSpider 作者: QuantumLiu 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def get_info(self):
        '''
        ????????????url???url
        Get informations of the comic
        return:
            comic title,description,cover url,chapters' urls
        '''
        headers={'use-agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",'Referer':'http://manhua.dmzj.com/tags/s.shtml'}
        root='http://manhua.dmzj.com'
        r_title=r'<span class="anim_title_text"><a href=".*?"><h1>(.*?)</h1></a></span>'
        r_des=r'<meta name=\'description\' content=".*?(??.*?)"/>'#????
        r_cover=r'src="(.*?)" id="cover_pic"/></a>'#??url??
        r_cb=r'<div class="cartoon_online_border" >([\s\S]*?)<div class="clearfix"></div>'#??border
        r_cs=r'<li><a title="(.*?)" href="(.*?)" .*?>.*?</a>'#??????
        try:
            text=requests.get(self.comic_url,headers=headers).text
        except ConnectionError:
            traceback.print_exc()
            raise ConnectionError
        title=re.findall(r_title,text)[0]
        cb=re.findall(r_cb,text)[0]
        chapter_urls=[(c[0],root+c[1]+'#@page=1') for c in re.findall(r_cs,cb)]
        cover_url=re.findall(r_cover,text)[0]
        des=re.findall(r_des,text)
        return title,des,cover_url,chapter_urls

comment.py 文件源码项目：Instagram 作者: Fastcampus-WPS-5th 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def make_html_content_and_add_tags(self):
        # ????? ???? ?????
        p = re.compile(r'(#\w+)')
        # findall???? ???? ????? ???
        tag_name_list = re.findall(p, self.content)
        # ?? content(Comment??)? ??? ??
        ori_content = self.content
        # ????? ????
        for tag_name in tag_name_list:
            # Tag??? ????? ??, ????? ???? ????? _??
            tag, _ = Tag.objects.get_or_create(name=tag_name.replace('#', ''))
            # ?? content? ??? ??
            change_tag = '<a href="{url}" class="hash-tag">{tag_name}</a>'.format(
                # url=reverse('post:hashtag_post_list', args=[tag_name.replace('#', '')]),
                url=reverse('post:hashtag_post_list',
                            kwargs={'tag_name': tag_name.replace('#', '')}),
                tag_name=tag_name
            )
            ori_content = re.sub(r'{}(?![<\w])'.format(tag_name), change_tag, ori_content, count=1)
            # content? ??? Tag??? ??? tags??? ??
            if not self.tags.filter(pk=tag.pk).exists():
                self.tags.add(tag)
        # ??? ??? ???? html_content? ??
        self.html_content = ori_content
        super().save(update_fields=['html_content'])

test_sitemap_view.py 文件源码项目：django-powerpages 作者: Open-E-WEB 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def parse_sitemap(content):
    if not isinstance(content, six.text_type):
        content = content.decode('utf-8')
    urlset_match = re.search(
        r'<urlset[^>]*>(?P<urls>[\s\S]*)</urlset>', content
    )
    if urlset_match:
        results = []
        urlset_content = urlset_match.groupdict()['urls']
        for url_content in re.findall(r'<url>([\s\S]+)</url>', urlset_content):
            results.append(
                dict(
                    re.findall(r'<([^>]+)>([^<]*)</[^>]+>', url_content)
                )
            )
    else:
        results = None
    return results

pubg_iemoakland_players.py 文件源码项目：fantasy-dota-heroes 作者: ThePianoDentist 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def get_players():
    with open("../../junk/iemoakland.html") as f:
        html = f.read()
    output = []
    teams = re.findall('(?s)<div class="influencer-card">(.*?)<!-- Card End  -->', html)
    counter = 1
    for t in teams:
        team_name = re.search('<h1 class="influencer-name">([^<]+)</h1>', t).group(1)
        player_section = re.search('(?s)<p class="influencer-description">(.*?)</p>', t).group(1)
        players = re.findall('(?:<a[^>]+>)?\s*(.*?)(?:</a>)?<br />', player_section)
        if len(players) < 4:
            print(team_name)
            print(players)
        for player in players:
            if '<a hre' in player:
                player = re.search('<a[^>]+>([^<]+)', player).group(1)
            output.append({"id": counter, "name": player, "team": team_name, "value": 10.0})
            counter += 1

    with open("../../lib/pubg_players.py", "w+") as f:
        f.write("pubg_init = " + repr(output))
    return

icourse.py 文件源码项目：course-crawler 作者: Foair 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name

study_mooc.py 文件源码项目：course-crawler 作者: Foair 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name

study_mooc.py 文件源码项目：course-crawler 作者: Foair 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_announce(term_id):
    """ ??????? """

    # batchId ?????? str(int(time.time() * 1000))
    post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'httpSessionId': 'dba4977be78d42a78a6e2c2dd2b9bb42', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getAllAnnouncementByTerm', 'c0-id': '0', 'c0-param0': 'number:' + term_id, 'c0-param1': 'number:1', 'batchId': str(int(time.time() * 1000))}
    res = CONNECTION.post('http://mooc.study.163.com/dwr/call/plaincall/CourseBean.getAllAnnouncementByTerm.dwr', data=post_data).text

    announcements = re.findall(r'content="(.*?[^\\])".*title="(.*?[^\\])"', res)

    with open(os.path.join(BASE_DIR, 'Announcements.html'), 'w', encoding='utf-8') as announce_file:
        for announcement in announcements:
            # ????
            announce_content = announcement[0].encode('utf-8').decode('unicode_escape')
            # ????
            announce_title = announcement[1].encode('utf-8').decode('unicode_escape')
            announce_file.write('<h1>' + announce_title + '</h1>\n' + announce_content + '\n')

test_pdf.py 文件源码项目：segno 作者: heuer 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def pdf_as_matrix(buff, border):
    """\
    Reads the path in the PDF and returns it as list of 0, 1 lists.

    :param io.BytesIO buff: Buffer to read the matrix from.
    """
    pdf = buff.getvalue()
    h, w = re.search(br'/MediaBox \[0 0 ([0-9]+) ([0-9]+)\]', pdf,
                     flags=re.MULTILINE).groups()
    if h != w:
        raise ValueError('Expected equal height/width, got height="{}" width="{}"'.format(h, w))
    size = int(w) - 2 * border

    graphic = _find_graphic(buff)
    res = [[0] * size for i in range(size)]
    for x1, y1, x2, y2 in re.findall(r'\s*(\-?\d+)\s+(\-?\d+)\s+m\s+'
                                        r'(\-?\d+)\s+(\-?\d+)\s+l', graphic):
        x1, y1, x2, y2 = [int(i) for i in (x1, y1, x2, y2)]
        y = abs(y1)
        res[y][x1:x2] = [1] * (x2 - x1)
    return res

client.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def get(self, netloc, ua, timeout):
        try:
            headers = {'User-Agent': ua, 'Referer': netloc}
            result = _basic_request(netloc, headers=headers, timeout=timeout)

            match = re.findall('xhr\.open\("GET","([^,]+),', result)
            if not match:
                return False

            url_Parts = match[0].split('"')
            url_Parts[1] = '1680'
            url = urlparse.urljoin(netloc, ''.join(url_Parts))

            match = re.findall('rid=([0-9a-zA-Z]+)', url_Parts[0])
            if not match:
                return False

            headers['Cookie'] = 'rcksid=%s' % match[0]
            result = _basic_request(url, headers=headers, timeout=timeout)
            return self.getCookieString(result, headers['Cookie'])
        except:
            return

    # not very robust but lazieness...

client.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def get(self, result):
        try:
            s = re.compile("S\s*=\s*'([^']+)").findall(result)[0]
            s = base64.b64decode(s)
            s = s.replace(' ', '')
            s = re.sub('String\.fromCharCode\(([^)]+)\)', r'chr(\1)', s)
            s = re.sub('\.slice\((\d+),(\d+)\)', r'[\1:\2]', s)
            s = re.sub('\.charAt\(([^)]+)\)', r'[\1]', s)
            s = re.sub('\.substr\((\d+),(\d+)\)', r'[\1:\1+\2]', s)
            s = re.sub(';location.reload\(\);', '', s)
            s = re.sub(r'\n', '', s)
            s = re.sub(r'document\.cookie', 'cookie', s)

            cookie = '' ; exec(s)
            self.cookie = re.compile('([^=]+)=(.*)').findall(cookie)[0]
            self.cookie = '%s=%s' % (self.cookie[0], self.cookie[1])

            return self.cookie
        except:
            pass

directstream.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def odnoklassniki(url):
    try:
        media_id = re.compile('//.+?/.+?/([\w]+)').findall(url)[0]

        result = client.request('http://ok.ru/dk', post={'cmd': 'videoPlayerMetadata', 'mid': media_id})
        result = re.sub(r'[^\x00-\x7F]+', ' ', result)
        result = json.loads(result).get('videos', [])

        hd = []
        for name, quali in {'ultra': '4K', 'quad': '1440p', 'full': '1080p', 'hd': 'HD'}.items():
            hd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        sd = []
        for name, quali in {'sd': 'SD', 'low': 'SD', 'lowest': 'SD', 'mobile': 'SD'}.items():
            sd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        url = hd + sd[:1]
        if not url == []: return url
    except:
        return

directstream.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def cldmailru(url):
    try:
        v = url.split('public')[-1]

        r = client.request(url)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        tok = re.findall('"tokens"\s*:\s*{\s*"download"\s*:\s*"([^"]+)', r)[0]

        url = re.findall('"weblink_get"\s*:\s*\[.+?"url"\s*:\s*"([^"]+)', r)[0]

        url = '%s%s?key=%s' % (url, v, tok)

        return url
    except:
        return

directstream.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def yandex(url):
    try:
        cookie = client.request(url, output='cookie')

        r = client.request(url, cookie=cookie)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        sk = re.findall('"sk"\s*:\s*"([^"]+)', r)[0]

        idstring = re.findall('"id"\s*:\s*"([^"]+)', r)[0]

        idclient = binascii.b2a_hex(os.urandom(16))

        post = {'idClient': idclient, 'version': '3.9.2', 'sk': sk, '_model.0': 'do-get-resource-url', 'id.0': idstring}
        post = urllib.urlencode(post)

        r = client.request('https://yadi.sk/models/?_m=do-get-resource-url', post=post, cookie=cookie)
        r = json.loads(r)

        url = r['models'][0]['data']['file']

        return url
    except:
        return

proxy.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def geturl(url):
    try:
        r = client.request(url, output='geturl')
        if r == None: return r

        host1 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(url.strip().lower()).netloc)[0]
        host2 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(r.strip().lower()).netloc)[0]
        if host1 == host2: return r

        proxies = sorted(get(), key=lambda x: random.random())
        proxies = sorted(proxies, key=lambda x: random.random())
        proxies = proxies[:3]

        for p in proxies:
            p += urllib.quote_plus(url)
            r = client.request(p, output='geturl')
            if not r == None: return parse(r)

    except:
        pass

miradetodo.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def movie(self, imdb, title, localtitle, aliases, year):
        try:
            t = 'http://www.imdb.com/title/%s' % imdb
            t = client.request(t, headers={'Accept-Language': 'es-AR'})
            t = client.parseDOM(t, 'title')[0]
            t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip().encode('utf-8')

            q = self.search_link % urllib.quote_plus(t)
            q = urlparse.urljoin(self.base_link, q)

            r = client.request(q)

            r = client.parseDOM(r, 'div', attrs = {'class': 'item'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'span', attrs = {'class': 'tt'}), client.parseDOM(i, 'span', attrs = {'class': 'year'})) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]
            r = [i[0] for i in r if cleantitle.get(t) == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            pass

solarmovie.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            return url
        except:
            return

kingmovies.py 文件源码项目：plugin.video.exodus 作者: lastship 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'item-detail'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch' % url
            return url
        except:
            return