python类Tag()的实例源码

mikan.py 文件源码 项目:BGmi 作者: RicterZ 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parser_day_bangumi(soup):
    """

    :param soup:
    :type soup: bs4.Tag
    :return: list
    :rtype: list[dict]
    """
    li = []
    for soup in soup.find_all('li'):
        url = soup.select_one('a')
        span = soup.find('span')
        if url:
            name = url['title']
            url = url['href']
            assert isinstance(url, str)
            bangumi_id = url.split('/')[-1]
            soup.find('li', )
            li.append({'name': name, 'keyword': bangumi_id, 'cover': span['data-src']})
    return li
parser.py 文件源码 项目:netwars 作者: i008 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def process_post_bodies(bodies: List[Tag]) -> (str, list):
        for body in bodies:
            cites = list()
            cited = body.findAll('div', {'class': 'cite'})
            if cited:
                cites = [c['name'] for c in cited]
            collect_text = []
            for tag in body:
                # TODO: This is a suboptimal(and partially wrong) solution to parse cites in post body (a lot to improve here)
                if tag.name not in ('div', 'p'):
                    if hasattr(tag, 'text'):
                        collect_text.append(tag.text)
                    elif isinstance(tag, NavigableString):
                        collect_text.append(str(tag))
                    else:
                        collect_text.append('\n')
            else:
                yield ''.join(collect_text), cites
html.py 文件源码 项目:PyBloqs 作者: manahl 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def append_to(parent, tag, **kwargs):
    """
    Append an element to the supplied parent.

    :param parent: Parent to append to.
    :param tag: Tag to create.
    :param args: Tag args.
    :param kwargs: Tag kwargs.
    :return: New element.
    """
    if hasattr(parent, "soup"):
        soup = parent.soup
    else:
        soup = parent.find_parent("html")

    # Create Tag explicitly instead of using new_tag, otherwise attribute "name" leads to clash with tag-name in bs4
    new_tag = bs4.Tag(builder=soup.builder, name=tag, attrs=kwargs)

    new_tag.soup = soup

    parent.append(new_tag)

    return new_tag
scp.py 文件源码 项目:nyx 作者: Cappycot 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def read_component(thing):
    if isinstance(thing, Tag):
        if thing.name == "em":
            return "*" + read_component(thing.next_element) + "*"
        elif thing.name == "strong":
            return "**" + read_component(thing.next_element) + "**"
        elif thing.name == "u":
            return "__" + read_component(thing.next_element) + "__"
        elif thing.attrs.get("style") == "text-decoration: line-through;":
            return "~~" + read_component(thing.next_element) + "~~"
        elif thing.attrs.get("id") is not None and "footnoteref" in \
                thing.attrs["id"]:
            return ""
        else:
            return read_component(thing.next_element)
    else:
        return thing
run.py 文件源码 项目:Weather 作者: dev4love 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def show_weather(cityinfo):
    print(u'?????? #%s,%s# ???...' % (cityinfo.get(u'parent_name_ch'), cityinfo.get(u'city_name_ch')))
    weather_content = api.getWeather(cityinfo.get(u'id'))
    soup = BeautifulSoup(weather_content, u'html.parser')
    # print(soup.prettify())
    # print(soup.title)
    table_tag = soup.find_all(u'table', class_=u'sevendays')[0]
    for child in table_tag.children:
        if not isinstance(child, Tag):
            continue

        date = child.find(u'td', class_=u'date').get_text()
        temp = child.find(u'td', class_=u'temp').get_text()
        desc = child.find(u'td', class_=u'desc').get_text()
        print(''.join(date.split()))
        print(''.join(temp.split()))
        print(''.join(desc.split()))
        print(u'=================')
gui.py 文件源码 项目:Weather 作者: dev4love 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def _showWeather(self, city):
        self.info.insert(tk.INSERT, u'?????? #%s, %s# ???...\n\n\n' % (
            city.get(u'city_name_ch'), city.get(u'parent_name_ch')))

        weather_content = self.api.getWeather(city.get(u'id'))
        soup = BeautifulSoup(weather_content, u'html.parser')

        table_tag = soup.find_all(u'table', class_=u'sevendays')[0]
        for child in table_tag.children:
            if not isinstance(child, Tag):
                continue

            date = child.find(u'td', class_=u'date').get_text()
            temp = child.find(u'td', class_=u'temp').get_text()
            desc = child.find(u'td', class_=u'desc').get_text()

            self.info.insert(tk.INSERT, ''.join(date.split()) + '\n')
            self.info.insert(tk.INSERT, ''.join(temp.split()) + '\n')
            self.info.insert(tk.INSERT, ''.join(desc.split()) + '\n')
            self.info.insert(tk.INSERT, u'=================' + '\n')
zimuku.py 文件源码 项目:PyZimuDog 作者: linheimx 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def get_movie_list(kw_movie, pageIndex=0):
    url = api_movies.format(movie=kw_movie, page_index=pageIndex)
    html = fetch_text(url)
    dom = BeautifulSoup(html, 'html.parser')
    try:
        # 1.movie
        div_items = dom.find_all('div', 'item prel clearfix')  # type:Tag
        movies = []
        for div in div_items:
            movie = process_movie_item(div)
            movies.append(movie)

        # 2.page next
        div_page = dom.find('div', 'pagination l clearfix')
        index, haveNext = process_page_next(div_page)
        page = PageMovie(movies, index, haveNext)
        return Resp(page)
    except Exception as e:
        return Resp(errorMsg=e.__repr__())
zimuku.py 文件源码 项目:PyZimuDog 作者: linheimx 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def process_movie_item(div_item: Tag) -> Movie:
    movie = Movie()

    # ----------------
    div1 = div_item.find('div', 'litpic hidden-xs hidden-sm')
    a = div1.findChild()
    # detail_url
    movie.detail_url = base_url + a['href']
    # avatar
    img = a.findChild()
    movie.avatar_url = img['data-original']

    # ---------------
    div2 = div_item.find('div', 'title')  # type:Tag
    b = div2.select("p a b")[0]  # type:Tag
    movie.name = b.text
    return movie
ZimuKu.py 文件源码 项目:PyZimuDog 作者: linheimx 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def get_MovieList(keyword: str) -> List[Movie]:
    '''
    ?????????????
    :param keyword:
    :return:
    '''
    r = requests.get(base_url + '/search?ad=1&q={0}'.format(keyword))

    dom = BeautifulSoup(r.text, 'html.parser')

    list_movie = []

    div_blocks = dom.find_all('div', class_='item prel clearfix')
    try:
        for div_block in div_blocks:  # type:Tag
            movie = get_Movie(div_block)
            if movie:
                list_movie.append(movie)
    except BaseException:
        pass
    return list_movie
ZimuKu.py 文件源码 项目:PyZimuDog 作者: linheimx 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_Movie(item: Tag) -> Movie:
    '''
    ??????
    :param item:
    :return:
    '''

    try:
        movie = Movie()

        a = item.select_one('div.title p a')  # type:Tag
        movie.detail_url = a['href']
        movie.name = a.findChild().text
    except BaseException:
        pass

    return movie
ZimuKu.py 文件源码 项目:PyZimuDog 作者: linheimx 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_ZimusByMovie(url: str) -> List[Zimu]:
    r = requests.get(base_url + "/" + url)
    dom = BeautifulSoup(r.text, 'html.parser')

    list_zimu = []

    father = dom.select_one('body tbody')  # type: Tag
    trs = father.select('tr')  # type:List[Tag]
    for tr in trs:
        try:
            a = tr.select_one('td a')
            zimu = Zimu()
            zimu.detail_url = a['href']
            zimu.name = a['title']
            list_zimu.append(zimu)
        except BaseException:
            continue
    return list_zimu
views.py 文件源码 项目:django-allauth-providers-ko 作者: askdjango 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def naver_complete_login(request, app, token):
    provider = providers.registry.by_id(NaverProvider.id)
    headers = {'authorization': 'Bearer {}'.format(token.token)}
    resp = requests.get(API_URL + '/nid/getUserProfile.xml', headers=headers)
    resp.raise_for_status()

    soup = BeautifulSoup(resp.text, 'xml')
    parsed = {}
    for sub in ('result', 'response'):
        props = {}
        for tag in soup.find(sub):
            if isinstance(tag, Tag):
                props[tag.name] = tag.text
        parsed[sub] = props

    extra_data = parsed['response']
    login = provider.sociallogin_from_response(request, extra_data)
    return login
parse_nl.py 文件源码 项目:wiktionary-translations-parser 作者: elfxiong 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def parse_translation_table(self, table):
        """ Overrides GeneralParser's method.
        :param table: a Tag object. Not necessary a table; can be a div.
        :return: (translation, language_name, language_code)
        """

        # go through all "li" elements in a table
        for li in table.find_all('li'):
            if not isinstance(li, Tag):
                continue
            text = li.get_text().split(':')
            if len(text) < 2:
                continue

            # language name is before ":"
            lang_name = text[0]

            # language code is usually in super script
            lang_code = li.find(class_="trad-sup-code")
            if lang_code:
                lang_code = lang_code.text.strip()[1:-1]
            else:
                lang_code = ""

            # There are two functions that removes parentheses. Not sure which one to use.
            t = remove_parenthesis(text[1])
            trans_list = re.split(COMMA_OR_SEMICOLON, t)
                # each "trans" is: translation <sup>(lang_code)</sup> (transliteration)
                # lang_code and transliteration may not exist
            for trans in trans_list:
                translation = trans.split('(')[0].strip()
                yield (translation, lang_name.strip(), lang_code)
general.py 文件源码 项目:wiktionary-translations-parser 作者: elfxiong 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def parse_translation_table(self, table):
        """
        Parse the table to get translations and the languages.
        Hopefully this function will work for most editions. Override this method if needed.
        :param table: a Tag object. Not necessary a table; can be a div.
        :return: (translation, language_name, language_code)
        """
        for li in table.find_all('li'):
            if not isinstance(li, Tag):
                continue
            text = li.get_text().split(':')

            # TBD: the table is not a translation table
            #  OR the table is a translation table but there are some <li> without colon
            if len(text) < 2:
                continue

            # language name is before ":"
            lang_name = text[0].strip()

            # language code is in super script
            lang_code = li.find("sup")
            if lang_code:
                lang_code = remove_all_punctuation(lang_code.text).strip()
            else:
                lang_code = ""

            t = remove_parenthesis(text[1])
            trans_list = re.split(COMMA_OR_SEMICOLON, t)
            # each "trans" is: translation <sup>(lang_code)</sup> (transliteration)
            # lang_code and transliteration may not exist
            for trans in trans_list:
                # translation = trans.split('(')[0].strip()
                translation = re.split(r'[(??]', trans)[0].strip()
                # Throw out tuples if they have '[['
                if "[[" in translation:
                    continue

                yield (translation, lang_name, lang_code)
parse_pl.py 文件源码 项目:wiktionary-translations-parser 作者: elfxiong 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parse_unordered_list_polish(self, ulist):

        for li in ulist.find_all('li'):
            if not isinstance(li, Tag):
                continue
            if not li.get_text() == '':
                text = li.get_text().split(':')
                lang_name = text[0]
                lang_code = ''
                if len(text) > 1:
                    trans_list = re.split(COMMA_OR_SEMICOLON, text[1])
                    for trans in trans_list:
                        translation = remove_parenthesis(trans).strip()
                        yield (translation, lang_name, lang_code)
parse_ru.py 文件源码 项目:wiktionary-translations-parser 作者: elfxiong 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def parse_translation_table_russian(self, table):

        for li in table.find_all('li'):
            if not isinstance(li, Tag):
                continue
            text = li.get_text().split(':')

            # language name is before ":"
            lang_name = text[0]

            lang_code = ''
            if li.find("sub"):
                lang_code = li.find("sub").get_text()

            # remove the lang code from the lang name
            lang_name = lang_name[:-len(lang_code)]

            if len(text) > 1:
                t = remove_parenthesis(text[1])
            else:
                t = remove_parenthesis(text[0])

            trans_list = re.split(COMMA_OR_SEMICOLON, t)

            for trans in trans_list:
                translation = trans.split('(')[0].strip()
                if not translation == '':
                    yield (translation, lang_name, lang_code)
hosters.py 文件源码 项目:tvlinker 作者: ozmartian 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def bs_tag_to_string(bstag: Tag) -> str:
        return ''.join(str(item) for item in bstag.contents)
mikan.py 文件源码 项目:BGmi 作者: RicterZ 项目源码 文件源码 阅读 47 收藏 0 点赞 0 评论 0
def search_by_keyword(self, keyword, count=None):
        """
        return a list of dict with at least 4 key: download, name, title, episode
        example:
[
            {
                'name':"?????????",
                'download': 'magnet:?xt=urn:btih:what ever',
                'title': "[????] ????????? ?12? MP4 720p  ?",
                'episode': 12
            },
        ]
    ```
    :param keyword: search key word
    :type keyword: str
    :param count: how many page to fetch from website
    :type count: int

    :return: list of episode search result
    :rtype: list[dict]
    """

    result = []
    r = network.get(server_root + "Home/Search", params={'searchstr': keyword}).text
    s = BeautifulSoup(r, 'lxml')
    td_list = s.find_all('tr', attrs={'class': 'js-search-results-row'})  # type:list[bs4.Tag]
    for tr in td_list:
        title = tr.find('a', class_='magnet-link-wrap').text
        time_string = tr.find_all('td')[2].string
        result.append({
            'download': tr.find('a', class_='magnet-link').attrs.get('data-clipboard-text', ''),
            'name': keyword,
            'title': title,
            'episode': self.parse_episode(title),
            'time': int(time.mktime(time.strptime(time_string, "%Y/%m/%d %H:%M")))
        })
        # print(result)
    return result

```

works.py 文件源码 项目:ao3 作者: alexwlchan 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def author(self):
        """The author of this work."""
        # The author of the work is kept in the byline, in the form
        #
        #     <h3 class="byline heading">
        #       <a href="/users/[author_name]" rel="author">[author_name]</a>
        #     </h3>
        #
        byline_tag = self._soup.find('h3', attrs={'class': 'byline'})
        a_tag = [t
                 for t in byline_tag.contents
                 if isinstance(t, Tag)]
        assert len(a_tag) == 1
        return a_tag[0].contents[0].strip()
BeautifulSoupHelper.py 文件源码 项目:secret 作者: jianlong108 项目源码 文件源码 阅读 53 收藏 0 点赞 0 评论 0
def isTagClass(obj):
    return isinstance(obj, Tag)
BeautifulSoupHelper.py 文件源码 项目:secret 作者: jianlong108 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def getelementlistwithlabel(tagObj, label, options={}):

    if isinstance(tagObj, Tag):
        elementlist = []
        templist = tagObj.find_all(label, attrs=options)
        elementlist.extend(templist)
        return elementlist
    else:
        print '??????,??Tag?? ????:' + tagObj
        return None
BeautifulSoupHelper.py 文件源码 项目:secret 作者: jianlong108 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def gettextlistwithlabel(tagObj):

    if isinstance(tagObj, Tag):

        strlist = tagObj.get_text()

        return strlist.encode('utf-8')
    else:
        print '??????,??Tag?? ????:' + tagObj
        return None
yukicoder.py 文件源码 项目:online-judge-tools 作者: kmyk 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _parse_sample_tag(self, tag):
        assert isinstance(tag, bs4.Tag)
        assert tag.name == 'pre'
        prv = utils.previous_sibling_tag(tag)
        pprv = tag.parent and utils.previous_sibling_tag(tag.parent)
        if prv.name == 'h6' and tag.parent.name == 'div' and tag.parent['class'] == ['paragraph'] and pprv.name == 'h5':
            log.debug('h6: %s', str(prv))
            log.debug('name.encode(): %s', prv.string.encode())
            s = tag.string or ''  # tag.string for the tag "<pre></pre>" returns None
            return utils.textfile(s.lstrip()), pprv.string + ' ' + prv.string
utils.py 文件源码 项目:online-judge-tools 作者: kmyk 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def previous_sibling_tag(tag):
    tag = tag.previous_sibling
    while tag and not isinstance(tag, bs4.Tag):
        tag = tag.previous_sibling
    return tag
utils.py 文件源码 项目:online-judge-tools 作者: kmyk 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def next_sibling_tag(tag):
    tag = tag.next_sibling
    while tag and not isinstance(tag, bs4.Tag):
        tag = tag.next_sibling
    return tag
utils.py 文件源码 项目:online-judge-tools 作者: kmyk 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __init__(self, form, url):
        assert isinstance(form, bs4.Tag)
        assert form.name == 'form'
        self.form = form
        self.url = url
        self.payload = {}
        self.files = {}
        for input in self.form.find_all('input'):
            log.debug('input: %s', str(input))
            if input.attrs.get('type') in [ 'checkbox', 'radio' ]:
                continue
            if 'name' in input.attrs and 'value' in input.attrs:
                self.payload[input['name']] = input['value']
anarchygolf.py 文件源码 项目:online-judge-tools 作者: kmyk 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _parse_sample_tag(self, tag):
        assert isinstance(tag, bs4.Tag)
        assert tag.name == 'h2'
        name = tag.contents[0]
        if ':' in name:
            name = name[:  name.find(':') ]
        if name in [ 'Sample input', 'Sample output' ]:
            nxt = tag.next_sibling
            while nxt and nxt.string.strip() == '':
                nxt = nxt.next_sibling
            if nxt.name == 'pre':
                s = utils.textfile(utils.dos2unix(nxt.string.lstrip()))
            else:
                s = ''
            return s, name
browser.py 文件源码 项目:biweeklybudget 作者: jantman 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def soupify(self, body):
        # https://www.crummy.com/software/BeautifulSoup/
        # docs: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
        # bs4 codebase: http://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/files
        if isinstance(body, Tag): return body
        soup = BeautifulSoup(body, "html.parser")
        return soup
courseparser.py 文件源码 项目:PyTaskHelper 作者: AvSinStudio 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def parse_tasks(tasks, year):
    db = []
    for task in tasks:
        if not isinstance(task, bs4.Tag):
            continue
        task = task.td
        base_name = task.strong.text.strip()
        next_tag = task.strong.next_sibling.next_sibling.name
        if next_tag == 'span':
            name = base_name
            maximum = task.span.text.strip()
            results = parse_results(task.table, year)
            db.append({'category': 'common', 'name': name,
                       'max': int(maximum), 'students': results,
                       'year': year})
        else:
            for st in task.findAll('font'):
                if st.previous.name != 'div':
                    continue
                name = st.text.strip()
                category = base_name
                maximum = st.findNext('span').text.strip()
                results = parse_results(st.findNext('table'), year)
                db.append({'category': category, 'name': name,
                           'max': int(maximum), 'students': results,
                           'year': year})
    return db
html2xml.py 文件源码 项目:table2xml 作者: phiedulxp 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def is_leaf_table(table_soup):
    if not isinstance(table_soup,Tag):
        return True
    if len(table_soup.find_all('table')) == 0:
        return True
    return False


问题


面经


文章

微信
公众号

扫码关注公众号