python类html()的实例源码-第2页-面圈网

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def html(self):
        return self._html

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def export(self):
        output_dir = os.path.join(self._export_dir, self._id)
        root = ".."
        os.makedirs(output_dir, exist_ok=True)
        data, files = self._process(root=root)

        with open(os.path.join(output_dir, 'index.html'), 'wb+') as f:
            f.write(data.encode('utf-8'))

        for url, file in files:
            self._download_url(url, os.path.join(output_dir, file))

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def _process(self, root='..'):
        files = []
        self._clean_html()
        self._annotate()

        for (element, attr, url, _) in self._html.iterlinks():
            if element.tag == 'a' and attr == 'href' and url.startswith('https://www.google.com/url'):
                element.set('href', process_link(url, root=root))
            elif element.tag == 'img' and attr == 'src':
                filetitle = hashlib.md5(url.encode()).hexdigest()
                filetitle += '.jpg'
                element.set('src', '../' +  self._id + '/' + filetitle) # We go to top level to handle when the document is use as appliance
                files.append((url, filetitle))

        self._toc = self._get_toc()
        self._add_anchors()
        self._wrap_images()
        self._replace_youtube_videos()

        # Wrap the original body
        try:
            body = self._html.xpath('//body')[0]
        except (IndexError):
            body = lxml.html.Element('body')
        body.tag = 'div'

        if 'style' in body.attrib:
            del body.attrib['style']

        self._content = lxml.etree.tostring(body, pretty_print=True, method="html")
        return self._theme.render(self._template + '.html', document=self, root=root, config=self._config, appliances=self._appliances), files

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def _wrap_images(self):
        """
        Wrap images in a target blank
        """
        for img in self._html.iter('img'):
            img.attrib.pop('style', None)
            a = img
            a.attrib["href"] = img.attrib.pop("src", None)
            a.attrib["target"] = "_blank"
            a.tag = "a"
            img = lxml.html.builder.IMG()
            img.attrib["src"] = a.attrib["href"]
            a.append(img)

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _add_anchors(self):
        """
        Add anchors link to h1, h2, h3
        """
        for element in self._html.iter('h1', 'h2', 'h3'):
            if len(element) == 0 and element.attrib.get('id') is not None:
                a = lxml.html.builder.A()
                a.attrib['href'] = "#" + element.attrib.get('id')
                a.text = element.text
                element.text = None
                element.append(a)

document.py 文件源码项目：gns3-documentation-template 作者: GNS3 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def main():
    """
    Run a test
    """
    import tempfile

    with tempfile.TemporaryDirectory() as tmpdir:
        document = DriveDocument("42", "test", "<html><body style=\"test\"><h1>Hello</h1></body></html>", editable_by_anyone=True)
        document.export(tmpdir)
        with open(os.path.join(tmpdir, "42", "index.html")) as f:
            print(f.read())

dailydownbyKeywords.py 文件源码项目：catchWecaht 作者: leon0204 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def get_keylist(self, search_url,i):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list,i)

dailydownbyKeywords.py 文件源码项目：catchWecaht 作者: leon0204 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def removeFile(self):
        # ?????
        if os.path.exists('/home/wwwroot/laravel/public/img/daily/'):
            shutil.rmtree(r'/home/wwwroot/laravel/public/img/daily')  # pic
        if os.path.exists('/home/wwwroot/url/daily/'):
            shutil.rmtree(r'/home/wwwroot/url/daily')  # html




    # ???????????

dailydown.py 文件源码项目：catchWecaht 作者: leon0204 项目源码文件源码阅读 17 收藏 0 点赞 0 评论 0

def get_list(self, search_url):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list)



    # ?????????????

new_catch.py 文件源码项目：catchWecaht 作者: leon0204 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def get_list(self, search_url):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list)



    # ?????????????

ReserveProcessor.py 文件源码项目：FuME 作者: fupadev 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def reserve(self, match):
        payload = {'match_selected': match['match_id'],
                   'match_verein_id': '',
                   'as_values_match_verein_id': '',
                   'check_match': match['match_id']}

        r = self.driver.request("POST", self.baseUrl + '&act=new', data=payload)
        doc = lxml.html.fromstring(r.content)
        path_match = "/html/body//table//tr[@id]/*//text() | " \
                     "/html/body//table//tr[@id]/*//@href"
        raw = doc.xpath(path_match)

        # 2017-06-05 -> 05.06.17
        date = datetime.datetime.strptime(match['match_date'], '%Y-%m-%d %H:%M').strftime('%d.%m.%y %H:%M')

        # ---- raw snipet -----
        # 0 06.06.17 18:30 Uhr
        # 1 Relegation
        # 2 TSV Landsberg
        # 3 - TSV Bogen
        # 4 index.php?page=fotograf_spiele&mafo_id=43704&act=del
        # 5 Bereits jemand eingetragen:
        # 6 http://www.fupa.net/fupaner/abc-def-3
        # 7 abc def
        # ...

        for i, d in enumerate(raw):
            if date in d:
                if match['home'] in raw[i + 2] and match['guest'] in raw[i + 3]:
                    url = raw[i + 4]
                    match['mafo_id'] = url.split("?")[1].split("&")[1].split("=")[1]
                    try:
                        if 'Bereits jemand eingetragen' in raw[i + 5]:
                            # already reserved
                            return match, raw[i + 7]  # Photographer
                    except:
                        pass
                    # match can be reserved
                    return match, None

utils.py 文件源码项目：htmltab 作者: flother 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def parse_html(html_file):
    """
    Read the HTML file using lxml's HTML parser, but convert to Unicode
    using Beautiful Soup's UnicodeDammit class.

    Can raise LxmlError or TypeError if the file can't be opened or
    parsed.
    """
    unicode_html = UnicodeDammit(html_file, smart_quotes_to="html",
                                 is_html=True)
    if unicode_html.unicode_markup is None:
        raise ValueError("no HTML provided")
    if not unicode_html.unicode_markup:
        raise ValueError("could not detect character encoding")
    return lxml.html.fromstring(unicode_html.unicode_markup)

test_frames.py 文件源码项目：Taigabot 作者: FrozenPigs 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_parse_fragments_fromstring(self):
        parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
        html = """<frameset>
            <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
        </frameset>"""
        etree_document = lxml.html.fragments_fromstring(html, parser=parser)
        self.assertEqual(len(etree_document), 1)
        root = etree_document[0]
        self.assertEqual(root.tag, "frameset")
        frame_element = root[0]
        self.assertEqual(frame_element.tag, 'frame')

test_frames.py 文件源码项目：Taigabot 作者: FrozenPigs 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_parse_fromstring(self):
        parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
        html = """<html><frameset>
            <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
        </frameset></html>"""
        etree_document = lxml.html.fromstring(html, parser=parser)
        self.assertEqual(etree_document.tag, 'html')
        self.assertEqual(len(etree_document), 1)
        frameset_element = etree_document[0]
        self.assertEqual(len(frameset_element), 1)
        frame_element = frameset_element[0]
        self.assertEqual(frame_element.tag, 'frame')

test_clean.py 文件源码项目：Taigabot 作者: FrozenPigs 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_allow_tags(self):
        html = """
            <html>
            <head>
            </head>
            <body>
            <p>some text</p>
            <table>
            <tr>
            <td>hello</td><td>world</td>
            </tr>
            <tr>
            <td>hello</td><td>world</td>
            </tr>
            </table>
            <img>
            </body>
            </html>
            """

        html_root = lxml.html.document_fromstring(html)
        cleaner = Cleaner(
            remove_unknown_tags = False,
            allow_tags = ['table', 'tr', 'td'])
        result = cleaner.clean_html(html_root)

        self.assertEqual(12-5+1, len(list(result.iter())))

test_clean.py 文件源码项目：Taigabot 作者: FrozenPigs 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def test_safe_attrs_included(self):
        html = """<p><span style="color: #00ffff;">Cyan</span></p>"""

        safe_attrs=set(lxml.html.defs.safe_attrs)
        safe_attrs.add('style')

        cleaner = Cleaner(
            safe_attrs_only=True,
            safe_attrs=safe_attrs)
        result = cleaner.clean_html(html)

        self.assertEqual(html, result)

test_clean.py 文件源码项目：Taigabot 作者: FrozenPigs 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def test_safe_attrs_excluded(self):
        html = """<p><span style="color: #00ffff;">Cyan</span></p>"""
        expected = """<p><span>Cyan</span></p>"""

        safe_attrs=set()

        cleaner = Cleaner(
            safe_attrs_only=True,
            safe_attrs=safe_attrs)
        result = cleaner.clean_html(html)

        self.assertEqual(expected, result)

SessionAPI.py 文件源码项目：competitive-cli 作者: GDGVIT 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def submit(self, probNum, path=".", language=None):
        """
        submits the problem according to the problem Number of the question.
        returns a list containing the submission details about the question.
        """
        file_path, filename = UvaSession.find_file(probNum, path)
        probFile = open(file_path)

        if language is None:
            language_number = UvaSession.find_language(filename)
        else:
            language_number = UvaSession.language_handler[language]

        if language_number is None:
            return

        payload = {
            "localid": probNum,
            "code": probFile.read(),
            "language": language_number,
            "codeupl": "",
            "problemid": "",
            "category": "",
            "submit": "Submit"
        }

        updated_headers = {
            "Referer": UvaSession.UVA_HOST + "index.php?option=com_onlinejudge&Itemid=25",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Host": "uva.onlinejudge.org",
            "Origin": UvaSession.UVA_HOST
        }

        resp = self.uva_session.post(UvaSession.SUBMIT_PATH, data=payload, headers=updated_headers)
        submission_id = resp.url[resp.url.find('ID')+3:]
        return self.check_result(submission_id, probNum)

SessionAPI.py 文件源码项目：competitive-cli 作者: GDGVIT 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def login(self, username="", password=""):

        # logging in without credentials
        self.username = username
        response_page = self.codechef_session.get(CodechefSession.codechef_url)
        html_page = lxml.html.fromstring(response_page.text)
        hidden_inputs = html_page.xpath(
            r'//form//input[@type="hidden"]'
        )
        payload = {i.attrib["name"]: i.attrib["value"]
                   for i in hidden_inputs}
        payload['name'] = username
        payload['pass'] = password
        payload['op'] = 'Login'
        response = self.codechef_session.post(CodechefSession.codechef_url, data=payload)

        # removing extra sessions using simple scraping and form handling
        while response.url == CodechefSession.codechef_url + '/session/limit':
            html_page = lxml.html.fromstring(response.text)
            all_inputs = html_page.xpath(r'//form//input')
            payload = {i.attrib["name"]: i.attrib["value"] for i in all_inputs[::-1]}

            response = self.codechef_session.post(CodechefSession.codechef_url + '/session/limit', data=payload)
        soup = bs(response.content, 'lxml')
        name = soup.find(text=username)

        self.logged_in = bool(name)
        if self.logged_in: self.username = username
        return self.logged_in

SessionAPI.py 文件源码项目：competitive-cli 作者: GDGVIT 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def submit(self, question_code, path=".", language=None):
        contest = ""
        for contests in self.info_present_contests():
            for contest_ques in CodechefSession.ques_in_contest(contests['contest_name']):
                if contest_ques == question_code:
                    contest = '/' + contests['contest_name']
                    break
        file_path = path
        # file_path, file_name = CodechefSession.find_file(question_code, path)
        lang = CodechefSession.language_handler[language]
        response = self.codechef_session.get(
            self.codechef_url + contest + '/submit/' + question_code
        )

        html_page = lxml.html.fromstring(response.text)
        hidden_inputs = html_page.xpath(r'//form//input[@type="hidden"]')
        payload = {i.attrib['name']: i.attrib['value'] for i in hidden_inputs}
        payload['language'] = lang
        payload['problem_code'] = question_code
        payload['op'] = 'Submit'

        file = {
            "files[sourcefile]": open(file_path)
        }

        response = self.codechef_session.post(CodechefSession.codechef_url + contest + '/submit/' + question_code,
                                              data=payload,
                                              files=file
                                              )

        sub_id = response.url.split('/')[-1]
        return sub_id , self.check_result(sub_id, question_code)