python类html()的实例源码

document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def html(self):
        return self._html
document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def export(self):
        output_dir = os.path.join(self._export_dir, self._id)
        root = ".."
        os.makedirs(output_dir, exist_ok=True)
        data, files = self._process(root=root)

        with open(os.path.join(output_dir, 'index.html'), 'wb+') as f:
            f.write(data.encode('utf-8'))

        for url, file in files:
            self._download_url(url, os.path.join(output_dir, file))
document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def _process(self, root='..'):
        files = []
        self._clean_html()
        self._annotate()

        for (element, attr, url, _) in self._html.iterlinks():
            if element.tag == 'a' and attr == 'href' and url.startswith('https://www.google.com/url'):
                element.set('href', process_link(url, root=root))
            elif element.tag == 'img' and attr == 'src':
                filetitle = hashlib.md5(url.encode()).hexdigest()
                filetitle += '.jpg'
                element.set('src', '../' +  self._id + '/' + filetitle) # We go to top level to handle when the document is use as appliance
                files.append((url, filetitle))

        self._toc = self._get_toc()
        self._add_anchors()
        self._wrap_images()
        self._replace_youtube_videos()

        # Wrap the original body
        try:
            body = self._html.xpath('//body')[0]
        except (IndexError):
            body = lxml.html.Element('body')
        body.tag = 'div'

        if 'style' in body.attrib:
            del body.attrib['style']

        self._content = lxml.etree.tostring(body, pretty_print=True, method="html")
        return self._theme.render(self._template + '.html', document=self, root=root, config=self._config, appliances=self._appliances), files
document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _wrap_images(self):
        """
        Wrap images in a target blank
        """
        for img in self._html.iter('img'):
            img.attrib.pop('style', None)
            a = img
            a.attrib["href"] = img.attrib.pop("src", None)
            a.attrib["target"] = "_blank"
            a.tag = "a"
            img = lxml.html.builder.IMG()
            img.attrib["src"] = a.attrib["href"]
            a.append(img)
document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def _add_anchors(self):
        """
        Add anchors link to h1, h2, h3
        """
        for element in self._html.iter('h1', 'h2', 'h3'):
            if len(element) == 0 and element.attrib.get('id') is not None:
                a = lxml.html.builder.A()
                a.attrib['href'] = "#" + element.attrib.get('id')
                a.text = element.text
                element.text = None
                element.append(a)
document.py 文件源码 项目:gns3-documentation-template 作者: GNS3 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def main():
    """
    Run a test
    """
    import tempfile

    with tempfile.TemporaryDirectory() as tmpdir:
        document = DriveDocument("42", "test", "<html><body style=\"test\"><h1>Hello</h1></body></html>", editable_by_anyone=True)
        document.export(tmpdir)
        with open(os.path.join(tmpdir, "42", "index.html")) as f:
            print(f.read())
dailydownbyKeywords.py 文件源码 项目:catchWecaht 作者: leon0204 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_keylist(self, search_url,i):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list,i)
dailydownbyKeywords.py 文件源码 项目:catchWecaht 作者: leon0204 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def removeFile(self):
        # ?????
        if os.path.exists('/home/wwwroot/laravel/public/img/daily/'):
            shutil.rmtree(r'/home/wwwroot/laravel/public/img/daily')  # pic
        if os.path.exists('/home/wwwroot/url/daily/'):
            shutil.rmtree(r'/home/wwwroot/url/daily')  # html




    # ???????????
dailydown.py 文件源码 项目:catchWecaht 作者: leon0204 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def get_list(self, search_url):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list)



    # ?????????????
new_catch.py 文件源码 项目:catchWecaht 作者: leon0204 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_list(self, search_url):

        html = requests.get(search_url, headers=self.headers, verify=False).content
        selector = etree.HTML(html)
        # ????
        content = selector.xpath('//div[@class="news-box"]/ul/li/div[@class="txt-box"]/h3/a/@href')
        for list in content:
            maincontent = self.get_content(list)



    # ?????????????
ReserveProcessor.py 文件源码 项目:FuME 作者: fupadev 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def reserve(self, match):
        payload = {'match_selected': match['match_id'],
                   'match_verein_id': '',
                   'as_values_match_verein_id': '',
                   'check_match': match['match_id']}

        r = self.driver.request("POST", self.baseUrl + '&act=new', data=payload)
        doc = lxml.html.fromstring(r.content)
        path_match = "/html/body//table//tr[@id]/*//text() | " \
                     "/html/body//table//tr[@id]/*//@href"
        raw = doc.xpath(path_match)

        # 2017-06-05 -> 05.06.17
        date = datetime.datetime.strptime(match['match_date'], '%Y-%m-%d %H:%M').strftime('%d.%m.%y %H:%M')

        # ---- raw snipet -----
        # 0 06.06.17 18:30 Uhr
        # 1 Relegation
        # 2 TSV Landsberg
        # 3 - TSV Bogen
        # 4 index.php?page=fotograf_spiele&mafo_id=43704&act=del
        # 5 Bereits jemand eingetragen:
        # 6 http://www.fupa.net/fupaner/abc-def-3
        # 7 abc def
        # ...

        for i, d in enumerate(raw):
            if date in d:
                if match['home'] in raw[i + 2] and match['guest'] in raw[i + 3]:
                    url = raw[i + 4]
                    match['mafo_id'] = url.split("?")[1].split("&")[1].split("=")[1]
                    try:
                        if 'Bereits jemand eingetragen' in raw[i + 5]:
                            # already reserved
                            return match, raw[i + 7]  # Photographer
                    except:
                        pass
                    # match can be reserved
                    return match, None
utils.py 文件源码 项目:htmltab 作者: flother 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def parse_html(html_file):
    """
    Read the HTML file using lxml's HTML parser, but convert to Unicode
    using Beautiful Soup's UnicodeDammit class.

    Can raise LxmlError or TypeError if the file can't be opened or
    parsed.
    """
    unicode_html = UnicodeDammit(html_file, smart_quotes_to="html",
                                 is_html=True)
    if unicode_html.unicode_markup is None:
        raise ValueError("no HTML provided")
    if not unicode_html.unicode_markup:
        raise ValueError("could not detect character encoding")
    return lxml.html.fromstring(unicode_html.unicode_markup)
test_frames.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_parse_fragments_fromstring(self):
        parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
        html = """<frameset>
            <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
        </frameset>"""
        etree_document = lxml.html.fragments_fromstring(html, parser=parser)
        self.assertEqual(len(etree_document), 1)
        root = etree_document[0]
        self.assertEqual(root.tag, "frameset")
        frame_element = root[0]
        self.assertEqual(frame_element.tag, 'frame')
test_frames.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def test_parse_fromstring(self):
        parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
        html = """<html><frameset>
            <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
        </frameset></html>"""
        etree_document = lxml.html.fromstring(html, parser=parser)
        self.assertEqual(etree_document.tag, 'html')
        self.assertEqual(len(etree_document), 1)
        frameset_element = etree_document[0]
        self.assertEqual(len(frameset_element), 1)
        frame_element = frameset_element[0]
        self.assertEqual(frame_element.tag, 'frame')
test_clean.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_allow_tags(self):
        html = """
            <html>
            <head>
            </head>
            <body>
            <p>some text</p>
            <table>
            <tr>
            <td>hello</td><td>world</td>
            </tr>
            <tr>
            <td>hello</td><td>world</td>
            </tr>
            </table>
            <img>
            </body>
            </html>
            """

        html_root = lxml.html.document_fromstring(html)
        cleaner = Cleaner(
            remove_unknown_tags = False,
            allow_tags = ['table', 'tr', 'td'])
        result = cleaner.clean_html(html_root)

        self.assertEqual(12-5+1, len(list(result.iter())))
test_clean.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def test_safe_attrs_included(self):
        html = """<p><span style="color: #00ffff;">Cyan</span></p>"""

        safe_attrs=set(lxml.html.defs.safe_attrs)
        safe_attrs.add('style')

        cleaner = Cleaner(
            safe_attrs_only=True,
            safe_attrs=safe_attrs)
        result = cleaner.clean_html(html)

        self.assertEqual(html, result)
test_clean.py 文件源码 项目:Taigabot 作者: FrozenPigs 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_safe_attrs_excluded(self):
        html = """<p><span style="color: #00ffff;">Cyan</span></p>"""
        expected = """<p><span>Cyan</span></p>"""

        safe_attrs=set()

        cleaner = Cleaner(
            safe_attrs_only=True,
            safe_attrs=safe_attrs)
        result = cleaner.clean_html(html)

        self.assertEqual(expected, result)
SessionAPI.py 文件源码 项目:competitive-cli 作者: GDGVIT 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def submit(self, probNum, path=".", language=None):
        """
        submits the problem according to the problem Number of the question.
        returns a list containing the submission details about the question.
        """
        file_path, filename = UvaSession.find_file(probNum, path)
        probFile = open(file_path)

        if language is None:
            language_number = UvaSession.find_language(filename)
        else:
            language_number = UvaSession.language_handler[language]

        if language_number is None:
            return

        payload = {
            "localid": probNum,
            "code": probFile.read(),
            "language": language_number,
            "codeupl": "",
            "problemid": "",
            "category": "",
            "submit": "Submit"
        }

        updated_headers = {
            "Referer": UvaSession.UVA_HOST + "index.php?option=com_onlinejudge&Itemid=25",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Host": "uva.onlinejudge.org",
            "Origin": UvaSession.UVA_HOST
        }

        resp = self.uva_session.post(UvaSession.SUBMIT_PATH, data=payload, headers=updated_headers)
        submission_id = resp.url[resp.url.find('ID')+3:]
        return self.check_result(submission_id, probNum)
SessionAPI.py 文件源码 项目:competitive-cli 作者: GDGVIT 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def login(self, username="", password=""):

        # logging in without credentials
        self.username = username
        response_page = self.codechef_session.get(CodechefSession.codechef_url)
        html_page = lxml.html.fromstring(response_page.text)
        hidden_inputs = html_page.xpath(
            r'//form//input[@type="hidden"]'
        )
        payload = {i.attrib["name"]: i.attrib["value"]
                   for i in hidden_inputs}
        payload['name'] = username
        payload['pass'] = password
        payload['op'] = 'Login'
        response = self.codechef_session.post(CodechefSession.codechef_url, data=payload)

        # removing extra sessions using simple scraping and form handling
        while response.url == CodechefSession.codechef_url + '/session/limit':
            html_page = lxml.html.fromstring(response.text)
            all_inputs = html_page.xpath(r'//form//input')
            payload = {i.attrib["name"]: i.attrib["value"] for i in all_inputs[::-1]}

            response = self.codechef_session.post(CodechefSession.codechef_url + '/session/limit', data=payload)
        soup = bs(response.content, 'lxml')
        name = soup.find(text=username)

        self.logged_in = bool(name)
        if self.logged_in: self.username = username
        return self.logged_in
SessionAPI.py 文件源码 项目:competitive-cli 作者: GDGVIT 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def submit(self, question_code, path=".", language=None):
        contest = ""
        for contests in self.info_present_contests():
            for contest_ques in CodechefSession.ques_in_contest(contests['contest_name']):
                if contest_ques == question_code:
                    contest = '/' + contests['contest_name']
                    break
        file_path = path
        # file_path, file_name = CodechefSession.find_file(question_code, path)
        lang = CodechefSession.language_handler[language]
        response = self.codechef_session.get(
            self.codechef_url + contest + '/submit/' + question_code
        )

        html_page = lxml.html.fromstring(response.text)
        hidden_inputs = html_page.xpath(r'//form//input[@type="hidden"]')
        payload = {i.attrib['name']: i.attrib['value'] for i in hidden_inputs}
        payload['language'] = lang
        payload['problem_code'] = question_code
        payload['op'] = 'Submit'

        file = {
            "files[sourcefile]": open(file_path)
        }

        response = self.codechef_session.post(CodechefSession.codechef_url + contest + '/submit/' + question_code,
                                              data=payload,
                                              files=file
                                              )

        sub_id = response.url.split('/')[-1]
        return sub_id , self.check_result(sub_id, question_code)


问题


面经


文章

微信
公众号

扫码关注公众号