python类parser()的实例源码

util.py 文件源码 项目:chandl 作者: gebn 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def unescape_html(html_):
    """
    Replace HTML entities (e.g. `£`) in a string.

    :param html_: The escaped HTML.
    :return: The input string with entities replaces.
    """

    # http://stackoverflow.com/a/2360639

    if sys.version_info.major == 2:  # 2.7
        # noinspection PyUnresolvedReferences,PyCompatibility
        from HTMLParser import HTMLParser
        return HTMLParser().unescape(html_)

    if sys.version_info.minor == 3:  # 3.3
        # noinspection PyCompatibility
        from html.parser import HTMLParser
        # noinspection PyDeprecation
        return HTMLParser().unescape(html_)

    # 3.4+
    # noinspection PyCompatibility
    import html
    return html.unescape(html_)
nixiebotreader.py 文件源码 项目:NixieBot 作者: Zedsquared 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def processIncomingTweet(tweet): #check tweet that has come in via the filter stream, it might have commands in it
    # print(tweet)
    global maxWordQ
    global wordq
    if scanTags(tweet,"NixieBotShowMe") :
        theWord=extractWord(html.parser.HTMLParser().unescape(tweet['text']))
        if ((theWord is not None ) or ( hasCommand(tweet))) :
            wordqPut(tweet,priority = prioritise(tweet))
            size = wordq.qsize()
            if size > maxWordQ : maxWordQ = size
            print("word request from", tweet['user']['screen_name'], "word = ", theWord, " Word queue at:", size, "maxqueue was ", maxWordQ)
            recentReqs.append(tweet) # store for sending to hard storage every now and then
            if len(recentReqs) > reqPickleFrequency :
                if pickleMe(recentReqs, "Requests", dateStamp=True) :
                    recentReqs[:]=[]      
            #userCounter.update(tweet['user']['screen_name'])

    # DMreceipt bad idea as it still counts against rate limit
    #for ht in tweet['entities']['hashtags']:        
    #    if ht['text']=="NBreceipt" and not rct:
    #        sendReceipt(tweet,theWord,tt)
    #        rct=True
html2tele.py 文件源码 项目:progrobot 作者: petr-kalinin 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def html2tele(html):
    #print("html2tele input: ", html)
    parser = _HTMLToText()
    parser.feed(html)
    parser.close()
    result = parser.get_text()
    result = re.sub(r'\n(\s*\n+)', '\n\n', result)
    result = re.sub(r' +<pre>', '<pre>', result)
    result = re.sub(r'</pre> +', '</pre>', result)
    #print("html2tele result: ", result)
    return result

#----------
kludgy_html_parser.py 文件源码 项目:hangoutsbot 作者: das7pad 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def simple_parse_to_segments(html, debug=False, **kwargs):
    html = fix_urls(html)
    html = '<html>' + html + '</html>' # html.parser seems to ignore the final entityref without html closure
    parser = simpleHTMLParser(debug)
    return parser.feed(html)
nixiebot.py 文件源码 项目:NixieBot 作者: Zedsquared 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def on_success(self, tweet):
        global recentIDDeque
        if 'text' in tweet and not ('retweeted_status' in tweet) :
            print("<<<<<<<<<<<<<<<<<<<  Incoming!<<<<<<<<<<<<<<<<<< " + html.parser.HTMLParser().unescape(tweet['text']) + tweet['id_str'])
            if tweet['id_str'] not in recentIDDeque :
                 processIncomingTweet(tweet)
                 recentIDDeque.appendleft(tweet['id_str'])
            else :
                print("!!!! duplicate!  Ignored ")
            backOffTime = 60
nixiebot.py 文件源码 项目:NixieBot 作者: Zedsquared 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def processIncomingTweet(tweet): #check tweet that has come in via the filter stream, it might have commands in it
    # print(tweet)
    global botState
    global wordq
    global randstream
    if scanTags(tweet,"NixieBotShowMe") :
        theWord=extractWord(html.parser.HTMLParser().unescape(tweet['text']))
        if ((theWord is not None ) or ( hasCommand(tweet))) :
            wordqPut(tweet,priority = prioritise(tweet))
            size = wordq.qsize()
            if size > botState['maxWordQ'] : botState['maxWordQ'] = size
            print("word request from", tweet['user']['screen_name'], "word = ", theWord, " Word queue at:", size, "maxqueue was ", botState['maxWordQ'])
            recentReqs.append(tweet) # store for sending to hard storage every now and then
            if len(recentReqs) > reqPickleFrequency :
                if pickleMe(recentReqs, "Requests", dateStamp=True) :
                    recentReqs[:]=[]      
            #userCounter.update(tweet['user']['screen_name'])
    elif scanTags(tweet,"NixieBotRollMe") :
        rollq.put(tweet)
        print("roll request incoming! Word queue at:", rollq.qsize())
    else :
        #must be a trump tweet so submit to random for now
        randstream.on_success(tweet)

    # DMreceipt bad idea as it still counts against rate limit
    #for ht in tweet['entities']['hashtags']:        
    #    if ht['text']=="NBreceipt" and not rct:
    #        sendReceipt(tweet,theWord,tt)
    #        rct=True
nixiebotreader.py 文件源码 项目:NixieBot 作者: Zedsquared 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def on_success(self, tweet):
        if 'text' in tweet and not ('retweeted_status' in tweet) :
            print("<<<<<<<<<<<<<<<<<<<  Incoming!<<<<<<<<<<<<<<<<<< " + html.parser.HTMLParser().unescape(tweet['text']))
            processIncomingTweet(tweet)
            backOffTime = 60
test_standard_library.py 文件源码 项目:packaging 作者: blockstack 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def test_html_import(self):
        import html
        import html.entities
        import html.parser
        self.assertTrue(True)
formatter.py 文件源码 项目:deep-learning-nlp-rl-papers 作者: madrugado 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--toc-maker", help="path to ToC making tool")
    parser.add_argument("--twitter-poster", default="t update", help="twitter poster command")
    parser.add_argument("-t", "--use-twitter", action="store_true")

    known_args, unknown_args = parser.parse_known_args()

    if not known_args.toc_maker:
        known_args.toc_maker = "./gh-md-toc"
        if not os.path.isfile(known_args.toc_maker):
            s = cmd.getoutput("uname -s").lower()
            f = "gh-md-toc.%s.amd64.tgz" % s
            URL = "https://github.com/ekalinin/github-markdown-toc.go/releases/download/0.6.0/%s" % f
            if not os.path.isfile(f):
                if cmd.getstatusoutput("wget %s" % URL)[0] != 0:
                    raise EnvironmentError("Cannot download toc maker from URL: %s" % URL)
            if cmd.getstatusoutput("tar xzf %s" % f)[0] != 0:
                    raise EnvironmentError("Cannot untar toc maker from file %s" % f)
            os.remove(f)

            current_permissions = stat.S_IMODE(os.lstat(known_args.toc_maker).st_mode)
            os.chmod(known_args.toc_maker, current_permissions & stat.S_IXUSR)

    if unknown_args:
        filepath = unknown_args[0]
    else:
        print("You should specify the path for file to work with!")
        quit(1)

    return known_args, filepath
test_imp.py 文件源码 项目:web_ctp 作者: molebot 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_with_deleted_parent(self):
        # see #18681
        from html import parser
        html = sys.modules.pop('html')
        def cleanup():
            sys.modules['html'] = html
        self.addCleanup(cleanup)
        with self.assertRaisesRegex(ImportError, 'html'):
            imp.reload(parser)
notes.py 文件源码 项目:fondamentibook 作者: xelatihy 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def __init__(self):
        '''Crea un parser per la class HTMLNode'''
        # inizializza la class base super()
        super().__init__()
        self.root = None
        self.stack = []
notes.py 文件源码 项目:fondamentibook 作者: xelatihy 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def parse(html):
    '''Esegue il parsing HTML del testo html e
    ritorna la radice dell'albero.'''
    parser = _MyHTMLParser()
    parser.feed(html)
    return parser.root
test_imp.py 文件源码 项目:ouroboros 作者: pybee 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_with_deleted_parent(self):
        # see #18681
        from html import parser
        html = sys.modules.pop('html')
        def cleanup():
            sys.modules['html'] = html
        self.addCleanup(cleanup)
        with self.assertRaisesRegex(ImportError, 'html'):
            imp.reload(parser)
test_imp.py 文件源码 项目:kbe_server 作者: xiaohaoppy 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_with_deleted_parent(self):
        # see #18681
        from html import parser
        html = sys.modules.pop('html')
        def cleanup():
            sys.modules['html'] = html
        self.addCleanup(cleanup)
        with self.assertRaisesRegex(ImportError, 'html'):
            imp.reload(parser)
nixiebot.py 文件源码 项目:NixieBot 作者: Zedsquared 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def loadUserFont(fontfile) :
    #load in font file generated from online font designer at http://b7971.lucsmall.com/
    #lines should look like: 0x7622, // 0 - A
    #and the bit order should be reversed using the button at the top of that page
    global comLock
    global userProperChars
    font = {}
    stashfx = effx
    stashspeed = fxspeed
    setEffex(0,0)
    userProperChars = ""
    print("loading font")
    with open(fontfile) as ff :
        for line in ff :
            if line == '\n' : continue # cope with blank at end of file
            parts = line.split(",")
            print("parts = ",parts)
            bits = parts[0]
            letter = parts[1].split("-")[1].strip()
            bitval = int(bits,16) 
            print(bitval,letter)
            font[letter] = bitval
    font['-'] = 0x0022  #nasty hack as hyphen entry is broken by the split("-")
    font[','] = 0x0004  # ditto for comma
    font['~'] = 0x1310  # and tilde
    print(len(font)," characters loaded, now sending")
    with comLock :
        print("loadfont got comlock")
        cmd = "$B7F" + "U" * tubes
        print(cmd)
        com.write(bytes(cmd+"\r","utf-8")) 
        for glyph in font:
            userProperChars = userProperChars + glyph
            cmd="$B7W"+glyph
            mask =int('0b0100000000000000',2)
            while mask > 0 :
                if int(font[glyph]) & int(mask) > 0 :
                    cmd = cmd + "1"
                else :
                    cmd = cmd + "0"
                mask = mask >> 1
            print(cmd) 
            com.write(bytes(cmd+"\r","utf-8"))
            time.sleep(0.3)
            cmd="$B7M"+ glyph * tubes
            print(cmd)
            com.write(bytes(cmd+"\r","utf-8"))
        # special case (ok, bodge!) for space as the strip command in the font file parser above will remove it, and all fonts need a space
        cmd="$B7W 000000000000000"
        print(cmd)
        com.write(bytes(cmd+"\r","utf-8"))
        cmd="$B7M                    "
        print(cmd)
        com.write(bytes(cmd+"\r","utf-8"))
        userProperChars = userProperChars + " "
        setEffex(stashfx,stashspeed)
        # now write out character set file ( used by proper()  )
        with open("uCharSet.txt",'w' ) as cf :
            cf.write(userProperChars)       
    print("loadfont rel comlock")
test_standard_library.py 文件源码 项目:packaging 作者: blockstack 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def test_future_moves(self):
        """
        Ensure everything is available from the future.moves interface that we
        claim and expect. (Issue #104).
        """
        from future.moves.collections import Counter, OrderedDict   # backported to Py2.6
        from future.moves.collections import UserDict, UserList, UserString

        from future.moves import configparser
        from future.moves import copyreg

        from future.moves.itertools import filterfalse, zip_longest

        from future.moves import html
        import future.moves.html.entities
        import future.moves.html.parser

        from future.moves import http
        import future.moves.http.client
        import future.moves.http.cookies
        import future.moves.http.cookiejar
        import future.moves.http.server

        from future.moves import queue

        from future.moves import socketserver

        from future.moves.subprocess import check_output              # even on Py2.6
        from future.moves.subprocess import getoutput, getstatusoutput

        from future.moves.sys import intern

        from future.moves import urllib
        import future.moves.urllib.error
        import future.moves.urllib.parse
        import future.moves.urllib.request
        import future.moves.urllib.response
        import future.moves.urllib.robotparser

        try:
            # Is _winreg available on Py2? If so, ensure future.moves._winreg is available too:
            import _winreg
        except ImportError:
            pass
        else:
            from future.moves import winreg

        from future.moves import xmlrpc
        import future.moves.xmlrpc.client
        import future.moves.xmlrpc.server

        from future.moves import _dummy_thread
        from future.moves import _markupbase
        from future.moves import _thread


问题


面经


文章

微信
公众号

扫码关注公众号