python类page()的实例源码

wiki_summary.py 文件源码 项目:WikiSummary 作者: Mikerah 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_random_articles_v1(number_of_articles_wanted):
    """Given the wanted number of articles returned, get random wikipedia articles"""
    if number_of_articles_wanted == 1:
        print(wikipedia.summary(wikipedia.random()))
    else:    
        list_of_articles = wikipedia.random(number_of_articles_wanted)
        try:
            for a in list_of_articles:
                article = a[:]
                if ('disambiguation' in wikipedia.page(a).title) or ('it may refer to' in wikipedia.page(a).title):
                    list_of_articles.remove(a)
                    list_of_articles.append(wikipedia.random())

                print(list_of_articles.index(a)+1," - "+wikipedia.summary(a))
                print()
        except wikipedia.exceptions.DisambiguationError:
            list_of_articles.remove(article)
            list_of_articles.append(wikipedia.random(article))
websearch.py 文件源码 项目:jarvis 作者: anqxyr 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def tvtropes(inp, *, query):
    """Show laconic description of the trope, and a link to the full page."""
    query = query.title().replace(' ', '')
    baseurl = 'http://tvtropes.org/{}/' + query
    url = baseurl.format('Laconic')
    soup = bs4.BeautifulSoup(requests.get(url).text, 'lxml')
    text = soup.find(class_='page-content').find('hr')
    if text is None:
        return lex.tvtropes.not_found
    text = reversed(list(text.previous_siblings))
    text = [i.text if hasattr(i, 'text') else i for i in text]
    text = [str(i).strip() for i in text]
    return '{} {}'.format(' '.join(text), baseurl.format('Main'))

###############################################################################
# Kaktuskast
###############################################################################
wiki.py 文件源码 项目:Abb1t 作者: k-freeman 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def run(self):
        while True:
            msg = self.queue_in.get()  # get() is blocking
            match = re.search(r'^(?:/|!)wiki (.*)$', msg.get_text().lower())
            if match:
                reply = ""
                try:
                    related_entries = wikipedia.search(match.group(1))
                    w = wikipedia.page(match.group(1))
                    reply1 = "*{}*\n".format(w.title)
                    reply2 = "{}\n".format(w.summary)
                    reply3 = "\n*related topics*:\n- {}".format("\n- ".join(related_entries))

                    if len(reply1+reply2+reply3)>4096:
                        reply = reply1 + reply2[:4092-len(reply1)-len(reply3)]+"...\n" + reply3 # shortening to 4096 characters
                    else:
                        reply = reply1+reply2+reply3
                except wikipedia.DisambiguationError as e:
                    related_entries = str(e).split(":",1)[1].split("\n")[1:]
                    reply = "This was too inspecific. Choose one from these:\n- {}".format("\n- ".join(related_entries))
                except:
                    reply = "No matches returned for this request."
                if reply:
                    self.bot.sendMessage(msg.get_chat_id(), reply, parse_mode="Markdown")
commands.py 文件源码 项目:AceBot 作者: Run1e 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def wikirandom(self, ctx):
        """Get a random wikipedia page."""

        await ctx.trigger_typing()
        try:
            page_name = wikipedia.random(1)
        except:
            return await ctx.invoke(self.wikirandom)

        try:
            wiki = wikipedia.page(page_name)
            for attr in ('summary', 'url', 'title'):
                if not hasattr(wiki, attr):
                    return await ctx.invoke(self.wikirandom)
        except wikipedia.exceptions.DisambiguationError as e:
            return await ctx.invoke(self.wikirandom)
        await self.embedwiki(ctx, wiki)
bot_utils.py 文件源码 项目:chalice-linebot 作者: c-bata 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def wikipedia_search(word):
    """Search a word meaning on wikipedia."""
    wikipedia.set_lang('ja')
    results = wikipedia.search(word)

    # get first result
    if results:
        page = wikipedia.page(results[0])
        msg = page.title + "\n" + page.url
    else:
        msg = '`{}` ??????????????'.format(word)
    return msg


# ====================================
# Google News
# ====================================
simplewikipedia.py 文件源码 项目:hangoutsbot 作者: das7pad 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def wiki(bot, event, *args):
    """lookup a term on Wikipedia"""

    term = " ".join(args)
    if not term:
        return

    try:
        page = wikipedia.page(term, auto_suggest=False)

        summary = page.summary.strip()
        summary = summary.replace('\r\n', '\n').replace('\r', '\n')
        summary = re.sub('\n+', "\n", summary).replace('\n', '\n\n')
        source = _('<i>source: <a href="{}">{}</a></i>').format(page.url, page.url)

        html_text = '<b>"{}"</b>\n\n{}\n\n{}'.format(term, summary, source)
    except wikipedia.exceptions.PageError:
        html_text = _("<i>no entry found for {}</i>").format(term)
    except wikipedia.exceptions.DisambiguationError as e:
        exception_text = str(e).strip().replace("\n", "\n")
        html_text = "<i>{}</i>".format(exception_text)

    return html_text
wikipedia.py 文件源码 项目:apex-sigma-core 作者: lu-ci 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def wikipedia(cmd, message, args):
    if args:
        try:
            summary_task = functools.partial(wiki.page, ' '.join(args).lower())
            with ThreadPoolExecutor() as threads:
                page = await cmd.bot.loop.run_in_executor(threads, summary_task)

            response = discord.Embed(color=0xF9F9F9)
            response.set_author(
                name=f'Wikipedia: {page.title}',
                url=page.url,
                icon_url='https://upload.wikimedia.org/wikipedia/commons/6/6e/Wikipedia_logo_silver.png'
            )
            response.description = f'{page.summary[:800]}...'
        except wiki.PageError:
            response = discord.Embed(color=0x696969, title='?? No results.')
        except wiki.DisambiguationError:
            response = discord.Embed(color=0xBE1931, title='? Search too broad, please be more specific.')
    else:
        response = discord.Embed(color=0xBE1931, title='? Nothing inputted.')
    await message.channel.send(None, embed=response)
search.py 文件源码 项目:tuxbot-bot 作者: outout14 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def search_aur(self, ctx, args):
        attends = await ctx.send("_Je te cherche ça {} !_".format(ctx.message.author.mention))
        erreur = 0
        try:
            html = urllib.request.urlopen("https://aur.archlinux.org/packages/" + args).read()
        except:
            erreur = 1

        if erreur == 1:
            await attends.delete()
            embed = discord.Embed(description=":sob: Je n'ai pas trouvé le packet mais j'ai lancé une petite recherche, tu y trouveras peut être ton bonheur ? https://aur.archlinux.org/packages/?K=" + args,url='https://aur.archlinux.org/')
            embed.set_author(name="Aur.archlinux", url='https://aur.archlinux.org/', icon_url='http://outout.tech/tuxbot/arch.png')
            embed.set_thumbnail(url='http://outout.tech/tuxbot/arch.png')
            embed.set_footer(text="Pff même pas trouvé !")
            await ctx.send(embed=embed)

        else:
            await attends.delete()
            embed = discord.Embed(description="Et voila, j'ai trouvé la page sur le packet : https://aur.archlinux.org/packages/{0} ! \n Ca te dit un petit ``pacaur -S {0}`` ?".format(args), url='https://aur.archlinux.org/')
            embed.set_author(name="Aur.archlinux", url='https://aur.archlinux.org/', icon_url='http://outout.tech/tuxbot/arch.png')
            embed.set_thumbnail(url='http://outout.tech/tuxbot/arch.png')
            embed.set_footer(text="C'est vrai que pacman et pacaur sont mieux qu'APT ^^")
            await ctx.send(embed=embed)
wikipedia.py 文件源码 项目:Mash-Cogs 作者: Canule 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def wikipedia(self, ctx, *text):
        """Wikipedia search."""     

        if text == ():
            await send_cmd_help(ctx)
            return
        else:            
            s = "_";
            search = ""
            search = s.join(text)
            user = ctx.message.author
            wikiLang = 'en'# Define the Wikipedia language / Most of these are supported » https://nl.wikipedia.org/wiki/ISO_3166-1
            ws = None
            wikipedia.set_lang(wikiLang)# Set the Wikipedia language.
            try:
                ws = wikipedia.page(search)
                wikiUrl = (ws.url.encode('ascii', 'xmlcharrefreplace'))
                await self.bot.say(wikiUrl.decode("utf8"))
            except:
                await self.bot.say( 'Sorry {}, no wiki hit, try to rephrase'.format(user))
wikify.py 文件源码 项目:cdata 作者: cnschema 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def wikipedia_search_slow(query, lang="en", max_result=1):
    import wikipedia
    #wikification
    query = any2unicode(query)
    items = []
    ret = {"query":query, "itemList":items}
    wikipedia.set_lang(lang)
    wikiterm = wikipedia.search(query)
    #logging.info(wikiterm)
    for idx, term in enumerate(wikiterm[0:max_result]):
        wikipage = wikipedia.page(term)
        item = {
            "name": wikipage.title,
            "description": wikipedia.summary(term, sentences=1),
            "url": wikipage.url,
        }
        items.append(item)

    return ret
api_adapters.py 文件源码 项目:wikipedia_parser 作者: ojones 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def fetch_wobj(id):
    # TODO: isdigit is not robust enough, a title could be number instead of an id
    wobj = None

    try:
        if str(id).isdigit():
            wobj = wikipedia.page(pageid=id, auto_suggest=False)
        else:
            wobj = wikipedia.page(title=id, auto_suggest=False)
    except:
        # error in 3rd party python-wikipedia package
        pass

    return wobj


# wobj
api_adapters.py 文件源码 项目:wikipedia_parser 作者: ojones 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def fetch_api_categories(id, wobj):

    categories = []

    try:
        if id.isdigit() and wobj:
            categories = wobj.categories
        else:
            page = fetch_mwclient(id)
            for category in list(page.categories()):
                categories.append(category.name)
            return categories
    except:
        pass

    return categories
bot_feature.py 文件源码 项目:eatradish_haoyebot 作者: eatradish 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def wikipedia_summary(msg, lang = 'en'):
    try:
        if lang == 'en':
            wikipedia.set_lang('en')
        else:
            wikipedia.set_lang(lang)
        url = wikipedia.page(msg).url
        msg = wikipedia.summary(msg)
        fliter = []
        for i in msg:
            if i != '\n':
                fliter.append(i)
            else:
                break
        msg = "".join(fliter)
        return msg + '\n' + url
    except:
        return "Not Found Page or LANG"
minbo.py 文件源码 项目:minbo 作者: minka-it 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def wiki(message):
    chat_id = message.chat.id
    param = message.text.split(' ',1) #separa el comando de los parametros
    if len(param) == 1 or param[1]=="help":
        bot.send_message(chat_id,text_messages['help_wiki'])
    else:
        bot.send_message(chat_id, "Consultando en Wikipedia...")
        try:
            wiki = wikipedia.page(param[1])
            bot.send_message(chat_id, wiki.summary)
            bot.send_message(chat_id, "Consulta mas en:\n"+wiki.url)
        except wikipedia.exceptions.DisambiguationError as e:
            bot.send_message(chat_id, "'"+param[1]+"'"+" puede referirse a:")
            bot.send_message(chat_id, '\n'.join(e.options))
        except wikipedia.exceptions.PageError as e:
            bot.send_message(chat_id, "No se encontro ninguna pagina, intenta con otra consulta!")
        except Exception, e:
            print e
            bot.send_message(chat_id,"Tengo un bug en mi estomago!")
websearch.py 文件源码 项目:jarvis 作者: anqxyr 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def wikipedia(inp, *, query):
    """Get wikipedia page about the topic."""
    try:
        page = wiki.page(query)
    except wiki.exceptions.PageError:
        return lex.wikipedia.not_found
    except wiki.exceptions.DisambiguationError as e:
        tools.save_results(inp, e.options, lambda x: wikipedia(inp, query=x))
        return lex.unclear(options=e.options)

    return lex.wikipedia.result(
        title=page.title, url=page.url, text=page.content)
commands.py 文件源码 项目:AceBot 作者: Run1e 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def wikipedia(self, ctx, *, query):
        """Preview a Wikipedia article."""

        await ctx.trigger_typing()

        try:
            wiki = wikipedia.page(query)
        except:
            return await ctx.send('No results.')

        await self.embedwiki(ctx, wiki)
generate_response.py 文件源码 项目:Alfred 作者: JohnGiorgi 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def wikipediaAction(message):
    """Makes the appropriate calls to the wikipedia API for answer wiki queries.

    Args:
        message: An incoming text message
        processer: Instance of NLProcessor class

    Returns:
        A message indicating what action was taking with the wikipedia API
    """
    # tokenize input
    tokens = tokenize.wordpunct_tokenize(message)
    # filter stopwords, additionally, remove 'wiki' or 'wikipedia'
    tokens_filtered = remove_stopwords(tokens)
    tokens_filtered = [token for token in tokens_filtered if token != 'wiki' and token != 'wikipedia']
    # join filtered message
    message = ' '.join(tokens_filtered)

    # for debugging/testing
    print("(Highly) processed input: ", message)

    # Get the wikipedia summary for the request
    try:
        summary = wikipedia.summary(message, sentences = 1)
        url = wikipedia.page(message).url
        answer = summary + "\nSee more here: " + url
        if len(answer) > 500:
            answer = answer[0:500] + "\nSee wikipedia for more..."
    except:
        # handle all errors
        answer = "Request was not found using Wikipedia. Be more specific?"

    return answer
download_data.py 文件源码 项目:LDA-REST 作者: valentinarho 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def download_single(wiki_page_name, only_summary=False, language='en'):
    """
    Download the content of a wikipedia page

    :param wiki_page_name: the name
    :param only_summary:
    :return:
    """

    wikipedia.set_lang(language)
    if only_summary:
        page = wikipedia.page(wiki_page_name)
        return page.content
    else:
        return wikipedia.summary(wiki_page_name)
download_data.py 文件源码 项目:LDA-REST 作者: valentinarho 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def download_all(wiki_page_names, only_summary=False, language='en'):
    contents = {}
    for pn in wiki_page_names:
        contents[pn] = download_single(pn, only_summary=only_summary, language=language)

    return contents


# TODO if starts with http or www get only the page name
wiki.py 文件源码 项目:Jarvis 作者: sukeesh 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def summary(query, sentences=0, chars=0):
    """Returns a plain text summary from the query's page."""
    try:
        return wikipedia.summary(query, sentences, chars)
    except wikipedia.exceptions.PageError:
        return "No page matches, try another item."
    except wikipedia.exceptions.DisambiguationError as error:
        return error.options[:5]
wiki.py 文件源码 项目:Jarvis 作者: sukeesh 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def content(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
    """Returns plain text content of query's page, excluding images, tables and other data."""
    try:
        page = wikipedia.page(title)
        return page.content
    except wikipedia.exceptions.PageError:
        return "No page matches, try another item."
    except wikipedia.exceptions.DisambiguationError as error:
        return error.options[:5]
__init__.py 文件源码 项目:wikiphilosophy 作者: adtac 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def next_link(cur, done):
    try:
        g = wikipedia.page(cur).html()
    except wikipedia.exceptions.DisambiguationError as e:
        for op in e.options:
            if op not in done:
                g = wikipedia.page(op).html()
                break
    soup = BeautifulSoup(re.sub(r'\([^)]*\)', '', g), "html.parser")
    for para in soup.findAll("p"):
        flag = False
        for link in para.findAll("a"):
            flag = True
            if link.get("href").startswith("/wiki/") and link.get("title") not in done and link.contents[0].islower():
                return link.get("title")
wikipedia_quiz.py 文件源码 项目:WikipediaQuiz 作者: NicholasMoser 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def retrieve_random_passage(page, length):
    """Given a wikipedia page and length, retrieves a random passage of text from
    the content of the wikipedia page with the given length.
    """
    content = page.content
    content_length = len(content)
    if length > content_length:
        length = content_length - 1
    start = random.randrange(len(content) - length)
    end = start + length
    return content[start:end]
search.py 文件源码 项目:tuxbot-bot 作者: outout14 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def search_docubuntu(self, ctx, args):
        attends = await ctx.send("_Je te cherche ça {} !_".format(ctx.message.author.mention))
        html = urllib.request.urlopen("https://doc.ubuntu-fr.org/" + args).read()
        if "avez suivi un lien" in str(html):
           await attends.edit(content=":sob: Nooooon ! Cette page n'existe pas, mais tu peux toujours la créer : https://doc.ubuntu-fr.org/"+ args)
        else:
           await attends.delete()
           embed = discord.Embed(description="Voila j'ai trouvé ! Voici la page ramenant à votre recherche, toujours aussi bien rédigée :wink: : https://doc.ubuntu-fr.org/" + args, url='http://doc.ubuntu-fr.org/')
           embed.set_author(name="DocUbuntu-Fr", url='http://doc.ubuntu-fr.org/', icon_url='http://outout.tech/tuxbot/ubuntu.png')
           embed.set_thumbnail(url='http://outout.tech/tuxbot/ubuntu.png')
           embed.set_footer(text="Merci à ceux qui ont pris le temps d'écrire cette documentation")
           await ctx.send(embed=embed)
admin.py 文件源码 项目:falco 作者: nathan0 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def randwiki(irc, source, msgtarget, args):
    rand = wikipedia.random(pages=1)
    url = wikipedia.page(rand).url
    irc.msg(msgtarget, "Random Article: {} - \x1d{}\x1d".format(rand, url))
    irc.msg(msgtarget, wikipedia.summary(rand, sentences=2, chars=250, auto_suggest=True))
admin.py 文件源码 项目:falco 作者: nathan0 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def wiki(irc, source, msgtarget, args):
    try:
        url = wikipedia.page(args).url
        page = wikipedia.summary(wikipedia.search(args)[0], sentences=2, auto_suggest=True)
        irc.msg(msgtarget, page)
        irc.msg(msgtarget, "More at \x1d"+url)
    except wikipedia.exceptions.DisambiguationError as e:
        bot_commands["wiki"](irc, source, msgtarget, e.options[0])
    except wikipedia.exceptions.PageError:
        irc.msg(msgtarget, "No page could be found")
define.py 文件源码 项目:jessy 作者: jessy-project 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def ask_wikipedia(self, definition):
        '''
        Ask Wikipedia for the definition.

        :param definition:
        :return:
        '''
        # TODO: this method should run in a separate process, asynchronously

        is_exact = False
        out = []
        if not wikipedia:
            return is_exact, out

        page_titles = wikipedia.search(definition)
        page = None
        if page_titles:
            for page_title in page_titles:
                if page_title.lower() == definition:
                    try:
                        page = wikipedia.page(page_title)
                        is_exact = True
                    except DisambiguationError as ex:
                        out.append(Phrase().text('This can refer to a many things, such as {0}'.format(self.join_for_more(ex.options, limit=None))))
                        return is_exact, out

            if not page and 'disambiguation' not in page_titles[0]:
                try:
                    page = wikipedia.page(page_titles[0])
                except Exception as ex:
                    out.append(Phrase().text(str(ex)))

        if page and not out:
            out.append(Phrase().text(page.content.split('==')[0]
                                     .split('\n')[0]
                                     .encode('utf-8', 'ignore')).pause(1))
        return is_exact, out
wikipedia.py 文件源码 项目:pyconjpbot 作者: pyconjp 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def wikipedia_page(message, option, query):
    """
    Wikipedia??????????
    """
    if query == 'help':
        return

    # set language
    lang = 'ja'
    if option:
        _, lang = option.split('-')
    wikipedia.set_lang(lang)

    try:
        # search with query
        results = wikipedia.search(query)
    except:
        botsend(message, '??????? `{}` ???????'.format(lang))
        return

    # get first result
    if results:
        page = wikipedia.page(results[0])

        attachments = [{
            'fallback': 'Wikipedia: {}'.format(page.title),
            'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
            'text': page.summary,
        }]
        botwebapi(message, attachments)
    else:
        botsend(message, '`{}` ??????????????'.format(query))
classifier.py 文件源码 项目:quantulum 作者: marcolagi 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def download_wiki():
    """Download WikiPedia pages of ambiguous units."""
    ambiguous = [i for i in l.UNITS.items() if len(i[1]) > 1]
    ambiguous += [i for i in l.DERIVED_ENT.items() if len(i[1]) > 1]
    pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])

    print
    objs = []
    for num, page in enumerate(pages):

        obj = {'url': page[1]}
        obj['_id'] = obj['url'].replace('https://en.wikipedia.org/wiki/', '')
        obj['clean'] = obj['_id'].replace('_', ' ')

        print '---> Downloading %s (%d of %d)' % \
              (obj['clean'], num + 1, len(pages))

        obj['text'] = wikipedia.page(obj['clean']).content
        obj['unit'] = page[0]
        objs.append(obj)

    path = os.path.join(l.TOPDIR, 'wiki.json')
    os.remove(path)
    json.dump(objs, open(path, 'w'), indent=4, sort_keys=True)

    print '\n---> All done.\n'


###############################################################################
tests.py 文件源码 项目:quantulum 作者: marcolagi 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def wiki_test(page='CERN'):
    """Download a wikipedia page and test the parser on its content.

    Pages full of units:
        CERN
        Hubble_Space_Telescope,
        Herschel_Space_Observatory
    """
    content = wikipedia.page(page).content
    parsed = p.parse(content)
    parts = int(round(len(content) * 1.0 / 1000))

    print
    end_char = 0
    for num, chunk in enumerate(range(parts)):
        _ = os.system('clear')
        print
        quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) *
                  1000]
        beg_char = max(chunk * 1000, end_char)
        text, end_char = embed_text(quants, beg_char, chunk, content)
        print COLOR2 % text
        print
        try:
            _ = raw_input('--------- End part %d of %d\n' % (num + 1, parts))
        except (KeyboardInterrupt, EOFError):
            return


###############################################################################


问题


面经


文章

微信
公众号

扫码关注公众号