def get_random_articles_v1(number_of_articles_wanted):
"""Given the wanted number of articles returned, get random wikipedia articles"""
if number_of_articles_wanted == 1:
print(wikipedia.summary(wikipedia.random()))
else:
list_of_articles = wikipedia.random(number_of_articles_wanted)
try:
for a in list_of_articles:
article = a[:]
if ('disambiguation' in wikipedia.page(a).title) or ('it may refer to' in wikipedia.page(a).title):
list_of_articles.remove(a)
list_of_articles.append(wikipedia.random())
print(list_of_articles.index(a)+1," - "+wikipedia.summary(a))
print()
except wikipedia.exceptions.DisambiguationError:
list_of_articles.remove(article)
list_of_articles.append(wikipedia.random(article))
python类page()的实例源码
def tvtropes(inp, *, query):
"""Show laconic description of the trope, and a link to the full page."""
query = query.title().replace(' ', '')
baseurl = 'http://tvtropes.org/{}/' + query
url = baseurl.format('Laconic')
soup = bs4.BeautifulSoup(requests.get(url).text, 'lxml')
text = soup.find(class_='page-content').find('hr')
if text is None:
return lex.tvtropes.not_found
text = reversed(list(text.previous_siblings))
text = [i.text if hasattr(i, 'text') else i for i in text]
text = [str(i).strip() for i in text]
return '{} {}'.format(' '.join(text), baseurl.format('Main'))
###############################################################################
# Kaktuskast
###############################################################################
def run(self):
while True:
msg = self.queue_in.get() # get() is blocking
match = re.search(r'^(?:/|!)wiki (.*)$', msg.get_text().lower())
if match:
reply = ""
try:
related_entries = wikipedia.search(match.group(1))
w = wikipedia.page(match.group(1))
reply1 = "*{}*\n".format(w.title)
reply2 = "{}\n".format(w.summary)
reply3 = "\n*related topics*:\n- {}".format("\n- ".join(related_entries))
if len(reply1+reply2+reply3)>4096:
reply = reply1 + reply2[:4092-len(reply1)-len(reply3)]+"...\n" + reply3 # shortening to 4096 characters
else:
reply = reply1+reply2+reply3
except wikipedia.DisambiguationError as e:
related_entries = str(e).split(":",1)[1].split("\n")[1:]
reply = "This was too inspecific. Choose one from these:\n- {}".format("\n- ".join(related_entries))
except:
reply = "No matches returned for this request."
if reply:
self.bot.sendMessage(msg.get_chat_id(), reply, parse_mode="Markdown")
def wikirandom(self, ctx):
"""Get a random wikipedia page."""
await ctx.trigger_typing()
try:
page_name = wikipedia.random(1)
except:
return await ctx.invoke(self.wikirandom)
try:
wiki = wikipedia.page(page_name)
for attr in ('summary', 'url', 'title'):
if not hasattr(wiki, attr):
return await ctx.invoke(self.wikirandom)
except wikipedia.exceptions.DisambiguationError as e:
return await ctx.invoke(self.wikirandom)
await self.embedwiki(ctx, wiki)
def wikipedia_search(word):
"""Search a word meaning on wikipedia."""
wikipedia.set_lang('ja')
results = wikipedia.search(word)
# get first result
if results:
page = wikipedia.page(results[0])
msg = page.title + "\n" + page.url
else:
msg = '`{}` ??????????????'.format(word)
return msg
# ====================================
# Google News
# ====================================
def wiki(bot, event, *args):
"""lookup a term on Wikipedia"""
term = " ".join(args)
if not term:
return
try:
page = wikipedia.page(term, auto_suggest=False)
summary = page.summary.strip()
summary = summary.replace('\r\n', '\n').replace('\r', '\n')
summary = re.sub('\n+', "\n", summary).replace('\n', '\n\n')
source = _('<i>source: <a href="{}">{}</a></i>').format(page.url, page.url)
html_text = '<b>"{}"</b>\n\n{}\n\n{}'.format(term, summary, source)
except wikipedia.exceptions.PageError:
html_text = _("<i>no entry found for {}</i>").format(term)
except wikipedia.exceptions.DisambiguationError as e:
exception_text = str(e).strip().replace("\n", "\n")
html_text = "<i>{}</i>".format(exception_text)
return html_text
def wikipedia(cmd, message, args):
if args:
try:
summary_task = functools.partial(wiki.page, ' '.join(args).lower())
with ThreadPoolExecutor() as threads:
page = await cmd.bot.loop.run_in_executor(threads, summary_task)
response = discord.Embed(color=0xF9F9F9)
response.set_author(
name=f'Wikipedia: {page.title}',
url=page.url,
icon_url='https://upload.wikimedia.org/wikipedia/commons/6/6e/Wikipedia_logo_silver.png'
)
response.description = f'{page.summary[:800]}...'
except wiki.PageError:
response = discord.Embed(color=0x696969, title='?? No results.')
except wiki.DisambiguationError:
response = discord.Embed(color=0xBE1931, title='? Search too broad, please be more specific.')
else:
response = discord.Embed(color=0xBE1931, title='? Nothing inputted.')
await message.channel.send(None, embed=response)
def search_aur(self, ctx, args):
attends = await ctx.send("_Je te cherche ça {} !_".format(ctx.message.author.mention))
erreur = 0
try:
html = urllib.request.urlopen("https://aur.archlinux.org/packages/" + args).read()
except:
erreur = 1
if erreur == 1:
await attends.delete()
embed = discord.Embed(description=":sob: Je n'ai pas trouvé le packet mais j'ai lancé une petite recherche, tu y trouveras peut être ton bonheur ? https://aur.archlinux.org/packages/?K=" + args,url='https://aur.archlinux.org/')
embed.set_author(name="Aur.archlinux", url='https://aur.archlinux.org/', icon_url='http://outout.tech/tuxbot/arch.png')
embed.set_thumbnail(url='http://outout.tech/tuxbot/arch.png')
embed.set_footer(text="Pff même pas trouvé !")
await ctx.send(embed=embed)
else:
await attends.delete()
embed = discord.Embed(description="Et voila, j'ai trouvé la page sur le packet : https://aur.archlinux.org/packages/{0} ! \n Ca te dit un petit ``pacaur -S {0}`` ?".format(args), url='https://aur.archlinux.org/')
embed.set_author(name="Aur.archlinux", url='https://aur.archlinux.org/', icon_url='http://outout.tech/tuxbot/arch.png')
embed.set_thumbnail(url='http://outout.tech/tuxbot/arch.png')
embed.set_footer(text="C'est vrai que pacman et pacaur sont mieux qu'APT ^^")
await ctx.send(embed=embed)
def wikipedia(self, ctx, *text):
"""Wikipedia search."""
if text == ():
await send_cmd_help(ctx)
return
else:
s = "_";
search = ""
search = s.join(text)
user = ctx.message.author
wikiLang = 'en'# Define the Wikipedia language / Most of these are supported » https://nl.wikipedia.org/wiki/ISO_3166-1
ws = None
wikipedia.set_lang(wikiLang)# Set the Wikipedia language.
try:
ws = wikipedia.page(search)
wikiUrl = (ws.url.encode('ascii', 'xmlcharrefreplace'))
await self.bot.say(wikiUrl.decode("utf8"))
except:
await self.bot.say( 'Sorry {}, no wiki hit, try to rephrase'.format(user))
def wikipedia_search_slow(query, lang="en", max_result=1):
import wikipedia
#wikification
query = any2unicode(query)
items = []
ret = {"query":query, "itemList":items}
wikipedia.set_lang(lang)
wikiterm = wikipedia.search(query)
#logging.info(wikiterm)
for idx, term in enumerate(wikiterm[0:max_result]):
wikipage = wikipedia.page(term)
item = {
"name": wikipage.title,
"description": wikipedia.summary(term, sentences=1),
"url": wikipage.url,
}
items.append(item)
return ret
def fetch_wobj(id):
# TODO: isdigit is not robust enough, a title could be number instead of an id
wobj = None
try:
if str(id).isdigit():
wobj = wikipedia.page(pageid=id, auto_suggest=False)
else:
wobj = wikipedia.page(title=id, auto_suggest=False)
except:
# error in 3rd party python-wikipedia package
pass
return wobj
# wobj
def fetch_api_categories(id, wobj):
categories = []
try:
if id.isdigit() and wobj:
categories = wobj.categories
else:
page = fetch_mwclient(id)
for category in list(page.categories()):
categories.append(category.name)
return categories
except:
pass
return categories
def wikipedia_summary(msg, lang = 'en'):
try:
if lang == 'en':
wikipedia.set_lang('en')
else:
wikipedia.set_lang(lang)
url = wikipedia.page(msg).url
msg = wikipedia.summary(msg)
fliter = []
for i in msg:
if i != '\n':
fliter.append(i)
else:
break
msg = "".join(fliter)
return msg + '\n' + url
except:
return "Not Found Page or LANG"
def wiki(message):
chat_id = message.chat.id
param = message.text.split(' ',1) #separa el comando de los parametros
if len(param) == 1 or param[1]=="help":
bot.send_message(chat_id,text_messages['help_wiki'])
else:
bot.send_message(chat_id, "Consultando en Wikipedia...")
try:
wiki = wikipedia.page(param[1])
bot.send_message(chat_id, wiki.summary)
bot.send_message(chat_id, "Consulta mas en:\n"+wiki.url)
except wikipedia.exceptions.DisambiguationError as e:
bot.send_message(chat_id, "'"+param[1]+"'"+" puede referirse a:")
bot.send_message(chat_id, '\n'.join(e.options))
except wikipedia.exceptions.PageError as e:
bot.send_message(chat_id, "No se encontro ninguna pagina, intenta con otra consulta!")
except Exception, e:
print e
bot.send_message(chat_id,"Tengo un bug en mi estomago!")
def wikipedia(inp, *, query):
"""Get wikipedia page about the topic."""
try:
page = wiki.page(query)
except wiki.exceptions.PageError:
return lex.wikipedia.not_found
except wiki.exceptions.DisambiguationError as e:
tools.save_results(inp, e.options, lambda x: wikipedia(inp, query=x))
return lex.unclear(options=e.options)
return lex.wikipedia.result(
title=page.title, url=page.url, text=page.content)
def wikipedia(self, ctx, *, query):
"""Preview a Wikipedia article."""
await ctx.trigger_typing()
try:
wiki = wikipedia.page(query)
except:
return await ctx.send('No results.')
await self.embedwiki(ctx, wiki)
def wikipediaAction(message):
"""Makes the appropriate calls to the wikipedia API for answer wiki queries.
Args:
message: An incoming text message
processer: Instance of NLProcessor class
Returns:
A message indicating what action was taking with the wikipedia API
"""
# tokenize input
tokens = tokenize.wordpunct_tokenize(message)
# filter stopwords, additionally, remove 'wiki' or 'wikipedia'
tokens_filtered = remove_stopwords(tokens)
tokens_filtered = [token for token in tokens_filtered if token != 'wiki' and token != 'wikipedia']
# join filtered message
message = ' '.join(tokens_filtered)
# for debugging/testing
print("(Highly) processed input: ", message)
# Get the wikipedia summary for the request
try:
summary = wikipedia.summary(message, sentences = 1)
url = wikipedia.page(message).url
answer = summary + "\nSee more here: " + url
if len(answer) > 500:
answer = answer[0:500] + "\nSee wikipedia for more..."
except:
# handle all errors
answer = "Request was not found using Wikipedia. Be more specific?"
return answer
def download_single(wiki_page_name, only_summary=False, language='en'):
"""
Download the content of a wikipedia page
:param wiki_page_name: the name
:param only_summary:
:return:
"""
wikipedia.set_lang(language)
if only_summary:
page = wikipedia.page(wiki_page_name)
return page.content
else:
return wikipedia.summary(wiki_page_name)
def download_all(wiki_page_names, only_summary=False, language='en'):
contents = {}
for pn in wiki_page_names:
contents[pn] = download_single(pn, only_summary=only_summary, language=language)
return contents
# TODO if starts with http or www get only the page name
def summary(query, sentences=0, chars=0):
"""Returns a plain text summary from the query's page."""
try:
return wikipedia.summary(query, sentences, chars)
except wikipedia.exceptions.PageError:
return "No page matches, try another item."
except wikipedia.exceptions.DisambiguationError as error:
return error.options[:5]
def content(title=None, pageid=None, auto_suggest=True, redirect=True, preload=False):
"""Returns plain text content of query's page, excluding images, tables and other data."""
try:
page = wikipedia.page(title)
return page.content
except wikipedia.exceptions.PageError:
return "No page matches, try another item."
except wikipedia.exceptions.DisambiguationError as error:
return error.options[:5]
def next_link(cur, done):
try:
g = wikipedia.page(cur).html()
except wikipedia.exceptions.DisambiguationError as e:
for op in e.options:
if op not in done:
g = wikipedia.page(op).html()
break
soup = BeautifulSoup(re.sub(r'\([^)]*\)', '', g), "html.parser")
for para in soup.findAll("p"):
flag = False
for link in para.findAll("a"):
flag = True
if link.get("href").startswith("/wiki/") and link.get("title") not in done and link.contents[0].islower():
return link.get("title")
def retrieve_random_passage(page, length):
"""Given a wikipedia page and length, retrieves a random passage of text from
the content of the wikipedia page with the given length.
"""
content = page.content
content_length = len(content)
if length > content_length:
length = content_length - 1
start = random.randrange(len(content) - length)
end = start + length
return content[start:end]
def search_docubuntu(self, ctx, args):
attends = await ctx.send("_Je te cherche ça {} !_".format(ctx.message.author.mention))
html = urllib.request.urlopen("https://doc.ubuntu-fr.org/" + args).read()
if "avez suivi un lien" in str(html):
await attends.edit(content=":sob: Nooooon ! Cette page n'existe pas, mais tu peux toujours la créer : https://doc.ubuntu-fr.org/"+ args)
else:
await attends.delete()
embed = discord.Embed(description="Voila j'ai trouvé ! Voici la page ramenant à votre recherche, toujours aussi bien rédigée :wink: : https://doc.ubuntu-fr.org/" + args, url='http://doc.ubuntu-fr.org/')
embed.set_author(name="DocUbuntu-Fr", url='http://doc.ubuntu-fr.org/', icon_url='http://outout.tech/tuxbot/ubuntu.png')
embed.set_thumbnail(url='http://outout.tech/tuxbot/ubuntu.png')
embed.set_footer(text="Merci à ceux qui ont pris le temps d'écrire cette documentation")
await ctx.send(embed=embed)
def randwiki(irc, source, msgtarget, args):
rand = wikipedia.random(pages=1)
url = wikipedia.page(rand).url
irc.msg(msgtarget, "Random Article: {} - \x1d{}\x1d".format(rand, url))
irc.msg(msgtarget, wikipedia.summary(rand, sentences=2, chars=250, auto_suggest=True))
def wiki(irc, source, msgtarget, args):
try:
url = wikipedia.page(args).url
page = wikipedia.summary(wikipedia.search(args)[0], sentences=2, auto_suggest=True)
irc.msg(msgtarget, page)
irc.msg(msgtarget, "More at \x1d"+url)
except wikipedia.exceptions.DisambiguationError as e:
bot_commands["wiki"](irc, source, msgtarget, e.options[0])
except wikipedia.exceptions.PageError:
irc.msg(msgtarget, "No page could be found")
def ask_wikipedia(self, definition):
'''
Ask Wikipedia for the definition.
:param definition:
:return:
'''
# TODO: this method should run in a separate process, asynchronously
is_exact = False
out = []
if not wikipedia:
return is_exact, out
page_titles = wikipedia.search(definition)
page = None
if page_titles:
for page_title in page_titles:
if page_title.lower() == definition:
try:
page = wikipedia.page(page_title)
is_exact = True
except DisambiguationError as ex:
out.append(Phrase().text('This can refer to a many things, such as {0}'.format(self.join_for_more(ex.options, limit=None))))
return is_exact, out
if not page and 'disambiguation' not in page_titles[0]:
try:
page = wikipedia.page(page_titles[0])
except Exception as ex:
out.append(Phrase().text(str(ex)))
if page and not out:
out.append(Phrase().text(page.content.split('==')[0]
.split('\n')[0]
.encode('utf-8', 'ignore')).pause(1))
return is_exact, out
def wikipedia_page(message, option, query):
"""
Wikipedia??????????
"""
if query == 'help':
return
# set language
lang = 'ja'
if option:
_, lang = option.split('-')
wikipedia.set_lang(lang)
try:
# search with query
results = wikipedia.search(query)
except:
botsend(message, '??????? `{}` ???????'.format(lang))
return
# get first result
if results:
page = wikipedia.page(results[0])
attachments = [{
'fallback': 'Wikipedia: {}'.format(page.title),
'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
'text': page.summary,
}]
botwebapi(message, attachments)
else:
botsend(message, '`{}` ??????????????'.format(query))
def download_wiki():
"""Download WikiPedia pages of ambiguous units."""
ambiguous = [i for i in l.UNITS.items() if len(i[1]) > 1]
ambiguous += [i for i in l.DERIVED_ENT.items() if len(i[1]) > 1]
pages = set([(j.name, j.uri) for i in ambiguous for j in i[1]])
print
objs = []
for num, page in enumerate(pages):
obj = {'url': page[1]}
obj['_id'] = obj['url'].replace('https://en.wikipedia.org/wiki/', '')
obj['clean'] = obj['_id'].replace('_', ' ')
print '---> Downloading %s (%d of %d)' % \
(obj['clean'], num + 1, len(pages))
obj['text'] = wikipedia.page(obj['clean']).content
obj['unit'] = page[0]
objs.append(obj)
path = os.path.join(l.TOPDIR, 'wiki.json')
os.remove(path)
json.dump(objs, open(path, 'w'), indent=4, sort_keys=True)
print '\n---> All done.\n'
###############################################################################
def wiki_test(page='CERN'):
"""Download a wikipedia page and test the parser on its content.
Pages full of units:
CERN
Hubble_Space_Telescope,
Herschel_Space_Observatory
"""
content = wikipedia.page(page).content
parsed = p.parse(content)
parts = int(round(len(content) * 1.0 / 1000))
print
end_char = 0
for num, chunk in enumerate(range(parts)):
_ = os.system('clear')
print
quants = [j for j in parsed if chunk * 1000 < j.span[0] < (chunk + 1) *
1000]
beg_char = max(chunk * 1000, end_char)
text, end_char = embed_text(quants, beg_char, chunk, content)
print COLOR2 % text
print
try:
_ = raw_input('--------- End part %d of %d\n' % (num + 1, parts))
except (KeyboardInterrupt, EOFError):
return
###############################################################################