def get_new_article(self):
# Get the next wikipedia article.
article = wikipedia.page(self.title_stream.next()).content
# Count the words and store the result.
self.word_counts.append(Counter(article.split(" ")))
self.num_articles_processed += 1
python类page()的实例源码
def wikify2(phrase, description=None):
#wikification
ret = {}
wikiterm = wikipedia.search(phrase)
for idx, term in enumerate(wikiterm[0:MAX_RESULT]):
wikipage = wikipedia.page(term)
ret["wikipedia_{}_url".format(idx)] = wikipage.url
ret["wikipedia_{}_desc".format(idx)] = wikipedia.summary(term, sentences=1)
return ret
def wikipedia(self, context, lang: str = None, query: str = None):
''' Get a page from wikipedia and reply with an embed '''
query = self.bot.get_text(context)
if lang is not None:
if lang.startswith('(') and lang.endswith(')'):
query = query[len(lang) + 1:]
lang = lang[1:-1]
else:
lang = None
if query in [None, '', ' ']:
await self.bot.doubt(context)
return
try:
import wikipedia
if lang is not None and lang in wikipedia.languages().keys():
wikipedia.set_lang(lang)
else:
wikipedia.set_lang('en')
page = wikipedia.page(query)
summary = page.summary
if len(summary) > 1222: # totally arbitrary chosen number
summary = summary[:1220] + '...'
embed = discord.Embed(title=page.title, description=summary, url=page.url)
embed.set_footer(text=page.url)
if self.bot.config['bleeding']:
if len(page.images) > 0:
embed.set_image(url=page.images[0])
await self.bot.say(embed=embed)
await self.bot.replied(context)
except wikipedia.PageError as e:
await self.bot.reply('{}\nMake sure you search for page titles in the language that you have set.'.format(e))
await self.bot.doubt(context)
except KeyError:
pass
except wikipedia.DisambiguationError as e:
msg = '```\n{}\n```'.format(e)
await self.bot.doubt(context)
await self.bot.say(msg)
def resolve(pageID):
page = wikipedia.page(pageID, preload=True)
return {
"url": page.url,
"summary": page.summary,
"images": page.images,
}
# --- SEARCH PARAMETERS --- #
# Run `pytest` when changing these parameters to ensure they still work.
def search(coord, place_name):
"""Finds the Wikipedia page corresponding to the given place.
The current implementation requires Wikipedia geotags, meaning it'll miss:
- Chains (Starbucks)
- Corporate pages (Lagunitas, as opposed to the brewery site)
- Area-based things (49-mile drive in SF)
:param coord: is (latitude, longitude)
:return: A wikipedia page title.
"""
# We don't use the title arg of `wikipedia.geosearch`. It will return exact title matches, even if the geo location
# does not match, so "Boulevard" will return a street rather than the restaurant (which is "Boulevard (restaurant)").
wiki_page_titles = wikipedia.geosearch(*coord)
return _match_place_name_to_wiki_page(place_name, wiki_page_titles)
def get_article(self):
try:
wikipedia.set_lang(self.lang)
article = wikipedia.page(self.search)
return article
except Exception as e:
logger.error('Getting wiki article error : ' + str(e))
raise WikiException(str(e))
def getwikitext(title):
try:
print('Fetching data from Wikipedia...')
page = wikipedia.page(title)
text = page.content
except wikipedia.exceptions.PageError:
return 'PageError'
return text
def wikipedia_query(query, simple_result=False):
if not query:
return "Try searching for *something* next time, knucklehead."
try:
page = wikipedia.page(query, auto_suggest=True)
if simple_result: # Just return the url of the found page
return page.url
else: # Return the first ~500 characters of the summary
title = page.title
summary = page.summary
for i in range(0, (len(summary) if len(summary) < 500 else 500) - 1):
if summary[i] == '=' and summary[i+1] == '=':
summary = summary[0:i]
break;
if len(summary) >= 500:
summary = summary[0:500]
summary += ' ...*`[truncated]`*'
return "***```{title}```***\n{summary}".format(title=title, summary=summary)
except wikipedia.exceptions.PageError:
raise bot_exception(WIKIPEDIA_EXCEPTION,
"Page doesn't exist. Trying for some suggestions...", '```{}```'.format(
(wikipedia.suggest(query) if wikipedia.suggest(query) is not None else "None")))
except wikipedia.exceptions.DisambiguationError as tp: # Try to get list of suggestions
suggestions = wikipedia.search(query, results=5)
if len(suggestions) > 0:
formatted_suggestions = '```\n'
for suggestion in suggestions:
formatted_suggestions += '{}\n'.format(suggestion)
formatted_suggestions += '```'
raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. Here are some suggestions:", formatted_suggestions)
else:
raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. No suggestions found.")
def get_wiki_page(req):
wikipedia.set_lang("ru")
p = wikipedia.page(req)
return p
def save_images(page):
local_images = []
counter = 0
for i,img in enumerate(page.images):
fname = img.split("/")[-1];
if img.endswith(IMAGES_EXT) and "Aquote" not in fname and "Commons-logo" not in fname and "Wiktionary" not in fname:
print fname
urllib.urlretrieve(img, "./files/"+fname)
local_images.append("./files/"+fname)
counter += 1
if counter >= IMAGES_COUNT:
break
return local_images
def pixiv():
url = 'https://public-api.secure.pixiv.net/v1/ranking/all?image_sizes=px_128x128%2Cpx_480mw%2Clarge&include_stats=true&page=1&profile_image_sizes=px_170x170%2Cpx_50x50&mode=daily&include_sanity_level=true&per_page=50'
headers = {"Host": "public-api.secure.pixiv.net", "Authorization": "Bearer WHDWCGnwWA2C8PRfQSdXJxjXp0G6ULRaRkkd6t5B6h8", "Accept-Encoding": "gzip, deflate", "Accept": "*/*", "Accept-Language": "zh-cn", "Connection": "keep-alive", "Proxy-ConnectAion": "keep-alive", "User-Agent": "PixivIOSApp/5.6.0", "Referer": "http://spapi.pixiv.net/"}
r = requests.get(url, headers = headers)
j = json.loads(r.text)
num = random.randint(0, 49)
photo_url = j['response'][0]['works'][num]['work']['image_urls']['large']
pixiv_url = 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id={}'.format(urllib.parse.urlsplit(photo_url).path.split('/')[-1].replace('_p0.jpg', '').replace('_p0.png', ''))
return {'photo': photo_url, 'pixiv': pixiv_url}
def wikipedia_quiz(number_of_articles, passage_length):
"""Generates a multiple choice quiz to identify the correct wikipedia article that
a random passage is pulled from. The number of articles determines how many choices
you must pick from. The passage length determines the number of characters that the
random passage will be.
"""
print('*** Wikipedia Quiz ***')
logging.info('Quiz is starting')
random_articles = wikipedia.random(number_of_articles)
logging.debug('Random articles: %s', str(random_articles).encode('utf-8'))
correct_article_index = random.randrange(number_of_articles)
page_retrieved = False
while not page_retrieved:
try:
correct_article = random_articles[correct_article_index]
correct_page = wikipedia.page(correct_article)
page_retrieved = True
except wikipedia.exceptions.DisambiguationError:
# Wikipedia provides options to choose from, but if we pick one, the title will be
# much more descriptive (particularly by using parenthesis like so). This usually
# ends up making the guessing too easy. Let's just reroll and put the new random
# article in the place of the old one.
new_random_article = wikipedia.random()
random_articles[correct_article_index] = new_random_article
# Try to obtain a good passage
random_passage = retrieve_random_passage(correct_page, passage_length)
retry = 0
while is_passage_unfair(random_passage, correct_article) and retry < RETRY_AMOUNT_MAX:
logging.info('Passage is unfair, generating a new one...')
random_passage = retrieve_random_passage(correct_page, passage_length)
retry += 1
if retry >= RETRY_AMOUNT_MAX:
print('Too many retries for the passage...')
logging.error('Too many retries for the passage...')
return False
# Print info to user
print('...%s...' % random_passage)
encode_utf8 = sys.version_info.major == 2 # Hack support for Python 2
for index, random_article in enumerate(random_articles):
if encode_utf8:
random_article = random_article.encode('utf-8')
print('%d: %s' % (index, random_article))
# Handle answer
answer = request_answer(number_of_articles)
if answer == str(correct_article_index):
print('Correct!')
logging.info('Correct, answer was %d', correct_article_index)
else:
print('Incorrect, answer was: %d' % correct_article_index)
logging.info('Incorrect, answer was: %d', correct_article_index)
logging.info('Quiz is ending')
return True
def search_wikipedia(self, ctx: commands.Context, args):
"""Fait une recherche sur wikipd"""
wait = await ctx.send("_Je cherche..._")
results = wikipedia.search(args)
nbmr = 0
mmssgg = ""
for value in results:
nbmr = nbmr + 1
mmssgg = mmssgg + "**{}**: {} \n".format(str(nbmr), value)
em = discord.Embed(title='Résultats de : ' + args, description = mmssgg, colour=0x4ECDC4)
em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
await wait.delete()
sending = ["1?", "2?", "3?", "4?", "5?", "6?", "7?", "8?", "9?", "??"]
def check(reaction, user):
return user == ctx.author and reaction.emoji in sending and reaction.message.id == msg.id
async def waiter(future: asyncio.Future):
reaction, user = await self.bot.wait_for('reaction_add', check=check)
future.set_result(reaction.emoji)
emoji = asyncio.Future()
self.bot.loop.create_task(waiter(emoji))
msg = await ctx.send(embed=em)
for e in sending:
await msg.add_reaction(e)
if emoji.done():
break
while not emoji.done():
await asyncio.sleep(0.1)
sPage = int(sending.index(emoji.result()))
args_ = results[sPage]
try:
await msg.delete()
await ctx.trigger_typing()
wait = await ctx.send(ctx.message.author.mention + " ah ok sympa cette recherche, je l'effectue de suite !")
wp = wikipedia.page(args_)
wp_contenu = wp.summary[:200] + "..."
em = discord.Embed(title='Wikipedia : ' + wp.title, description = "{} \n_Lien_ : {} ".format(wp_contenu, wp.url), colour=0x9B59B6)
em.set_author(name="Wikipedia", url='http://wikipedia.org', icon_url='https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png')
em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
em.set_footer(text="Merci à eux de nous fournir une encyclopédie libre !")
await wait.delete()
await ctx.send(embed=em)
except wikipedia.exceptions.PageError: #TODO : A virer dans l'event on_error
await ctx.send(":open_mouth: Une **erreur interne** est survenue, si cela ce reproduit contactez votre administrateur ou faites une Issue sur ``github`` !")
def _extract_from_wiki(self):
title = self.title_line_edit.text()
if title:
page = self.page_combo_box.currentText()
wikipedia.set_lang(self.lang_combo_box.currentText())
self.load_progressbar.setMinimum(0)
self.load_progressbar.setMaximum(0)
class ProgressThread(QThread, QWidget):
content_link_arrived = pyqtSignal([list])
content_text_arrived = pyqtSignal(['QString'])
content_image_arrived = pyqtSignal([list, 'QString'])
error_occurred = pyqtSignal()
valid_images = []
def run(self):
try:
wiki = wikipedia.page(title=title)
f = open('templates/template.html')
if page == 'Content':
self.content_text_arrived.emit(wiki.content)
elif page == 'Images':
print(wiki.images)
self.des_dir = Preferences.output_path + '/' + title
self.valid_images = []
if not os.path.exists(self.des_dir):
print(self.des_dir)
os.mkdir(self.des_dir)
for i in wiki.images:
if PurePath(i).suffix in Preferences.valid_image_formats:
print(i)
print(self.des_dir)
wget.download(i, out=self.des_dir)
self.valid_images.append(i)
self.content_image_arrived.emit(self.valid_images, self.des_dir)
elif page == 'Summary':
self.content_text_arrived.emit(wiki.summary)
elif page == 'Images Links':
self.content_link_arrived.emit(wiki.images)
elif page == 'References Links':
self.content_link_arrived.emit(wiki.references)
except:
self.error_occurred.emit()
self.progress_thread = ProgressThread()
self.progress_thread.content_link_arrived.connect(self.set_content_link)
self.progress_thread.content_text_arrived.connect(self.set_content_text)
self.progress_thread.content_image_arrived.connect(self.set_content_image)
self.progress_thread.error_occurred.connect(self.handle_error_occurred)
self.progress_thread.start()
else:
self.content_text_browser.clear()
self.content_text_browser.setEnabled(False)
def process(input, entities):
output = {}
try:
query = entities['wiki'][0]['value']
data = wikipedia.page(query)
output['input'] = input
template = TextTemplate('Wikipedia summary of ' + data.title + ':\n' + data.summary)
text = template.get_text()
template = ButtonTemplate(text)
template.add_web_url('Wikipedia Link', data.url)
output['output'] = template.get_message()
output['success'] = True
except wikipedia.exceptions.DisambiguationError as e:
template = GenericTemplate()
template.set_image_aspect_ratio_to_square()
image_url = 'https://en.wikipedia.org/static/images/project-logos/enwiki-2x.png'
pageids = set()
for option in e.options:
try:
data = wikipedia.page(option)
if data.pageid in pageids:
continue
pageids.add(data.pageid)
buttons = ButtonTemplate()
buttons.add_web_url('Wikipedia Link', data.url)
payload = {
'intent': 'wiki',
'entities': {
'wiki': [
{
'value': option
}
]
}
}
buttons.add_postback('Wikipedia Summary', payload)
template.add_element(title=data.title, item_url=data.url, image_url=image_url,
buttons=buttons.get_buttons())
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
pass # Some suggestions don't map to a page; skipping them..
output['input'] = input
output['output'] = template.get_message()
output['success'] = True
except:
error_message = 'I couldn\'t find any wikipedia results matching your query.'
error_message += '\nPlease ask me something else, like:'
error_message += '\n - wikipedia barack'
error_message += '\n - html wiki'
error_message += '\n - who is sachin tendulkar'
output['error_msg'] = TextTemplate(error_message).get_message()
output['success'] = False
return output
def process(input, entities):
output = {}
try:
query = entities['wiki'][0]['value']
data = wikipedia.page(query)
output['input'] = input
template = TextTemplate('Wikipedia summary of ' + data.title + ':\n' + data.summary)
text = template.get_text()
template = ButtonTemplate(text)
template.add_web_url('Wikipedia Link', data.url)
output['output'] = template.get_message()
output['success'] = True
except wikipedia.exceptions.DisambiguationError as e:
template = GenericTemplate()
image_url = 'https://en.wikipedia.org/static/images/project-logos/enwiki-2x.png'
pageids = set()
for option in e.options:
try:
data = wikipedia.page(option)
if data.pageid in pageids:
continue
pageids.add(data.pageid)
buttons = ButtonTemplate()
buttons.add_web_url('Wikipedia Link', data.url)
payload = {
'intent': 'wiki',
'entities': {
'wiki': [
{
'value': option
}
]
}
}
buttons.add_postback('Wikipedia Summary', json.dumps(payload))
template.add_element(title=data.title, item_url=data.url, image_url=image_url, buttons=buttons.get_buttons())
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
pass # Some suggestions don't map to a page; skipping them..
output['input'] = input
output['output'] = template.get_message()
output['success'] = True
except:
error_message = 'I couldn\'t find any wikipedia results matching your query.'
error_message += '\nPlease ask me something else, like:'
error_message += '\n - wikipedia barack'
error_message += '\n - html wiki'
error_message += '\n - who is sachin tendulkar'
output['error_msg'] = TextTemplate(error_message).get_message()
output['success'] = False
return output