def run(self):
while True:
msg = self.queue_in.get() # get() is blocking
match = re.search(r'^(?:/|!)wiki (.*)$', msg.get_text().lower())
if match:
reply = ""
try:
related_entries = wikipedia.search(match.group(1))
w = wikipedia.page(match.group(1))
reply1 = "*{}*\n".format(w.title)
reply2 = "{}\n".format(w.summary)
reply3 = "\n*related topics*:\n- {}".format("\n- ".join(related_entries))
if len(reply1+reply2+reply3)>4096:
reply = reply1 + reply2[:4092-len(reply1)-len(reply3)]+"...\n" + reply3 # shortening to 4096 characters
else:
reply = reply1+reply2+reply3
except wikipedia.DisambiguationError as e:
related_entries = str(e).split(":",1)[1].split("\n")[1:]
reply = "This was too inspecific. Choose one from these:\n- {}".format("\n- ".join(related_entries))
except:
reply = "No matches returned for this request."
if reply:
self.bot.sendMessage(msg.get_chat_id(), reply, parse_mode="Markdown")
python类search()的实例源码
def label_topic_by_probability(cls, topic_description, min_word_probability=0.010, max_words=6):
"""
Try to disambiguate a topic considering all words with a weight greater than min_word_probability
:param max_words:
:param topic_description: is a list of pairs (word, word_probability)
:param min_word_probability: is the minimum probability for words
:return: list of strings, possible wikipedia pages
"""
words = [w for w, p in topic_description if p >= min_word_probability]
words = words[:max_words]
if len(words) == 0:
# if no words are over the threshold return empty
res = []
else:
res = wikipedia.search(' '.join(words))
return res
def wikipedia_search(word):
"""Search a word meaning on wikipedia."""
wikipedia.set_lang('ja')
results = wikipedia.search(word)
# get first result
if results:
page = wikipedia.page(results[0])
msg = page.title + "\n" + page.url
else:
msg = '`{}` ??????????????'.format(word)
return msg
# ====================================
# Google News
# ====================================
def get_pages(query):
pages = list()
if len(query.strip()) <= 0:
raise ValueError
response = requests.get(SEARCH_URL + str(query))
soup = BeautifulSoup(markup=response.text, features="lxml")
if soup is None:
raise Exception
if "search" in str(soup.title).lower():
result_ul = soup.find(name="ul", attrs={"class": "mw-search-results"})
results_list = result_ul.find_all("li")
for li in results_list:
li_div = li.find(name="div", attrs={"class": "mw-search-result-heading"})
a = li_div.find("a")
link = "https://en.wikipedia.org" + a["href"]
heading = str(a.text)
pages.append((link, heading))
return pages
else:
return wikipedia.summary(query)
def about(query,qtype=None):
service_url = 'https://kgsearch.googleapis.com/v1/entities:search'
params = {
'query': query,
'limit': 10,
'indent': True,
'key': api_key,
}
url = service_url + '?' + urllib.urlencode(params)
response = json.loads(urllib.urlopen(url).read())
if not len(response['itemListElement']):
return "sorry, I don't know about "+query +"\nIf you know about "+query+" please tell me."
result = ""
if len(response['itemListElement'])==1:
if "detailedDescription" in response['itemListElement'][0]['result']:
return response['itemListElement'][0]['result']['detailedDescription']["articleBody"]
else:
return response['itemListElement'][0]['result']['name'] +" is a " +\
response['itemListElement'][0]['result']["description"]
for element in response['itemListElement']:
try:result += element['result']['name'] + "->" +element['result']["description"]+"\n"
except:pass
return result
def wikipedia_search_slow(query, lang="en", max_result=1):
import wikipedia
#wikification
query = any2unicode(query)
items = []
ret = {"query":query, "itemList":items}
wikipedia.set_lang(lang)
wikiterm = wikipedia.search(query)
#logging.info(wikiterm)
for idx, term in enumerate(wikiterm[0:max_result]):
wikipage = wikipedia.page(term)
item = {
"name": wikipage.title,
"description": wikipedia.summary(term, sentences=1),
"url": wikipage.url,
}
items.append(item)
return ret
def wikify3(phrase, description=None):
ret = {}
urlBase = "https://en.wikipedia.org/w/api.php?action=opensearch&format=json&formatversion=2&search={}&namespace=0&limit=10&suggest=true"
url = urlBase.format(re.sub("\s+","%20",phrase))
r = requests.get(url)
jsonData = json.loads(r.content)
#logging.info(items)
ret = {}
for idx, label in enumerate(jsonData[1][0:MAX_RESULT]):
description = jsonData[2][idx]
url = jsonData[3][idx]
#if "refer to:" in description:
# continue
if idx > 0:
prefix = "wikipedia{}".format(idx+1)
else:
prefix = "wikipedia"
ret["{}Label".format(prefix)] = label
ret["{}Description".format(prefix)] = description
ret["{}Url".format(prefix)] = url
return ret
def label_topic_by_number_of_words(cls, topic_description, n_words=5):
"""
Try to disambiguate a topic considering top k words in its description
:param n_words:
:param topic_description: is a list of pairs (word, word_probability)
:return: list of strings, possible wikipedia pages
"""
words = [t[0] for i, t in enumerate(topic_description) if i < n_words]
if len(words) == 0:
# if no words are over the threshold, take the first
words = [topic_description[0][0]]
res = wikipedia.search(' '.join(words))
return res
def whatIs(query,sessionID="general"):
try:
return wikipedia.summary(query)
except:
for newquery in wikipedia.search(query):
try:
return wikipedia.summary(newquery)
except:
pass
return about(query)
def whoIs(query,sessionID="general"):
try:
return wikipedia.summary(query)
except:
for newquery in wikipedia.search(query):
try:
return wikipedia.summary(newquery)
except:
pass
return "I don't know about "+query
def _search(self, ctx):
"""Rechercher sur le world wide web"""
if ctx.invoked_subcommand is None:
text = open('texts/search.md').read()
em = discord.Embed(title='Commandes de search TuxBot', description=text, colour=0x89C4F9)
await ctx.send(embed=em)
def ask_wikipedia(self, definition):
'''
Ask Wikipedia for the definition.
:param definition:
:return:
'''
# TODO: this method should run in a separate process, asynchronously
is_exact = False
out = []
if not wikipedia:
return is_exact, out
page_titles = wikipedia.search(definition)
page = None
if page_titles:
for page_title in page_titles:
if page_title.lower() == definition:
try:
page = wikipedia.page(page_title)
is_exact = True
except DisambiguationError as ex:
out.append(Phrase().text('This can refer to a many things, such as {0}'.format(self.join_for_more(ex.options, limit=None))))
return is_exact, out
if not page and 'disambiguation' not in page_titles[0]:
try:
page = wikipedia.page(page_titles[0])
except Exception as ex:
out.append(Phrase().text(str(ex)))
if page and not out:
out.append(Phrase().text(page.content.split('==')[0]
.split('\n')[0]
.encode('utf-8', 'ignore')).pause(1))
return is_exact, out
def wikipedia_page(message, option, query):
"""
Wikipedia??????????
"""
if query == 'help':
return
# set language
lang = 'ja'
if option:
_, lang = option.split('-')
wikipedia.set_lang(lang)
try:
# search with query
results = wikipedia.search(query)
except:
botsend(message, '??????? `{}` ???????'.format(lang))
return
# get first result
if results:
page = wikipedia.page(results[0])
attachments = [{
'fallback': 'Wikipedia: {}'.format(page.title),
'pretext': 'Wikipedia: <{}|{}>'.format(page.url, page.title),
'text': page.summary,
}]
botwebapi(message, attachments)
else:
botsend(message, '`{}` ??????????????'.format(query))
def wikipedia_search(query, lang="en", max_result=1):
"""
https://www.mediawiki.org/wiki/API:Opensearch
"""
query = any2unicode(query)
params = {
"action":"opensearch",
"search": query,
"format":"json",
#"formatversion":2,
#"namespace":0,
"suggest":"true",
"limit": 10
}
urlBase = "https://{}.wikipedia.org/w/api.php?".format(lang)
url = urlBase + urllib.urlencode(any2utf8(params))
#logging.info(url)
r = requests.get(url)
jsonData = json.loads(r.content)
#logging.info(jsonData)
items = []
ret = {"query":query, "itemList":items}
for idx, label in enumerate(jsonData[1][0:max_result]):
description = jsonData[2][idx]
url = jsonData[3][idx]
item = {
"name": label,
"description":description,
"url": url,
}
items.append(item)
return ret
def whoIs(query,sessionID="general"):
try:
return wikipedia.summary(query)
except:
for newquery in wikipedia.search(query):
try:
return wikipedia.summary(newquery)
except:
pass
return "I don't know about "+query
def whoIs(query,sessionID="general"):
try:
return wikipedia.summary(query)
except:
for newquery in wikipedia.search(query):
try:
return wikipedia.summary(newquery)
except:
pass
return "I don't know about "+query
def get_wanted_article(search_term):
"""Given a search term, find the associated article"""
search_term = " ".join(search_term)
try:
list_of_associated_articles = wikipedia.search(search_term)
wanted_article = list_of_associated_articles[0]
print(wikipedia.summary(wanted_article))
except wikipedia.exceptions.DisambiguationError as disambiguation:
sys.exit("Unfortunately your request has led to a disambiguation, "
"please refine your search further:\n{}".format(disambiguation))
def wikify1(phrase, description=None):
#wikification
"""
{
searchinfo: - {
search: "birthday"
},
search: - [
- {
repository: "",
id: "P3150",
concepturi: "http://www.wikidata.org/entity/P3150",
url: "//www.wikidata.org/wiki/Property:P3150",
title: "Property:P3150",
pageid: 28754653,
datatype: "wikibase-item",
label: "birthday",
description: "item for day and month on which the subject was born. Used when full "date of birth" (P569) isn't known.",
match: - {
type: "label",
language: "en",
text: "birthday"
}
},"""
urlBase = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={}&format=json&language=en&uselang=en&type=property"
url = urlBase.format(re.sub("\s+","%20",phrase))
r = requests.get(url)
items = json.loads(r.content).get("search",[])
#logging.info(items)
ret = {}
for idx, item in enumerate(items[0:MAX_RESULT]):
if idx > 0:
prefix = "wikidata{}".format(idx+1)
else:
prefix = "wikidata"
ret["{}Id".format(prefix)] = item["id"]
ret["{}Name".format(prefix)] = item.get("label","")
ret["{}Description".format(prefix)] = item.get("description","")
ret["{}Url".format(prefix)] = item["concepturi"]
return ret
def wikify2(phrase, description=None):
#wikification
ret = {}
wikiterm = wikipedia.search(phrase)
for idx, term in enumerate(wikiterm[0:MAX_RESULT]):
wikipage = wikipedia.page(term)
ret["wikipedia_{}_url".format(idx)] = wikipage.url
ret["wikipedia_{}_desc".format(idx)] = wikipedia.summary(term, sentences=1)
return ret
def wikipedia_query(query, simple_result=False):
if not query:
return "Try searching for *something* next time, knucklehead."
try:
page = wikipedia.page(query, auto_suggest=True)
if simple_result: # Just return the url of the found page
return page.url
else: # Return the first ~500 characters of the summary
title = page.title
summary = page.summary
for i in range(0, (len(summary) if len(summary) < 500 else 500) - 1):
if summary[i] == '=' and summary[i+1] == '=':
summary = summary[0:i]
break;
if len(summary) >= 500:
summary = summary[0:500]
summary += ' ...*`[truncated]`*'
return "***```{title}```***\n{summary}".format(title=title, summary=summary)
except wikipedia.exceptions.PageError:
raise bot_exception(WIKIPEDIA_EXCEPTION,
"Page doesn't exist. Trying for some suggestions...", '```{}```'.format(
(wikipedia.suggest(query) if wikipedia.suggest(query) is not None else "None")))
except wikipedia.exceptions.DisambiguationError as tp: # Try to get list of suggestions
suggestions = wikipedia.search(query, results=5)
if len(suggestions) > 0:
formatted_suggestions = '```\n'
for suggestion in suggestions:
formatted_suggestions += '{}\n'.format(suggestion)
formatted_suggestions += '```'
raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. Here are some suggestions:", formatted_suggestions)
else:
raise bot_exception(WIKIPEDIA_EXCEPTION, "Query is too ambiguous. No suggestions found.")
def search(query, count=10, suggestion=False):
"""Do a Wikipedia search for a query, returns a list of 10 related items."""
items = wikipedia.search(query, count, suggestion)
if isinstance(items, list) and len(items) > 0:
return items
return "No articles with that name, try another item."
def search_wikipedia(self, ctx: commands.Context, args):
"""Fait une recherche sur wikipd"""
wait = await ctx.send("_Je cherche..._")
results = wikipedia.search(args)
nbmr = 0
mmssgg = ""
for value in results:
nbmr = nbmr + 1
mmssgg = mmssgg + "**{}**: {} \n".format(str(nbmr), value)
em = discord.Embed(title='Résultats de : ' + args, description = mmssgg, colour=0x4ECDC4)
em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
await wait.delete()
sending = ["1?", "2?", "3?", "4?", "5?", "6?", "7?", "8?", "9?", "??"]
def check(reaction, user):
return user == ctx.author and reaction.emoji in sending and reaction.message.id == msg.id
async def waiter(future: asyncio.Future):
reaction, user = await self.bot.wait_for('reaction_add', check=check)
future.set_result(reaction.emoji)
emoji = asyncio.Future()
self.bot.loop.create_task(waiter(emoji))
msg = await ctx.send(embed=em)
for e in sending:
await msg.add_reaction(e)
if emoji.done():
break
while not emoji.done():
await asyncio.sleep(0.1)
sPage = int(sending.index(emoji.result()))
args_ = results[sPage]
try:
await msg.delete()
await ctx.trigger_typing()
wait = await ctx.send(ctx.message.author.mention + " ah ok sympa cette recherche, je l'effectue de suite !")
wp = wikipedia.page(args_)
wp_contenu = wp.summary[:200] + "..."
em = discord.Embed(title='Wikipedia : ' + wp.title, description = "{} \n_Lien_ : {} ".format(wp_contenu, wp.url), colour=0x9B59B6)
em.set_author(name="Wikipedia", url='http://wikipedia.org', icon_url='https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png')
em.set_thumbnail(url = "https://upload.wikimedia.org/wikipedia/commons/2/26/Paullusmagnus-logo_%28large%29.png")
em.set_footer(text="Merci à eux de nous fournir une encyclopédie libre !")
await wait.delete()
await ctx.send(embed=em)
except wikipedia.exceptions.PageError: #TODO : A virer dans l'event on_error
await ctx.send(":open_mouth: Une **erreur interne** est survenue, si cela ce reproduit contactez votre administrateur ou faites une Issue sur ``github`` !")
def wikidata_search(query, lang="zh", output_lang="en", searchtype="item", max_result=1):
"""
wikification: search wikipedia pages for the given query
https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities
result format
{
searchinfo: - {
search: "birthday"
},
search: - [
- {
repository: "",
id: "P3150",
concepturi: "http://www.wikidata.org/entity/P3150",
url: "//www.wikidata.org/wiki/Property:P3150",
title: "Property:P3150",
pageid: 28754653,
datatype: "wikibase-item",
label: "birthday",
description: "item for day and month on which the subject was born. Used when full "date of birth" (P569) isn't known.",
match: - {
type: "label",
language: "en",
text: "birthday"
}
}
"""
query = any2unicode(query)
params = {
"action":"wbsearchentities",
"search": query,
"format":"json",
"language":lang,
"uselang":output_lang,
"type":searchtype
}
urlBase = "https://www.wikidata.org/w/api.php?"
url = urlBase + urllib.urlencode(any2utf8(params))
#logging.info(url)
r = requests.get(url)
results = json.loads(r.content).get("search",[])
#logging.info(items)
property_list = [
{"name":"name", "alternateName":["label"]},
{"name":"url", "alternateName":["concepturi"]},
{"name":"identifier", "alternateName":["id"]},
{"name":"description"},
]
items = []
ret = {"query": query, "itemList":items}
for result in results[0:max_result]:
#logging.info(result)
item = json_dict_copy(result, property_list)
items.append(item)
return ret