interactive bots: Improve googlesearch bot.
This commit is contained in:
parent
1d2d6c9cf7
commit
b157b817ae
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -23,6 +23,7 @@ pip-log.txt
|
||||||
.coverage
|
.coverage
|
||||||
.tox
|
.tox
|
||||||
nosetests.xml
|
nosetests.xml
|
||||||
|
htmlcov/
|
||||||
|
|
||||||
# Translations
|
# Translations
|
||||||
*.mo
|
*.mo
|
||||||
|
|
|
@ -50,6 +50,8 @@ setuptools_info = dict(
|
||||||
'zulip',
|
'zulip',
|
||||||
'mock>=2.0.0',
|
'mock>=2.0.0',
|
||||||
'html2text', # for bots/define
|
'html2text', # for bots/define
|
||||||
|
'BeautifulSoup4', # for bots/googlesearch
|
||||||
|
'lxml', # for bots/googlesearch
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"api_url": "http://www.google.com/search",
|
||||||
|
"params": {
|
||||||
|
"q": "no res"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response": "<head></head><body><div id='search'></div></body>",
|
||||||
|
"response-headers": {
|
||||||
|
"status": 200,
|
||||||
|
"content-type": "text/html; charset=utf-8"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"api_url": "http://www.google.com/search",
|
||||||
|
"params": {
|
||||||
|
"q": "zulip"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response": "<head></head><body><div id='search'><a href='/url?url=https%3A%2F%2Fzulipchat.com%2F'>Zulip</a></div></body>",
|
||||||
|
"response-headers": {
|
||||||
|
"status": 200,
|
||||||
|
"content-type": "text/html; charset=utf-8"
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,13 +1,42 @@
|
||||||
# See readme.md for instructions on running this code.
|
# See readme.md for instructions on running this code.
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import logging
|
import logging
|
||||||
from six.moves.urllib import error
|
from six.moves.urllib import parse
|
||||||
from six.moves.urllib.request import urlopen
|
|
||||||
|
|
||||||
# Uses the Google search engine bindings
|
import requests
|
||||||
# pip install --upgrade google
|
|
||||||
from google import search
|
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def google_search(keywords):
|
||||||
|
query = {'q': keywords}
|
||||||
|
# Gets the page
|
||||||
|
page = requests.get('http://www.google.com/search', params=query)
|
||||||
|
# Parses the page into BeautifulSoup
|
||||||
|
soup = BeautifulSoup(page.text, "lxml")
|
||||||
|
|
||||||
|
# Gets all search URLs
|
||||||
|
anchors = soup.find(id='search').findAll('a')
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for a in anchors:
|
||||||
|
try:
|
||||||
|
# Tries to get the href property of the URL
|
||||||
|
link = a['href']
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
# Link must start with '/url?', as these are the search result links
|
||||||
|
if (not link.startswith('/url?')):
|
||||||
|
continue
|
||||||
|
# Makes sure a hidden 'cached' result isn't displayed
|
||||||
|
if (a.text.strip() == 'Cached' and 'webcache.googleusercontent.com'):
|
||||||
|
continue
|
||||||
|
if (a.text.strip() == ''):
|
||||||
|
continue
|
||||||
|
# a.text: The name of the page
|
||||||
|
result = {'url': "https://www.google.com{}".format(link),
|
||||||
|
'name': a.text}
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
|
||||||
def get_google_result(search_keywords):
|
def get_google_result(search_keywords):
|
||||||
help_message = "To use this bot, start messages with @mentioned-bot, \
|
help_message = "To use this bot, start messages with @mentioned-bot, \
|
||||||
|
@ -18,23 +47,21 @@ def get_google_result(search_keywords):
|
||||||
An example message that could be sent is:\
|
An example message that could be sent is:\
|
||||||
'@mentioned-bot zulip' or \
|
'@mentioned-bot zulip' or \
|
||||||
'@mentioned-bot how to create a chatbot'."
|
'@mentioned-bot how to create a chatbot'."
|
||||||
|
|
||||||
|
search_keywords = search_keywords.strip()
|
||||||
|
|
||||||
if search_keywords == 'help':
|
if search_keywords == 'help':
|
||||||
return help_message
|
return help_message
|
||||||
elif search_keywords == '' or search_keywords is None:
|
elif search_keywords == '' or search_keywords is None:
|
||||||
return help_message
|
return help_message
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
urls = search(search_keywords, stop=20)
|
results = google_search(search_keywords)
|
||||||
urlopen('http://216.58.192.142', timeout=1)
|
if (len(results) == 0):
|
||||||
except error.URLError as er:
|
return "Found no results."
|
||||||
logging.exception(er)
|
return "Found Result: [{}]({})".format(results[0]['name'], results[0]['url'])
|
||||||
return 'Error: No internet connection. {}.'.format(er)
|
except ConnectionError as c_err:
|
||||||
except Exception as e:
|
return "Error: Failed to connect. {}.".format(c_err)
|
||||||
logging.exception(e)
|
|
||||||
return 'Error: Search failed. {}.'.format(e)
|
|
||||||
|
|
||||||
try:
|
|
||||||
url = next(urls)
|
|
||||||
except AttributeError as a_err:
|
except AttributeError as a_err:
|
||||||
# google.search query failed and urls is of object
|
# google.search query failed and urls is of object
|
||||||
# 'NoneType'
|
# 'NoneType'
|
||||||
|
@ -47,12 +74,8 @@ def get_google_result(search_keywords):
|
||||||
logging.exception(t_err)
|
logging.exception(t_err)
|
||||||
return "Error: Google search function failed. {}.".format(t_err)
|
return "Error: Google search function failed. {}.".format(t_err)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
|
||||||
return 'Error: Search failed. {}.'.format(e)
|
return 'Error: Search failed. {}.'.format(e)
|
||||||
|
|
||||||
return 'Success: {}'.format(url)
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleSearchHandler(object):
|
class GoogleSearchHandler(object):
|
||||||
'''
|
'''
|
||||||
This plugin allows users to enter a search
|
This plugin allows users to enter a search
|
||||||
|
@ -78,16 +101,3 @@ class GoogleSearchHandler(object):
|
||||||
bot_handler.send_reply(message, result)
|
bot_handler.send_reply(message, result)
|
||||||
|
|
||||||
handler_class = GoogleSearchHandler
|
handler_class = GoogleSearchHandler
|
||||||
|
|
||||||
|
|
||||||
def test():
|
|
||||||
try:
|
|
||||||
urlopen('http://216.58.192.142', timeout=1)
|
|
||||||
print('Success')
|
|
||||||
return True
|
|
||||||
except error.URLError as e:
|
|
||||||
print('Error: {}'.format(e))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
test()
|
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
46
zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py
Normal file
46
zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
from six.moves.urllib import error
|
||||||
|
|
||||||
|
from zulip_bots.test_lib import BotTestCase
|
||||||
|
|
||||||
|
class TestGoogleSearchBot(BotTestCase):
|
||||||
|
bot_name = 'googlesearch'
|
||||||
|
|
||||||
|
# Simple query
|
||||||
|
def test_normal(self):
|
||||||
|
with self.mock_http_conversation('test_normal'):
|
||||||
|
self.assert_bot_response({'content': 'zulip'}, {'content': 'Found Result: [Zulip](https://www.google.com/url?url=https%3A%2F%2Fzulipchat.com%2F)'}, 'send_reply')
|
||||||
|
|
||||||
|
# Help without typing anything
|
||||||
|
def test_bot_help_none(self):
|
||||||
|
help_message = "To use this bot, start messages with @mentioned-bot, \
|
||||||
|
followed by what you want to search for. If \
|
||||||
|
found, Zulip will return the first search result \
|
||||||
|
on Google.\
|
||||||
|
\
|
||||||
|
An example message that could be sent is:\
|
||||||
|
'@mentioned-bot zulip' or \
|
||||||
|
'@mentioned-bot how to create a chatbot'."
|
||||||
|
self.assert_bot_response({'content': ''}, {'content': help_message}, 'send_reply')
|
||||||
|
|
||||||
|
# Help from typing 'help'
|
||||||
|
def test_bot_help(self):
|
||||||
|
help_message = "To use this bot, start messages with @mentioned-bot, \
|
||||||
|
followed by what you want to search for. If \
|
||||||
|
found, Zulip will return the first search result \
|
||||||
|
on Google.\
|
||||||
|
\
|
||||||
|
An example message that could be sent is:\
|
||||||
|
'@mentioned-bot zulip' or \
|
||||||
|
'@mentioned-bot how to create a chatbot'."
|
||||||
|
self.assert_bot_response({'content': 'help'}, {'content': help_message}, 'send_reply')
|
||||||
|
|
||||||
|
def test_bot_no_results(self):
|
||||||
|
with self.mock_http_conversation('test_no_result'):
|
||||||
|
self.assert_bot_response({'content': 'no res'}, {'content': 'Found no results.'}, 'send_reply')
|
Loading…
Reference in a new issue