From b157b817ae5ae0032a55c397633a0ed169c9f29a Mon Sep 17 00:00:00 2001 From: fredfishgames Date: Thu, 30 Nov 2017 16:52:45 +0000 Subject: [PATCH] interactive bots: Improve googlesearch bot. --- .gitignore | 1 + zulip_bots/setup.py | 2 + .../googlesearch/__init__.py | 0 .../googlesearch/doc.md | 0 .../googlesearch/fixtures/test_no_result.json | 13 +++ .../googlesearch/fixtures/test_normal.json | 13 +++ .../googlesearch/googlesearch.py | 76 ++++++++++-------- .../googlesearch/logo.png | Bin .../bots/googlesearch/test_googlesearch.py | 46 +++++++++++ 9 files changed, 118 insertions(+), 33 deletions(-) rename zulip_bots/zulip_bots/{bots_unmaintained => bots}/googlesearch/__init__.py (100%) rename zulip_bots/zulip_bots/{bots_unmaintained => bots}/googlesearch/doc.md (100%) create mode 100644 zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_no_result.json create mode 100644 zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_normal.json rename zulip_bots/zulip_bots/{bots_unmaintained => bots}/googlesearch/googlesearch.py (60%) rename zulip_bots/zulip_bots/{bots_unmaintained => bots}/googlesearch/logo.png (100%) create mode 100644 zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py diff --git a/.gitignore b/.gitignore index ec49d56..56f65af 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ pip-log.txt .coverage .tox nosetests.xml +htmlcov/ # Translations *.mo diff --git a/zulip_bots/setup.py b/zulip_bots/setup.py index 503aa58..2c7b8dc 100755 --- a/zulip_bots/setup.py +++ b/zulip_bots/setup.py @@ -50,6 +50,8 @@ setuptools_info = dict( 'zulip', 'mock>=2.0.0', 'html2text', # for bots/define + 'BeautifulSoup4', # for bots/googlesearch + 'lxml', # for bots/googlesearch ], ) diff --git a/zulip_bots/zulip_bots/bots_unmaintained/googlesearch/__init__.py b/zulip_bots/zulip_bots/bots/googlesearch/__init__.py similarity index 100% rename from zulip_bots/zulip_bots/bots_unmaintained/googlesearch/__init__.py rename to zulip_bots/zulip_bots/bots/googlesearch/__init__.py diff --git a/zulip_bots/zulip_bots/bots_unmaintained/googlesearch/doc.md b/zulip_bots/zulip_bots/bots/googlesearch/doc.md similarity index 100% rename from zulip_bots/zulip_bots/bots_unmaintained/googlesearch/doc.md rename to zulip_bots/zulip_bots/bots/googlesearch/doc.md diff --git a/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_no_result.json b/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_no_result.json new file mode 100644 index 0000000..d604afc --- /dev/null +++ b/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_no_result.json @@ -0,0 +1,13 @@ +{ + "request": { + "api_url": "http://www.google.com/search", + "params": { + "q": "no res" + } + }, + "response": "", + "response-headers": { + "status": 200, + "content-type": "text/html; charset=utf-8" + } + } diff --git a/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_normal.json b/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_normal.json new file mode 100644 index 0000000..180bc7f --- /dev/null +++ b/zulip_bots/zulip_bots/bots/googlesearch/fixtures/test_normal.json @@ -0,0 +1,13 @@ +{ + "request": { + "api_url": "http://www.google.com/search", + "params": { + "q": "zulip" + } + }, + "response": "", + "response-headers": { + "status": 200, + "content-type": "text/html; charset=utf-8" + } +} diff --git a/zulip_bots/zulip_bots/bots_unmaintained/googlesearch/googlesearch.py b/zulip_bots/zulip_bots/bots/googlesearch/googlesearch.py similarity index 60% rename from zulip_bots/zulip_bots/bots_unmaintained/googlesearch/googlesearch.py rename to zulip_bots/zulip_bots/bots/googlesearch/googlesearch.py index e28d384..14ac74b 100644 --- a/zulip_bots/zulip_bots/bots_unmaintained/googlesearch/googlesearch.py +++ b/zulip_bots/zulip_bots/bots/googlesearch/googlesearch.py @@ -1,13 +1,42 @@ # See readme.md for instructions on running this code. from __future__ import print_function import logging -from six.moves.urllib import error -from six.moves.urllib.request import urlopen +from six.moves.urllib import parse -# Uses the Google search engine bindings -# pip install --upgrade google -from google import search +import requests +from bs4 import BeautifulSoup + +def google_search(keywords): + query = {'q': keywords} + # Gets the page + page = requests.get('http://www.google.com/search', params=query) + # Parses the page into BeautifulSoup + soup = BeautifulSoup(page.text, "lxml") + + # Gets all search URLs + anchors = soup.find(id='search').findAll('a') + results = [] + + for a in anchors: + try: + # Tries to get the href property of the URL + link = a['href'] + except KeyError: + continue + # Link must start with '/url?', as these are the search result links + if (not link.startswith('/url?')): + continue + # Makes sure a hidden 'cached' result isn't displayed + if (a.text.strip() == 'Cached' and 'webcache.googleusercontent.com'): + continue + if (a.text.strip() == ''): + continue + # a.text: The name of the page + result = {'url': "https://www.google.com{}".format(link), + 'name': a.text} + results.append(result) + return results def get_google_result(search_keywords): help_message = "To use this bot, start messages with @mentioned-bot, \ @@ -18,23 +47,21 @@ def get_google_result(search_keywords): An example message that could be sent is:\ '@mentioned-bot zulip' or \ '@mentioned-bot how to create a chatbot'." + + search_keywords = search_keywords.strip() + if search_keywords == 'help': return help_message elif search_keywords == '' or search_keywords is None: return help_message else: try: - urls = search(search_keywords, stop=20) - urlopen('http://216.58.192.142', timeout=1) - except error.URLError as er: - logging.exception(er) - return 'Error: No internet connection. {}.'.format(er) - except Exception as e: - logging.exception(e) - return 'Error: Search failed. {}.'.format(e) - - try: - url = next(urls) + results = google_search(search_keywords) + if (len(results) == 0): + return "Found no results." + return "Found Result: [{}]({})".format(results[0]['name'], results[0]['url']) + except ConnectionError as c_err: + return "Error: Failed to connect. {}.".format(c_err) except AttributeError as a_err: # google.search query failed and urls is of object # 'NoneType' @@ -47,12 +74,8 @@ def get_google_result(search_keywords): logging.exception(t_err) return "Error: Google search function failed. {}.".format(t_err) except Exception as e: - logging.exception(e) return 'Error: Search failed. {}.'.format(e) - return 'Success: {}'.format(url) - - class GoogleSearchHandler(object): ''' This plugin allows users to enter a search @@ -78,16 +101,3 @@ class GoogleSearchHandler(object): bot_handler.send_reply(message, result) handler_class = GoogleSearchHandler - - -def test(): - try: - urlopen('http://216.58.192.142', timeout=1) - print('Success') - return True - except error.URLError as e: - print('Error: {}'.format(e)) - return False - -if __name__ == '__main__': - test() diff --git a/zulip_bots/zulip_bots/bots_unmaintained/googlesearch/logo.png b/zulip_bots/zulip_bots/bots/googlesearch/logo.png similarity index 100% rename from zulip_bots/zulip_bots/bots_unmaintained/googlesearch/logo.png rename to zulip_bots/zulip_bots/bots/googlesearch/logo.png diff --git a/zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py b/zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py new file mode 100644 index 0000000..680abfa --- /dev/null +++ b/zulip_bots/zulip_bots/bots/googlesearch/test_googlesearch.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import print_function + +import random + +from six.moves.urllib import error + +from zulip_bots.test_lib import BotTestCase + +class TestGoogleSearchBot(BotTestCase): + bot_name = 'googlesearch' + + # Simple query + def test_normal(self): + with self.mock_http_conversation('test_normal'): + self.assert_bot_response({'content': 'zulip'}, {'content': 'Found Result: [Zulip](https://www.google.com/url?url=https%3A%2F%2Fzulipchat.com%2F)'}, 'send_reply') + + # Help without typing anything + def test_bot_help_none(self): + help_message = "To use this bot, start messages with @mentioned-bot, \ + followed by what you want to search for. If \ + found, Zulip will return the first search result \ + on Google.\ + \ + An example message that could be sent is:\ + '@mentioned-bot zulip' or \ + '@mentioned-bot how to create a chatbot'." + self.assert_bot_response({'content': ''}, {'content': help_message}, 'send_reply') + + # Help from typing 'help' + def test_bot_help(self): + help_message = "To use this bot, start messages with @mentioned-bot, \ + followed by what you want to search for. If \ + found, Zulip will return the first search result \ + on Google.\ + \ + An example message that could be sent is:\ + '@mentioned-bot zulip' or \ + '@mentioned-bot how to create a chatbot'." + self.assert_bot_response({'content': 'help'}, {'content': help_message}, 'send_reply') + + def test_bot_no_results(self): + with self.mock_http_conversation('test_no_result'): + self.assert_bot_response({'content': 'no res'}, {'content': 'Found no results.'}, 'send_reply')