python-zulip-api/bots/summarize_stream.py

from __future__ import print_function
from typing import *
# This is hacky code to analyze data on our support stream.  The main
# reusable bits are get_recent_messages and get_words.

import zulip
import re
import collections

def get_recent_messages(client, narrow, count=100):
    narrow = [word.split(':') for word in narrow.split()]
    req = {
        'narrow': narrow,
        'num_before': count,
        'num_after': 0,
        'anchor': 1000000000,
        'apply_markdown': False
    }
    old_messages = client.do_api_query(req, zulip.API_VERSTRING + 'messages', method='GET')
    if 'messages' not in old_messages:
        return []
    return old_messages['messages']

def get_words(content):
    regex = "[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+"
    words = re.findall(regex, content, re.M)
    words = [w.lower() for w in words]
    # words = [w.rstrip('s') for w in words]
    return words

def analyze_messages(msgs, word_count, email_count):
    for msg in msgs:
        if False:
            if ' ack' in msg['content']:
                name = msg['sender_full_name'].split()[0]
                print('ACK', name)
        m = re.search('ticket (Z....).*email: (\S+).*~~~(.*)', msg['content'], re.M | re.S)
        if m:
            ticket, email, req = m.groups()
            words = get_words(req)
            for word in words:
                word_count[word] += 1
            email_count[email] += 1
        if False:
            print()
            for k, v in msg.items():
                print('%-20s: %s' % (k, v))

def generate_support_stats():
    client = zulip.Client()
    narrow = 'stream:support'
    count = 2000
    msgs = get_recent_messages(client, narrow, count)
    msgs_by_topic = collections.defaultdict(list) # type: Dict[str, List[Dict[str, Any]]]
    for msg in msgs:
        topic = msg['subject']
        msgs_by_topic[topic].append(msg)

    word_count = collections.defaultdict(int) # type: Dict[str, int]
    email_count = collections.defaultdict(int) # type: Dict[str, int]

    if False:
        for topic in msgs_by_topic:
            msgs = msgs_by_topic[topic]
    analyze_messages(msgs, word_count, email_count)

    if True:
        words = [w for w in word_count.keys() if word_count[w] >= 10 and len(w) >= 5]
        words = sorted(words, key=lambda w: word_count[w], reverse=True)
        for word in words:
            print(word, word_count[word])

    if False:
        emails = sorted(list(email_count.keys()),
                        key=lambda w: email_count[w], reverse=True)
        for email in emails:
            print(email, email_count[email])

generate_support_stats()
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`from __future__ import print_function`
Add PEP-484 annotations to bots/. 2016-02-05 14:27:19 -05:00			`from typing import *`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`# This is hacky code to analyze data on our support stream. The main`
			`# reusable bits are get_recent_messages and get_words.`

			`import zulip`
			`import re`
			`import collections`

			`def get_recent_messages(client, narrow, count=100):`
			`narrow = [word.split(':') for word in narrow.split()]`
			`req = {`
			`'narrow': narrow,`
			`'num_before': count,`
			`'num_after': 0,`
			`'anchor': 1000000000,`
			`'apply_markdown': False`
			`}`
			`old_messages = client.do_api_query(req, zulip.API_VERSTRING + 'messages', method='GET')`
			`if 'messages' not in old_messages:`
			`return []`
			`return old_messages['messages']`

			`def get_words(content):`
			`regex = "[A-Z]{2,}(?![a-z])\|[A-Z][a-z]+(?=[A-Z])\|[\'\w\-]+"`
			`words = re.findall(regex, content, re.M)`
			`words = [w.lower() for w in words]`
			`# words = [w.rstrip('s') for w in words]`
			`return words`

			`def analyze_messages(msgs, word_count, email_count):`
			`for msg in msgs:`
			`if False:`
			`if ' ack' in msg['content']:`
			`name = msg['sender_full_name'].split()[0]`
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`print('ACK', name)`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`m = re.search('ticket (Z....).email: (\S+).~~~(.*)', msg['content'], re.M \| re.S)`
			`if m:`
			`ticket, email, req = m.groups()`
			`words = get_words(req)`
			`for word in words:`
			`word_count[word] += 1`
			`email_count[email] += 1`
			`if False:`
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`print()`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`for k, v in msg.items():`
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`print('%-20s: %s' % (k, v))`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00
			`def generate_support_stats():`
			`client = zulip.Client()`
			`narrow = 'stream:support'`
			`count = 2000`
			`msgs = get_recent_messages(client, narrow, count)`
Add PEP-484 annotations to bots/. 2016-02-05 14:27:19 -05:00			`msgs_by_topic = collections.defaultdict(list) # type: Dict[str, List[Dict[str, Any]]]`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`for msg in msgs:`
			`topic = msg['subject']`
			`msgs_by_topic[topic].append(msg)`

Add PEP-484 annotations to bots/. 2016-02-05 14:27:19 -05:00			`word_count = collections.defaultdict(int) # type: Dict[str, int]`
			`email_count = collections.defaultdict(int) # type: Dict[str, int]`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00
			`if False:`
			`for topic in msgs_by_topic:`
			`msgs = msgs_by_topic[topic]`
			`analyze_messages(msgs, word_count, email_count)`

			`if True:`
python3: Fix usage of .keys()/.values() to handle iterators. This fixes the places where we use the result of .keys(), .items(), and .values() that wouldn't work with an iterator to wrap them with list(). 2016-01-24 19:27:18 -05:00			`words = [w for w in word_count.keys() if word_count[w] >= 10 and len(w) >= 5]`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`words = sorted(words, key=lambda w: word_count[w], reverse=True)`
			`for word in words:`
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`print(word, word_count[word])`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00
			`if False:`
python3: Fix usage of .keys()/.values() to handle iterators. This fixes the places where we use the result of .keys(), .items(), and .values() that wouldn't work with an iterator to wrap them with list(). 2016-01-24 19:27:18 -05:00			`emails = sorted(list(email_count.keys()),`
			`key=lambda w: email_count[w], reverse=True)`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00			`for email in emails:`
Apply Python 3 futurize transform libfuturize.fixes.fix_print_with_import. 2015-11-01 11:11:06 -05:00			`print(email, email_count[email])`
Add summarize_stream.py. (imported from commit 061b1ccc6649acb9a9fc40370282fa34c645afed) 2013-12-26 13:20:35 -05:00
			`generate_support_stats()`