2015-11-01 11:11:06 -05:00
|
|
|
from __future__ import print_function
|
2016-02-05 14:27:19 -05:00
|
|
|
from typing import *
|
2013-12-26 13:20:35 -05:00
|
|
|
# This is hacky code to analyze data on our support stream. The main
|
|
|
|
# reusable bits are get_recent_messages and get_words.
|
|
|
|
|
|
|
|
import zulip
|
|
|
|
import re
|
|
|
|
import collections
|
|
|
|
|
|
|
|
def get_recent_messages(client, narrow, count=100):
|
|
|
|
narrow = [word.split(':') for word in narrow.split()]
|
|
|
|
req = {
|
|
|
|
'narrow': narrow,
|
|
|
|
'num_before': count,
|
|
|
|
'num_after': 0,
|
|
|
|
'anchor': 1000000000,
|
|
|
|
'apply_markdown': False
|
|
|
|
}
|
|
|
|
old_messages = client.do_api_query(req, zulip.API_VERSTRING + 'messages', method='GET')
|
|
|
|
if 'messages' not in old_messages:
|
|
|
|
return []
|
|
|
|
return old_messages['messages']
|
|
|
|
|
|
|
|
def get_words(content):
|
|
|
|
regex = "[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+"
|
|
|
|
words = re.findall(regex, content, re.M)
|
|
|
|
words = [w.lower() for w in words]
|
|
|
|
# words = [w.rstrip('s') for w in words]
|
|
|
|
return words
|
|
|
|
|
|
|
|
def analyze_messages(msgs, word_count, email_count):
|
|
|
|
for msg in msgs:
|
|
|
|
if False:
|
|
|
|
if ' ack' in msg['content']:
|
|
|
|
name = msg['sender_full_name'].split()[0]
|
2015-11-01 11:11:06 -05:00
|
|
|
print('ACK', name)
|
2013-12-26 13:20:35 -05:00
|
|
|
m = re.search('ticket (Z....).*email: (\S+).*~~~(.*)', msg['content'], re.M | re.S)
|
|
|
|
if m:
|
|
|
|
ticket, email, req = m.groups()
|
|
|
|
words = get_words(req)
|
|
|
|
for word in words:
|
|
|
|
word_count[word] += 1
|
|
|
|
email_count[email] += 1
|
|
|
|
if False:
|
2015-11-01 11:11:06 -05:00
|
|
|
print()
|
2013-12-26 13:20:35 -05:00
|
|
|
for k, v in msg.items():
|
2015-11-01 11:11:06 -05:00
|
|
|
print('%-20s: %s' % (k, v))
|
2013-12-26 13:20:35 -05:00
|
|
|
|
|
|
|
def generate_support_stats():
|
|
|
|
client = zulip.Client()
|
|
|
|
narrow = 'stream:support'
|
|
|
|
count = 2000
|
|
|
|
msgs = get_recent_messages(client, narrow, count)
|
2016-02-05 14:27:19 -05:00
|
|
|
msgs_by_topic = collections.defaultdict(list) # type: Dict[str, List[Dict[str, Any]]]
|
2013-12-26 13:20:35 -05:00
|
|
|
for msg in msgs:
|
|
|
|
topic = msg['subject']
|
|
|
|
msgs_by_topic[topic].append(msg)
|
|
|
|
|
2016-02-05 14:27:19 -05:00
|
|
|
word_count = collections.defaultdict(int) # type: Dict[str, int]
|
|
|
|
email_count = collections.defaultdict(int) # type: Dict[str, int]
|
2013-12-26 13:20:35 -05:00
|
|
|
|
|
|
|
if False:
|
|
|
|
for topic in msgs_by_topic:
|
|
|
|
msgs = msgs_by_topic[topic]
|
|
|
|
analyze_messages(msgs, word_count, email_count)
|
|
|
|
|
|
|
|
if True:
|
2016-01-24 19:27:18 -05:00
|
|
|
words = [w for w in word_count.keys() if word_count[w] >= 10 and len(w) >= 5]
|
2013-12-26 13:20:35 -05:00
|
|
|
words = sorted(words, key=lambda w: word_count[w], reverse=True)
|
|
|
|
for word in words:
|
2015-11-01 11:11:06 -05:00
|
|
|
print(word, word_count[word])
|
2013-12-26 13:20:35 -05:00
|
|
|
|
|
|
|
if False:
|
2016-01-24 19:27:18 -05:00
|
|
|
emails = sorted(list(email_count.keys()),
|
|
|
|
key=lambda w: email_count[w], reverse=True)
|
2013-12-26 13:20:35 -05:00
|
|
|
for email in emails:
|
2015-11-01 11:11:06 -05:00
|
|
|
print(email, email_count[email])
|
2013-12-26 13:20:35 -05:00
|
|
|
|
|
|
|
generate_support_stats()
|