# This is hacky code to analyze data on our support stream. The main # reusable bits are get_recent_messages and get_words. import zulip import re import collections def get_recent_messages(client, narrow, count=100): narrow = [word.split(':') for word in narrow.split()] req = { 'narrow': narrow, 'num_before': count, 'num_after': 0, 'anchor': 1000000000, 'apply_markdown': False } old_messages = client.do_api_query(req, zulip.API_VERSTRING + 'messages', method='GET') if 'messages' not in old_messages: return [] return old_messages['messages'] def get_words(content): regex = "[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+" words = re.findall(regex, content, re.M) words = [w.lower() for w in words] # words = [w.rstrip('s') for w in words] return words def analyze_messages(msgs, word_count, email_count): for msg in msgs: if False: if ' ack' in msg['content']: name = msg['sender_full_name'].split()[0] print 'ACK', name m = re.search('ticket (Z....).*email: (\S+).*~~~(.*)', msg['content'], re.M | re.S) if m: ticket, email, req = m.groups() words = get_words(req) for word in words: word_count[word] += 1 email_count[email] += 1 if False: print for k, v in msg.items(): print '%-20s: %s' % (k, v) def generate_support_stats(): client = zulip.Client() narrow = 'stream:support' count = 2000 msgs = get_recent_messages(client, narrow, count) msgs_by_topic = collections.defaultdict(list) for msg in msgs: topic = msg['subject'] msgs_by_topic[topic].append(msg) word_count = collections.defaultdict(int) email_count = collections.defaultdict(int) if False: for topic in msgs_by_topic: msgs = msgs_by_topic[topic] analyze_messages(msgs, word_count, email_count) if True: words = word_count.keys() words = filter(lambda w: word_count[w] >= 10, words) words = filter(lambda w: len(w) >= 5, words) words = sorted(words, key=lambda w: word_count[w], reverse=True) for word in words: print word, word_count[word] if False: emails = email_count.keys() emails = sorted(emails, key=lambda w: email_count[w], reverse=True) for email in emails: print email, email_count[email] generate_support_stats()