slack: Sanitize the message text and fix Travis test.

This commit is contained in:
Rhea Parekh 2017-10-06 01:21:04 +05:30 committed by showell
parent 7db4e36752
commit ce51d99c6a
2 changed files with 47 additions and 11 deletions

View file

@ -22,10 +22,12 @@ matrix:
- mv zulip_data.zip $HOME - mv zulip_data.zip $HOME
- cd - cd
- ls - ls
- git clone https://github.com/zulip/zulip.git - git clone --depth=50 https://github.com/zulip/zulip.git
- cd zulip - cd zulip
- ./tools/travis/setup-backend - ./tools/travis/setup-backend
- ./manage.py import ../zulip_data.zip - source tools/travis/activate-venv
- ./manage.py --help
- ./manage.py import --destroy-rebuild-database ../zulip_data.zip
addons: addons:
postgresql: "9.3" postgresql: "9.3"
install: install:

View file

@ -6,6 +6,7 @@ import sys
import argparse import argparse
import shutil import shutil
import subprocess import subprocess
import re
from typing import Any, Dict, List from typing import Any, Dict, List
# stubs # stubs
@ -153,9 +154,9 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
# construct the recipient object and append it zerver_recipient # construct the recipient object and append it zerver_recipient
recipient = dict( recipient = dict(
type_id=stream_id_count, type_id=stream_id_count,
id=stream_id_count, id=stream_id_count,
type=2) type=2)
zerver_recipient.append(recipient) zerver_recipient.append(recipient)
# TOODO add recipients for private message and huddles # TOODO add recipients for private message and huddles
@ -210,12 +211,41 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
def channelmessage2zerver_message(slack_dir, channel, added_users, added_channels): def channelmessage2zerver_message(slack_dir, channel, added_users, added_channels):
json_names = os.listdir(slack_dir + '/' + channel) json_names = os.listdir(slack_dir + '/' + channel)
users = json.load(open(slack_dir + '/users.json'))
zerver_message = [] zerver_message = []
msg_id_count = 1 msg_id_count = 1
# Sanitize the message text
def sanitize_text(text):
tokens = text.split(' ')
text = ' '.join([sanitize_token(t) for t in tokens])
return text
def sanitize_token(token):
if (re.compile(r"<@.*|.*>").match(token)):
token = token.replace('<@', ' ')
token = token.replace('>', ' ')
token = token.replace('|', ' ')
length = len(token.split(' '))
if length > 1:
try:
short_name = token.split(' ')[2]
except IndexError:
short_name = ''
token = token.split(' ')[1]
for user in users:
if (user['id'] == token and user['name'] == short_name and length == 4) or \
(user['id'] == token and length == 3):
token = user.get('real_name', user['name'])
token = "@**" + token + "** "
return token
for json_name in json_names: for json_name in json_names:
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name))) msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
for msg in msgs: for msg in msgs:
text = msg['text'] text = msg['text']
if "has joined the channel" in text:
continue
try: try:
user = msg.get('user', msg['file']['user']) user = msg.get('user', msg['file']['user'])
except KeyError: except KeyError:
@ -228,10 +258,10 @@ def channelmessage2zerver_message(slack_dir, channel, added_users, added_channel
subject=channel, # This is Zulip-specific subject=channel, # This is Zulip-specific
pub_date=msg['ts'], pub_date=msg['ts'],
id=msg_id_count, id=msg_id_count,
has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, less like email has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
edit_history=None, edit_history=None,
sender=added_users[user], # map slack id to zulip id sender=added_users[user], # map slack id to zulip id
content=text, # TODO sanitize slack text, which contains <@msg['user']|short_name> content=sanitize_text(text),
rendered_content=text, # slack doesn't cache this rendered_content=text, # slack doesn't cache this
recipient=added_channels[channel], recipient=added_channels[channel],
last_edit_time=None, last_edit_time=None,
@ -293,7 +323,8 @@ def main(slack_zip_file: str) -> None:
realm['zerver_subscription'] = zerver_subscription realm['zerver_subscription'] = zerver_subscription
realm['zerver_recipient'] = zerver_recipient realm['zerver_recipient'] = zerver_recipient
# IO # IO
json.dump(realm, open(output_dir + '/realm.json', 'w')) realm_file = output_dir + '/realm.json'
json.dump(realm, open(realm_file, 'w'))
# now for message.json # now for message.json
message_json = {} message_json = {}
@ -304,7 +335,10 @@ def main(slack_zip_file: str) -> None:
added_users, added_channels)) added_users, added_channels))
message_json['zerver_message'] = zerver_message message_json['zerver_message'] = zerver_message
# IO # IO
json.dump(message_json, open(output_dir + '/message.json', 'w')) message_file = output_dir + '/message.json'
json.dump(message_json, open(message_file, 'w'))
print('ls', os.listdir())
print('pwd', os.getcwd())
# TODO # TODO
# attachments # attachments
@ -313,7 +347,7 @@ def main(slack_zip_file: str) -> None:
rm_tree(slack_dir) rm_tree(slack_dir)
# compress the folder # compress the folder
subprocess.check_call(['zip', '-r', output_dir + '.zip', output_dir]) subprocess.check_call(['zip', '-jpr', output_dir + '.zip', realm_file, message_file])
# remove zulip dir # remove zulip dir
rm_tree(output_dir) rm_tree(output_dir)