slack: Add attachment.json skeleton.
Map the attachments but disable attachments them for now.
This commit is contained in:
parent
b7ab8b4257
commit
cb1cafe194
|
@ -8,6 +8,8 @@ import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
# stubs
|
# stubs
|
||||||
user_profile_stub = Dict[str, Any]
|
user_profile_stub = Dict[str, Any]
|
||||||
|
@ -166,7 +168,10 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
|
||||||
zerver_stream.append(stream)
|
zerver_stream.append(stream)
|
||||||
added_channels[stream['name']] = stream_id_count
|
added_channels[stream['name']] = stream_id_count
|
||||||
|
|
||||||
# construct the recipient object and append it zerver_recipient
|
# construct the recipient object and append it to zerver_recipient
|
||||||
|
# type 1: private
|
||||||
|
# type 2: stream
|
||||||
|
# type 3: huddle
|
||||||
recipient = dict(
|
recipient = dict(
|
||||||
type_id=stream_id_count,
|
type_id=stream_id_count,
|
||||||
id=stream_id_count,
|
id=stream_id_count,
|
||||||
|
@ -224,14 +229,17 @@ def channels2zerver_stream(slack_dir, realm_id, added_users):
|
||||||
print('######### IMPORTING STREAMS FINISHED #########\n')
|
print('######### IMPORTING STREAMS FINISHED #########\n')
|
||||||
return zerver_defaultstream, zerver_stream, added_channels, zerver_subscription, zerver_recipient
|
return zerver_defaultstream, zerver_stream, added_channels, zerver_subscription, zerver_recipient
|
||||||
|
|
||||||
def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
|
def channelmessage2zerver_message_one_stream(constants, channel, added_users,
|
||||||
zerver_userprofile,
|
zerver_userprofile,
|
||||||
added_channels, msg_id_count,
|
added_channels, ids,
|
||||||
usermessage_id, zerver_subscription):
|
zerver_subscription):
|
||||||
|
slack_dir, REALM_ID, upload_dir = constants
|
||||||
|
message_id, usermessage_id, attachment_id = ids
|
||||||
json_names = os.listdir(slack_dir + '/' + channel)
|
json_names = os.listdir(slack_dir + '/' + channel)
|
||||||
users = json.load(open(slack_dir + '/users.json'))
|
users = json.load(open(slack_dir + '/users.json'))
|
||||||
zerver_message = []
|
zerver_message = []
|
||||||
zerver_usermessage = []
|
zerver_usermessage = []
|
||||||
|
zerver_attachment = []
|
||||||
|
|
||||||
# Sanitize the message text
|
# Sanitize the message text
|
||||||
def sanitize_text(text):
|
def sanitize_text(text):
|
||||||
|
@ -260,6 +268,7 @@ def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
|
||||||
return token
|
return token
|
||||||
|
|
||||||
# check if a user has been mentioned in a message
|
# check if a user has been mentioned in a message
|
||||||
|
# for zerver_usermessage
|
||||||
def check_user_mention(text):
|
def check_user_mention(text):
|
||||||
# Zulip's at mention
|
# Zulip's at mention
|
||||||
mentions = re.findall(r'(@(?:\*\*([^\*]+)\*\*|(\w+)))', text)
|
mentions = re.findall(r'(@(?:\*\*([^\*]+)\*\*|(\w+)))', text)
|
||||||
|
@ -270,12 +279,57 @@ def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
|
||||||
mentioned_users_id.append(userprofile['id'])
|
mentioned_users_id.append(userprofile['id'])
|
||||||
return mentioned_users_id
|
return mentioned_users_id
|
||||||
|
|
||||||
|
# check if the text contain a URL
|
||||||
|
def check_has_link(msg):
|
||||||
|
if 'has_link' in msg:
|
||||||
|
return msg['has_link']
|
||||||
|
else:
|
||||||
|
# TODO map msg['attachments']['from_url']
|
||||||
|
text = msg['text']
|
||||||
|
return ('http://' in text or 'https://' in text)
|
||||||
|
|
||||||
|
def parse_url(url):
|
||||||
|
return url.replace("\/\/", "//").replace("\/", "/")
|
||||||
|
|
||||||
|
def save_attachment(url, _id, name):
|
||||||
|
url = parse_url(url)
|
||||||
|
response = requests.get(url, stream=True)
|
||||||
|
os.makedirs(upload_dir + '/' + str(id), exist_ok=True)
|
||||||
|
with open(upload_dir + '/' + str(id) + '/' + name, 'wb') as output_file:
|
||||||
|
shutil.copyfileobj(response.raw, output_file)
|
||||||
|
|
||||||
for json_name in json_names:
|
for json_name in json_names:
|
||||||
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
|
msgs = json.load(open(slack_dir + '/%s/%s' % (channel, json_name)))
|
||||||
for msg in msgs:
|
for msg in msgs:
|
||||||
text = msg['text']
|
text = msg['text']
|
||||||
if "has joined the channel" in text:
|
has_attachment = False
|
||||||
continue
|
|
||||||
|
if 'subtype' in msg.keys():
|
||||||
|
st = msg['subtype']
|
||||||
|
if st in ["channel_join", "channel_leave", "channel_name"]:
|
||||||
|
# Ignore noisy messages
|
||||||
|
continue
|
||||||
|
elif st == "file_share":
|
||||||
|
has_attachment = True
|
||||||
|
_file = msg['file']
|
||||||
|
slack_user_id = _file['user']
|
||||||
|
zulip_user_id = added_users[slack_user_id]
|
||||||
|
save_attachment(_file['url_private'], attachment_id, _file['name'])
|
||||||
|
path_id = "%d\/%d\/%s" % (REALM_ID, attachment_id, _file['name'])
|
||||||
|
# construct attachments object and append it to zerver_attachment
|
||||||
|
attachments = dict(
|
||||||
|
id=attachment_id,
|
||||||
|
is_realm_public=True, # TOODOO map for private messages and huddles, where is_realm_public = False
|
||||||
|
file_name=_file['name'],
|
||||||
|
create_time=_file['created'],
|
||||||
|
size=_file['size'],
|
||||||
|
path_id=path_id,
|
||||||
|
realm=REALM_ID,
|
||||||
|
owner=zulip_user_id,
|
||||||
|
messages=[message_id])
|
||||||
|
attachment_id += 1
|
||||||
|
zerver_attachment.append(attachments)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
user = msg.get('user', msg['file']['user'])
|
user = msg.get('user', msg['file']['user'])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -288,15 +342,15 @@ def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
|
||||||
has_image=msg.get('has_image', False),
|
has_image=msg.get('has_image', False),
|
||||||
subject=channel, # This is Zulip-specific
|
subject=channel, # This is Zulip-specific
|
||||||
pub_date=msg['ts'],
|
pub_date=msg['ts'],
|
||||||
id=msg_id_count,
|
id=message_id,
|
||||||
has_attachment=False, # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
|
has_attachment=has_attachment, # attachment will be posted in the subsequent message; this is how Slack does it, i.e. less like email
|
||||||
edit_history=None,
|
edit_history=None,
|
||||||
sender=added_users[user], # map slack id to zulip id
|
sender=added_users[user], # map slack id to zulip id
|
||||||
content=sanitize_text(text),
|
content=sanitize_text(text),
|
||||||
rendered_content=text, # slack doesn't cache this
|
rendered_content=text, # slack doesn't cache this
|
||||||
recipient=added_channels[channel],
|
recipient=added_channels[channel],
|
||||||
last_edit_time=None,
|
last_edit_time=None,
|
||||||
has_link=msg.get('has_link', False))
|
has_link=check_has_link(msg))
|
||||||
zerver_message.append(zulip_message)
|
zerver_message.append(zulip_message)
|
||||||
|
|
||||||
# construct usermessages
|
# construct usermessages
|
||||||
|
@ -315,8 +369,8 @@ def channelmessage2zerver_message_one_stream(slack_dir, channel, added_users,
|
||||||
usermessage_id += 1
|
usermessage_id += 1
|
||||||
zerver_usermessage.append(usermessage)
|
zerver_usermessage.append(usermessage)
|
||||||
|
|
||||||
msg_id_count += 1
|
message_id += 1
|
||||||
return zerver_message, zerver_usermessage
|
return zerver_message, zerver_usermessage, zerver_attachment
|
||||||
|
|
||||||
def main(slack_zip_file: str) -> None:
|
def main(slack_zip_file: str) -> None:
|
||||||
slack_dir = slack_zip_file.replace('.zip', '')
|
slack_dir = slack_zip_file.replace('.zip', '')
|
||||||
|
@ -343,7 +397,7 @@ def main(slack_zip_file: str) -> None:
|
||||||
# Make sure the directory output is clean
|
# Make sure the directory output is clean
|
||||||
output_dir = 'zulip_data'
|
output_dir = 'zulip_data'
|
||||||
rm_tree(output_dir)
|
rm_tree(output_dir)
|
||||||
os.makedirs(output_dir)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
|
realm = dict(zerver_client=[{"name": "populate_db", "id": 1},
|
||||||
{"name": "website", "id": 2},
|
{"name": "website", "id": 2},
|
||||||
|
@ -383,19 +437,22 @@ def main(slack_zip_file: str) -> None:
|
||||||
message_json = {}
|
message_json = {}
|
||||||
zerver_message = []
|
zerver_message = []
|
||||||
zerver_usermessage = []
|
zerver_usermessage = []
|
||||||
|
zerver_attachment = []
|
||||||
|
|
||||||
|
upload_dir = output_dir + '/uploads/' + str(REALM_ID)
|
||||||
|
constants = [slack_dir, REALM_ID, upload_dir]
|
||||||
for channel in added_channels.keys():
|
for channel in added_channels.keys():
|
||||||
msg_id_count = len(zerver_message) + 1 # For the id of the messages
|
message_id = len(zerver_message) + 1 # For the id of the messages
|
||||||
usermessage_id = len(zerver_usermessage) + 1
|
usermessage_id = len(zerver_usermessage) + 1
|
||||||
zm_one_stream, zum_one_stream = channelmessage2zerver_message_one_stream(slack_dir, channel,
|
attachment_id = len(zerver_attachment) + 1
|
||||||
added_users,
|
ids = [message_id, usermessage_id, attachment_id]
|
||||||
zerver_userprofile,
|
zm, zum, za = channelmessage2zerver_message_one_stream(constants, channel,
|
||||||
added_channels,
|
added_users, zerver_userprofile,
|
||||||
msg_id_count,
|
added_channels, ids,
|
||||||
usermessage_id,
|
zerver_subscription)
|
||||||
zerver_subscription)
|
zerver_message += zm
|
||||||
zerver_message += zm_one_stream
|
zerver_usermessage += zum
|
||||||
zerver_usermessage += zum_one_stream
|
zerver_attachment += za
|
||||||
# TOODOO add zerver_usermessage corresponding to the
|
# TOODOO add zerver_usermessage corresponding to the
|
||||||
# private messages and huddles type recipients
|
# private messages and huddles type recipients
|
||||||
|
|
||||||
|
@ -407,16 +464,19 @@ def main(slack_zip_file: str) -> None:
|
||||||
|
|
||||||
# IO avatar records
|
# IO avatar records
|
||||||
avatar_records_file = output_dir + '/avatars/records.json'
|
avatar_records_file = output_dir + '/avatars/records.json'
|
||||||
os.makedirs(output_dir + '/avatars')
|
os.makedirs(output_dir + '/avatars', exist_ok=True)
|
||||||
json.dump([], open(avatar_records_file, 'w'))
|
json.dump([], open(avatar_records_file, 'w'))
|
||||||
|
|
||||||
# IO uploads TODO
|
# IO uploads TODO
|
||||||
uploads_records_file = output_dir + '/uploads/records.json'
|
uploads_records_file = output_dir + '/uploads/records.json'
|
||||||
os.makedirs(output_dir + '/uploads')
|
os.makedirs(output_dir + '/uploads', exist_ok=True)
|
||||||
json.dump([], open(uploads_records_file, 'w'))
|
json.dump([], open(uploads_records_file, 'w'))
|
||||||
|
|
||||||
# TODO
|
# IO attachments
|
||||||
# attachments
|
attachment_file = output_dir + '/attachment.json'
|
||||||
|
# attachment = {"zerver_attachment": zerver_attachment}
|
||||||
|
attachment = {"zerver_attachment": []}
|
||||||
|
json.dump(attachment, open(attachment_file, 'w'))
|
||||||
|
|
||||||
print('ls', os.listdir())
|
print('ls', os.listdir())
|
||||||
print('pwd', os.getcwd())
|
print('pwd', os.getcwd())
|
||||||
|
|
Loading…
Reference in a new issue