zephyr_mirror_backend: Fix thread safety problems.

As of commit 5eaac7bfba (#18),
zulip.Client is not thread-safe and especially not fork-safe due to
connections held open by requests.Session.

Delay construction of the Client until after forking off
zulip_to_zephyr.  Replace the fork for each message sent by
zephyr_to_zulip with a threaded queue worker.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
This commit is contained in:
Anders Kaseorg 2022-08-22 19:19:30 -07:00 committed by Anders Kaseorg
parent 63c259b2bc
commit 56f805a5d7

View file

@ -13,18 +13,21 @@ import sys
import tempfile import tempfile
import textwrap import textwrap
import time import time
from queue import Queue
from threading import Thread
from types import FrameType from types import FrameType
from typing import IO, Any, Dict, List, NoReturn, Optional, Set, Tuple, Union from typing import IO, Any, Dict, List, NoReturn, Optional, Set, Tuple, Union
from typing_extensions import Literal, TypedDict from typing_extensions import Literal, TypedDict
import zulip
from zulip import RandomExponentialBackoff from zulip import RandomExponentialBackoff
DEFAULT_SITE = "https://api.zulip.com" DEFAULT_SITE = "https://api.zulip.com"
class States: class States:
Startup, ZulipToZephyr, ZephyrToZulip, ChildSending = list(range(4)) Startup, ZulipToZephyr, ZephyrToZulip = list(range(3))
CURRENT_STATE = States.Startup CURRENT_STATE = States.Startup
@ -32,6 +35,16 @@ CURRENT_STATE = States.Startup
logger: logging.Logger logger: logging.Logger
def make_zulip_client() -> zulip.Client:
return zulip.Client(
email=zulip_account_email,
api_key=api_key,
verbose=True,
client="zephyr_mirror",
site=options.site,
)
def to_zulip_username(zephyr_username: str) -> str: def to_zulip_username(zephyr_username: str) -> str:
if "@" in zephyr_username: if "@" in zephyr_username:
(user, realm) = zephyr_username.split("@") (user, realm) = zephyr_username.split("@")
@ -117,7 +130,7 @@ class ZephyrDict(TypedDict, total=False):
zsig: str zsig: str
def send_zulip(zeph: ZephyrDict) -> Dict[str, Any]: def send_zulip(zulip_client: zulip.Client, zeph: ZephyrDict) -> Dict[str, Any]:
message: Dict[str, Any] message: Dict[str, Any]
message = {} message = {}
if options.forward_class_messages: if options.forward_class_messages:
@ -151,7 +164,7 @@ def send_zulip(zeph: ZephyrDict) -> Dict[str, Any]:
return zulip_client.send_message(message) return zulip_client.send_message(message)
def send_error_zulip(error_msg: str) -> None: def send_error_zulip(zulip_client: zulip.Client, error_msg: str) -> None:
message = { message = {
"type": "private", "type": "private",
"sender": zulip_account_email, "sender": zulip_account_email,
@ -263,7 +276,7 @@ def maybe_restart_mirroring_script() -> None:
raise Exception("Failed to reload too many times, aborting!") raise Exception("Failed to reload too many times, aborting!")
def process_loop(log: Optional[IO[str]]) -> NoReturn: def process_loop(zulip_queue: "Queue[ZephyrDict]", log: Optional[IO[str]]) -> NoReturn:
restart_check_count = 0 restart_check_count = 0
last_check_time = time.time() last_check_time = time.time()
recieve_backoff = RandomExponentialBackoff() recieve_backoff = RandomExponentialBackoff()
@ -278,7 +291,7 @@ def process_loop(log: Optional[IO[str]]) -> NoReturn:
if notice is None: if notice is None:
break break
try: try:
process_notice(notice, log) process_notice(notice, zulip_queue, log)
process_backoff.succeed() process_backoff.succeed()
except Exception: except Exception:
logger.exception("Error relaying zephyr:") logger.exception("Error relaying zephyr:")
@ -395,7 +408,9 @@ def decrypt_zephyr(zephyr_class: str, instance: str, body: str) -> str:
return decrypted return decrypted
def process_notice(notice: "zephyr.ZNotice", log: Optional[IO[str]]) -> None: def process_notice(
notice: "zephyr.ZNotice", zulip_queue: "Queue[ZephyrDict]", log: Optional[IO[str]]
) -> None:
assert notice.sender is not None assert notice.sender is not None
(zsig, body) = parse_zephyr_body(notice.message, notice.format) (zsig, body) = parse_zephyr_body(notice.message, notice.format)
is_personal = False is_personal = False
@ -490,18 +505,19 @@ def process_notice(notice: "zephyr.ZNotice", log: Optional[IO[str]]) -> None:
log.write(json.dumps(zeph) + "\n") log.write(json.dumps(zeph) + "\n")
log.flush() log.flush()
if os.fork() == 0: zulip_queue.put(zeph)
global CURRENT_STATE
CURRENT_STATE = States.ChildSending
# Actually send the message in a child process, to avoid blocking. def send_zulip_worker(zulip_queue: "Queue[ZephyrDict]", zulip_client: zulip.Client) -> None:
while True:
zeph = zulip_queue.get()
try: try:
res = send_zulip(zeph) res = send_zulip(zulip_client, zeph)
if res.get("result") != "success": if res.get("result") != "success":
logger.error(f"Error relaying zephyr:\n{zeph}\n{res}") logger.error(f"Error relaying zephyr:\n{zeph}\n{res}")
except Exception: except Exception:
logger.exception("Error relaying zephyr:") logger.exception("Error relaying zephyr:")
finally: zulip_queue.task_done()
os._exit(0)
def quit_failed_initialization(message: str) -> str: def quit_failed_initialization(message: str) -> str:
@ -560,6 +576,8 @@ def zephyr_subscribe_autoretry(sub: Tuple[str, str, str]) -> None:
def zephyr_to_zulip(options: optparse.Values) -> None: def zephyr_to_zulip(options: optparse.Values) -> None:
zulip_client = make_zulip_client()
if options.use_sessions and os.path.exists(options.session_path): if options.use_sessions and os.path.exists(options.session_path):
logger.info("Loading old session") logger.info("Loading old session")
zephyr_load_session_autoretry(options.session_path) zephyr_load_session_autoretry(options.session_path)
@ -593,18 +611,22 @@ def zephyr_to_zulip(options: optparse.Values) -> None:
"sending saved message to %s from %s..." "sending saved message to %s from %s..."
% (zeph.get("stream", zeph.get("recipient")), zeph["sender"]) % (zeph.get("stream", zeph.get("recipient")), zeph["sender"])
) )
send_zulip(zeph) send_zulip(zulip_client, zeph)
except Exception: except Exception:
logger.exception("Could not send saved zephyr:") logger.exception("Could not send saved zephyr:")
time.sleep(2) time.sleep(2)
logger.info("Successfully initialized; Starting receive loop.") logger.info("Successfully initialized; Starting receive loop.")
# Actually send the messages in a thread, to avoid blocking.
zulip_queue: "Queue[ZephyrDict]" = Queue()
Thread(target=lambda: send_zulip_worker(zulip_queue, zulip_client)).start()
if options.resend_log_path is not None: if options.resend_log_path is not None:
with open(options.resend_log_path, "a") as log: with open(options.resend_log_path, "a") as log:
process_loop(log) process_loop(zulip_queue, log)
else: else:
process_loop(None) process_loop(zulip_queue, None)
def send_zephyr(zwrite_args: List[str], content: str) -> Tuple[int, str]: def send_zephyr(zwrite_args: List[str], content: str) -> Tuple[int, str]:
@ -675,7 +697,7 @@ def zcrypt_encrypt_content(zephyr_class: str, instance: str, content: str) -> Op
return encrypted return encrypted
def forward_to_zephyr(message: Dict[str, Any]) -> None: def forward_to_zephyr(message: Dict[str, Any], zulip_client: zulip.Client) -> None:
# 'Any' can be of any type of text # 'Any' can be of any type of text
support_heading = "Hi there! This is an automated message from Zulip." support_heading = "Hi there! This is an automated message from Zulip."
support_closing = """If you have any questions, please be in touch through the \ support_closing = """If you have any questions, please be in touch through the \
@ -749,6 +771,7 @@ Feedback button or at support@zulip.com."""
result = zcrypt_encrypt_content(zephyr_class, instance, wrapped_content) result = zcrypt_encrypt_content(zephyr_class, instance, wrapped_content)
if result is None: if result is None:
send_error_zulip( send_error_zulip(
zulip_client,
"""%s """%s
Your Zulip-Zephyr mirror bot was unable to forward that last message \ Your Zulip-Zephyr mirror bot was unable to forward that last message \
@ -758,7 +781,7 @@ key (perhaps because your AFS tokens expired). That means that while \
Zulip users (like you) received it, Zephyr users did not. Zulip users (like you) received it, Zephyr users did not.
%s""" %s"""
% (support_heading, support_closing) % (support_heading, support_closing),
) )
return return
@ -775,6 +798,7 @@ Zulip users (like you) received it, Zephyr users did not.
return return
elif code == 0: elif code == 0:
send_error_zulip( send_error_zulip(
zulip_client,
"""%s """%s
Your last message was successfully mirrored to zephyr, but zwrite \ Your last message was successfully mirrored to zephyr, but zwrite \
@ -783,7 +807,7 @@ returned the following warning:
%s %s
%s""" %s"""
% (support_heading, stderr, support_closing) % (support_heading, stderr, support_closing),
) )
return return
elif code != 0 and ( elif code != 0 and (
@ -797,6 +821,7 @@ returned the following warning:
if options.ignore_expired_tickets: if options.ignore_expired_tickets:
return return
send_error_zulip( send_error_zulip(
zulip_client,
"""%s """%s
Your last message was forwarded from Zulip to Zephyr unauthenticated, \ Your last message was forwarded from Zulip to Zephyr unauthenticated, \
@ -806,7 +831,7 @@ are running the Zulip-Zephyr mirroring bot, so we can send \
authenticated Zephyr messages for you again. authenticated Zephyr messages for you again.
%s""" %s"""
% (support_heading, support_closing) % (support_heading, support_closing),
) )
return return
@ -814,6 +839,7 @@ authenticated Zephyr messages for you again.
# probably because the recipient isn't subscribed to personals, # probably because the recipient isn't subscribed to personals,
# but regardless, we should just notify the user. # but regardless, we should just notify the user.
send_error_zulip( send_error_zulip(
zulip_client,
"""%s """%s
Your Zulip-Zephyr mirror bot was unable to forward that last message \ Your Zulip-Zephyr mirror bot was unable to forward that last message \
@ -823,12 +849,12 @@ received it, Zephyr users did not. The error message from zwrite was:
%s %s
%s""" %s"""
% (support_heading, stderr, support_closing) % (support_heading, stderr, support_closing),
) )
return return
def maybe_forward_to_zephyr(message: Dict[str, Any]) -> None: def maybe_forward_to_zephyr(message: Dict[str, Any], zulip_client: zulip.Client) -> None:
# The key string can be used to direct any type of text. # The key string can be used to direct any type of text.
if message["sender_email"] == zulip_account_email: if message["sender_email"] == zulip_account_email:
if not ( if not (
@ -851,7 +877,7 @@ def maybe_forward_to_zephyr(message: Dict[str, Any]) -> None:
) )
return return
try: try:
forward_to_zephyr(message) forward_to_zephyr(message, zulip_client)
except Exception: except Exception:
# Don't let an exception forwarding one message crash the # Don't let an exception forwarding one message crash the
# whole process # whole process
@ -859,12 +885,16 @@ def maybe_forward_to_zephyr(message: Dict[str, Any]) -> None:
def zulip_to_zephyr(options: optparse.Values) -> NoReturn: def zulip_to_zephyr(options: optparse.Values) -> NoReturn:
zulip_client = make_zulip_client()
# Sync messages from zulip to zephyr # Sync messages from zulip to zephyr
logger.info("Starting syncing messages.") logger.info("Starting syncing messages.")
backoff = RandomExponentialBackoff(timeout_success_equivalent=120) backoff = RandomExponentialBackoff(timeout_success_equivalent=120)
while True: while True:
try: try:
zulip_client.call_on_each_message(maybe_forward_to_zephyr) zulip_client.call_on_each_message(
lambda message: maybe_forward_to_zephyr(message, zulip_client)
)
except Exception: except Exception:
logger.exception("Error syncing messages:") logger.exception("Error syncing messages:")
backoff.fail() backoff.fail()
@ -886,6 +916,8 @@ def subscribed_to_mail_messages() -> bool:
def add_zulip_subscriptions(verbose: bool) -> None: def add_zulip_subscriptions(verbose: bool) -> None:
zulip_client = make_zulip_client()
zephyr_subscriptions = set() zephyr_subscriptions = set()
skipped = set() skipped = set()
for (cls, instance, recipient) in parse_zephyr_subs(verbose=verbose): for (cls, instance, recipient) in parse_zephyr_subs(verbose=verbose):
@ -1146,7 +1178,7 @@ def parse_args() -> Tuple[optparse.Values, List[str]]:
def die_gracefully(signal: int, frame: FrameType) -> None: def die_gracefully(signal: int, frame: FrameType) -> None:
if CURRENT_STATE == States.ZulipToZephyr or CURRENT_STATE == States.ChildSending: if CURRENT_STATE == States.ZulipToZephyr:
# this is a child process, so we want os._exit (no clean-up necessary) # this is a child process, so we want os._exit (no clean-up necessary)
os._exit(1) os._exit(1)
@ -1207,15 +1239,6 @@ or specify the --api-key-file option."""
sys.exit(1) sys.exit(1)
zulip_account_email = options.user + "@mit.edu" zulip_account_email = options.user + "@mit.edu"
import zulip
zulip_client = zulip.Client(
email=zulip_account_email,
api_key=api_key,
verbose=True,
client="zephyr_mirror",
site=options.site,
)
start_time = time.time() start_time = time.time()