From 25bb86d2d0fdb428012d006d95265ad04d04a8aa Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Fri, 21 Dec 2012 11:26:36 -0500 Subject: [PATCH] zephyr_mirror: Fix killing duplicate zephyr_mirror processes. The refactoring that we did a couple weeks ago to make the zephyr mirror script restart itself automatically (by splitting it into zephyr_mirror.py and zephyr_mirror_backend.py) had a poor interaction with our code for killing old zephyr_mirror processes (to prevent double-mirroring). If you manually ran two copies of the outer mirroring script (zephyr_mirror.py), then the second one would on startup kill the first one's zephyr_mirror_backend.py children (for being duplicate zephyr_mirror_backend.py processes that would result in double-mirroring). However, importantly, it did not kill the first mirroring script's zephyr_mirror.py parent process, so the first mirroring script would then proceed to startup up new children. The process then repeats, with the two scripts' roles reversed. This issue didn't affect the sharded mirroring case, where I had been doing the testing of the kill code with that refactoring, because we don't have a version of the outer zephyr_mirror.py loop for that situation (a consequence of it being hard to restart the threads properly with the run_parallel API that we're using to spawn all the children). (imported from commit d4886ac77312a6b0ebd0d612f6fb084970bf23a2) --- bots/zephyr_mirror_backend.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/bots/zephyr_mirror_backend.py b/bots/zephyr_mirror_backend.py index b7d65ac..58e66e8 100755 --- a/bots/zephyr_mirror_backend.py +++ b/bots/zephyr_mirror_backend.py @@ -831,25 +831,26 @@ or specify the --api-key-file option.""" % (options.api_key_file,))) add_humbug_subscriptions(True) sys.exit(0) - # First check that there are no other bots running - bot_name = "zephyr_mirror_backend.py" + # Kill all zephyr_mirror processes other than this one and its parent. if not options.test_mode: - pgrep_query = bot_name + pgrep_query = "/usr/bin/python.*zephyr_mirror" if options.shard is not None: - pgrep_query = "%s.*--shard=%s" % (bot_name, options.shard) + pgrep_query = "%s.*--shard=%s" % (pgrep_query, options.shard) proc = subprocess.Popen(['pgrep', '-U', os.environ["USER"], "-f", pgrep_query], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, _err_unused = proc.communicate() - for pid in out.split(): - if int(pid.strip()) != os.getpid(): - # Another copy of zephyr_mirror.py! Kill it. - print "Killing duplicate zephyr_mirror process %s" % (pid,) - try: - os.kill(int(pid), signal.SIGKILL) - except OSError: - # We don't care if the child process no longer exists, so just print the error - traceback.print_exc() + for pid in map(int, out.split()): + if pid == os.getpid() or pid == os.getppid(): + continue + + # Another copy of zephyr_mirror.py! Kill it. + print "Killing duplicate zephyr_mirror process %s" % (pid,) + try: + os.kill(pid, signal.SIGKILL) + except OSError: + # We don't care if the target process no longer exists, so just print the error + traceback.print_exc() if options.shard is not None and set(options.shard) != set("a"): # The shard that is all "a"s is the one that handles personals