Skip to content
This repository has been archived by the owner on Aug 22, 2022. It is now read-only.

Commit

Permalink
Implement kill_zombies periodic task, see opencraft/settings.py for c…
Browse files Browse the repository at this point in the history
…onfs (#721)

Co-authored-by: Daniel Francis <daniel.francis@opencraft.com>
  • Loading branch information
spokerman12 and Daniel Francis committed Mar 8, 2021
1 parent c39faec commit 6094088
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 4 deletions.
8 changes: 5 additions & 3 deletions instance/management/commands/kill_zombies.py
Expand Up @@ -71,16 +71,18 @@ def handle(self, *args, **options):

if not nova_servers:
self.log('No servers found in region {}.'.format(self.region))
return
return None
self.log('Found {} unterminated servers in region {}.'.format(len(nova_servers), self.region))

# Scan each server for the zombieness.
death_count = sum(1 for srv in nova_servers if self.not_zombie_or_die(srv))

if self.dry_run:
self.log("Would have terminated {} zombies if this weren't a dry run.".format(death_count))
result = "Would have terminated {} zombies if this weren't a dry run.".format(death_count)
else:
self.log("Terminated {} zombies.".format(death_count))
result = "Terminated {} zombies.".format(death_count)
self.log(result)
return result

def not_zombie_or_die(self, nova_server):
"""
Expand Down
119 changes: 118 additions & 1 deletion instance/tasks.py
Expand Up @@ -23,11 +23,13 @@
# Imports #####################################################################

from datetime import datetime
import logging
from typing import Optional, List
import logging

from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.management import call_command, CommandError
from django.core.mail import EmailMessage
from django.db import connection
from django.db.models import Q
from django.utils import timezone
Expand All @@ -47,6 +49,9 @@

logger = logging.getLogger(__name__)

# Constants ##################################################################

KILL_ZOMBIES_CRON_SCHEDULE = settings.KILL_ZOMBIES_SCHEDULE.split()

# Tasks #######################################################################

Expand Down Expand Up @@ -339,3 +344,115 @@ def cleanup_old_betatest_users():
users_to_delete = queryset.filter(is_active=False, profile__modified__lte=delete_cutoff)
logger.info('Deleting %s users.', users_to_delete.count())
users_to_delete.delete()


class KillZombiesRunner:
"""
Helper class to run `kill_zombies_periodically`
"""

def __init__(self):
self.region: str = settings.OPENSTACK_REGION
self.recipient: list = [email for _, email in settings.ADMINS]
self.threshold: int = settings.KILL_ZOMBIES_WARNING_THRESHOLD

def send_email(self, subject: str, body: str) -> int:
"""
Utility method for sending emails
"""
if self.recipient:
email = EmailMessage(
subject, body, settings.DEFAULT_FROM_EMAIL, self.recipient
)
email.send()
else:
logging.info("Email recipient is undefined. Skipping email.")

def trigger_warning(self, num_zombies):
"""
Warns when more than `threshold` VMs will be terminated
"""
subject = "Zombie instances are over the current threshold ({})".format(
self.threshold
)
body = (
"The number of zombie OpenStack VMs ({}) in the {} region "
"is over the KILL_ZOMBIES_WARNING_THRESHOLD ({}).\n"
"These instances will be terminated using the `kill_zombies` command."
).format(num_zombies, self.region, self.threshold)
logging.info(
"%s\nSending an email notification to: %s",
body, self.recipient
)
self.send_email(subject, body)

def get_zombie_servers_count(self, stdout: str) -> int:
"""
If there are servers to terminate, a `kill_zombies` dry run will warn:
"Would have terminated {} zombies" to stdout.
"""
num_instances = 0
if "No servers found in region" in stdout:
return num_instances
try:
num_instances = int(stdout.split("Would have terminated")[1].split()[0])
except (IndexError, ValueError):
logger.info("Received unexpected input from dry run. Defaulting to zero")
return num_instances

def run(self):
"""
Main method for running the task
"""
try:
dry_run_output, output = "", ""
dry_run_output = call_command(
"kill_zombies",
region=self.region,
dry_run=True,
)
num_of_zombies = self.get_zombie_servers_count(dry_run_output)
if num_of_zombies > self.threshold:
self.trigger_warning(num_of_zombies)
if num_of_zombies != 0:
output = call_command("kill_zombies", region=self.region)
logging.info(
"Task `kill_zombies_periodically` ran successfully "
"and terminated %s zombies", num_of_zombies
)
else:
logging.info(
"Found zero zombies to terminate. "
"Task `kill_zombies_periodically` ran successfully."
)
except CommandError:
logger.error(
"Task `kill_zombies` command failed. Sending notification email"
)
subject = "Terminate zombie instances command failed"
body = (
"Scheduled execution of `kill_zombies` command failed. "
"Log entries are displayed below:"
"%s\n%s"
) % (dry_run_output, output)
self.send_email(subject, body)
return False
return True


if settings.KILL_ZOMBIES_ENABLED:
@db_periodic_task(crontab(
minute=KILL_ZOMBIES_CRON_SCHEDULE[0],
hour=KILL_ZOMBIES_CRON_SCHEDULE[1],
day=KILL_ZOMBIES_CRON_SCHEDULE[2],
month=KILL_ZOMBIES_CRON_SCHEDULE[3],
day_of_week=KILL_ZOMBIES_CRON_SCHEDULE[4],
))
def kill_zombies_task():
"""
A wrapper for `kill_zombies_periodically` so it only
exists when KILL_ZOMBIES_ENABLED is true
"""
task_runner = KillZombiesRunner()
logging.info("Executing periodic task `kill_zombies_periodically`")
task_runner.run()
176 changes: 176 additions & 0 deletions instance/tests/management/test_kill_zombies.py
@@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
#
# OpenCraft -- tools to aid developing and hosting free software projects
# Copyright (C) 2015-2019 OpenCraft <contact@opencraft.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
Tests for the 'kill_zombies' management command.
"""
# Imports #####################################################################

from unittest.mock import patch, call
from django.core import mail
from django.core.management import CommandError
from django.test import TestCase, override_settings
from instance.tasks import KillZombiesRunner

# Tests #######################################################################

# pylint: disable=bad-continuation


@override_settings(
KILL_ZOMBIES_ENABLED=True,
ADMINS=(
("admin1", "admin1@localhost"),
("admin2", "admin2@localhost"),
),
OPENSTACK_REGION="some-region"
)
class KillZombiesPeriodicallyTestCase(TestCase):
"""
Test cases for the `kill_zombies_task` periodic task.
"""
@override_settings(KILL_ZOMBIES_ENABLED=False)
@patch("instance.tasks.call_command")
def test_toggle_task_false(self, mock_command):
"""
Tests that the task can't be run if disabled
through env variables
"""
with self.assertRaises(ImportError):
from instance.tasks import kill_zombies_task
kill_zombies_task()
self.assertFalse(mock_command.called)

@patch("instance.tasks.call_command")
def test_set_env_variables(self, mock_command):
"""
Tests that the task passes env variables to
the kill_zombies command call
"""
test_runner = KillZombiesRunner()
test_runner.run()
first_call = call.mock_command("kill_zombies", region="some-region", dry_run=True)
self.assertEqual(mock_command.mock_calls[0], first_call)
mock_command.assert_called_with("kill_zombies", region="some-region")

@patch.object(KillZombiesRunner, "trigger_warning")
def test_get_zombie_servers_count(self, mock_trigger):
"""
Tests that KillZombiesRunner.get_zombie_servers_count
works as expected
"""
test_runner = KillZombiesRunner()
no_instances = """
2021-02-26 14:16:12 | Starting kill_zombies
2021-02-26 14:16:12 | No servers found in region some-region.
"""
some_instances = """
2021-02-26 14:16:12 | Starting kill_zombies
2021-02-26 14:16:12 | Found 32 unterminated servers in region some-region.
2021-02-26 14:16:12 | Would have terminated 32 zombies if this weren't a dry run.
"""
bad_output = "qwerty"
self.assertEqual(test_runner.get_zombie_servers_count(no_instances), 0)
self.assertEqual(test_runner.get_zombie_servers_count(some_instances), 32)
self.assertEqual(test_runner.get_zombie_servers_count(bad_output), 0)
self.assertEqual(test_runner.get_zombie_servers_count(""), 0)

@override_settings(KILL_ZOMBIES_WARNING_THRESHOLD=3)
@patch.object(KillZombiesRunner, "get_zombie_servers_count", return_value=4)
@patch.object(KillZombiesRunner, "trigger_warning")
@patch("instance.tasks.call_command")
def test_trigger_warning_if_over_threshold(
self,
mock_get_zombie_servers_count,
mock_trigger_warning,
mock_command
):
"""
Tests that trigger_warning is called when the number
of zombies to delete is over the threshold
"""
test_runner = KillZombiesRunner()
test_runner.run()
self.assertTrue(mock_get_zombie_servers_count.called)
self.assertTrue(mock_trigger_warning.called)

@patch.object(KillZombiesRunner, "get_zombie_servers_count", return_value=0)
@patch("instance.tasks.call_command")
def test_do_nothing_if_zero(
self,
mock_get_zombie_servers_count,
mock_command
):
"""
Tests that only a dry_run is executed when
the number of zombies to terminate is zero.
"""
test_runner = KillZombiesRunner()
test_runner.run()
mock_get_zombie_servers_count.assert_called_with(
"kill_zombies",
region="some-region",
dry_run=True
)

@override_settings(
DEFAULT_FROM_EMAIL="from@site.com",
ADMINS=(("admin3", "admin3@localhost"), ("admin4", "admin4@localhost"))
)
@patch("instance.tasks.call_command")
def test_send_warning_email(self, mock_command):
"""
Tests that trigger_warning sends an email when called
"""
test_runner = KillZombiesRunner()
test_runner.trigger_warning(5)
self.assertEqual(len(mail.outbox), 1)
self.assertIn(
"is over the KILL_ZOMBIES_WARNING_THRESHOLD",
mail.outbox[0].body
)
self.assertEqual(mail.outbox[0].from_email, "from@site.com")
self.assertEqual(mail.outbox[0].to, ["admin3@localhost", "admin4@localhost"])

@override_settings(
DEFAULT_FROM_EMAIL="from@site.com",
ADMINS=(("admin3", "admin3@localhost"),)
)
@patch.object(KillZombiesRunner, "get_zombie_servers_count", return_value=4)
@patch.object(KillZombiesRunner, "trigger_warning")
@patch("instance.tasks.call_command")
def test_send_email_on_failure(
self,
mock_get_zombie_servers_count,
mock_trigger_warning,
mock_command
):
"""
When call_command fails, it raises a CommandError.
Tests that an email is sent when CommandError is raised
"""
mock_command.side_effect = CommandError()
test_runner = KillZombiesRunner()
test_runner.run()
self.assertEqual(len(mail.outbox), 1)
self.assertIn(
"Scheduled execution of `kill_zombies` command failed",
mail.outbox[0].body
)
self.assertEqual(mail.outbox[0].from_email, "from@site.com")
self.assertEqual(mail.outbox[0].to, ["admin3@localhost"])
6 changes: 6 additions & 0 deletions opencraft/settings.py
Expand Up @@ -910,6 +910,12 @@
# Format is '<minute> <hour> <day> <month> <day_of_week>' like normal crontabs
TRIAL_INSTANCES_REPORT_SCHEDULE = env('TRIAL_INSTANCES_REPORT_SCHEDULE', default='0 2 1 * *')

# Kill Zombies Periodic task #################################################

KILL_ZOMBIES_ENABLED = env("KILL_ZOMBIES_ENABLED", default=False)
KILL_ZOMBIES_SCHEDULE = env("KILL_ZOMBIES_SCHEDULE", default="0 0 * * *")
KILL_ZOMBIES_WARNING_THRESHOLD = env("KILL_ZOMBIES_WARNING_THRESHOLD", default=10)

# User inactive and delete ####################################################

CLEANUP_OLD_BETATEST_USERS = env('CLEANUP_OLD_BETATEST_USERS', default=True)
Expand Down

0 comments on commit 6094088

Please sign in to comment.