Skip to content
Snippets Groups Projects
Verified Commit 2341380f authored by Karel Koci's avatar Karel Koci :metal:
Browse files

nsfarm/lxd: improve cleanup to remove stale containers

This improves cleanup to not only remove old images but also a stale
containers.

There are two types of containers nsfarm spawns. Those are bootstrap
and regular containers. The bootstrap containers are spawned with
intention to create image while regular containers are just spawned to
be later removed. The difference is that while regular containers are
always tied to one specific process (the instance of nsfarm) the
bootstrap is intentionally global to serialize multiple instances on
single image creation.
The cleanup algorithm can remove any regular container that's parent is
no longer running. The bootstrap containers do not contain info about
parent process and thus we use time to remove just old containers.

Warning: This changes the template for regular container name. The
change is needed for reliable PID parsing from template name. The
unclear state is when image would end with dash and number.
parent 79af6d5f
Branches
Tags
1 merge request!28Draft: Improve cleanup to remove stale containers
......@@ -11,6 +11,16 @@ def parser(parser):
clean = subparsers.add_parser('clean', help='Remove old and unused containers')
clean.set_defaults(lxd_op='clean')
clean.add_argument(
'-i', '--images',
action='store_true',
help='Clean no longer used images.'
)
clean.add_argument(
'-c', '--containers',
action='store_true',
help='Clean no longer used containers.'
)
clean.add_argument(
'DELTA',
nargs='?',
......@@ -18,7 +28,7 @@ def parser(parser):
type=parse_deltatime,
help="""Time delta for how long image should not be used to be cleaned (removed). In default if not specified
'1w' is used. Format is expect to be a number with suffix. Supported suffixes are m(inute), h(our), d(ay) and
w(eek).
w(eek). This applies only to images thus it has no effect if used with --containers.
"""
)
clean.add_argument(
......@@ -101,7 +111,11 @@ def parse_deltatime(spec):
def op_clean(args, _):
"""Handler for command line operation clean
"""
removed = utils.clean(args.DELTA, dry_run=args.dry_run)
removed = []
if args.images or not args.containers:
removed += utils.clean_images(args.DELTA, dry_run=args.dry_run)
if args.containers or not args.images:
removed += utils.clean_containers(dry_run=args.dry_run)
if removed:
print('\n'.join(removed))
sys.exit(0)
......
......@@ -77,9 +77,9 @@ class Container:
name = f"{prefix}-{self._image.name}-{os.getpid()}"
if self._lxd.local.containers.exists(name):
i = 1
while self._lxd.local.containers.exists(f"{name}-{i}"):
while self._lxd.local.containers.exists(f"{name}.{i}"):
i += 1
name = f"{name}-{i}"
name = f"{name}.{i}"
return name
def cleanup(self):
......
......@@ -10,19 +10,21 @@ from .image import Image
logger = logging.getLogger(__package__)
BOOTSTRAP_LIMIT = dateutil.relativedelta.relativedelta(hours=1)
def clean(delta, dry_run=False):
def clean_images(delta: dateutil.relativedelta.relativedelta, dry_run: bool = False):
"""Remove all images that were not used for longer then given delta.
delta: this should be instance of datetime.relativedelta
dry_run: do not remove anything only report alias of those to be removed on stdout
dry_run: do not remove anything only return aliases of those to be removed
Returns list of (to be) removed images.
"""
connection = LXDConnection()
since = datetime.today() - delta
removed = list()
removed = []
for img in connection.local.images.all():
if not any(alias["name"].startswith("nsfarm/") for alias in img.aliases):
continue
......@@ -38,6 +40,51 @@ def clean(delta, dry_run=False):
return removed
def _delete_container(cont):
ephemeral = cont.ephemeral
if cont.status == "Running":
cont.stop(wait=True)
if not ephemeral:
cont.delete()
def clean_containers(dry_run=False):
"""Remove abandoned containers created by nsfarm.
dry_run: do not remove anything, only return list of containers names to be removed.
Returns list of (to be) removed containers.
"""
connection = LXDConnection()
since = datetime.today() - BOOTSTRAP_LIMIT
removed = []
for cont in connection.local.instances.all():
if not cont.name.startswith("nsfarm-"):
continue
if cont.name.startswith("nsfarm-bootstrap-"):
# We can't simply identify owner of bootstrap container but we can set limit on how long bootstrap should
# take at most and remove any older containers.
created_at = dateutil.parser.parse(cont.created_at).replace(tzinfo=None)
if created_at < since:
removed.append(cont.name)
if not dry_run:
_delete_container(cont)
else:
# Container have PID of process they are spawned by in the name. We can't safely remove any container
# without running owner process.
pid = int(cont.name.split('-')[-1].split('.')[0])
try:
os.kill(pid, 0)
except OSError as err:
if (err.errno != 3): # 3 == ESRCH: No such process
raise
removed.append(cont.name)
if not dry_run:
_delete_container(cont)
return removed
def all_images():
"""Returns iterator over all known images for NSFarm.
......
......@@ -2,4 +2,5 @@ pytest>=5.0
pexpect
pyserial
pylxd
dateutil
selenium
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment