Commit eb7c6380 authored by Swann Perarnau's avatar Swann Perarnau

[refactor] extract out basic container management

Refactor the container creation code to isolate the building/retrieving
of a container namedtuple out of the core container creation code.

This simplifies quite a bit the different branches of this code, and
makes the core create method almost entirely dedicated to just launching
a command.
parent c7bdd5b2
Pipeline #5089 failed with stages
in 5 minutes and 17 seconds
......@@ -32,17 +32,59 @@ class ContainerManager(object):
self.chrt = ChrtClient()
self.pmpi_lib = pmpi_lib
def _get_container_tuple(self, container_name, manifest):
"""Retrieve a container tuple if the container exists, otherwise use
the manifest to create a new one.
Returns (bool, container_tuple), the first field telling if a container
needs to be created."""
if container_name in self.containers:
return (False, self.containers[container_name])
# ask the resource manager for resources
ncpus = int(manifest.app.isolators.container.cpus.value)
nmems = int(manifest.app.isolators.container.mems.value)
req = resources(ncpus, nmems)
alloc = self.resourcemanager.schedule(container_name, req)
logger.info("create: allocation: %r", alloc)
container_resources = dict()
container_resources['cpus'], container_resources['mems'] = alloc
# Container power settings
container_power = dict()
container_power['profile'] = None
container_power['policy'] = None
container_power['damper'] = None
container_power['slowdown'] = None
container_power['manager'] = None
if manifest.is_feature_enabled('power'):
pp = manifest.app.isolators.power
if pp.profile in ["1", "True"]:
container_power['profile'] = dict()
container_power['profile']['start'] = dict()
container_power['profile']['end'] = dict()
if pp.policy != "NONE":
container_power['policy'] = pp.policy
container_power['damper'] = pp.damper
container_power['slowdown'] = pp.slowdown
# Compute hardware bindings
hwbindings = dict()
if manifest.is_feature_enabled('hwbind'):
hwbindings['distrib'] = sorted(self.hwloc.distrib(
ncpus, alloc), key=operator.
attrgetter('cpus'))
return (True, Container(container_name, manifest,
container_resources,
container_power, {}, {}, hwbindings))
def create(self, request):
"""Create a container according to the request.
Returns the pid of the container or a negative number for errors."""
container = None
containerexistsflag = False
processes = None
clientids = None
pp = None
hwbindings = None
bind_index = 0
manifestfile = request['manifest']
command = request['file']
......@@ -54,64 +96,19 @@ class ContainerManager(object):
logger.info("create: args: %r", args)
logger.info("create: container name: %s", container_name)
apppreloadlibrary = self.pmpi_lib
manifest = ImageManifest()
if not manifest.load(manifestfile):
logger.error("Manifest is invalid")
return None
# Check if container exists else create it
if container_name in self.containers:
container = self.containers[container_name]
containerexistsflag = True
processes = container.processes
clientids = container.clientids
hwbindings = container.hwbindings
bind_index = len(processes)
else:
processes = dict()
clientids = dict()
hwbindings = dict()
# ask the resource manager for resources
ncpus = int(manifest.app.isolators.container.cpus.value)
nmems = int(manifest.app.isolators.container.mems.value)
req = resources(ncpus, nmems)
alloc = self.resourcemanager.schedule(container_name, req)
logger.info("run: allocation: %r", alloc)
# create container
logger.info("creating container %s", container_name)
self.nodeos.create(container_name, alloc)
container_resources = dict()
container_resources['cpus'], container_resources['mems'] = alloc
# Container power settings
container_power = dict()
container_power['profile'] = None
container_power['policy'] = None
container_power['damper'] = None
container_power['slowdown'] = None
container_power['manager'] = None
if manifest.is_feature_enabled('power'):
pp = manifest.app.isolators.power
if pp.profile in ["1", "True"]:
container_power['profile'] = dict()
container_power['profile']['start'] = dict()
container_power['profile']['end'] = dict()
if pp.policy != "NONE":
container_power['policy'] = pp.policy
container_power['damper'] = pp.damper
container_power['slowdown'] = pp.slowdown
# Compute hardware bindings
if manifest.is_feature_enabled('hwbind'):
hwbindings['enabled'] = True
hwbindings['distrib'] = sorted(self.hwloc.distrib(
ncpus, alloc), key=operator.
attrgetter('cpus'))
creation_needed, container = self._get_container_tuple(container_name,
manifest)
if creation_needed:
logger.info("Creating container %s", container_name)
req = resources(container.resources['cpus'],
container.resources['mems'])
self.nodeos.create(container_name, req)
self.containers[container_name] = container
# build context to execute
# environ['PATH'] = ("/usr/local/sbin:"
......@@ -120,14 +117,13 @@ class ContainerManager(object):
environ['PERF'] = self.linuxperf
environ['AC_APP_NAME'] = manifest.name
environ['AC_METADATA_URL'] = "localhost"
if (containerexistsflag and container.power['policy'] is not None) or (
pp is not None and pp.policy != "NONE"):
environ['LD_PRELOAD'] = apppreloadlibrary
# power profiling uses LD_PRELOAD, we use get to ensure that it
# doesn't crash if the policy doesn't exits.
if container.power.get('policy'):
environ['LD_PRELOAD'] = self.pmpi_lib
environ['NRM_TRANSMIT'] = '1'
if containerexistsflag:
environ['NRM_DAMPER'] = container.power['damper']
else:
environ['NRM_DAMPER'] = pp.damper
environ['NRM_DAMPER'] = container.power['damper']
# build prefix to the entire command based on enabled features
argv = []
......@@ -139,12 +135,17 @@ class ContainerManager(object):
# it as an argument to the command line, if enabled in manifest.
# The hardware binding computed using hwloc-distrib is used here
# --single
if bool(hwbindings) and hwbindings['enabled']:
if container.hwbindings:
# round robin over the cpu bindings available
bind_index = len(container.processes) % \
len(container.hwbindings['distrib'])
argv.append('hwloc-bind')
# argv.append('--single')
argv.append('core:'+str(hwbindings['distrib'][bind_index].cpus[0]))
cpumask = container.hwbindings['distrib'][bind_index].cpus[0]
memmask = container.hwbindings['distrib'][bind_index].mems[0]
argv.append("core:{}".format(cpumask))
argv.append('--membind')
argv.append('numa:'+str(hwbindings['distrib'][bind_index].mems[0]))
argv.append("numa:{}".format(memmask))
# It would've been better if argo-perf-wrapper wrapped around
# argo-nodeos-config and not the final command -- that way it would
......@@ -158,23 +159,13 @@ class ContainerManager(object):
# run my command
process = self.nodeos.execute(container_name, argv, environ)
processes[process.pid] = process
clientids[process.pid] = request['clientid']
if containerexistsflag:
container.processes[process.pid] = process
self.pids[process.pid] = container
logger.info("Created process %s in container %s", process.pid,
container_name)
else:
container = Container(container_name, manifest,
container_resources, container_power,
processes, clientids, hwbindings)
self.pids[process.pid] = container
self.containers[container_name] = container
logger.info("Container %s created and running : %r",
container_name, container)
# register the process
container.processes[process.pid] = process
container.clientids[process.pid] = request['clientid']
self.pids[process.pid] = container
logger.info("Created process %s in container %s", process.pid,
container_name)
return process.pid, container
def delete(self, uuid):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment