containers.py 8.06 KB
Newer Older
1 2 3
from __future__ import print_function

from aci import ImageManifest
4
from collections import namedtuple
5
import logging
6
from subprograms import ChrtClient, NodeOSClient, resources
7
import operator
8

9
logger = logging.getLogger('nrm')
10
Container = namedtuple('Container', ['uuid', 'manifest', 'resources',
11 12
                                     'power', 'processes', 'clientids',
                                     'hwbindings'])
13

14 15 16 17 18 19

class ContainerManager(object):

    """Manages the creation, listing and deletion of containers, using a
    container runtime underneath."""

20 21 22
    def __init__(self, rm,
                 perfwrapper="argo-perf-wrapper",
                 linuxperf="perf",
23 24
                 argo_nodeos_config="argo_nodeos_config",
                 pmpi_lib="/usr/lib/libnrm-pmpi.so"):
25 26 27
        self.linuxperf = linuxperf
        self.perfwrapper = perfwrapper
        self.nodeos = NodeOSClient(argo_nodeos_config=argo_nodeos_config)
28
        self.containers = dict()
29 30
        self.pids = dict()
        self.resourcemanager = rm
31
        self.hwloc = rm.hwloc
32
        self.chrt = ChrtClient()
33
        self.pmpi_lib = pmpi_lib
34 35 36 37 38

    def create(self, request):
        """Create a container according to the request.

        Returns the pid of the container or a negative number for errors."""
39 40 41 42
        container = None
        containerexistsflag = False
        processes = None
        clientids = None
43
        pp = None
44 45
        hwbindings = None
        bind_index = 0
46

47 48 49
        manifestfile = request['manifest']
        command = request['file']
        args = request['args']
50
        environ = request['environ']
51
        container_name = request['uuid']
52 53 54
        logger.info("run: manifest file:  %s", manifestfile)
        logger.info("run: command:        %s", command)
        logger.info("run: args:           %r", args)
55
        logger.info("run: container name: %s", container_name)
56 57

        # TODO: Application library to load must be set during configuration
58
        apppreloadlibrary = self.pmpi_lib
59

60 61
        manifest = ImageManifest()
        if not manifest.load(manifestfile):
62
            logger.error("Manifest is invalid")
63
            return None
64

65
        if manifest.is_feature_enabled('scheduler'):
66 67 68 69 70
            sched = manifest.app.isolators.scheduler
            argv = self.chrt.getwrappedcmd(sched)
        else:
            argv = []

71 72
        # Check if container exists else create it
        if container_name in self.containers:
73 74 75 76 77 78
            container = self.containers[container_name]
            containerexistsflag = True
            processes = container.processes
            clientids = container.clientids
            hwbindings = container.hwbindings
            bind_index = len(processes)
79 80 81
        else:
            processes = dict()
            clientids = dict()
82
            hwbindings = dict()
83 84

            # ask the resource manager for resources
85 86 87
            ncpus = int(manifest.app.isolators.container.cpus.value)
            nmems = int(manifest.app.isolators.container.mems.value)
            req = resources(ncpus, nmems)
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
            alloc = self.resourcemanager.schedule(container_name, req)
            logger.info("run: allocation: %r", alloc)

            # create container
            logger.info("creating container %s", container_name)
            self.nodeos.create(container_name, alloc)
            container_resources = dict()
            container_resources['cpus'], container_resources['mems'] = alloc

            # Container power settings
            container_power = dict()
            container_power['profile'] = None
            container_power['policy'] = None
            container_power['damper'] = None
            container_power['slowdown'] = None
            container_power['manager'] = None

105 106 107 108 109 110 111 112 113 114
            if manifest.is_feature_enabled('power'):
                pp = manifest.app.isolators.power
                if pp.profile in ["1", "True"]:
                    container_power['profile'] = dict()
                    container_power['profile']['start'] = dict()
                    container_power['profile']['end'] = dict()
                if pp.policy != "NONE":
                    container_power['policy'] = pp.policy
                    container_power['damper'] = pp.damper
                    container_power['slowdown'] = pp.slowdown
115

116
            # Compute hardware bindings
117 118 119 120
            if manifest.is_feature_enabled('hwbind'):
                hwbindings['enabled'] = True
                hwbindings['distrib'] = sorted(self.hwloc.distrib(
                                            ncpus, alloc), key=operator.
121 122
                                                attrgetter('cpus'))

123
        # build context to execute
124 125
        # environ['PATH'] = ("/usr/local/sbin:"
        #                   "/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
126
        environ['ARGO_CONTAINER_UUID'] = container_name
127
        environ['PERF'] = self.linuxperf
128 129
        environ['AC_APP_NAME'] = manifest.name
        environ['AC_METADATA_URL'] = "localhost"
130 131 132 133 134 135 136 137
        if (containerexistsflag and container.power['policy'] is not None) or (
                pp is not None and pp.policy != "NONE"):
            environ['LD_PRELOAD'] = apppreloadlibrary
            environ['NRM_TRANSMIT'] = '1'
            if containerexistsflag:
                environ['NRM_DAMPER'] = container.power['damper']
            else:
                environ['NRM_DAMPER'] = pp.damper
138

139 140 141 142 143 144 145
        # It would've been better if argo-perf-wrapper wrapped around
        # argo-nodeos-config and not the final command -- that way it would
        # be running outside of the container.  However, because
        # argo-nodeos-config is suid root, perf can't monitor it.
        if manifest.is_feature_enabled('perfwrapper'):
            argv.append(self.perfwrapper)

146 147 148 149 150 151 152 153 154 155 156
        # Use hwloc-bind to launch each process in the conatiner by prepending
        # it as an argument to the command line, if enabled in manifest.
        # The hardware binding computed using hwloc-distrib is used here
        # --single
        if bool(hwbindings) and hwbindings['enabled']:
            argv.append('hwloc-bind')
            # argv.append('--single')
            argv.append('core:'+str(hwbindings['distrib'][bind_index].cpus[0]))
            argv.append('--membind')
            argv.append('numa:'+str(hwbindings['distrib'][bind_index].mems[0]))

157 158
        argv.append(command)
        argv.extend(args)
159

160
        # run my command
161 162 163 164 165 166 167 168 169
        process = self.nodeos.execute(container_name, argv, environ)
        processes[process.pid] = process
        clientids[process.pid] = request['clientid']

        if containerexistsflag:
            container.processes[process.pid] = process
            self.pids[process.pid] = container
            logger.info("Created process %s in container %s", process.pid,
                        container_name)
170
        else:
171 172
            container = Container(container_name, manifest,
                                  container_resources, container_power,
173
                                  processes, clientids, hwbindings)
174 175 176 177
            self.pids[process.pid] = container
            self.containers[container_name] = container
            logger.info("Container %s created and running : %r",
                        container_name, container)
178

179
        return process.pid, container
180 181 182

    def delete(self, uuid):
        """Delete a container and kill all related processes."""
183
        self.nodeos.delete(uuid, kill=True)
184
        self.resourcemanager.update(uuid)
185 186
        c = self.containers[uuid]
        del self.containers[uuid]
187
        map(lambda i: self.pids.pop(c.processes[i].pid, None), c.processes)
188

Swann Perarnau's avatar
Swann Perarnau committed
189 190 191 192
    def kill(self, uuid):
        """Kill all the processes of a container."""
        if uuid in self.containers:
            c = self.containers[uuid]
193
            logger.debug("killing %r:", c)
194 195
            for p in c.processes.values():
                try:
196
                    p.proc.terminate()
197 198
                except OSError:
                    logging.error("OS error: could not terminate process.")
Swann Perarnau's avatar
Swann Perarnau committed
199

200 201
    def list(self):
        """List the containers in the system."""
202 203
        return [{'uuid': c.uuid, 'pid': c.processes.keys()}
                for c in self.containers.values()]