containers.py 8.76 KB
Newer Older
1 2 3
from __future__ import print_function

from aci import ImageManifest
4
from collections import namedtuple
5
import logging
6
from subprograms import ChrtClient, NodeOSClient, resources
7
import operator
8

9
logger = logging.getLogger('nrm')
10
Container = namedtuple('Container', ['uuid', 'manifest', 'resources',
11 12
                                     'power', 'processes', 'clientids',
                                     'hwbindings'])
13

14 15 16 17 18 19

class ContainerManager(object):

    """Manages the creation, listing and deletion of containers, using a
    container runtime underneath."""

20 21 22
    def __init__(self, rm,
                 perfwrapper="argo-perf-wrapper",
                 linuxperf="perf",
23 24
                 argo_nodeos_config="argo_nodeos_config",
                 pmpi_lib="/usr/lib/libnrm-pmpi.so"):
25 26 27
        self.linuxperf = linuxperf
        self.perfwrapper = perfwrapper
        self.nodeos = NodeOSClient(argo_nodeos_config=argo_nodeos_config)
28
        self.containers = dict()
29 30
        self.pids = dict()
        self.resourcemanager = rm
31
        self.hwloc = rm.hwloc
32
        self.chrt = ChrtClient()
33
        self.pmpi_lib = pmpi_lib
34 35 36 37 38

    def create(self, request):
        """Create a container according to the request.

        Returns the pid of the container or a negative number for errors."""
39 40 41 42
        container = None
        containerexistsflag = False
        processes = None
        clientids = None
43
        pp = None
44 45
        hwbindings = None
        bind_index = 0
46

47 48 49
        manifestfile = request['manifest']
        command = request['file']
        args = request['args']
50
        environ = request['environ']
51
        container_name = request['uuid']
52 53 54
        logger.info("run: manifest file:  %s", manifestfile)
        logger.info("run: command:        %s", command)
        logger.info("run: args:           %r", args)
55
        logger.info("run: container name: %s", container_name)
56 57

        # TODO: Application library to load must be set during configuration
58
        apppreloadlibrary = self.pmpi_lib
59

60 61
        manifest = ImageManifest()
        if not manifest.load(manifestfile):
62
            logger.error("Manifest is invalid")
63
            return None
64

65 66 67 68 69 70
        if hasattr(manifest.app.isolators, 'scheduler'):
            sched = manifest.app.isolators.scheduler
            argv = self.chrt.getwrappedcmd(sched)
        else:
            argv = []

71 72 73
        # Check if container exists else create it
        if container_name in self.containers:
                container = self.containers[container_name]
74 75 76
                containerexistsflag = True
                processes = container.processes
                clientids = container.clientids
77 78
                hwbindings = container.hwbindings
                bind_index = len(processes)
79 80 81
        else:
            processes = dict()
            clientids = dict()
82
            hwbindings = dict()
83 84

            # ask the resource manager for resources
85 86 87
            ncpus = int(manifest.app.isolators.container.cpus.value)
            nmems = int(manifest.app.isolators.container.mems.value)
            req = resources(ncpus, nmems)
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
            alloc = self.resourcemanager.schedule(container_name, req)
            logger.info("run: allocation: %r", alloc)

            # create container
            logger.info("creating container %s", container_name)
            self.nodeos.create(container_name, alloc)
            container_resources = dict()
            container_resources['cpus'], container_resources['mems'] = alloc

            # Container power settings
            container_power = dict()
            container_power['profile'] = None
            container_power['policy'] = None
            container_power['damper'] = None
            container_power['slowdown'] = None
            container_power['manager'] = None

            # It would've been better if argo-perf-wrapper wrapped around
            # argo-nodeos-config and not the final command -- that way it would
            # be running outside of the container.  However, because
            # argo-nodeos-config is suid root, perf can't monitor it.
            if hasattr(manifest.app.isolators, 'perfwrapper'):
                manifest_perfwrapper = manifest.app.isolators.perfwrapper
                if hasattr(manifest_perfwrapper, 'enabled'):
                    if manifest_perfwrapper.enabled in ["1", "True"]:
113
                        argv.append(self.perfwrapper)
114 115 116 117 118 119 120 121 122 123 124 125 126

            if hasattr(manifest.app.isolators, 'power'):
                if hasattr(manifest.app.isolators.power, 'enabled'):
                        pp = manifest.app.isolators.power
                        if pp.enabled in ["1", "True"]:
                            if pp.profile in ["1", "True"]:
                                container_power['profile'] = dict()
                                container_power['profile']['start'] = dict()
                                container_power['profile']['end'] = dict()
                            if pp.policy != "NONE":
                                container_power['policy'] = pp.policy
                                container_power['damper'] = pp.damper
                                container_power['slowdown'] = pp.slowdown
127

128 129 130 131 132 133 134 135 136 137
            # Compute hardware bindings
            if hasattr(manifest.app.isolators, 'hwbind'):
                manifest_hwbind = manifest.app.isolators.hwbind
                if hasattr(manifest_hwbind, 'enabled'):
                    if manifest_hwbind.enabled in ["1", "True"]:
                        hwbindings['enabled'] = True
                        hwbindings['distrib'] = sorted(self.hwloc.distrib(
                                                ncpus, alloc), key=operator.
                                                attrgetter('cpus'))

138
        # build context to execute
139 140
        # environ['PATH'] = ("/usr/local/sbin:"
        #                   "/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
141
        environ['ARGO_CONTAINER_UUID'] = container_name
142
        environ['PERF'] = self.linuxperf
143 144
        environ['AC_APP_NAME'] = manifest.name
        environ['AC_METADATA_URL'] = "localhost"
145 146 147 148 149 150 151 152
        if (containerexistsflag and container.power['policy'] is not None) or (
                pp is not None and pp.policy != "NONE"):
            environ['LD_PRELOAD'] = apppreloadlibrary
            environ['NRM_TRANSMIT'] = '1'
            if containerexistsflag:
                environ['NRM_DAMPER'] = container.power['damper']
            else:
                environ['NRM_DAMPER'] = pp.damper
153

154 155 156 157 158 159 160 161 162 163 164
        # Use hwloc-bind to launch each process in the conatiner by prepending
        # it as an argument to the command line, if enabled in manifest.
        # The hardware binding computed using hwloc-distrib is used here
        # --single
        if bool(hwbindings) and hwbindings['enabled']:
            argv.append('hwloc-bind')
            # argv.append('--single')
            argv.append('core:'+str(hwbindings['distrib'][bind_index].cpus[0]))
            argv.append('--membind')
            argv.append('numa:'+str(hwbindings['distrib'][bind_index].mems[0]))

165 166
        argv.append(command)
        argv.extend(args)
167

168
        # run my command
169 170 171 172 173 174 175 176 177
        process = self.nodeos.execute(container_name, argv, environ)
        processes[process.pid] = process
        clientids[process.pid] = request['clientid']

        if containerexistsflag:
            container.processes[process.pid] = process
            self.pids[process.pid] = container
            logger.info("Created process %s in container %s", process.pid,
                        container_name)
178
        else:
179 180
            container = Container(container_name, manifest,
                                  container_resources, container_power,
181
                                  processes, clientids, hwbindings)
182 183 184 185
            self.pids[process.pid] = container
            self.containers[container_name] = container
            logger.info("Container %s created and running : %r",
                        container_name, container)
186

187
        return process.pid, container
188 189 190

    def delete(self, uuid):
        """Delete a container and kill all related processes."""
191
        self.nodeos.delete(uuid, kill=True)
192
        self.resourcemanager.update(uuid)
193 194
        c = self.containers[uuid]
        del self.containers[uuid]
195
        map(lambda i: self.pids.pop(c.processes[i].pid, None), c.processes)
196

197 198 199 200
    def kill(self, uuid):
        """Kill all the processes of a container."""
        if uuid in self.containers:
            c = self.containers[uuid]
201
            logger.debug("killing %r:", c)
202 203
            for p in c.processes.values():
                try:
204
                    p.proc.terminate()
205 206
                except OSError:
                    logging.error("OS error: could not terminate process.")
207

208 209
    def list(self):
        """List the containers in the system."""
210 211
        return [{'uuid': c.uuid, 'pid': c.processes.keys()}
                for c in self.containers.values()]