Commit 848a9755 authored by Swann Perarnau's avatar Swann Perarnau

Merge branch 'containers-launch' into 'master'

Container launching implementation

See merge request !3
parents e9848601 1c4645cc
......@@ -15,8 +15,9 @@ class CommandLineInterface(object):
def __init__(self):
self.logger = logging.getLogger(__name__)
def do_signal(self):
pass
def do_signal(self, signum, stackframe):
self.logger.info("received signal %d, exiting", signum)
exit(1)
def setup(self):
# SUB port to the upstream API (connected to its PUB port)
......@@ -50,7 +51,32 @@ class CommandLineInterface(object):
self.logger.info("client uuid: %r", self.uuid)
def do_run(self, argv):
pass
""" Connect to the NRM and ask to spawn a container and run a command
in it.
The NRM should notify us on the pub socket of the container
creation."""
# build the command as a JSON dict containing enough info. We add to
# the command a container uuid as a way to make sure that we can make
# the command idempotent.
containerid = str(uuid.uuid4())
command = {'command': 'run',
'manifest': argv.manifest,
'file': argv.command,
'args': argv.args,
'uuid': containerid,
}
while(True):
self.upstream_pub_socket.send_json(command)
msg = self.upstream_sub_socket.recv_json()
self.logger.info("new message: %r", msg)
# ignore other messages
if isinstance(msg, dict) and msg.get('type') == 'container':
if msg['uuid'] == containerid:
self.logger.info("container response: %r", msg)
break
def do_setpower(self, argv):
""" Connect to the NRM and ask to change the power limit.
......@@ -85,7 +111,10 @@ class CommandLineInterface(object):
# run container
parser_run = subparsers.add_parser("run")
parser_run.add_argument("container")
parser_run.add_argument("manifest", help="manifest file to apply")
parser_run.add_argument("command", help="command to execute")
parser_run.add_argument("args", help="command arguments",
nargs=argparse.REMAINDER)
parser_run.set_defaults(func=self.do_run)
# setpowerlimit
......
{
"acKind": "ImageManifest",
"acVersion": "0.6.0",
"name": "test",
"app": {
"isolators": [
{
"name": "argo/scheduler",
"value": {
"policy": "SCHED_OTHER",
"priority": "0"
}
},
{
"name": "argo/container",
"value": {
"cpus": "4",
"mems": "1"
}
}
]
}
}
"""Parse and Represent the APPC ACI specification."""
import collections
import logging
import json
logger = logging.getLogger('argus')
spec = collections.namedtuple('Field', ['cls', 'required'])
class SpecField(object):
"""Object part of the ACI Image Manifest fields."""
fields = {}
def __init__(self):
"""Create empty field."""
pass
def load(self, data):
"""Load fields."""
for key in self.fields:
spec = self.fields[key]
if key not in data:
if spec.required:
logger.error("Missing key from manifest: %s", key)
return False
else:
ok, v = self.loadfield(data[key], spec.cls)
if not ok:
logger.error("Error for key %s in %s", key, self.__class__)
return False
setattr(self, key, v)
return True
def loadfield(self, data, cls):
"""load data as if from a field of the provided cls.
Make sure the basic types are also respected.
"""
ret = cls()
if not hasattr(ret, 'load'):
if not isinstance(data, cls):
logger.error("Wrong data type %s, expected: %s", cls,
data.__class__)
return (False, None)
else:
return (True, data)
else:
return (ret.load(data), ret)
class Scheduler(SpecField):
"""Scheduler information for a container."""
classes = ['SCHED_FIFO', 'SCHED_HPC', 'SCHED_OTHER']
fields = {"policy": spec(unicode, True),
"priority": spec(unicode, False)
}
def __init__(self):
"""Create scheduler object."""
pass
def load(self, data):
"""Load configuration from json text."""
ret = super(Scheduler, self).load(data)
if not ret:
return ret
# check scheduler class & prio
if self.policy not in self.classes:
logger.error("Wrong scheduling class %s, not any of %r", data,
Scheduler.classes)
return False
if self.policy != "SCHED_OTHER":
logger.warning("scheduler priority forced as 0 " +
"for non default policies")
self.priority = "0"
return True
class CPUSet(SpecField):
"""Represent a CPUSet field."""
def __init__(self):
"""Create an empty set."""
pass
def load(self, data):
"""Load from json object."""
self.value = data
return True
class MemSet(SpecField):
"""Represent a MemSet field."""
def __init__(self):
"""Create an empty set."""
pass
def load(self, data):
"""Load from json object."""
self.value = data
return True
class Container(SpecField):
"""Container Information."""
fields = {"cpus": spec(CPUSet, True),
"mems": spec(MemSet, True)
}
def __init__(self):
"""Create empty container."""
pass
def load(self, data):
"""Load container information."""
return super(Container, self).load(data)
class IsolatorList(SpecField):
"""Represent the list of isolator in a Manifest."""
types = {"argo/scheduler": spec(Scheduler, False),
"argo/container": spec(Container, True)
}
def __init__(self):
"""Create empty list."""
pass
def load(self, data):
"""Load from json struct."""
for e in data:
name = e['name']
if name in self.types:
t = self.types[name]
ok, v = super(IsolatorList, self).loadfield(e['value'], t.cls)
if not ok:
logger.error("Error with %s in %s", name, self.__class__)
return False
setattr(self, name.lstrip("argo/"), v)
for k in self.types:
if self.types[k].required:
assert name.lstrip("argo/") in self.__dict__
return True
class App(SpecField):
"""Represent the App part of an Image Manifest."""
# attribute, subclass, required
fields = {"environment": spec(list, False),
"isolators": spec(IsolatorList, True),
}
def __init__(self):
"""Create empty container."""
pass
def load(self, data):
"""Load from json dict."""
return super(App, self).load(data)
class ImageManifest(SpecField):
"""Represent an ACI Image Manifest."""
fields = {"acKind": spec(unicode, True),
"acVersion": spec(unicode, True),
"name": spec(unicode, True),
"app": spec(App, True)
}
def __init__(self):
"""Create empty manifest."""
pass
def load(self, filename):
"""Load a manifest from JSON file."""
with open(filename, 'r') as f:
data = json.load(f)
return super(ImageManifest, self).load(data)
from __future__ import print_function
from aci import ImageManifest
from collections import namedtuple
import logging
import os
from subprograms import ChrtClient, NodeOSClient, resources
import sys
Container = namedtuple('Container', ['uuid', 'manifest', 'pid'])
class ContainerManager(object):
"""Manages the creation, listing and deletion of containers, using a
container runtime underneath."""
def __init__(self, rm):
self.containers = dict()
self.pids = dict()
self.logger = logging.getLogger(__name__)
self.resourcemanager = rm
self.nodeos = NodeOSClient()
self.chrt = ChrtClient()
def create(self, request):
"""Create a container according to the request.
Returns the pid of the container or a negative number for errors."""
manifestfile = request['manifest']
command = request['file']
args = request['args']
self.logger.info("run: manifest file: %s", manifestfile)
self.logger.info("run: command: %s", command)
self.logger.info("run: args: %r", args)
manifest = ImageManifest()
if not manifest.load(manifestfile):
self.logger.error("Manifest is invalid")
return -1
# ask the resource manager for resources
req = resources(int(manifest.app.isolators.container.cpus.value),
int(manifest.app.isolators.container.mems.value))
allocation = self.resourcemanager.schedule(request['uuid'], req)
self.logger.info("run: allocation: %r", allocation)
# build context to execute
environ = os.environ
environ['PATH'] = ("/usr/local/sbin:"
"/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
environ['AC_APP_NAME'] = manifest.name
environ['AC_METADATA_URL'] = "localhost"
environ['container'] = 'argo'
self.logger.info("run: environ: %r", environ)
# create container
container_name = request['uuid']
self.logger.info("creating container %s", container_name)
self.nodeos.create(container_name, allocation)
self.logger.info("created container %s", container_name)
newpid = os.fork()
self.logger.info("forked: new pid: %s", newpid)
if newpid == 0:
# move myself to that container
mypid = os.getpid()
self.nodeos.attach(container_name, mypid)
self.logger.info("child: attached to container %s", container_name)
# run my command
if hasattr(manifest.app.isolators, 'scheduler'):
sched = manifest.app.isolators.scheduler
argv = self.chrt.getwrappedcmd(sched)
else:
argv = []
argv.append(command)
argv.extend(args)
self.logger.debug("execvpe %r", argv)
os.execvpe(argv[0], argv, environ)
# should never happen
sys.exit(1)
else:
c = Container(container_name, manifest, newpid)
self.pids[newpid] = c
self.containers[container_name] = c
return newpid
def delete(self, uuid):
"""Delete a container and kill all related processes."""
self.nodeos.delete(uuid, kill=True)
c = self.containers[uuid]
del self.containers[uuid]
del self.pids[c.pid]
from __future__ import print_function
from containers import ContainerManager
from resources import ResourceManager
import json
import logging
import os
import re
import sensor
import signal
import zmq
from zmq.eventloop import ioloop, zmqstream
import sensor
application_fsm_table = {'stable': {'i': 's_ask_i', 'd': 's_ask_d'},
's_ask_i': {'done': 'stable', 'max': 'max'},
......@@ -69,6 +73,7 @@ class Application(object):
class Daemon(object):
def __init__(self):
self.applications = {}
self.containerpids = {}
self.buf = ''
self.logger = logging.getLogger(__name__)
self.target = 1.0
......@@ -99,12 +104,38 @@ class Daemon(object):
def do_upstream_receive(self, parts):
self.logger.info("receiving upstream message: %r", parts)
if len(parts) != 1:
self.logger.error("unexpected msg length, droping it: %r", parts)
self.logger.error("unexpected msg length, dropping it: %r", parts)
return
msg = json.loads(parts[0])
if isinstance(msg, dict) and msg.get('command') == 'setpower':
self.target = float(msg['limit'])
self.logger.info("new target measure: %g", self.target)
if isinstance(msg, dict):
command = msg.get('command')
# TODO: switch to a dispatch dictionary
if command is None:
self.logger.error("missing command in message: %r", msg)
return
if command == 'setpower':
self.target = float(msg['limit'])
self.logger.info("new target measure: %g", self.target)
elif command == 'run':
self.logger.info("new container required: %r", msg)
pid = self.container_manager.create(msg)
if pid > 0:
self.containerpids[pid] = msg['uuid']
# TODO: obviously we need to send more info than that
update = {'type': 'container',
'uuid': msg['uuid'],
'errno': 0,
'pid': pid,
}
self.upstream_pub.send_json(update)
else:
update = {'type': 'container',
'uuid': msg['uuid'],
'errno': pid,
}
self.upstream_pub.send_json(update)
else:
self.logger.error("invalid command: %r", command)
def do_sensor(self):
self.machine_info = self.sensor.do_update()
......@@ -134,7 +165,39 @@ class Daemon(object):
self.logger.info("application now in state: %s", application.state)
def do_signal(self, signum, frame):
ioloop.IOLoop.current().add_callback_from_signal(self.do_shutdown)
if signum == signal.SIGINT:
ioloop.IOLoop.current().add_callback_from_signal(self.do_shutdown)
elif signum == signal.SIGCHLD:
ioloop.IOLoop.current().add_callback_from_signal(self.do_children)
else:
self.logger.error("wrong signal: %d", signum)
def do_children(self):
# find out if children have terminated
while True:
try:
pid, status, rusage = os.wait3(os.WNOHANG)
if pid == 0 and status == 0:
break
except OSError:
break
self.logger.info("child update %d: %r", pid, status)
# check if its a pid we care about
if pid in self.containerpids:
# check if this is an exit
if os.WIFEXITED(status):
uuid = self.containerpids[pid]
self.container_manager.delete(uuid)
msg = {'type': 'container',
'event': 'exit',
'status': status,
'uuid': None,
}
self.upstream_pub.send_json(msg)
else:
self.logger.debug("child update ignored")
pass
def do_shutdown(self):
self.sensor.stop()
......@@ -181,6 +244,10 @@ class Daemon(object):
# create a stream to let ioloop deal with blocking calls on HWM
self.upstream_pub = zmqstream.ZMQStream(upstream_pub_socket)
# create resource and container manager
self.resource_manager = ResourceManager()
self.container_manager = ContainerManager(self.resource_manager)
# create sensor manager and make first measurement
self.sensor = sensor.SensorManager()
self.sensor.start()
......@@ -195,6 +262,7 @@ class Daemon(object):
# take care of signals
signal.signal(signal.SIGINT, self.do_signal)
signal.signal(signal.SIGCHLD, self.do_signal)
ioloop.IOLoop.current().start()
......
from __future__ import print_function
import logging
from subprograms import HwlocClient, resources
class ResourceManager(object):
"""Manages the query of node resources, the tracking of their use and
the scheduling of new containers according to partitioning rules."""
def __init__(self):
self.logger = logging.getLogger(__name__)
self.hwloc = HwlocClient()
# query the node topo, keep track of the critical resources
self.allresources = self.hwloc.info()
self.logger.debug("resource info: %r", self.allresources)
self.available = self.allresources
def schedule(self, uuid, request):
"""Schedule a resource request on the available resources.
Request is a dictionary of the resources asked for."""
# dumb scheduling, just give the first resources available:
# - cpus are exclusive
# - memories exclusive if more than one left
if len(self.available.cpus) >= request.cpus:
retcpus = self.available.cpus[:request.cpus]
availcpus = self.available.cpus[request.cpus:]
else:
retcpus = []
availcpus = self.available.cpus
if len(self.available.mems) > 1:
retmems = self.available.mems[:request.mems]
availmems = self.available.mems[request.mems:]
else:
retmems = self.available.mems
availmems = self.available.mems
self.available = resources(availcpus, availmems)
return resources(retcpus, retmems)
def remove(self, uuid):
"""Free the resources associated with request uuid."""
pass
# def oldcode(self):
# numcpus = int(manifest.app.isolators.container.cpus.value)
#
# allresources = hwloc.info()
# self.logger.debug("resource info: %r", allresources)
# ncontainers = len(allresources.cpus) // numcpus
# self.logger.debug("will support %s containers", ncontainers)
# cur = nodeos.getavailable()
# self.logger.debug("%r are available", cur)
# sets = hwloc.distrib(ncontainers, restrict=cur, fake=allresources)
# self.logger.info("asking for %s cores", numcpus)
# self.logger.debug("will search in one of these: %r", sets)
# # find a free set
# avail = set(cur.cpus)
# for s in sets:
# cpuset = set(s.cpus)
# if cpuset.issubset(avail):
# alloc = s
# break
# else:
# self.logger.error("no exclusive cpuset found among %r", avail)
# return -2
#
"""Various clients for system utilities."""
import subprocess
import collections
import logging
import xml.etree.ElementTree
resources = collections.namedtuple("Resources", ["cpus", "mems"])
def logpopen(p, args, stdout, stderr):
"""log popen cmd."""
logging.debug("popen cmd: %r", args)
logging.debug("popen return code: %s", p.returncode)
logging.debug("popen stdout: %r", stdout)
logging.debug("popen, stderr: %r", stderr)
def bitmask2list(mask):
"""Convert a bitmask to the list of power of 2 set to 1."""
i = int(mask or '0x0', base=16)
ret = []
for j in range(i.bit_length()):
m = 1 << j
if (i & m):
ret.append(j)
return ret
def list2bitmask(l):
"""Convert a list into a bitmask."""
m = 0
for e in l:
m |= 1 << e
return hex(m)
class NodeOSClient(object):
"""Client to argo_nodeos_config."""
def __init__(self):
"""Load client configuration."""
self.prefix = "argo_nodeos_config"
def getavailable(self):
"""Gather available resources."""
args = [self.prefix, "--show_available_resources=shared:false"]
p = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
logpopen(p, args, stdout, stderr)
# parse the format: first line is threads, then a list as multiline,
# then nodes, and the same
cpus = []
mems = []
lines = stdout.splitlines()
splitindex = lines.index('------------Memory nodes------------')
cpuslines = lines[1:splitindex]
memlines = lines[splitindex+1:]
for l in cpuslines:
cpus.extend(l.split())
for l in memlines:
mems.extend(l.split())
return resources([int(x) for x in cpus], [int(x) for x in mems])
def create(self, name, params):
"""Create container, according to params."""
args = [self.prefix]
cmd = "--create_container="
cmd += 'name:{0}'.format(name)
cmd += ' cpus:[{0}]'.format(",".join([str(x) for x in params.cpus]))
cmd += ' mems:[{0}]'.format(",".join([str(x) for x in params.mems]))
args.append(cmd)
p = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
logpopen(p, args, stdout, stderr)
def attach(self, name, pid):
"""Attach a pid to a container."""
args = [self.prefix]
cmd = '--attach_to_container='
cmd += 'name:{0}'.format(name)
cmd += ' pids:[{0}]'.format(pid)
args.append(cmd)
p = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
logpopen(p, args, stdout, stderr)
def delete(self, name, kill=False):
"""Destroy container."""
# destroy container
args = [self.prefix]
cmd = '--delete_container='
cmd += 'name:{0}'.format(name)
if kill:
cmd += ' kill_content:true'
args.append(cmd)
p = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
logpopen(p, args, stdout, stderr)
class ChrtClient(object):
"""Client to chrt command line wrapper."""
flags = {'SCHED_OTHER': '--other',
'SCHED_BATCH': '--batch',
'SCHED_FIFO': '--fifo',
'SCHED_IDLE': '--idle',
'SCHED_RR': '--rr',
'SCHED_HPC': '--hpc'
}
def __init__(self):
"""Load configuration."""
self.prefix = "chrt"
def getwrappedcmd(self, params):
"""Return a list of args to prepend to a popen call."""
args = [self.prefix]
args.append(self.flags[params.policy])
args.append(params.priority)
return args
class HwlocClient(object):
"""Client to hwloc binaries."""
def __init__(self):
"""Load configuration."""
self.prefix = "hwloc"
def info(self):
"""Return list of all cpus and mems."""
cmd = self.prefix + "-ls"
args = [cmd, '--whole-system', '--output-format', 'xml']
p = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
logpopen(p, args, stdout, stderr)
xmlroot = xml.etree.ElementTree.fromstring(stdout)
ret = resources([], [])
for obj in xmlroot.iter('object'):
if obj.attrib['type'] == "NUMANode":
ret.mems.append(int(obj.attrib['os_index']))
if obj.attrib['type'] == "PU":
ret.cpus.append(int(obj.attrib['os_index']))
# if there's only one memory node, hwloc doesn't list it
if not ret.mems:
ret.mems.append(0)
return ret
def all2fake(self, resources):
"""Convert resource description of the system into fake topology.
We need that because hwloc barfs on fake numa nodes.
"""
# easy version: we have as many numa nodes as we have cores
mems = len(resources.mems)
cpus = len(resources.mems)
assert cpus % mems == 0
pu = cpus // mems
return "numa: %s pu:%s".format(mems, pu)
def distrib(self, numprocs, restrict=None, fake=None):
"""Distribute numprocs across the hierarchy."""
# The original command only reports back cpusets. We do better, by
# reporting the mems that go with it. This requires some magic, using