GitLab maintenance scheduled for Today, 2019-12-05, from 17:00 to 18:00 CT - Services will be unavailable during this time.

Commit 402a2524 authored by Sridutt Bhalachandra's avatar Sridutt Bhalachandra

[feature] Support for Process/Task pinning

Added support for pinning process/task to a core. This is important for
allowing the use of power policies that use contextual information from an
application phase and use it for computing frequency levels for the next phase.
In absence of process/task pinning, the contextual information obtained does
not serve any value as it is not representative of application phase behavior
on a core as the processes and task can migrate during the next phase.

See Issue #20
parent 442d31f6
......@@ -14,14 +14,14 @@
{
"name": "argo/container",
"value": {
"cpus": "4",
"mems": "1"
"cpus": "24",
"mems": "2"
}
},
{
"name": "argo/perfwrapper",
"value": {
"enabled": "1"
"enabled": "0"
}
},
{
......@@ -30,9 +30,15 @@
"enabled": "1",
"profile": "1",
"policy": "NONE",
"damper": "0.1",
"damper": "1000000000",
"slowdown": "1.1"
}
},
{
"name": "argo/hwbind",
"value": {
"enabled": "1"
}
}
]
}
......
......@@ -202,6 +202,29 @@ class Power(SpecField):
return True
class HwBind(SpecField):
"""Hardware bindings for a container."""
fields = {"enabled": spec(unicode, False),
}
def __init__(self):
"""Create empty hardware bindings settings object."""
pass
def load(self, data):
"""Load hardware bindings settings."""
ret = super(HwBind, self).load(data)
if not ret:
return ret
if self.enabled not in ["0", "False", "1", "True"]:
logger.error("Invalid value for hardware bindings enabled: %s",
self.enabled)
return False
return True
class IsolatorList(SpecField):
"""Represent the list of isolator in a Manifest."""
......@@ -210,6 +233,7 @@ class IsolatorList(SpecField):
"argo/container": spec(Container, True),
"argo/perfwrapper": spec(PerfWrapper, False),
"argo/power": spec(Power, False),
"argo/hwbind": spec(HwBind, False),
}
def __init__(self):
......
......@@ -4,10 +4,12 @@ from aci import ImageManifest
from collections import namedtuple
import logging
from subprograms import ChrtClient, NodeOSClient, resources
import operator
logger = logging.getLogger('nrm')
Container = namedtuple('Container', ['uuid', 'manifest', 'resources',
'power', 'processes', 'clientids'])
'power', 'processes', 'clientids',
'hwbindings'])
class ContainerManager(object):
......@@ -26,6 +28,8 @@ class ContainerManager(object):
self.containers = dict()
self.pids = dict()
self.resourcemanager = rm
self.hwloc = rm.hwloc
self.nodeos = NodeOSClient()
self.chrt = ChrtClient()
self.pmpi_lib = pmpi_lib
......@@ -38,6 +42,8 @@ class ContainerManager(object):
processes = None
clientids = None
pp = None
hwbindings = None
bind_index = 0
manifestfile = request['manifest']
command = request['file']
......@@ -69,13 +75,17 @@ class ContainerManager(object):
containerexistsflag = True
processes = container.processes
clientids = container.clientids
hwbindings = container.hwbindings
bind_index = len(processes)
else:
processes = dict()
clientids = dict()
hwbindings = dict()
# ask the resource manager for resources
req = resources(int(manifest.app.isolators.container.cpus.value),
int(manifest.app.isolators.container.mems.value))
ncpus = int(manifest.app.isolators.container.cpus.value)
nmems = int(manifest.app.isolators.container.mems.value)
req = resources(ncpus, nmems)
alloc = self.resourcemanager.schedule(container_name, req)
logger.info("run: allocation: %r", alloc)
......@@ -116,6 +126,16 @@ class ContainerManager(object):
container_power['damper'] = pp.damper
container_power['slowdown'] = pp.slowdown
# Compute hardware bindings
if hasattr(manifest.app.isolators, 'hwbind'):
manifest_hwbind = manifest.app.isolators.hwbind
if hasattr(manifest_hwbind, 'enabled'):
if manifest_hwbind.enabled in ["1", "True"]:
hwbindings['enabled'] = True
hwbindings['distrib'] = sorted(self.hwloc.distrib(
ncpus, alloc), key=operator.
attrgetter('cpus'))
# build context to execute
# environ['PATH'] = ("/usr/local/sbin:"
# "/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
......@@ -132,6 +152,17 @@ class ContainerManager(object):
else:
environ['NRM_DAMPER'] = pp.damper
# Use hwloc-bind to launch each process in the conatiner by prepending
# it as an argument to the command line, if enabled in manifest.
# The hardware binding computed using hwloc-distrib is used here
# --single
if bool(hwbindings) and hwbindings['enabled']:
argv.append('hwloc-bind')
# argv.append('--single')
argv.append('core:'+str(hwbindings['distrib'][bind_index].cpus[0]))
argv.append('--membind')
argv.append('numa:'+str(hwbindings['distrib'][bind_index].mems[0]))
argv.append(command)
argv.extend(args)
......@@ -148,7 +179,7 @@ class ContainerManager(object):
else:
container = Container(container_name, manifest,
container_resources, container_power,
processes, clientids)
processes, clientids, hwbindings)
self.pids[process.pid] = container
self.containers[container_name] = container
logger.info("Container %s created and running : %r",
......
......@@ -32,7 +32,7 @@ def list2bitmask(l):
"""Convert a list into a bitmask."""
m = 0
for e in l:
m |= 1 << e
m |= 1 << int(e)
return hex(m)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment