Commit d0499a02 authored by Swann Perarnau's avatar Swann Perarnau

Merge branch 'wip_12-add-application-level-power-profiling-support' into 'master'

Add application level power profiling support

Closes #12

See merge request !24
parents a8516291 d859a647
Pipeline #3799 passed with stages
in 2 minutes and 22 seconds
......@@ -25,9 +25,10 @@
}
},
{
"name": "argo/powerpolicy",
"name": "argo/power",
"value": {
"enabled": "0",
"enabled": "1",
"profile": "1",
"policy": "NONE",
"damper": "0.1",
"slowdown": "1.1"
......
......@@ -143,47 +143,52 @@ class PerfWrapper(SpecField):
if not ret:
return ret
if self.enabled not in ["0", "False", "1", "True"]:
logger.error("Invalid value of perfwrapper enabled: %s",
logger.error("Invalid value for perfwrapper enabled: %s",
self.enabled)
return False
return True
class PowerPolicy(SpecField):
class Power(SpecField):
"""Information on whether to use power policy for a container."""
"""Power settings for a container."""
policies = ['NONE', 'DDCM', 'DVFS', 'COMBINED']
fields = {"enabled": spec(unicode, False),
"profile": spec(unicode, False),
"policy": spec(unicode, False),
"damper": spec(unicode, False),
"slowdown": spec(unicode, False)
}
def __init__(self):
"""Create empty perf wrapper."""
"""Create empty power settings object."""
pass
def load(self, data):
"""Load perf wrapper information."""
ret = super(PowerPolicy, self).load(data)
"""Load power settings."""
ret = super(Power, self).load(data)
if not ret:
return ret
if self.enabled not in ["0", "False", "1", "True"]:
logger.error("Invalid value of powerpolicy enabled: %s",
logger.error("Invalid value for power enabled: %s",
self.enabled)
return False
if self.profile not in ["0", "False", "1", "True"]:
logger.error("Invalid value for power profile: %s",
self.enabled)
return False
if self.policy not in self.policies:
logger.error("Invalid value of powerpolicy policy: %s",
logger.error("Invalid value for power policy: %s",
self.policy)
return False
if self.damper < 0.0:
logger.error("Invalid value of powerpolicy damper: %s",
logger.error("Invalid value for power policy damper: %s",
self.policy)
return False
if self.slowdown < 1.0:
logger.error("Invalid value of powerpolicy slowdown: %s",
logger.error("Invalid value for power policy slowdown: %s",
self.policy)
return False
return True
......@@ -196,7 +201,7 @@ class IsolatorList(SpecField):
types = {"argo/scheduler": spec(Scheduler, False),
"argo/container": spec(Container, True),
"argo/perfwrapper": spec(PerfWrapper, False),
"argo/powerpolicy": spec(PowerPolicy, False)
"argo/power": spec(Power, False),
}
def __init__(self):
......
......@@ -84,7 +84,7 @@ class ApplicationManager(object):
phase_contexts = dict()
phase_context_keys = ['set', 'startcompute', 'endcompute',
'startbarrier', 'endbarrier']
if container.powerpolicy['policy']:
if container.power['policy']:
ids = container.resources['cpus']
for id in ids:
phase_contexts[id] = dict.fromkeys(phase_context_keys)
......
......@@ -8,7 +8,7 @@ from subprograms import ChrtClient, NodeOSClient, resources
logger = logging.getLogger('nrm')
Container = namedtuple('Container', ['uuid', 'manifest', 'resources',
'powerpolicy', 'process'])
'power', 'process'])
class ContainerManager(object):
......@@ -60,12 +60,13 @@ class ContainerManager(object):
container_resources = dict()
container_resources['cpus'], container_resources['mems'] = allocation
# Container power policy information
container_powerpolicy = dict()
container_powerpolicy['policy'] = None
container_powerpolicy['damper'] = None
container_powerpolicy['slowdown'] = None
container_powerpolicy['manager'] = None
# Container power settings
container_power = dict()
container_power['profile'] = None
container_power['policy'] = None
container_power['damper'] = None
container_power['slowdown'] = None
container_power['manager'] = None
# TODO: Application library to load must be set during configuration
applicationpreloadlibrary = ''
......@@ -85,21 +86,25 @@ class ContainerManager(object):
if manifest.app.isolators.perfwrapper.enabled in ["1", "True"]:
argv.append('argo-perf-wrapper')
if hasattr(manifest.app.isolators, 'powerpolicy'):
if hasattr(manifest.app.isolators.powerpolicy, 'enabled'):
pp = manifest.app.isolators.powerpolicy
if hasattr(manifest.app.isolators, 'power'):
if hasattr(manifest.app.isolators.power, 'enabled'):
pp = manifest.app.isolators.power
if pp.enabled in ["1", "True"]:
if pp.profile in ["1", "True"]:
container_power['profile'] = dict()
container_power['profile']['start'] = dict()
container_power['profile']['end'] = dict()
if pp.policy != "NONE":
container_powerpolicy['policy'] = pp.policy
container_powerpolicy['damper'] = pp.damper
container_powerpolicy['slowdown'] = pp.slowdown
container_power['policy'] = pp.policy
container_power['damper'] = pp.damper
container_power['slowdown'] = pp.slowdown
environ['LD_PRELOAD'] = applicationpreloadlibrary
argv.append(command)
argv.extend(args)
process = self.nodeos.execute(container_name, argv, environ)
c = Container(container_name, manifest, container_resources,
container_powerpolicy, process)
container_power, process)
self.pids[process.pid] = c
self.containers[container_name] = c
logger.info("Container %s created and running : %r", container_name, c)
......
......@@ -121,7 +121,7 @@ class Controller(object):
def run_policy(self, containers):
"""Run policies on containers with policies set."""
for container in containers:
pp = containers[container].powerpolicy
pp = containers[container].power
if pp['policy']:
apps = self.actuators[0].application_manager.applications
if apps:
......
......@@ -142,10 +142,12 @@ class rapl_reader:
ret[k] = dvals
return ret
def diffenergy(self,e1,e2): # e1 is prev and e2 is not
def diffenergy(self,e1,e2,shortenFlag=False): # e1 is prev and e2 is not
ret = {}
ret['time'] = e2['time'] - e1['time']
for k in self.max_energy_range_uj_d:
if shortenFlag:
k = self.shortenkey(k)
if e2[k]>=e1[k]:
ret[k] = e2[k] - e1[k]
else:
......@@ -155,7 +157,7 @@ class rapl_reader:
# calculate the average power from two energy values
# e1 and e2 are the value returned from readenergy()
# e1 should be sampled before e2
def calcpower(self,e1,e2):
def calcpower(self,e1,e2,shortenFlag=False):
ret = {}
delta = e2['time'] - e1['time'] # assume 'time' never wrap around
ret['delta'] = delta
......@@ -166,6 +168,8 @@ class rapl_reader:
return ret
for k in self.max_energy_range_uj_d:
if shortenFlag:
k = self.shortenkey(k)
if e2[k]>=e1[k]:
ret[k] = e2[k] - e1[k]
else:
......
......@@ -53,7 +53,7 @@ class Daemon(object):
if uuid in self.application_manager.applications:
app = self.application_manager.applications[uuid]
c = self.container_manager.containers[app.container_uuid]
if c.powerpolicy['policy']:
if c.power['policy']:
app.update_phase_context(msg)
elif event == 'exit':
uuid = msg['uuid']
......@@ -87,19 +87,23 @@ class Daemon(object):
logger.info("new container required: %r", msg)
container = self.container_manager.create(msg)
if container.powerpolicy['policy']:
container.powerpolicy['manager'] = PowerPolicyManager(
if container.power['policy']:
container.power['manager'] = PowerPolicyManager(
container.resources['cpus'],
container.powerpolicy['policy'],
float(container.powerpolicy['damper']),
float(container.powerpolicy['slowdown']))
container.power['policy'],
float(container.power['damper']),
float(container.power['slowdown']))
if container.power['profile']:
p = container.power['profile']
p['start'] = self.machine_info['energy']['energy']
p['start']['time'] = self.machine_info['time']
# TODO: obviously we need to send more info than that
update = {'type': 'container',
'event': 'start',
'uuid': container_uuid,
'errno': 0 if container else -1,
'pid': container.process.pid,
'powerpolicy': container.powerpolicy['policy']
'power': container.power['policy']
}
self.upstream_pub.send_json(update)
# setup io callbacks
......@@ -179,14 +183,29 @@ class Daemon(object):
# check if this is an exit
if os.WIFEXITED(status) or os.WIFSIGNALED(status):
container = self.container_manager.pids[pid]
if container.powerpolicy['policy']:
container.powerpolicy['manager'].reset_all()
self.container_manager.delete(container.uuid)
pp = container.power
if pp['policy']:
pp['manager'].reset_all()
msg = {'type': 'container',
'event': 'exit',
'status': status,
'uuid': container.uuid,
}
if pp['profile']:
e = pp['profile']['end']
self.machine_info = self.sensor_manager.do_update()
e = self.machine_info['energy']['energy']
e['time'] = self.machine_info['time']
s = pp['profile']['start']
# Calculate difference between the values
diff = self.sensor_manager.calc_difference(s, e)
# Get final package temperature
temp = self.machine_info['temperature']
diff['temp'] = map(lambda k: temp[k]['pkg'], temp)
logger.info("Container %r profile data: %r",
container.uuid, diff)
msg['profile_data'] = diff
self.container_manager.delete(container.uuid)
self.upstream_pub.send_json(msg)
else:
logger.debug("child update ignored")
......
......@@ -6,6 +6,7 @@
This module should be the only one interfacing with coolr.
"""
from __future__ import print_function
import time
import coolr
import coolr.clr_rapl
import coolr.clr_hwmon
......@@ -33,6 +34,7 @@ class SensorManager:
machine_info = dict()
machine_info['energy'] = self.rapl.sample(accflag=True)
machine_info['temperature'] = self.coretemp.sample()
machine_info['time'] = time.time()
return machine_info
def get_powerlimits(self):
......@@ -42,3 +44,22 @@ class SensorManager:
def set_powerlimit(self, domain, value):
self.rapl.set_powerlimit(value, domain)
def calc_difference(self, start, end):
diff = dict()
# Calculate energy difference
diff['energy'] = self.rapl.diffenergy(start, end, shortenFlag=True)
# Update time elapsed
diff['time'] = diff['energy']['time']
# Remove 'time' field returned by function
diff['energy'].pop('time')
# Convert uJ to J
diff['energy'] = {k: diff['energy'][k]/(1000000.0) for k in
diff['energy']}
# Calculate power difference
diff['power'] = self.rapl.calcpower(start, end, shortenFlag=True)
# Remove 'delta' field returned by function
diff['power'].pop('delta')
return diff
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment