Commit 0c07c4dd authored by Sridutt Bhalachandra's avatar Sridutt Bhalachandra
Browse files

[Feature] NRM response to phase_context event

Made changes in NRM to respond to phase_context event from [previously
called power_policy event (874a6a4d)] from the application. The NRM can now
store the informaton received on the event and call DDCM power policy through
interfaces developed (Issue #11) in the control loop

See Issue #10
parent 40f4361c
...@@ -18,12 +18,13 @@ class Application(object): ...@@ -18,12 +18,13 @@ class Application(object):
'min_ask_i': {'done': 'stable', 'noop': 'noop'}, 'min_ask_i': {'done': 'stable', 'noop': 'noop'},
'noop': {}} 'noop': {}}
def __init__(self, uuid, container, progress, threads): def __init__(self, uuid, container, progress, threads, phase_contexts):
self.uuid = uuid self.uuid = uuid
self.container_uuid = container self.container_uuid = container
self.progress = progress self.progress = progress
self.threads = threads self.threads = threads
self.thread_state = 'stable' self.thread_state = 'stable'
self.phase_contexts = phase_contexts
def do_thread_transition(self, event): def do_thread_transition(self, event):
"""Update the thread fsm state.""" """Update the thread fsm state."""
...@@ -58,6 +59,13 @@ class Application(object): ...@@ -58,6 +59,13 @@ class Application(object):
"""Update the progress tracking.""" """Update the progress tracking."""
assert self.progress assert self.progress
def update_phase_context(self, msg):
"""Update the phase contextual information."""
id = int(msg['cpu'])
self.phase_contexts[id] = {k: int(msg[k]) for k in ('startcompute',
'endcompute', 'startbarrier', 'endbarrier')}
self.phase_contexts[id]['set'] = True
class ApplicationManager(object): class ApplicationManager(object):
...@@ -66,15 +74,25 @@ class ApplicationManager(object): ...@@ -66,15 +74,25 @@ class ApplicationManager(object):
def __init__(self): def __init__(self):
self.applications = dict() self.applications = dict()
def register(self, msg): def register(self, msg, container):
"""Register a new downstream application.""" """Register a new downstream application."""
uuid = msg['uuid'] uuid = msg['uuid']
container = msg['container'] container_uuid = msg['container']
progress = msg['progress'] progress = msg['progress']
threads = msg['threads'] threads = msg['threads']
self.applications[uuid] = Application(uuid, container, progress, phase_contexts = dict()
threads) phase_context_keys = ['set', 'startcompute', 'endcompute',
'startbarrier', 'endbarrier']
if container.powerpolicy['policy']:
ids = container.resources['cpus']
for id in ids:
phase_contexts[id] = dict.fromkeys(phase_context_keys)
phase_contexts[id]['set'] = False
else:
phase_contexts = None
self.applications[uuid] = Application(uuid, container_uuid, progress,
threads, phase_contexts)
def delete(self, uuid): def delete(self, uuid):
"""Delete an application from the register.""" """Delete an application from the register."""
......
...@@ -117,3 +117,20 @@ class Controller(object): ...@@ -117,3 +117,20 @@ class Controller(object):
def update(self, action, actuator): def update(self, action, actuator):
"""Update tracking across the board to reflect the last action.""" """Update tracking across the board to reflect the last action."""
actuator.update(action) actuator.update(action)
def run_policy(self, containers):
"""Run policies on containers with policies set."""
for container in containers:
pp = containers[container].powerpolicy
if pp['policy']:
apps = self.actuators[0].application_manager.applications
if apps:
app = next(apps[a] for a in apps if apps[a].container_uuid
== container)
ids = containers[container].resources['cpus']
# Run policy only if all phase contexts have been received
if not filter(lambda i: not app.phase_contexts[i]['set'],
ids):
pp['manager'].run_policy(app.phase_contexts)
if filter(lambda i: app.phase_contexts[i]['set'], ids):
logger.debug("Phase context not reset %r", app)
...@@ -35,7 +35,9 @@ class Daemon(object): ...@@ -35,7 +35,9 @@ class Daemon(object):
logger.error("wrong message format: %r", msg) logger.error("wrong message format: %r", msg)
return return
if event == 'start': if event == 'start':
self.application_manager.register(msg) container_uuid = msg['container']
container = self.container_manager.containers[container_uuid]
self.application_manager.register(msg, container)
elif event == 'threads': elif event == 'threads':
uuid = msg['uuid'] uuid = msg['uuid']
if uuid in self.application_manager.applications: if uuid in self.application_manager.applications:
...@@ -50,9 +52,13 @@ class Daemon(object): ...@@ -50,9 +52,13 @@ class Daemon(object):
uuid = msg['uuid'] uuid = msg['uuid']
if uuid in self.application_manager.applications: if uuid in self.application_manager.applications:
app = self.application_manager.applications[uuid] app = self.application_manager.applications[uuid]
# TODO: Take appropriate action c = self.container_manager.containers[app.container_uuid]
if c.powerpolicy['policy']:
app.update_phase_context(msg)
elif event == 'exit': elif event == 'exit':
self.application_manager.delete(msg['uuid']) uuid = msg['uuid']
if uuid in self.application_manager.applications:
self.application_manager.delete(msg['uuid'])
else: else:
logger.error("unknown event: %r", event) logger.error("unknown event: %r", event)
return return
...@@ -85,8 +91,8 @@ class Daemon(object): ...@@ -85,8 +91,8 @@ class Daemon(object):
container.powerpolicy['manager'] = PowerPolicyManager( container.powerpolicy['manager'] = PowerPolicyManager(
container.resources['cpus'], container.resources['cpus'],
container.powerpolicy['policy'], container.powerpolicy['policy'],
container.powerpolicy['damper'], float(container.powerpolicy['damper']),
container.powerpolicy['slowdown']) float(container.powerpolicy['slowdown']))
# TODO: obviously we need to send more info than that # TODO: obviously we need to send more info than that
update = {'type': 'container', update = {'type': 'container',
'event': 'start', 'event': 'start',
...@@ -145,6 +151,9 @@ class Daemon(object): ...@@ -145,6 +151,9 @@ class Daemon(object):
if action: if action:
self.controller.execute(action, actuator) self.controller.execute(action, actuator)
self.controller.update(action, actuator) self.controller.update(action, actuator)
# Call policy only if there are containers
if self.container_manager.containers:
self.controller.run_policy(self.container_manager.containers)
def do_signal(self, signum, frame): def do_signal(self, signum, frame):
if signum == signal.SIGINT: if signum == signal.SIGINT:
...@@ -170,6 +179,8 @@ class Daemon(object): ...@@ -170,6 +179,8 @@ class Daemon(object):
# check if this is an exit # check if this is an exit
if os.WIFEXITED(status) or os.WIFSIGNALED(status): if os.WIFEXITED(status) or os.WIFSIGNALED(status):
container = self.container_manager.pids[pid] container = self.container_manager.pids[pid]
if container.powerpolicy['policy']:
container.powerpolicy['manager'].reset_all()
self.container_manager.delete(container.uuid) self.container_manager.delete(container.uuid)
msg = {'type': 'container', msg = {'type': 'container',
'event': 'exit', 'event': 'exit',
......
...@@ -58,31 +58,27 @@ class PowerPolicyManager: ...@@ -58,31 +58,27 @@ class PowerPolicyManager:
self.slowdownexits = 0 self.slowdownexits = 0
self.prevtolalphasetime = 10000.0 # Any large value self.prevtolalphasetime = 10000.0 # Any large value
def run_policy(self, cpu, startcompute, endcompute, startbarrier, def run_policy(self, phase_contexts):
endbarrier):
# Run only if policy is specified # Run only if policy is specified
if self.policy: if self.policy:
if cpu not in self.cpus: for id in phase_contexts:
logger.info("""Attempt to change power of cpu not in container if id not in self.cpus:
: %r""", cpu) logger.info("""Attempt to change power of cpu not in container
return : %r""", id)
# Select and invoke appropriate power policy return
# TODO: Need to add a better policy selection logic in addition to # Select and invoke appropriate power policy
# user specified using manifest file # TODO: Need to add a better policy selection logic in addition
ret, value = self.invoke_policy(cpu, self.policy, # to user specified using manifest file
self.dclevel[cpu], ret, value = self.invoke_policy(id, **phase_contexts[id])
self.freqlevel[cpu], startcompute, if self.policy == 'DDCM' and ret in ['DDCM', 'SLOWDOWN']:
endcompute, startbarrier, self.dclevel[id] = value
endbarrier) phase_contexts[id]['set'] = False
if self.policy == 'DDCM' and ret in ['DDCM', 'SLOWDOWN']:
self.dclevel[cpu] = value def invoke_policy(self, cpu, **kwargs):
def invoke_policy(self, cpu, policy, dclevel, freqlevel, startcompute,
endcompute, startbarrier, endbarrier):
# Calculate time spent in computation, barrier in current phase along # Calculate time spent in computation, barrier in current phase along
# with total phase time # with total phase time
computetime = endcompute - startcompute computetime = kwargs['endcompute'] - kwargs['startcompute']
barriertime = endbarrier - startbarrier barriertime = kwargs['endbarrier'] - kwargs['startbarrier']
totalphasetime = computetime + barriertime totalphasetime = computetime + barriertime
# If the current phase length is less than the damper value, then do # If the current phase length is less than the damper value, then do
...@@ -98,7 +94,7 @@ class PowerPolicyManager: ...@@ -98,7 +94,7 @@ class PowerPolicyManager:
# If the current phase has slowed down beyond the threshold set, then # If the current phase has slowed down beyond the threshold set, then
# reset power. This helps correct error in policy application or acts # reset power. This helps correct error in policy application or acts
# as a rudimentary way to detect phase change # as a rudimentary way to detect phase change
if(dclevel < self.ddcmpolicy.maxdclevel and totalphasetime > if(self.dclevel[cpu] < self.ddcmpolicy.maxdclevel and totalphasetime >
self.slowdown * self.prevtolalphasetime): self.slowdown * self.prevtolalphasetime):
self.ddcmpolicy.dc.reset(cpu) self.ddcmpolicy.dc.reset(cpu)
newdclevel = self.ddcmpolicy.maxdclevel newdclevel = self.ddcmpolicy.maxdclevel
...@@ -106,9 +102,9 @@ class PowerPolicyManager: ...@@ -106,9 +102,9 @@ class PowerPolicyManager:
return 'SLOWDOWN', newdclevel return 'SLOWDOWN', newdclevel
# Invoke the correct policy based on operation module # Invoke the correct policy based on operation module
if policy == "DDCM": if self.policy == "DDCM":
newdclevel = self.ddcmpolicy.execute(cpu, dclevel, computetime, newdclevel = self.ddcmpolicy.execute(cpu, self.dclevel[cpu],
totalphasetime) computetime, totalphasetime)
# TODO: Add DVFS and Combined policies # TODO: Add DVFS and Combined policies
...@@ -135,3 +131,8 @@ class PowerPolicyManager: ...@@ -135,3 +131,8 @@ class PowerPolicyManager:
def power_check(self, cpu): def power_check(self, cpu):
# Check status of all power controls # Check status of all power controls
return self.ddcmpolicy.dc.check(cpu) return self.ddcmpolicy.dc.check(cpu)
def reset_all(self):
# Reset all cpus
for cpu in self.cpus:
self.power_reset(cpu)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment