Commit 9b2d4452 authored by Valentin Reis's avatar Valentin Reis

Merge branch 'nrm-gen' into 'master'

Use json schemas for message formats

See merge request !90
parents cb3132e2 cd1d86b3
Pipeline #7316 passed with stages
in 7 minutes and 1 second
eval "$(lorri direnv)"
---
variables:
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/master/argopkgs-master.tar.gz"
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/nrm-gen/argopkgs-nrm-gen.tar.gz"
EXTRA: "--nrm ./."
stages:
......@@ -11,11 +11,11 @@ stages:
- quality
include:
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/repoquality.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/repoquality.yml
py.test:
stage: test
......@@ -28,6 +28,6 @@ py.test:
flake8:
stage: style
script:
- nix run -f "$ARGOPKGS" pythonPackages.flake8 --command flake8 nrm/* bin/*
- nix run -f "$ARGOPKGS" pythonPackages.flake8 --command flake8 nrm/*.py bin/*
tags:
- integration
include Makefile
include tox.ini
include nrm/schemas/*.json
......@@ -9,6 +9,8 @@ six = "==1.11.0"
pyzmq = "*"
tornado = "*"
numpy = "*"
warlock = "*"
scipy = "*"
argparse = "*"
[dev-packages]
......
This diff is collapsed.
......@@ -19,11 +19,8 @@ import nrm.messaging
import uuid
import sys
import time
import collections
RPC_MSG = nrm.messaging.MSGTYPES['up_rpc_req']
logger = logging.getLogger('nrm')
KillArgs = collections.namedtuple("Kill", ["uuid"])
class CommandLineInterface(object):
......@@ -37,12 +34,9 @@ class CommandLineInterface(object):
if uuid:
logger.info("received signal %d, killing the application..",
signum)
command = {'api': 'up_rpc_req',
'type': 'kill',
'container_uuid': uuid
}
msg = RPC_MSG['kill'](**command)
self.client.sendmsg(msg)
self.client.send(
"Kill",
container_uuid=uuid)
logger.info("killed the application, exiting.")
else:
logger.info("received signal %d, exiting", signum)
......@@ -70,33 +64,26 @@ class CommandLineInterface(object):
self.pub_client.connect()
while(True):
msg = self.pub_client.recvmsg()
msg = self.pub_client.recv()
logger.debug("pub message: %s", msg)
def print_if_filter():
if argv.filter:
if argv.filter == msg.type:
if (msg.type == "performance" or
msg.type == "progress"):
print("%s, %s, %s" % (msg.type, time.time(),
if argv.filter == msg.tag:
if (msg.tag == "performance" or
msg.tag == "progress"):
print("%s, %s, %s" % (msg.tag, time.time(),
msg.payload))
if msg.type == "power":
print("%s, %s, %s" % (msg.type, time.time(),
if msg.tag == "power":
print("%s, %s, %s" % (msg.tag, time.time(),
msg.total))
if msg.type == "container_exit":
print("%s, %s, %s" % (msg.type, time.time(),
if msg.tag == "exit":
print("%s, %s, %s" % (msg.tag, time.time(),
msg.profile_data))
else:
print("%s, %s" % (msg.type, time.time()))
print("%s, %s" % (msg.tag, time.time()))
sys.stdout.flush()
print_if_filter()
# if argv.uuid:
# uuid = getattr(msg, 'container_uuid', None)
# if argv.uuid == uuid or msg.type == "power":
# print_if_filter()
# else:
# print_if_filter()
def do_run(self, argv):
""" Connect to the NRM and ask to spawn a container and run a command
......@@ -108,6 +95,7 @@ class CommandLineInterface(object):
# the command a container uuid as a way to make sure that we can make
# the command idempotent.
environ = os.environ
# environ = []
container_uuid = argv.ucontainername or str(uuid.uuid4())
# simple check + error msg + non-zero return code
......@@ -118,26 +106,22 @@ class CommandLineInterface(object):
logger.error("Manifest file not found: %s", path)
sys.exit(1)
command = {'api': 'up_rpc_req',
'type': 'run',
'manifest': sanitize_manifest(argv.manifest),
'path': argv.command,
'args': argv.args,
'environ': dict(environ),
'container_uuid': container_uuid,
}
msg = RPC_MSG['run'](**command)
# command fsm
state = 'init'
outeof = False
erreof = False
exitmsg = None
self.client.sendmsg(msg)
self.client.send(
tag="run",
manifest=sanitize_manifest(argv.manifest),
path=argv.command,
args=argv.args,
environ=dict(environ),
container_uuid=container_uuid)
# the first message tells us if we started a container or not
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'process_start'
msg = self.client.recv()
assert msg.tag == 'start'
def handler(signum, frame):
self.do_signal(msg.container_uuid, signum, frame)
......@@ -145,25 +129,24 @@ class CommandLineInterface(object):
state = 'started'
while(True):
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type in ['stdout', 'stderr', 'exit', 'process_exit']
msg = self.client.recv()
assert msg.tag in ['stdout', 'stderr', 'exit']
if msg.type == 'stdout':
if msg.tag == 'stdout':
logger.debug("container msg: %r", msg)
if msg.payload == 'eof':
outeof = True
else:
print(msg.payload)
print(msg.payload, file=sys.stdout)
sys.stdout.flush()
elif msg.type == 'stderr':
elif msg.tag == 'stderr':
logger.debug("container msg: %r", msg)
if msg.payload == 'eof':
erreof = True
else:
print(msg.payload, file=sys.stderr)
sys.stdout.flush()
elif msg.type == 'process_exit':
sys.stderr.flush()
elif msg.tag == 'exit':
logger.info("process ended: %r", msg)
state = 'exiting'
exitmsg = msg
......@@ -197,13 +180,9 @@ class CommandLineInterface(object):
The NRM should respond to us with one message listing all
containers."""
command = {'api': 'up_rpc_req',
'type': 'list'}
msg = RPC_MSG['list'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'list'
self.client.send(tag="list")
msg = self.client.recv()
assert msg.tag == 'list'
logger.info("list response: %r", msg)
def do_kill(self, argv):
......@@ -212,15 +191,9 @@ class CommandLineInterface(object):
The NRM should respond to us with a message containing the exit status
of the top process of the container."""
command = {'api': 'up_rpc_req',
'type': 'kill',
'container_uuid': argv.uuid
}
msg = RPC_MSG['kill'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'exit'
self.client.send(tag="kill", container_uuid=argv.uuid)
msg = self.client.recv()
assert msg.tag == 'exit'
logger.info("container exit: %r", msg)
def do_setpower(self, argv):
......@@ -233,15 +206,9 @@ class CommandLineInterface(object):
# timely answer.
# TODO: check that the level makes a little bit of sense in the first
# place
command = {'api': 'up_rpc_req',
'type': 'setpower',
'limit': str(argv.limit),
}
msg = RPC_MSG['setpower'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'getpower'
self.client.send(tag="setPower", limit=str(argv.limit))
msg = self.client.recv()
assert msg.tag == 'getPower'
logger.info("command received by the daemon: %r", msg)
def main(self):
......
......@@ -20,7 +20,6 @@ import subprocess
import uuid
from nrm import messaging
PUB_MSG = messaging.MSGTYPES['down_event']
logger = logging.getLogger('perf-wrapper')
......@@ -33,26 +32,19 @@ class PerfWrapper(object):
pass
def shutdown(self):
update = {'api': 'down_event',
'type': 'application_exit',
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['application_exit'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(tag="exit", application_uuid=self.app_uuid)
def performance_report(self, performance):
update = {'api': 'down_event',
'type': 'performance',
'payload': performance,
'container_uuid': self.container_uuid,
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['performance'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(
tag="performance",
payload=performance,
container_uuid=self.container_uuid,
application_uuid=self.app_uuid)
def setup(self):
downstream_url = "ipc:///tmp/nrm-downstream-event"
self.downstream_event = messaging.DownstreamEventClient(downstream_url)
logger.info("connecting downstream pub")
self.downstream_event.connect()
logger.info("downstream pub socket connected to: %s", downstream_url)
......@@ -64,13 +56,10 @@ class PerfWrapper(object):
self.app_uuid = str(uuid.uuid4())
logger.info("client uuid: %r", self.app_uuid)
# send an hello to the demon
update = {'api': 'down_event',
'type': 'application_start',
'container_uuid': self.container_uuid,
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['application_start'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(
tag="start",
container_uuid=self.container_uuid,
application_uuid=self.app_uuid)
def main(self):
parser = argparse.ArgumentParser()
......
......@@ -91,8 +91,8 @@ class ApplicationManager(object):
def register(self, msg, container):
"""Register a new downstream application."""
uuid = msg.application_uuid
container_uuid = msg.container_uuid
uuid = msg['application_uuid']
container_uuid = msg['container_uuid']
progress = 0
threads = False
phase_contexts = dict()
......
......@@ -25,41 +25,41 @@ class Action(object):
self.delta = delta
class ApplicationActuator(object):
"""Actuator in charge of application thread control."""
def __init__(self, am, pubstream):
self.application_manager = am
self.pubstream = pubstream
def available_actions(self, target):
ret = []
for identity, application in \
self.application_manager.applications.iteritems():
if target in application.get_allowed_thread_requests():
delta = application.get_thread_request_impact(target)
ret.append(Action(application, target, delta))
return ret
def execute(self, action):
target_threads = action.target.threads
update = {'type': 'application',
'command': 'threads',
'uuid': action.target.uuid,
'event': 'threads',
}
if action.command == 'i':
payload = target_threads['cur'] + 1
elif action.command == 'd':
payload = target_threads['cur'] - 1
else:
assert False, "impossible command"
update['payload'] = payload
self.pubstream.send_json(update)
def update(self, action):
action.target.do_thread_transition(action.command)
# class ApplicationActuator(object):
#
# """Actuator in charge of application thread control."""
#
# def __init__(self, am, pubstream):
# self.application_manager = am
# self.pubstream = pubstream
#
# def available_actions(self, target):
# ret = []
# for identity, application in \
# self.application_manager.applications.iteritems():
# if target in application.get_allowed_thread_requests():
# delta = application.get_thread_request_impact(target)
# ret.append(Action(application, target, delta))
# return ret
#
# def execute(self, action):
# target_threads = action.target.threads
# update = {'type': 'application',
# 'command': 'threads',
# 'uuid': action.target.uuid,
# 'event': 'threads',
# }
# if action.command == 'i':
# payload = target_threads['cur'] + 1
# elif action.command == 'd':
# payload = target_threads['cur'] - 1
# else:
# assert False, "impossible command"
# update['payload'] = payload
# self.pubstream.send_json()
#
# def update(self, action):
# action.target.do_thread_transition(action.command)
class PowerActuator(object):
......
This diff is collapsed.
This diff is collapsed.
{
"oneOf": [
{
"required": [
"tag",
"container_uuid",
"application_uuid"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"start"
]
},
"container_uuid": {
"type": "string"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"exit"
]
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"performance"
]
},
"payload": {
"type": "number"
},
"container_uuid": {
"type": "string"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"progress"
]
},
"payload": {
"type": "number"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"cpu",
"startcompute",
"endcompute",
"startbarrier",
"endbarrier"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"phasecontext"
]
},
"endcompute": {
"type": "number"
},
"endbarrier": {
"type": "number"
},
"startbarrier": {
"type": "number"
},
"startcompute": {
"type": "number"
},
"cpu": {
"type": "number"
}
}
}
]
}
{
"oneOf": [
{
"required": [
"tag",
"total",
"limit"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"power"
]
},
"total": {
"type": "number"
},
"limit": {
"type": "number"
}
}
},
{
"required": [
"tag",
"container_uuid",
"errno",
"power"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"start"
]
},
"errno": {
"type": "number"
},
"power": {
"type": "string"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"profile_data"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"exit"
]
},
"profile_data": {
"additionalProperties": {
"type": "string"
},
"type": "object"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"performance"
]
},
"payload": {
"type": "number"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"progress"
]
},
"payload": {
"type": "number"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"powercap",
"energy",
"performance",
"control_time",
"feedback_time"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"control"
]
},
"energy": {
"type": "number"
},
"control_time": {
"type": "number"
},
"powercap": {
"type": "number"
},
"performance": {
"type": "number"
},
"feedback_time": {
"type": "number"
}
}
}
]
}
{
"oneOf": [
{
"required": [
"tag",
"containers"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"list"
]
},
"containers": {
"uniqueItems": false,
"items": {
"type": "string"
},
"type": "array"
}
}
},
{
"required": [
"tag",
"container_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"stdout"
]
},
"payload": {
"type": "string"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",