Commit cd1d86b3 authored by Valentin Reis's avatar Valentin Reis

[feature] moves the message formats to json schema.

Adds the nrm/schemas repository which defines the communication schemas
for the upstream and downstream APIs. The messaging.py file now uses
decorators and two added python dependencies (jsonschema and warlock).
This commits also adds the .envrc direnv configuration file for
nix-based development.
parent cb3132e2
Pipeline #7313 passed with stages
in 5 minutes and 25 seconds
eval "$(lorri direnv)"
---
variables:
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/master/argopkgs-master.tar.gz"
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/nrm-gen/argopkgs-nrm-gen.tar.gz"
EXTRA: "--nrm ./."
stages:
......@@ -11,11 +11,11 @@ stages:
- quality
include:
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/repoquality.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-gen/gitlab-ci/repoquality.yml
py.test:
stage: test
......@@ -28,6 +28,6 @@ py.test:
flake8:
stage: style
script:
- nix run -f "$ARGOPKGS" pythonPackages.flake8 --command flake8 nrm/* bin/*
- nix run -f "$ARGOPKGS" pythonPackages.flake8 --command flake8 nrm/*.py bin/*
tags:
- integration
include Makefile
include tox.ini
include nrm/schemas/*.json
......@@ -9,6 +9,8 @@ six = "==1.11.0"
pyzmq = "*"
tornado = "*"
numpy = "*"
warlock = "*"
scipy = "*"
argparse = "*"
[dev-packages]
......
This diff is collapsed.
......@@ -19,11 +19,8 @@ import nrm.messaging
import uuid
import sys
import time
import collections
RPC_MSG = nrm.messaging.MSGTYPES['up_rpc_req']
logger = logging.getLogger('nrm')
KillArgs = collections.namedtuple("Kill", ["uuid"])
class CommandLineInterface(object):
......@@ -37,12 +34,9 @@ class CommandLineInterface(object):
if uuid:
logger.info("received signal %d, killing the application..",
signum)
command = {'api': 'up_rpc_req',
'type': 'kill',
'container_uuid': uuid
}
msg = RPC_MSG['kill'](**command)
self.client.sendmsg(msg)
self.client.send(
"Kill",
container_uuid=uuid)
logger.info("killed the application, exiting.")
else:
logger.info("received signal %d, exiting", signum)
......@@ -70,33 +64,26 @@ class CommandLineInterface(object):
self.pub_client.connect()
while(True):
msg = self.pub_client.recvmsg()
msg = self.pub_client.recv()
logger.debug("pub message: %s", msg)
def print_if_filter():
if argv.filter:
if argv.filter == msg.type:
if (msg.type == "performance" or
msg.type == "progress"):
print("%s, %s, %s" % (msg.type, time.time(),
if argv.filter == msg.tag:
if (msg.tag == "performance" or
msg.tag == "progress"):
print("%s, %s, %s" % (msg.tag, time.time(),
msg.payload))
if msg.type == "power":
print("%s, %s, %s" % (msg.type, time.time(),
if msg.tag == "power":
print("%s, %s, %s" % (msg.tag, time.time(),
msg.total))
if msg.type == "container_exit":
print("%s, %s, %s" % (msg.type, time.time(),
if msg.tag == "exit":
print("%s, %s, %s" % (msg.tag, time.time(),
msg.profile_data))
else:
print("%s, %s" % (msg.type, time.time()))
print("%s, %s" % (msg.tag, time.time()))
sys.stdout.flush()
print_if_filter()
# if argv.uuid:
# uuid = getattr(msg, 'container_uuid', None)
# if argv.uuid == uuid or msg.type == "power":
# print_if_filter()
# else:
# print_if_filter()
def do_run(self, argv):
""" Connect to the NRM and ask to spawn a container and run a command
......@@ -108,6 +95,7 @@ class CommandLineInterface(object):
# the command a container uuid as a way to make sure that we can make
# the command idempotent.
environ = os.environ
# environ = []
container_uuid = argv.ucontainername or str(uuid.uuid4())
# simple check + error msg + non-zero return code
......@@ -118,26 +106,22 @@ class CommandLineInterface(object):
logger.error("Manifest file not found: %s", path)
sys.exit(1)
command = {'api': 'up_rpc_req',
'type': 'run',
'manifest': sanitize_manifest(argv.manifest),
'path': argv.command,
'args': argv.args,
'environ': dict(environ),
'container_uuid': container_uuid,
}
msg = RPC_MSG['run'](**command)
# command fsm
state = 'init'
outeof = False
erreof = False
exitmsg = None
self.client.sendmsg(msg)
self.client.send(
tag="run",
manifest=sanitize_manifest(argv.manifest),
path=argv.command,
args=argv.args,
environ=dict(environ),
container_uuid=container_uuid)
# the first message tells us if we started a container or not
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'process_start'
msg = self.client.recv()
assert msg.tag == 'start'
def handler(signum, frame):
self.do_signal(msg.container_uuid, signum, frame)
......@@ -145,25 +129,24 @@ class CommandLineInterface(object):
state = 'started'
while(True):
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type in ['stdout', 'stderr', 'exit', 'process_exit']
msg = self.client.recv()
assert msg.tag in ['stdout', 'stderr', 'exit']
if msg.type == 'stdout':
if msg.tag == 'stdout':
logger.debug("container msg: %r", msg)
if msg.payload == 'eof':
outeof = True
else:
print(msg.payload)
print(msg.payload, file=sys.stdout)
sys.stdout.flush()
elif msg.type == 'stderr':
elif msg.tag == 'stderr':
logger.debug("container msg: %r", msg)
if msg.payload == 'eof':
erreof = True
else:
print(msg.payload, file=sys.stderr)
sys.stdout.flush()
elif msg.type == 'process_exit':
sys.stderr.flush()
elif msg.tag == 'exit':
logger.info("process ended: %r", msg)
state = 'exiting'
exitmsg = msg
......@@ -197,13 +180,9 @@ class CommandLineInterface(object):
The NRM should respond to us with one message listing all
containers."""
command = {'api': 'up_rpc_req',
'type': 'list'}
msg = RPC_MSG['list'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'list'
self.client.send(tag="list")
msg = self.client.recv()
assert msg.tag == 'list'
logger.info("list response: %r", msg)
def do_kill(self, argv):
......@@ -212,15 +191,9 @@ class CommandLineInterface(object):
The NRM should respond to us with a message containing the exit status
of the top process of the container."""
command = {'api': 'up_rpc_req',
'type': 'kill',
'container_uuid': argv.uuid
}
msg = RPC_MSG['kill'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'exit'
self.client.send(tag="kill", container_uuid=argv.uuid)
msg = self.client.recv()
assert msg.tag == 'exit'
logger.info("container exit: %r", msg)
def do_setpower(self, argv):
......@@ -233,15 +206,9 @@ class CommandLineInterface(object):
# timely answer.
# TODO: check that the level makes a little bit of sense in the first
# place
command = {'api': 'up_rpc_req',
'type': 'setpower',
'limit': str(argv.limit),
}
msg = RPC_MSG['setpower'](**command)
self.client.sendmsg(msg)
msg = self.client.recvmsg()
assert msg.api == 'up_rpc_rep'
assert msg.type == 'getpower'
self.client.send(tag="setPower", limit=str(argv.limit))
msg = self.client.recv()
assert msg.tag == 'getPower'
logger.info("command received by the daemon: %r", msg)
def main(self):
......
......@@ -20,7 +20,6 @@ import subprocess
import uuid
from nrm import messaging
PUB_MSG = messaging.MSGTYPES['down_event']
logger = logging.getLogger('perf-wrapper')
......@@ -33,26 +32,19 @@ class PerfWrapper(object):
pass
def shutdown(self):
update = {'api': 'down_event',
'type': 'application_exit',
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['application_exit'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(tag="exit", application_uuid=self.app_uuid)
def performance_report(self, performance):
update = {'api': 'down_event',
'type': 'performance',
'payload': performance,
'container_uuid': self.container_uuid,
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['performance'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(
tag="performance",
payload=performance,
container_uuid=self.container_uuid,
application_uuid=self.app_uuid)
def setup(self):
downstream_url = "ipc:///tmp/nrm-downstream-event"
self.downstream_event = messaging.DownstreamEventClient(downstream_url)
logger.info("connecting downstream pub")
self.downstream_event.connect()
logger.info("downstream pub socket connected to: %s", downstream_url)
......@@ -64,13 +56,10 @@ class PerfWrapper(object):
self.app_uuid = str(uuid.uuid4())
logger.info("client uuid: %r", self.app_uuid)
# send an hello to the demon
update = {'api': 'down_event',
'type': 'application_start',
'container_uuid': self.container_uuid,
'application_uuid': self.app_uuid,
}
msg = PUB_MSG['application_start'](**update)
self.downstream_event.sendmsg(msg)
self.downstream_event.send(
tag="start",
container_uuid=self.container_uuid,
application_uuid=self.app_uuid)
def main(self):
parser = argparse.ArgumentParser()
......
......@@ -91,8 +91,8 @@ class ApplicationManager(object):
def register(self, msg, container):
"""Register a new downstream application."""
uuid = msg.application_uuid
container_uuid = msg.container_uuid
uuid = msg['application_uuid']
container_uuid = msg['container_uuid']
progress = 0
threads = False
phase_contexts = dict()
......
......@@ -25,41 +25,41 @@ class Action(object):
self.delta = delta
class ApplicationActuator(object):
"""Actuator in charge of application thread control."""
def __init__(self, am, pubstream):
self.application_manager = am
self.pubstream = pubstream
def available_actions(self, target):
ret = []
for identity, application in \
self.application_manager.applications.iteritems():
if target in application.get_allowed_thread_requests():
delta = application.get_thread_request_impact(target)
ret.append(Action(application, target, delta))
return ret
def execute(self, action):
target_threads = action.target.threads
update = {'type': 'application',
'command': 'threads',
'uuid': action.target.uuid,
'event': 'threads',
}
if action.command == 'i':
payload = target_threads['cur'] + 1
elif action.command == 'd':
payload = target_threads['cur'] - 1
else:
assert False, "impossible command"
update['payload'] = payload
self.pubstream.send_json(update)
def update(self, action):
action.target.do_thread_transition(action.command)
# class ApplicationActuator(object):
#
# """Actuator in charge of application thread control."""
#
# def __init__(self, am, pubstream):
# self.application_manager = am
# self.pubstream = pubstream
#
# def available_actions(self, target):
# ret = []
# for identity, application in \
# self.application_manager.applications.iteritems():
# if target in application.get_allowed_thread_requests():
# delta = application.get_thread_request_impact(target)
# ret.append(Action(application, target, delta))
# return ret
#
# def execute(self, action):
# target_threads = action.target.threads
# update = {'type': 'application',
# 'command': 'threads',
# 'uuid': action.target.uuid,
# 'event': 'threads',
# }
# if action.command == 'i':
# payload = target_threads['cur'] + 1
# elif action.command == 'd':
# payload = target_threads['cur'] - 1
# else:
# assert False, "impossible command"
# update['payload'] = payload
# self.pubstream.send_json()
#
# def update(self, action):
# action.target.do_thread_transition(action.command)
class PowerActuator(object):
......
This diff is collapsed.
This diff is collapsed.
{
"oneOf": [
{
"required": [
"tag",
"container_uuid",
"application_uuid"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"start"
]
},
"container_uuid": {
"type": "string"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"exit"
]
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"performance"
]
},
"payload": {
"type": "number"
},
"container_uuid": {
"type": "string"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"progress"
]
},
"payload": {
"type": "number"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"cpu",
"startcompute",
"endcompute",
"startbarrier",
"endbarrier"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"phasecontext"
]
},
"endcompute": {
"type": "number"
},
"endbarrier": {
"type": "number"
},
"startbarrier": {
"type": "number"
},
"startcompute": {
"type": "number"
},
"cpu": {
"type": "number"
}
}
}
]
}
{
"oneOf": [
{
"required": [
"tag",
"total",
"limit"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"power"
]
},
"total": {
"type": "number"
},
"limit": {
"type": "number"
}
}
},
{
"required": [
"tag",
"container_uuid",
"errno",
"power"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"start"
]
},
"errno": {
"type": "number"
},
"power": {
"type": "string"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"profile_data"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"exit"
]
},
"profile_data": {
"additionalProperties": {
"type": "string"
},
"type": "object"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"performance"
]
},
"payload": {
"type": "number"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"application_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"progress"
]
},
"payload": {
"type": "number"
},
"application_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"powercap",
"energy",
"performance",
"control_time",
"feedback_time"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"control"
]
},
"energy": {
"type": "number"
},
"control_time": {
"type": "number"
},
"powercap": {
"type": "number"
},
"performance": {
"type": "number"
},
"feedback_time": {
"type": "number"
}
}
}
]
}
{
"oneOf": [
{
"required": [
"tag",
"containers"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"list"
]
},
"containers": {
"uniqueItems": false,
"items": {
"type": "string"
},
"type": "array"
}
}
},
{
"required": [
"tag",
"container_uuid",
"payload"
],
"type": "object",
"properties": {
"tag": {
"type": "string",
"enum": [
"stdout"
]
},
"payload": {
"type": "string"
},
"container_uuid": {
"type": "string"
}
}
},
{
"required": [
"tag",
"container_uuid",