Commit 3dd86585 authored by Valentin Reis's avatar Valentin Reis
Browse files

Merge branch 'wip/config-rebased' into 'master'

[feature] commandline arguments, config file management.

See merge request !37
parents d7f57ef8 aedf9396
Pipeline #4634 passed with stages
in 2 minutes and 10 seconds
...@@ -46,3 +46,6 @@ venv/ ...@@ -46,3 +46,6 @@ venv/
*.log *.log
*.nav *.nav
*.out *.out
# frequent clones
argotest
...@@ -22,3 +22,25 @@ flake8: ...@@ -22,3 +22,25 @@ flake8:
except: except:
- /^wip\/.*/ - /^wip\/.*/
- /^WIP\/.*/ - /^WIP\/.*/
integration.test:
stage: test
script:
- git clone https://xgitlab.cels.anl.gov/argo/argotest.git
- cd argotest
- nix-shell -A test --run "./argotk.hs helloworld" --arg nrm-src ../.
artifacts:
paths:
- argotest/cmd_err.log
- argotest/cmd_out.log
- argotest/daemon_out.log
- argotest/daemon_out.log
- argotest/nrm.log
- argotest/time.log
expire_in: 1 week
except:
- /^wip\/.*/
- /^WIP\/.*/
tags:
- integration
...@@ -9,6 +9,7 @@ six = "==1.11.0" ...@@ -9,6 +9,7 @@ six = "==1.11.0"
pyzmq = "==16.0.4" pyzmq = "==16.0.4"
tornado = "==4.5.3" tornado = "==4.5.3"
numpy = "*" numpy = "*"
argparse = "*"
[dev-packages] [dev-packages]
pytest = "*" pytest = "*"
......
...@@ -18,6 +18,14 @@ And entering the resulting virtual environment with `pipenv shell`. ...@@ -18,6 +18,14 @@ And entering the resulting virtual environment with `pipenv shell`.
The NRM code only supports _argo-containers_ for now, so you need to install The NRM code only supports _argo-containers_ for now, so you need to install
the our container piece on the system for now. the our container piece on the system for now.
### Alternative - Nix usage.
These dependencies can be obtained using the nix package manager.
```
nix-shell https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/master/argopkgs-master.tar.gz -A nodelevel.nrm
```
## Basic Usage ## Basic Usage
Launch the `daemon`, and use `cmd` to interact with it. Launch the `daemon`, and use `cmd` to interact with it.
......
...@@ -92,7 +92,8 @@ class PerfWrapper(object): ...@@ -92,7 +92,8 @@ class PerfWrapper(object):
logger.info("fifoname: %r", fifoname) logger.info("fifoname: %r", fifoname)
os.mkfifo(fifoname, 0o600) os.mkfifo(fifoname, 0o600)
argv = ['perf', 'stat', '-e', 'instructions', '-x', ',', perf_tool_path = os.environ.get('PERF', 'perf')
argv = [perf_tool_path, 'stat', '-e', 'instructions', '-x', ',',
'-I', str(args.frequency), '-o', fifoname, '--'] '-I', str(args.frequency), '-o', fifoname, '--']
argv.extend(args.cmd) argv.extend(args.cmd)
logger.info("argv: %r", argv) logger.info("argv: %r", argv)
......
...@@ -26,11 +26,16 @@ class CommandLineInterface(object): ...@@ -26,11 +26,16 @@ class CommandLineInterface(object):
if uuid: if uuid:
logger.info("received signal %d, killing the application..", logger.info("received signal %d, killing the application..",
signum) signum)
self.do_kill(KillArgs(uuid)) command = {'api': 'up_rpc_req',
logger.info("killed the application.", signum) 'type': 'kill',
'container_uuid': uuid
}
msg = RPC_MSG['kill'](**command)
self.client.sendmsg(msg)
logger.info("killed the application, exiting.")
else: else:
logger.info("received signal %d, exiting", signum) logger.info("received signal %d, exiting", signum)
exit(1) exit(130)
def setup(self): def setup(self):
# upstream RPC port # upstream RPC port
......
#!/usr/bin/env python2 #!/usr/bin/env python2
import argparse
import json
import sys
import nrm import nrm
import nrm.daemon import nrm.daemon
import os
def main(argv=None):
if argv is None:
argv = sys.argv
conf_parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False
)
conf_parser.add_argument(
"-c",
"--configuration",
help="Specify a config json-formatted config file to override "
"any of the available CLI options. If an option is "
"actually provided on the command-line, it overrides "
"its corresponding value from the configuration file.",
metavar="FILE")
conf_parser.add_argument("-d", "--print_defaults", action='store_true',
help="Print the default configuration file.")
args, remaining_argv = conf_parser.parse_known_args()
defaults = {"nrm_log": "/tmp/nrm.log",
"hwloc": "hwloc",
"perf": "perf",
"argo_perf_wrapper": "argo-perf-wrapper",
"argo_nodeos_config": "argo_nodeos_config"}
if args.print_defaults:
print defaults
return(0)
if args.configuration:
defaults.update(json.load(open(args.configuration)))
parser = argparse.ArgumentParser(parents=[conf_parser])
parser.set_defaults(**defaults)
parser.add_argument(
"--nrm_log",
help="Main log file. Override default with the NRM_LOG."
"environment variable",
default=os.environ.get('NRM_LOG',
'/tmp/nrm.log'))
parser.add_argument(
'--hwloc',
help="Path to the hwloc to use. This path can be "
"relative and makes uses of the $PATH if necessary."
"Override default with the HWLOC environment"
"variable.",
default=os.environ.get('HWLOC',
'hwloc'))
parser.add_argument(
'--argo_nodeos_config',
help="Path to the argo_nodeos_config to use. This path "
"can be relative and makes uses of the $PATH if "
"necessary. Override default with the "
"ARGO_NODEOS_CONFIG environment variable.",
default=os.environ.get('ARGO_NODEOS_CONFIG',
'argo_nodeos_config'))
parser.add_argument(
'--perf',
help="Path to the linux perf tool to use. This path can be"
"relative and makes uses of the $PATH if necessary."
"Override default with the PERF environment"
"variable.",
default=os.environ.get('PERF',
'perf'))
parser.add_argument(
'--argo_perf_wrapper',
help="Path to the linux perf tool to use. This path can"
"be relative and makes uses of the $PATH if "
"necessary. Override default with the PERFWRAPPER "
"environment variable.",
default=os.environ.get('ARGO_PERF_WRAPPER',
'argo-perf-wrapper'))
args = parser.parse_args(remaining_argv)
nrm.daemon.runner(config=args)
return(0)
if __name__ == "__main__": if __name__ == "__main__":
nrm.daemon.runner() sys.exit(main())
{
"log":"/tmp/daemon_log.log"
"log_power":"/tmp/daemon_power.log"
}
...@@ -16,11 +16,16 @@ class ContainerManager(object): ...@@ -16,11 +16,16 @@ class ContainerManager(object):
"""Manages the creation, listing and deletion of containers, using a """Manages the creation, listing and deletion of containers, using a
container runtime underneath.""" container runtime underneath."""
def __init__(self, rm): def __init__(self, rm,
perfwrapper="argo-perf-wrapper",
linuxperf="perf",
argo_nodeos_config="argo_nodeos_config"):
self.linuxperf = linuxperf
self.perfwrapper = perfwrapper
self.nodeos = NodeOSClient(argo_nodeos_config=argo_nodeos_config)
self.containers = dict() self.containers = dict()
self.pids = dict() self.pids = dict()
self.resourcemanager = rm self.resourcemanager = rm
self.nodeos = NodeOSClient()
self.chrt = ChrtClient() self.chrt = ChrtClient()
def create(self, request): def create(self, request):
...@@ -102,7 +107,7 @@ class ContainerManager(object): ...@@ -102,7 +107,7 @@ class ContainerManager(object):
manifest_perfwrapper = manifest.app.isolators.perfwrapper manifest_perfwrapper = manifest.app.isolators.perfwrapper
if hasattr(manifest_perfwrapper, 'enabled'): if hasattr(manifest_perfwrapper, 'enabled'):
if manifest_perfwrapper.enabled in ["1", "True"]: if manifest_perfwrapper.enabled in ["1", "True"]:
argv.append('argo-perf-wrapper') argv.append(self.perfwrapper)
if hasattr(manifest.app.isolators, 'power'): if hasattr(manifest.app.isolators, 'power'):
if hasattr(manifest.app.isolators.power, 'enabled'): if hasattr(manifest.app.isolators.power, 'enabled'):
...@@ -122,6 +127,7 @@ class ContainerManager(object): ...@@ -122,6 +127,7 @@ class ContainerManager(object):
# environ['PATH'] = ("/usr/local/sbin:" # environ['PATH'] = ("/usr/local/sbin:"
# "/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin") # "/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
environ['ARGO_CONTAINER_UUID'] = container_name environ['ARGO_CONTAINER_UUID'] = container_name
environ['PERF'] = self.linuxperf
environ['AC_APP_NAME'] = manifest.name environ['AC_APP_NAME'] = manifest.name
environ['AC_METADATA_URL'] = "localhost" environ['AC_METADATA_URL'] = "localhost"
...@@ -164,7 +170,7 @@ class ContainerManager(object): ...@@ -164,7 +170,7 @@ class ContainerManager(object):
logger.debug("killing %r:", c) logger.debug("killing %r:", c)
for p in c.processes.values(): for p in c.processes.values():
try: try:
p.terminate() p.proc.terminate()
except OSError: except OSError:
logging.error("OS error: could not terminate process.") logging.error("OS error: could not terminate process.")
......
...@@ -23,8 +23,10 @@ logger = logging.getLogger('nrm') ...@@ -23,8 +23,10 @@ logger = logging.getLogger('nrm')
class Daemon(object): class Daemon(object):
def __init__(self):
def __init__(self, config):
self.target = 100.0 self.target = 100.0
self.config = config
def do_downstream_receive(self, parts): def do_downstream_receive(self, parts):
logger.info("receiving downstream message: %r", parts) logger.info("receiving downstream message: %r", parts)
...@@ -293,8 +295,13 @@ class Daemon(object): ...@@ -293,8 +295,13 @@ class Daemon(object):
self.downstream_pub = zmqstream.ZMQStream(downstream_pub_socket) self.downstream_pub = zmqstream.ZMQStream(downstream_pub_socket)
# create managers # create managers
self.resource_manager = ResourceManager() self.resource_manager = ResourceManager(hwloc=self.config.hwloc)
self.container_manager = ContainerManager(self.resource_manager) self.container_manager = ContainerManager(
self.resource_manager,
perfwrapper=self.config.argo_perf_wrapper,
linuxperf=self.config.perf,
argo_nodeos_config=self.config.argo_nodeos_config
)
self.application_manager = ApplicationManager() self.application_manager = ApplicationManager()
self.sensor_manager = SensorManager() self.sensor_manager = SensorManager()
aa = ApplicationActuator(self.application_manager, self.downstream_pub) aa = ApplicationActuator(self.application_manager, self.downstream_pub)
...@@ -318,8 +325,14 @@ class Daemon(object): ...@@ -318,8 +325,14 @@ class Daemon(object):
ioloop.IOLoop.current().start() ioloop.IOLoop.current().start()
def runner(): def runner(config):
ioloop.install() ioloop.install()
logging.basicConfig(level=logging.DEBUG)
daemon = Daemon() logger.setLevel(logging.DEBUG)
if config.nrm_log:
print("Logging to %s" % config.nrm_log)
logger.addHandler(logging.FileHandler(config.nrm_log))
daemon = Daemon(config)
daemon.main() daemon.main()
...@@ -11,8 +11,8 @@ class ResourceManager(object): ...@@ -11,8 +11,8 @@ class ResourceManager(object):
"""Manages the query of node resources, the tracking of their use and """Manages the query of node resources, the tracking of their use and
the scheduling of new containers according to partitioning rules.""" the scheduling of new containers according to partitioning rules."""
def __init__(self): def __init__(self, hwloc):
self.hwloc = HwlocClient() self.hwloc = HwlocClient(hwloc=hwloc)
# query the node topo, keep track of the critical resources # query the node topo, keep track of the critical resources
self.allresources = self.hwloc.info() self.allresources = self.hwloc.info()
......
...@@ -40,9 +40,9 @@ class NodeOSClient(object): ...@@ -40,9 +40,9 @@ class NodeOSClient(object):
"""Client to argo_nodeos_config.""" """Client to argo_nodeos_config."""
def __init__(self): def __init__(self, argo_nodeos_config="argo_nodeos_config"):
"""Load client configuration.""" """Load client configuration."""
self.prefix = "argo_nodeos_config" self.prefix = argo_nodeos_config
def getavailable(self): def getavailable(self):
"""Gather available resources.""" """Gather available resources."""
...@@ -151,9 +151,9 @@ class HwlocClient(object): ...@@ -151,9 +151,9 @@ class HwlocClient(object):
"""Client to hwloc binaries.""" """Client to hwloc binaries."""
def __init__(self): def __init__(self, hwloc="hwloc"):
"""Load configuration.""" """Load configuration."""
self.prefix = "hwloc" self.prefix = hwloc
def info(self): def info(self):
"""Return list of all cpus and mems.""" """Return list of all cpus and mems."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment