...
 
Commits (9)
---
variables:
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/master/argopkgs-master.tar.gz"
ARGOPKGS: "https://xgitlab.cels.anl.gov/argo/argopkgs/-/archive/nrm-py3/argopkgs-nrm-py3.tar.gz"
EXTRA: "--nrm ./."
stages:
......@@ -12,16 +12,9 @@ stages:
- docs
include:
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/nrm.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/master/gitlab-ci/repoquality.yml
flake8:
stage: style
script:
- nix run -f "$ARGOPKGS" pythonPackages.flake8 --command flake8 nrm/*.py bin/*
tags:
- integration
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/argonix.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/nrm.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/components.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/integration.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/applications.yml
- https://xgitlab.cels.anl.gov/argo/argopkgs/raw/nrm-py3/gitlab-ci/repoquality.yml
PYTHON:= $(shell which python2)
PYTHON:= $(shell which python3)
source:
$(PYTHON) setup.py sdist
......
#!/usr/bin/env python2
#!/usr/bin/env python3
from __future__ import print_function
import argparse
......
#!/usr/bin/env python2
#!/usr/bin/env python3
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
......
#!/usr/bin/env python2
#!/usr/bin/env python3
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
......
#!/usr/bin/env python2
#!/usr/bin/env python3
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
......
......@@ -5,7 +5,11 @@ rec {
nrm = pkgs.nrm;
hack = nrm.overrideAttrs (old:{
buildInputs = old.buildInputs ++ [
pkgs.pythonPackages.flake8
pkgs.pythonPackages.sphinx ];
pkgs.python3Packages.flake8
pkgs.python3Packages.autopep8
pkgs.python3Packages.black
pkgs.python3Packages.mypy
pkgs.pytype
pkgs.python3Packages.sphinx ];
});
}
......@@ -84,8 +84,8 @@ class ContainerManager(object):
hwbindings = dict()
if manifest.is_feature_enabled('hwbind'):
hwbindings['distrib'] = sorted(self.hwloc.distrib(
ncpus, allocated), key=operator.
attrgetter('cpus'))
ncpus, allocated), key=operator.
attrgetter('cpus'))
return (True, Container(container_name, manifest, allocated,
container_power, {}, {}, hwbindings))
......@@ -136,12 +136,12 @@ class ContainerManager(object):
# monitoring section involves libnrm
if manifest.is_feature_enabled('monitoring'):
environ['ARGO_NRM_RATELIMIT'] = \
manifest.app['monitoring']['ratelimit']
manifest.app['monitoring']['ratelimit']
if container.power.get('policy') or \
manifest.is_feature_enabled('monitoring'):
environ['ARGO_NRM_DOWNSTREAM_EVENT_URI'] = \
self.downstream_event_uri
self.downstream_event_uri
# build prefix to the entire command based on enabled features
argv = []
......@@ -156,7 +156,7 @@ class ContainerManager(object):
if container.hwbindings:
# round robin over the cpu bindings available
bind_index = len(container.processes) % \
len(container.hwbindings['distrib'])
len(container.hwbindings['distrib'])
argv.append('hwloc-bind')
# argv.append('--single')
cpumask = container.hwbindings['distrib'][bind_index].cpus[0]
......@@ -297,7 +297,7 @@ class DummyRuntime(ContainerRuntime):
pass
def execute(self, container_uuid, args, environ):
import tornado.process as process
import tornado.process as process # type: ignore
return process.Subprocess(args,
stdout=process.Subprocess.STREAM,
stderr=process.Subprocess.STREAM,
......
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
#!/usr/bin/env python
#
# coolr cpufreq related codes
......@@ -20,24 +10,36 @@
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
import os, sys, time
import struct
import copy
import clr_nodeinfo
import numpy as np
#an example the content of cpustat
#id 0
#aperf 4926926121023
#mperf 4582847073452
#perf_bias 8
#ucc 281462841145699
#urc 0
#perf_target 8448
#perf_status 8448
#pstate 33
#turbo_disengage 0
#tsc 1117245755950154
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
import os
import sys
import time
import coolr.clr_nodeinfo # type: ignore
import numpy as np # type: ignore
# an example the content of cpustat
# id 0
# aperf 4926926121023
# mperf 4582847073452
# perf_bias 8
# ucc 281462841145699
# urc 0
# perf_target 8448
# perf_status 8448
# pstate 33
# turbo_disengage 0
# tsc 1117245755950154
class cpustatvals:
def cpustatfn(self, cpuid):
......@@ -49,7 +51,7 @@ class cpustatvals:
self.cpuid = cpuid
def parse(self):
self.d = {} # clear d's contents
self.d = {} # clear d's contents
self.d['time'] = time.time()
with open(self.cpustatfn(self.cpuid)) as f:
while True:
......@@ -62,22 +64,22 @@ class cpustatvals:
def pr(self):
for k in ('id', 'aperf', 'mperf', 'pstate', 'tsc'):
print '%s=%d ' % (k, self.d[k]),
print
def diff_u64(self, v1, v2): # v1 - v2
print('%s=%d ' % (k, self.d[k]))
print()
def diff_u64(self, v1, v2): # v1 - v2
if v1 >= v2:
return v1 - v2
return (self.u64max -v2) + v1
return (self.u64max - v2) + v1
def calc_cpufreq(self,prev): # prev is an object of cpustatvals
def calc_cpufreq(self, prev): # prev is an object of cpustatvals
if not (prev.d.has_key('tsc') and self.d.has_key('tsc')):
return 0.0
return 0.0
tmp = {}
for k in ('tsc', 'aperf', 'mperf'):
tmp[k] = float(self.diff_u64(self.d[k], prev.d[k]))
dt = self.d['time'] - prev.d['time']
dt = self.d['time'] - prev.d['time'] # pytype: disable=key-error
freq = tmp['aperf'] / tmp['mperf']
freq *= tmp['tsc']
freq *= 1e-9 # covert it to GHz
......@@ -85,15 +87,14 @@ class cpustatvals:
return freq
def calc_aperf(self,prev): # prev is an object of cpustatvals
def calc_aperf(self, prev): # prev is an object of cpustatvals
if not (prev.d.has_key('tsc') and self.d.has_key('tsc')):
return 0.0
return 0.0
tmp = {}
k = 'aperf'
tmp[k] = float(self.diff_u64(self.d[k], prev.d[k]))
tmp[k] = float(self.diff_u64(self.d[k], prev.d[k])) # pytype: disable=key-error
dt = self.d['time'] - prev.d['time']
dt = self.d['time'] - prev.d['time'] # pytype: disable=key-error
return tmp['aperf'] * 1e-9 / dt
......@@ -106,12 +107,12 @@ class cpufreq_reader:
# so simply instantiating an object of cputopology again here.
self.ct = clr_nodeinfo.cputopology()
self.cpus = self.ct.onlinecpus # just for convenience
self.cpus = self.ct.onlinecpus # just for convenience
self.init = False
for cpuid in self.cpus:
tmp = cpustatvals(cpuid) # just for cpustatfn
tmp = cpustatvals(cpuid) # just for cpustatfn
statpath = tmp.cpustatfn(cpuid)
if not os.path.exists(statpath):
# print 'Not found', statpath
......@@ -121,7 +122,7 @@ class cpufreq_reader:
self.cnt = 0
self.samples = [
[cpustatvals(i) for i in self.cpus],
[cpustatvals(i) for i in self.cpus] ]
[cpustatvals(i) for i in self.cpus]]
self.sample()
......@@ -134,7 +135,6 @@ class cpufreq_reader:
self.samples[idx][cpuid].parse()
self.cnt = self.cnt + 1
def pstate(self):
ret = [0.0 for i in self.cpus]
if not self.init:
......@@ -142,7 +142,7 @@ class cpufreq_reader:
if self.cnt == 0:
return ret
idx = 0 # if cnt is an odd number
idx = 0 # if cnt is an odd number
if self.cnt % 2 == 0:
idx = 1
for cpuid in self.cpus:
......@@ -188,8 +188,8 @@ class cpufreq_reader:
return ret
def outputpercore(self,flag=True):
self.percore=flag
def outputpercore(self, flag=True):
self.percore = flag
def sample_and_json(self, node=""):
if not self.init:
......@@ -207,7 +207,7 @@ class cpufreq_reader:
freqstd = np.std(tmp)
buf += ',"p%s":{' % p
buf += '"mean":%.3lf,"std":%.3lf' % (freqmean,freqstd)
buf += '"mean":%.3lf,"std":%.3lf' % (freqmean, freqstd)
if self.percore:
for c in self.ct.pkgcpus[p]:
buf += ',"c%d":%.3lf' % (c, f[c])
......@@ -216,20 +216,19 @@ class cpufreq_reader:
return buf
if __name__ == '__main__':
freq = cpufreq_reader()
if not freq.init:
print 'Please check the cpustat module is installed'
print('Please check the cpustat module is installed')
sys.exit(1)
freq.outputpercore(False)
for i in range(0, 20):
j = freq.sample_and_json()
print '[freq json]'
print j
print('[freq json]')
print(j)
time.sleep(1)
sys.exit(0)
......@@ -237,25 +236,24 @@ if __name__ == '__main__':
for i in range(0, 20):
freq.sample()
print '[pstate]',
print('[pstate]')
for p in freq.pstate():
print p,
print
print(p)
print()
print '[aperf]',
print('[aperf]')
for f in freq.aperf():
print '%.2lf ' % f,
print
print('%.2lf ' % f)
print()
print '[freq]',
print('[freq]')
for f in freq.cpufreq():
print '%.2lf ' % f,
print
print('%.2lf ' % f)
print()
j = freq.sample_and_json()
print '[freq json]'
print j
print('[freq json]')
print(j)
print
print()
time.sleep(1)
#!/usr/bin/env python
#
# coolr hwmon related codes
#
# This code requires the coretemp driver for temperature reading
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
......@@ -8,43 +17,38 @@
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
#!/usr/bin/env python
#
# coolr hwmon related codes
#
# This code requires the coretemp driver for temperature reading
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
import re, os, sys
import numpy as np
from clr_nodeinfo import *
import re
import os
import sys
import numpy as np # type: ignore
from coolr.clr_nodeinfo import *
class coretemp_reader :
def parse_pkgtemp(self,fn):
class coretemp_reader:
def parse_pkgtemp(self, fn):
retval = -1
try:
f = open(fn , "r")
f = open(fn, "r")
except:
return retval
l = f.readline()
m = re.search('Physical id ([0-9]+)', l )
m = re.search('Physical id ([0-9]+)', l)
if m:
retval=int(m.group(1))
retval = int(m.group(1))
f.close()
return retval
def parse_coretemp(self,fn):
def parse_coretemp(self, fn):
retval = -1
try:
f = open(fn , "r")
f = open(fn, "r")
except:
return retval
l = f.readline()
m = re.search('Core ([0-9]+)', l )
m = re.search('Core ([0-9]+)', l)
if m:
retval=int(m.group(1))
retval = int(m.group(1))
f.close()
return retval
......@@ -53,16 +57,16 @@ class coretemp_reader :
class coretempinfo:
def __init__(self):
self.dir = ''
self.coretempfns = {} # use coreid as key
self.coretempfns = {} # use coreid as key
self.pkgtempfn = ''
def __init__ (self):
def __init__(self):
self.outputpercore(True)
self.coretemp = {} # use pkgid as key
self.coretemp = {} # use pkgid as key
for d1 in os.listdir(self.hwmondir):
# try to check see if 'name' contains 'coretemp'
tmpdir = "%s%s" % (self.hwmondir,d1)
tmpdir = "%s%s" % (self.hwmondir, d1)
drivername = readbuf("%s/name" % tmpdir).rstrip()
if not drivername == "coretemp":
continue
......@@ -71,27 +75,26 @@ class coretemp_reader :
coretempfns = {}
pkgtempfn = ''
# parse all temp*_label files
for d2 in os.listdir( tmpdir ):
m = re.search( 'temp([0-9]+)_label', d2 )
for d2 in os.listdir(tmpdir):
m = re.search('temp([0-9]+)_label', d2)
if m:
tempid=int(m.group(1))
tempid = int(m.group(1))
coreid = self.parse_coretemp("%s/%s" % (tmpdir, d2))
if coreid >= 0 :
coretempfns[coreid] = "%s/temp%d_input" % (tmpdir, tempid)
else: # possibly pkgid
if coreid >= 0:
coretempfns[coreid] = "%s/temp%d_input" % (
tmpdir, tempid)
else: # possibly pkgid
pkgtempfn = "%s/temp%d_input" % (tmpdir, tempid)
pkgid = self.parse_pkgtemp("%s/%s" % (tmpdir, d2))
if pkgid < 0 :
print 'unlikely: ', pkgtempfn
if pkgid < 0:
print('unlikely: ', pkgtempfn)
cti = self.coretempinfo()
cti.dir = tmpdir
cti.coretempfns = coretempfns
cti.pkgtempfn = pkgtempfn
if pkgid < 0: # assume a single socket machine
if pkgid < 0: # assume a single socket machine
self.coretemp[0] = cti
else:
self.coretemp[pkgid] = cti
......@@ -111,8 +114,8 @@ class coretemp_reader :
ret[pkgid] = temps
return ret
def outputpercore(self,flag=True):
self.percore=flag
def outputpercore(self, flag=True):
self.percore = flag
def sample(self):
temp = self.readtempall()
......@@ -144,28 +147,28 @@ class coretemp_reader :
for c in temps[pkgid].keys():
vals.append(temps[pkgid][c])
return [np.mean(vals), np.std(vals), np.min(vals), np.max(vals)]
def readpkgtemp(self):
fn = "%s_input" % self.pkgtempfns[pkgid].pkgfn
f = open(fn)
f = open(fn)
v = int(f.readline())/1000.0
f.close()
return v
def readcoretemp(self,pkgid):
def readcoretemp(self, pkgid):
t = []
for fnbase in self.pkgtempfns[pkgid].corefns:
fn = "%s_input" % fnbase
if not os.access( fn, os.R_OK ):
if not os.access(fn, os.R_OK):
continue # cpu may become offline
f = open(fn)
f = open(fn)
v = int(f.readline())/1000.0
f.close()
t.append(v)
return t
class acpi_power_meter_reader :
class acpi_power_meter_reader:
# add a nicer detection routine later
def __init__(self):
self.init = False
......@@ -179,17 +182,17 @@ class acpi_power_meter_reader :
def read(self):
if not self.init:
return -1
retval=-1
retval = -1
fn = '/sys/class/hwmon/hwmon0/device/power1_average'
try:
f = open(fn , "r")
f = open(fn, "r")
except:
return retval
l = f.readline()
retval = int(l) * 1e-6 # uW to W
retval = int(l) * 1e-6 # uW to W
f.close()
return retval
......
#!/usr/bin/env python
#
# misc. classes, functions
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
......@@ -8,19 +15,17 @@
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
#!/usr/bin/env python
#
# misc. classes, functions
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
import os, sys, re, time
import os
import sys
import re
import time
def readbuf(fn):
for retry in range(0,10):
for retry in range(0, 10):
try:
f = open( fn )
f = open(fn)
l = f.readline()
f.close()
return l
......@@ -29,8 +34,9 @@ def readbuf(fn):
continue
return ''
def readuptime():
f = open( '/proc/uptime' )
f = open('/proc/uptime')
l = f.readline()
v = l.split()
return float( v[0] )
return float(v[0])
#!/usr/bin/env python
#
# CPU related codes
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
......@@ -8,17 +15,13 @@
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
#!/usr/bin/env python
#
# CPU related codes
#
# Contact: Kazutomo Yoshii <ky@anl.gov>
#
import os, sys, re, time, socket
import os
import sys
import re
import socket
# local
from clr_misc import *
from coolr.clr_misc import *
#
# Once instantiated, the following values are avaialble
......@@ -29,35 +32,37 @@ from clr_misc import *
#
# limitation: no support for runtime change
#
class cputopology:
cpubasedir = '/sys/devices/system/cpu/'
cpubasedir = '/sys/devices/system/cpu/'
nodebasedir = '/sys/devices/system/node/'
def parserange(self,fn):
tmp = readbuf( fn )
def parserange(self, fn):
tmp = readbuf(fn)
ret = []
for t in tmp.split(','):
ab = re.findall('[0-9]+', t)
if len(ab) == 2 :
ret = ret + range( int(ab[0]), int(ab[1])+1 )
if len(ab) == 2:
ret = ret + range(int(ab[0]), int(ab[1])+1)
elif len(ab) == 1:
ret = ret + [int(ab[0])]
else:
print 'unlikely at cputoplogy.parserange():',ab
print('unlikely at cputoplogy.parserange():', ab)
sys.exit(1)
return ret
def parsemask(self,fn):
tmp = readbuf( fn )
def parsemask(self, fn):
tmp = readbuf(fn)
tmp = tmp.rstrip()
maskstrs = tmp.split(',')
maskstrs.reverse()
shift=0
shift = 0
ret = []
for mstr in maskstrs:
bmint = long(mstr,16)
for i in range(0,32):
if (bmint&1) == 1:
bmint = long(mstr, 16)
for i in range(0, 32):
if (bmint & 1) == 1:
ret.append(i+shift)
bmint = bmint >> 1
shift = shift + 32
......@@ -70,15 +75,16 @@ class cputopology:
self.pkgcpus = {}
for cpuid in self.onlinecpus:
pkgidfn = self.cpubasedir + "cpu%d/topology/physical_package_id" % (cpuid)
pkgidfn = self.cpubasedir + \
"cpu%d/topology/physical_package_id" % (cpuid)
pkgid = int(readbuf(pkgidfn))
if not self.pkgcpus.has_key(pkgid) :
if not self.pkgcpus.has_key(pkgid):
self.pkgcpus[pkgid] = []
self.pkgcpus[pkgid].append(cpuid)
self.cpu2coreid = {}
self.core2cpuid = {}
for pkgid in self.pkgcpus.keys() :
for pkgid in self.pkgcpus.keys():
for cpuid in self.pkgcpus[pkgid]:
coreidfn = self.cpubasedir + "cpu%d/topology/core_id" % (cpuid)
coreid = int(readbuf(coreidfn))
......@@ -88,21 +94,21 @@ class cputopology:
self.onlinenodes = self.parserange(self.nodebasedir + 'online')
self.nodecpus = {}
for n in self.onlinenodes:
self.nodecpus[n] = self.parsemask(self.nodebasedir + "node%d/cpumap" % (n))
self.nodecpus[n] = self.parsemask(
self.nodebasedir + "node%d/cpumap" % (n))
def __init__(self):
self.detect()
class nodeconfig :
class nodeconfig:
def parse(self):
self.hostname = socket.gethostname()
# XXX: not sure this is unique
self.nodename = self.hostname.split('.')[0]
tmp = readbuf( '/proc/version' )
tmp = readbuf('/proc/version')
self.version = tmp.split()[2]
re_model = re.compile("^model\s+:\s+([0-9]+)")
......@@ -128,67 +134,66 @@ class nodeconfig :
if os.path.exists(d):
self.freqdriver = 'acpi_cpufreq'
fn = d + "/scaling_driver"
self.driver = readbuf( fn ).rstrip()
self.driver = readbuf(fn).rstrip()
fn = d + "/scaling_governor"
self.governor = readbuf( fn ).rstrip()
self.governor = readbuf(fn).rstrip()
fn = d + "/scaling_cur_freq"
self.cur_freq = readbuf( fn ).rstrip()
self.cur_freq = readbuf(fn).rstrip()
d = "/sys/devices/system/cpu/intel_pstate"
if os.path.exists(d):
self.freqdriver = 'pstate'
k = 'max_perf_pct'
pmax = readbuf( "%s/%s" % (d,k) ).rstrip()
pmax = readbuf("%s/%s" % (d, k)).rstrip()
k = 'min_perf_pct'
pmin = readbuf( "%s/%s" % (d,k) ).rstrip()
pmin = readbuf("%s/%s" % (d, k)).rstrip()
k = 'no_turbo'
noturbo = readbuf( "%s/%s" % (d,k) ).rstrip()
self.pstate = "%s/%s/%s" % (pmax,pmin,noturbo)
noturbo = readbuf("%s/%s" % (d, k)).rstrip()
self.pstate = "%s/%s/%s" % (pmax, pmin, noturbo)
d = "/sys/devices/system/cpu/turbofreq"
if os.path.exists(d):
self.freqdriver = 'coolrfreq'
self.policy = d + '/pstate_policy'
def __init__ (self):
def __init__(self):
self.parse()
def testnodeconfig():
print '=== ', sys._getframe().f_code.co_name
def testnodeconfig():
print('=== ', sys._getframe().f_code.co_name)
nc = nodeconfig()
print 'node: ', nc.nodename
print 'version: ', nc.version
print 'cpumodel: ', nc.cpumodel
print 'memoryKB: ', nc.memoryKB
print 'freqdriver: ', nc.freqdriver
print
print('node: ', nc.nodename)
print('version: ', nc.version)
print('cpumodel: ', nc.cpumodel)
print('memoryKB: ', nc.memoryKB)
print('freqdriver: ', nc.freqdriver)
print()
def testcputopology():
print '=== ', sys._getframe().f_code.co_name
print('=== ', sys._getframe().f_code.co_name)
ct = cputopology()
print
print 'No. of online cpus: ', len(ct.onlinecpus)
print
print()
print('No. of online cpus: ', len(ct.onlinecpus))
print()
for p in sorted(ct.pkgcpus.keys()):
print 'pkg%d:' % p, len(ct.pkgcpus[p]), ct.pkgcpus[p]
print ' cpuid:',
print('pkg%d:' % p, len(ct.pkgcpus[p]), ct.pkgcpus[p])
print(' cpuid:')
for cpuid in ct.pkgcpus[p]:
print ct.cpu2coreid[cpuid],ct.cpu2coreid[cpuid][1],
print
print
print(ct.cpu2coreid[cpuid], ct.cpu2coreid[cpuid][1])
print()
print()
for n in sorted(ct.nodecpus.keys()):
print 'node%d:' % n, len(ct.nodecpus[n]), ct.nodecpus[n]
print('node%d:' % n, len(ct.nodecpus[n]), ct.nodecpus[n])
print ' cpuid:',
print(' cpuid:')
for cpuid in ct.nodecpus[n]:
print ct.cpu2coreid[cpuid],
print
print
print(ct.cpu2coreid[cpuid])
print()
print()
if __name__ == '__main__':
......@@ -196,6 +201,3 @@ if __name__ == '__main__':
testnodeconfig()
testcputopology()
This diff is collapsed.
......@@ -27,7 +27,7 @@
current value of duty cycle
"""
import msr
import coolr.msr
class DutyCycle:
......
......@@ -20,9 +20,9 @@ import os
from resources import ResourceManager
from sensor import SensorManager
import signal
from zmq.eventloop import ioloop
from nrm.messaging import UpstreamRPCServer, UpstreamPubServer, \
DownstreamEventServer
from zmq.eventloop import ioloop # type: ignore
from messaging import UpstreamRPCServer, UpstreamPubServer, \
DownstreamEventServer
logger = logging.getLogger('nrm')
......@@ -42,22 +42,22 @@ class Daemon(object):
elif event.tag == 'progress':
if event.application_uuid in self.application_manager.applications:
app = self.application_manager.applications[
event.application_uuid]
event.application_uuid]
app.update_performance(event)
# self.upstream_pub_server.send(event) TODO try this.
self.upstream_pub_server.send(
tag='progress',
payload=event.payload,
application_uuid=event.application_uuid)
tag='progress',
payload=event.payload,
application_uuid=event.application_uuid)
elif event.tag == 'performance':
if event.application_uuid in self.application_manager.applications:
app = self.application_manager.applications[
event.application_uuid]
event.application_uuid]
app.update_performance(event)
self.upstream_pub_server.send(
tag='performance',
payload=event.payload,
container_uuid=event.container_uuid)
tag='performance',
payload=event.payload,
container_uuid=event.container_uuid)
elif event.tag == 'phasecontext':
uuid = event.application_uuid
if uuid in self.application_manager.applications:
......@@ -82,9 +82,9 @@ class Daemon(object):
self.target = float(req.limit)
logger.info("new target measure: %g", self.target)
self.upstream_rpc_server.send(
client,
tag='getPower',
limit=str(self.target))
client,
tag='getPower',
limit=str(self.target))
elif req.tag == 'run':
logger.info("asked to run a command in a container: %r", req)
container_uuid = req.container_uuid
......@@ -100,25 +100,25 @@ class Daemon(object):
if len(container.processes) == 1:
if container.power['policy']:
container.power['manager'] = PowerPolicyManager(
container.resources.cpus,
container.power['policy'],
float(container.power['damper']),
float(container.power['slowdown']))
container.resources.cpus,
container.power['policy'],
float(container.power['damper']),
float(container.power['slowdown']))
if container.power['profile']:
p = container.power['profile']
p['start'] = self.machine_info['energy']['energy']
p['start']['time'] = self.machine_info['time']
self.upstream_pub_server.send(
tag='start',
container_uuid=container_uuid,
errno=0 if container else -1,
power=container.power['policy'] or str(None))
tag='start',
container_uuid=container_uuid,
errno=0 if container else -1,
power=container.power['policy'] or str(None))
# now deal with the process itself
self.upstream_rpc_server.send(
client,
tag='start',
pid=pid,
container_uuid=container_uuid)
client,
tag='start',
pid=pid,
container_uuid=container_uuid)
# setup io callbacks
outcb = partial(self.do_children_io, client, container_uuid,
'stdout')
......@@ -134,9 +134,9 @@ class Daemon(object):
logger.info("asked for container list: %r", req)
response = self.container_manager.list()
self.upstream_rpc_server.send(
client,
tag="list",
payload=response)
client,
tag="list",
payload=response)
else:
logger.error("invalid command: %r", req.tag)
......@@ -146,10 +146,10 @@ class Daemon(object):
Meant to be partially defined on a children basis."""
logger.info("%r received %r data: %r", container_uuid, io, data)
self.upstream_rpc_server.send(
client,
tag=io,
container_uuid=container_uuid,
payload=data or 'eof')
client,
tag=io,
container_uuid=container_uuid,
payload=data or 'eof')
def do_sensor(self):
self.machine_info = self.sensor_manager.do_update()
......@@ -161,9 +161,9 @@ class Daemon(object):
"can not report power upstream.")
else:
self.upstream_pub_server.send(
tag="power",
total=total_power,
limit=self.target)
tag="power",
total=total_power,
limit=self.target)
def do_control(self):
plan = self.controller.planify(self.target, self.machine_info)
......@@ -203,10 +203,10 @@ class Daemon(object):
# first, send a process_exit
self.upstream_rpc_server.send(
clientid,
tag="exit",
status=str(status),
container_uuid=container.uuid)
clientid,
tag="exit",
status=str(status),
container_uuid=container.uuid)
# Remove the pid of process that is finished
container.processes.pop(pid, None)
self.container_manager.pids.pop(pid, None)
......@@ -241,9 +241,9 @@ class Daemon(object):
container.uuid, diff)
self.container_manager.delete(container.uuid)
self.upstream_pub_server.send(
tag="exit",
container_uuid=container.uuid,
profile_data=diff)
tag="exit",
container_uuid=container.uuid,
profile_data=diff)
else:
logger.debug("child update ignored")
pass
......@@ -290,12 +290,12 @@ class Daemon(object):
SingularityUserRuntime(self.config.singularity)
assert(container_runtime is not None)
self.container_manager = ContainerManager(
container_runtime,
self.resource_manager,
perfwrapper=self.config.argo_perf_wrapper,
linuxperf=self.config.perf,
pmpi_lib=self.config.pmpi_lib,
downstream_event_uri=downstream_event_param,
container_runtime,
self.resource_manager,
perfwrapper=self.config.argo_perf_wrapper,
linuxperf=self.config.perf,
pmpi_lib=self.config.pmpi_lib,
downstream_event_uri=downstream_event_param,
)
self.application_manager = ApplicationManager()
self.sensor_manager = SensorManager()
......
......@@ -42,6 +42,7 @@ import coolr.dutycycle
class DDCMPolicy:
""" Contains cpu-specific DDCM based power policy """
def __init__(self, maxlevel=16, minlevel=1):
self.maxdclevel = maxlevel
self.mindclevel = minlevel
......
......@@ -11,10 +11,10 @@
import json
import logging
import uuid
import zmq
import zmq.utils
import zmq.utils.monitor
from zmq.eventloop import zmqstream
import zmq # type: ignore
import zmq.utils # type: ignore
import zmq.utils.monitor # type: ignore
from zmq.eventloop import zmqstream # type: ignore
from schema import loadschema
......@@ -29,7 +29,7 @@ def send(apiname):
def send(self, *args, **kwargs):
self.socket.send(
json.dumps(model(dict(*args, **kwargs))))
json.dumps(model(dict(*args, **kwargs))))
setattr(cls, "send", send)
return(cls)
......
......@@ -8,17 +8,17 @@
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
import warlock
import warlock # type: ignore
import json
import yaml
import os
from jsonschema import Draft4Validator
from jsonschema import Draft4Validator # type: ignore
_jsonexts = ["json"]
_yamlexts = ["yml", "yaml"]
def loadschema(ext, api):
def loadschema(ext: str, api: str):
sourcedir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(sourcedir, "schemas", api+"."+ext)) as f:
if ext in _jsonexts:
......
......@@ -12,7 +12,7 @@
import collections
import logging
import xml.etree.ElementTree
import tornado.process as process
import tornado.process as process # type: ignore
import subprocess
logger = logging.getLogger('nrm')
......
......@@ -15,8 +15,7 @@ setup(
classifiers=[
'Development Status :: 3 - Alpha',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
],
packages=find_packages(),
......
......@@ -21,6 +21,11 @@ import jsonschema
# import warlock
# import scipy
import os
exists = os.path.isfile('Pipfile')
if exists:
exit(1)
print("#DO NOT MODIFY: this file is auto-generated by requirements.py")
print("six==%s" % six.__version__)
print("zmq==%s" % zmq.__version__)
......
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
"""Tests for the ACI Manifest module."""
import nrm
import nrm.aci
import pytest
import json
@pytest.fixture
def manifest_base_data():
with open("examples/basic.json") as f:
return json.load(f)
def test_manifest_disabled_perfwrapper(manifest_base_data):
"""Ensure we can check if a feature is disabled."""
manifest = nrm.aci.ImageManifest(manifest_base_data)
assert not manifest.is_feature_enabled("perfwrapper")
def test_enabled_feature(manifest_base_data):
"""Ensure we can check if a feature is enabled without enabled in it."""
data = manifest_base_data.copy()
data["app"]["perfwrapper"] = "enabled"
manifest = nrm.aci.ImageManifest(data)
assert manifest.is_feature_enabled("perfwrapper")
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
"""Tests for the Coolr RAPL module."""
import nrm
import nrm.coolr
import nrm.coolr.clr_rapl
import pytest
@pytest.fixture
def rapl_reader():
"""Fixture for a regular rapl reader."""
rr = nrm.coolr.clr_rapl.rapl_reader()
assert rr.initialized(), "no rapl sysfs detected"
return rr
def test_read_powerdomains(rapl_reader):
"""Ensure we can read the power domains."""
assert rapl_reader.get_powerdomains()
def test_get_powerlimits(rapl_reader):
"""Ensure we can read the power limits."""
data = rapl_reader.get_powerlimits()
for k in data:
if data[k]['enabled']:
break
else:
assert False, "No power domain enabled."
def test_set_powerlimits(rapl_reader):
"""Ensure we can set a power limit."""
data = rapl_reader.get_powerlimits()
for k in data:
if data[k]['enabled']:
# compute a new limit in between cur and max
newlim = (data[k]['maxW'] - data[k]['curW'])/2.0
rapl_reader.set_powerlimit_pkg(newlim)
def test_sample(rapl_reader):
"""Ensure we can sample power consumption properly."""
import time
rapl_reader.start_energy_counter()
for i in range(0, 3):
time.sleep(1)
assert rapl_reader.sample(accflag=True)
rapl_reader.stop_energy_counter()
assert rapl_reader.total_energy_json()
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
"""Tests for the Sensor module."""
import nrm
import nrm.messaging
import pytest
@pytest.fixture
def upstream_rpc_client():
"""Fixture for a client handle on the upstream RPC API"""
return nrm.messaging.UpstreamRPCClient("ipc:///tmp/nrm-pytest-rpc")
@pytest.fixture
def upstream_rpc_server():
"""Fixture for a server handle on the upstream RPC API"""
return nrm.messaging.UpstreamRPCServer("ipc:///tmp/nrm-pytest-rpc")
@pytest.fixture
def upstream_pub_server():
"""Fixture for a server handle on the upstream PUB API"""
return nrm.messaging.UpstreamPubServer("ipc:///tmp/nrm-pytest-pub")
@pytest.fixture
def upstream_pub_client():
"""Fixture for a server handle on the upstream PUB API"""
return nrm.messaging.UpstreamPubClient("ipc:///tmp/nrm-pytest-pub")
@pytest.fixture
def downstream_event_server():
"""Fixture for a server handle on the downstream event API"""
return nrm.messaging.DownstreamEventServer("ipc:///tmp/nrm-pytest-down")
@pytest.fixture
def downstream_event_client():
"""Fixture for a client handle on the downstream event API"""
return nrm.messaging.DownstreamEventClient("ipc:///tmp/nrm-pytest-down")
@pytest.fixture
def dummy_msg():
"""Fixture for a dummy valid message."""
d = {'api': 'up_rpc_req', 'type': 'list'}
return nrm.messaging.MSGTYPES['up_rpc_req']['list'](**d)
@pytest.fixture
def dummy_daemon():
class _daemon(object):
def __init__(self):
self.called = False
def callback(self, msg, client):
self.called = True
self.msg = msg
self.client = client
return _daemon()
def test_msg_convertion(dummy_msg):
m = dummy_msg
assert nrm.messaging.wire2msg(nrm.messaging.msg2wire(m)) == dummy_msg
def test_rpc_connection(upstream_rpc_client, upstream_rpc_server):
upstream_rpc_client.connect()
def test_rpc_send_recv(upstream_rpc_client, upstream_rpc_server, dummy_msg):
upstream_rpc_client.send(dummy_msg)
msg, client = upstream_rpc_server.recv()
assert msg == dummy_msg
assert client == upstream_rpc_client.uuid
upstream_rpc_server.send(dummy_msg, client)
msg = upstream_rpc_client.recv()
assert msg == dummy_msg
def test_rpc_server_callback(upstream_rpc_client, upstream_rpc_server,
dummy_msg, dummy_daemon):
upstream_rpc_server.setup_recv_callback(dummy_daemon.callback)
frames = [upstream_rpc_client.uuid, nrm.messaging.msg2wire(dummy_msg)]
upstream_rpc_server.do_recv_callback(frames)
assert dummy_daemon.called
assert dummy_daemon.msg == dummy_msg
assert dummy_daemon.client == upstream_rpc_client.uuid
def test_pub_server_send(upstream_pub_server, dummy_msg):
upstream_pub_server.send(dummy_msg)
def test_pub_connection(upstream_pub_client, upstream_pub_server):
upstream_pub_client.connect()
def test_pub_client_recv(upstream_pub_server, upstream_pub_client, dummy_msg):
upstream_pub_server.send(dummy_msg)
msg = upstream_pub_client.recv()
assert msg == dummy_msg
def test_down_connection(downstream_event_client, downstream_event_server):
downstream_event_client.connect()
def test_down_event_send_recv(downstream_event_client, downstream_event_server,
dummy_msg):
downstream_event_client.send(dummy_msg)
msg, client = downstream_event_server.recv()
assert msg == dummy_msg
assert client == downstream_event_client.uuid
def test_down_event_server_callback(downstream_event_client,
downstream_event_server,
dummy_msg, dummy_daemon):
downstream_event_server.setup_recv_callback(dummy_daemon.callback)
frames = [downstream_event_client.uuid, nrm.messaging.msg2wire(dummy_msg)]
downstream_event_server.do_recv_callback(frames)
assert dummy_daemon.called
assert dummy_daemon.msg == dummy_msg
assert dummy_daemon.client == downstream_event_client.uuid
###############################################################################
# Copyright 2019 UChicago Argonne, LLC.
# (c.f. AUTHORS, LICENSE)
#
# This file is part of the NRM project.
# For more info, see https://xgitlab.cels.anl.gov/argo/nrm
#
# SPDX-License-Identifier: BSD-3-Clause
###############################################################################
"""Tests for the Sensor module."""
import nrm
import nrm.sensor
import pytest
@pytest.fixture
def sensor_manager():
"""Fixture for a regular sensor manager."""
return nrm.sensor.SensorManager()
def test_sensor_update_returns_valid_data(sensor_manager):
sensor_manager.start()
data = sensor_manager.do_update()
assert 'energy' in data
assert 'power' in data['energy']
assert 'total' in data['energy']['power']