Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
argo
nrm
Commits
ffe2d632
Commit
ffe2d632
authored
May 03, 2019
by
Swann Perarnau
Browse files
Merge branch 'container-singularity' into 'master'
Container singularity support Closes
#42
and
#49
See merge request
!85
parents
f73b4415
b3450523
Pipeline
#7121
passed with stages
in 13 minutes and 47 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
AUTHORS
View file @
ffe2d632
...
...
@@ -4,3 +4,4 @@ Kazutomo Yoshii <kazutomo.yoshii@gmail.com>
Sridutt Bhalachandra <sriduttb@anl.gov>
Srinivasan Ramesh <ramesh2@llnl.gov>
Valentin Reis <vreis@anl.gov>
Florence Monna <fmonna@anl.gov>
bin/nrmd
View file @
ffe2d632
...
...
@@ -45,6 +45,8 @@ def main(argv=None):
"argo_perf_wrapper"
:
"nrm-perfwrapper"
,
"argo_nodeos_config"
:
"argo_nodeos_config"
,
"pmpi_lib"
:
"/usr/lib/libnrm-pmpi.so"
,
"singularity"
:
"singularity"
,
"container_runtime"
:
"nodeos"
,
}
if
args
.
print_defaults
:
...
...
@@ -60,15 +62,15 @@ def main(argv=None):
action
=
"store_true"
)
parser
.
add_argument
(
"--nrm_log"
,
help
=
"Main log file. Override default with the NRM_LOG
.
"
help
=
"Main log file. Override default with the NRM_LOG
"
"environment variable"
,
default
=
os
.
environ
.
get
(
'NRM_LOG'
,
'/tmp/nrm.log'
))
parser
.
add_argument
(
'--hwloc'
,
help
=
"Path to the hwloc to use. This path can be "
"relative and makes uses of the $PATH if necessary."
"Override default with the HWLOC environment"
"relative and makes uses of the $PATH if necessary.
"
"Override default with the HWLOC environment
"
"variable."
,
default
=
os
.
environ
.
get
(
'HWLOC'
,
'hwloc'
))
...
...
@@ -82,25 +84,39 @@ def main(argv=None):
'argo_nodeos_config'
))
parser
.
add_argument
(
'--perf'
,
help
=
"Path to the linux perf tool to use. This path can be"
"relative and makes uses of the $PATH if necessary."
"Override default with the PERF environment"
help
=
"Path to the linux perf tool to use. This path can be
"
"relative and makes uses of the $PATH if necessary.
"
"Override default with the PERF environment
"
"variable."
,
default
=
os
.
environ
.
get
(
'PERF'
,
'perf'
))
parser
.
add_argument
(
'--pmpi_lib'
,
help
=
"Path to the libnrm PMPI library used for the power policy"
help
=
"Path to the libnrm PMPI library used for the power policy
.
"
"Override default with the PMPI environment variable."
,
default
=
os
.
environ
.
get
(
'PMPI'
,
defaults
[
'pmpi_lib'
]))
parser
.
add_argument
(
'--argo_perf_wrapper'
,
help
=
"Path to the linux perf tool to use. This path can"
help
=
"Path to the linux perf tool to use. This path can
"
"be relative and makes uses of the $PATH if "
"necessary. Override default with the PERFWRAPPER "
"environment variable."
,
default
=
os
.
environ
.
get
(
'ARGO_PERF_WRAPPER'
,
'nrm-perfwrapper'
))
parser
.
add_argument
(
'--singularity'
,
help
=
"Path to the singularity command. "
"Override default with the SINGULARITY environment variable."
,
default
=
os
.
environ
.
get
(
'SINGULARITY'
,
defaults
[
'singularity'
]))
parser
.
add_argument
(
'--container-runtime'
,
help
=
"Choice of container runtime. "
"Override default with the ARGO_CONTAINER_RUNTIME "
"environment variable."
,
choices
=
[
'nodeos'
,
'singularity'
],
default
=
os
.
environ
.
get
(
'ARGO_CONTAINER_RUNTIME'
,
defaults
[
'container_runtime'
]))
args
=
parser
.
parse_args
(
remaining_argv
)
nrm
.
daemon
.
runner
(
config
=
args
)
return
(
0
)
...
...
nrm/aci.py
View file @
ffe2d632
...
...
@@ -321,6 +321,29 @@ class App(SpecField):
return
super
(
App
,
self
).
load
(
data
)
class
Image
(
SpecField
):
"""Information on the container image to use."""
fields
=
{
"path"
:
spec
(
unicode
,
True
),
"type"
:
spec
(
unicode
,
True
),
}
def
__init__
(
self
):
"""Create an empty image."""
pass
def
load
(
self
,
data
):
"""Load from json dict."""
ret
=
super
(
Image
,
self
).
load
(
data
)
if
not
ret
:
return
ret
if
self
.
type
not
in
[
'sif'
,
'docker'
]:
logger
.
error
(
"Image type not recognized"
)
return
False
return
True
class
ImageManifest
(
SpecField
):
"""Represent an ACI Image Manifest."""
...
...
@@ -329,6 +352,7 @@ class ImageManifest(SpecField):
"acVersion"
:
spec
(
unicode
,
True
),
"name"
:
spec
(
unicode
,
True
),
"app"
:
spec
(
App
,
True
),
"image"
:
spec
(
Image
,
False
),
}
def
__init__
(
self
):
...
...
nrm/containers.py
View file @
ffe2d632
...
...
@@ -13,7 +13,7 @@ from __future__ import print_function
from
aci
import
ImageManifest
from
collections
import
namedtuple
import
logging
from
subprograms
import
ChrtClient
,
NodeOSClient
,
resources
from
subprograms
import
ChrtClient
,
NodeOSClient
,
resources
,
SingularityClient
import
operator
logger
=
logging
.
getLogger
(
'nrm'
)
...
...
@@ -30,7 +30,8 @@ class ContainerManager(object):
def
__init__
(
self
,
container_runtime
,
rm
,
perfwrapper
=
"nrm-perfwrapper"
,
linuxperf
=
"perf"
,
pmpi_lib
=
"/usr/lib/libnrm-pmpi.so"
):
pmpi_lib
=
"/usr/lib/libnrm-pmpi.so"
,
downstream_event_uri
=
"ipc:///tmp/nrm-downstream-event"
):
self
.
linuxperf
=
linuxperf
self
.
perfwrapper
=
perfwrapper
self
.
runtime
=
container_runtime
...
...
@@ -40,6 +41,7 @@ class ContainerManager(object):
self
.
hwloc
=
rm
.
hwloc
self
.
chrt
=
ChrtClient
()
self
.
pmpi_lib
=
pmpi_lib
self
.
downstream_event_uri
=
downstream_event_uri
def
_get_container_tuple
(
self
,
container_name
,
manifest
):
"""Retrieve a container tuple if the container exists, otherwise use
...
...
@@ -110,7 +112,7 @@ class ContainerManager(object):
manifest
)
if
creation_needed
:
logger
.
info
(
"Creating container %s"
,
container_name
)
self
.
runtime
.
create
(
container
)
self
.
runtime
.
create
(
container
,
self
.
downstream_event_uri
)
self
.
containers
[
container_name
]
=
container
# build context to execute
...
...
@@ -133,6 +135,11 @@ class ContainerManager(object):
environ
[
'ARGO_NRM_RATELIMIT'
]
=
\
manifest
.
app
.
isolators
.
monitoring
.
ratelimit
if
container
.
power
.
get
(
'policy'
)
or
\
manifest
.
is_feature_enabled
(
'monitoring'
):
environ
[
'ARGO_NRM_DOWNSTREAM_EVENT_URI'
]
=
\
self
.
downstream_event_uri
# build prefix to the entire command based on enabled features
argv
=
[]
if
manifest
.
is_feature_enabled
(
'scheduler'
):
...
...
@@ -210,7 +217,7 @@ class ContainerRuntime(object):
def
__init__
(
self
):
pass
def
create
(
self
,
container
):
def
create
(
self
,
container
,
downstream_uri
):
"""Create the container defined by the container namedtuple on the
system."""
raise
NotImplementedError
...
...
@@ -237,7 +244,7 @@ class NodeOSRuntime(ContainerRuntime):
path/command."""
self
.
client
=
NodeOSClient
(
argo_nodeos_config
=
path
)
def
create
(
self
,
container
):
def
create
(
self
,
container
,
downstream_uri
):
"""Uses the container resource allocation to create a container."""
self
.
client
.
create
(
container
.
uuid
,
container
.
resources
)
...
...
@@ -250,6 +257,31 @@ class NodeOSRuntime(ContainerRuntime):
self
.
client
.
delete
(
container_uuid
,
kill
)
class
SingularityUserRuntime
(
ContainerRuntime
):
"""Implements the container runtime interface using the singularity
subprogram."""
def
__init__
(
self
,
path
=
"singularity"
):
"""Creates the client for singularity, with an optional custom
path/command."""
self
.
client
=
SingularityClient
(
singularity_path
=
path
)
def
create
(
self
,
container
,
downstream_uri
):
"""Uses the container resource allocation to create a container."""
imageinfo
=
container
.
manifest
.
image
self
.
client
.
instance_start
(
container
.
uuid
,
imageinfo
.
path
,
[
downstream_uri
])
def
execute
(
self
,
container_uuid
,
args
,
environ
):
"""Launches a command in the container."""
return
self
.
client
.
execute
(
container_uuid
,
args
,
environ
)
def
delete
(
self
,
container_uuid
,
kill
=
False
):
"""Delete the container."""
self
.
client
.
instance_stop
(
container_uuid
,
kill
)
class
DummyRuntime
(
ContainerRuntime
):
"""Implements a dummy runtime that doesn't create any container, but still
...
...
@@ -258,7 +290,7 @@ class DummyRuntime(ContainerRuntime):
def
__init__
(
self
):
pass
def
create
(
self
,
container
):
def
create
(
self
,
container
,
downstream_uri
):
pass
def
execute
(
self
,
container_uuid
,
args
,
environ
):
...
...
nrm/daemon.py
View file @
ffe2d632
...
...
@@ -11,7 +11,7 @@
from
__future__
import
print_function
from
applications
import
ApplicationManager
from
containers
import
ContainerManager
,
NodeOSRuntime
from
containers
import
ContainerManager
,
NodeOSRuntime
,
SingularityUserRuntime
from
controller
import
Controller
,
PowerActuator
from
powerpolicy
import
PowerPolicyManager
from
functools
import
partial
...
...
@@ -307,15 +307,22 @@ class Daemon(object):
# create managers
self
.
resource_manager
=
ResourceManager
(
hwloc
=
self
.
config
.
hwloc
)
container_runtime
=
\
NodeOSRuntime
(
self
.
config
.
argo_nodeos_config
)
container_runtime
=
None
if
self
.
config
.
container_runtime
==
'nodeos'
:
container_runtime
=
\
NodeOSRuntime
(
path
=
self
.
config
.
argo_nodeos_config
)
elif
self
.
config
.
container_runtime
==
'singularity'
:
container_runtime
=
\
SingularityUserRuntime
(
self
.
config
.
singularity
)
assert
(
container_runtime
is
not
None
)
self
.
container_manager
=
ContainerManager
(
container_runtime
,
self
.
resource_manager
,
perfwrapper
=
self
.
config
.
argo_perf_wrapper
,
linuxperf
=
self
.
config
.
perf
,
pmpi_lib
=
self
.
config
.
pmpi_lib
,
)
downstream_event_uri
=
downstream_event_param
,
)
self
.
application_manager
=
ApplicationManager
()
self
.
sensor_manager
=
SensorManager
()
pa
=
PowerActuator
(
self
.
sensor_manager
)
...
...
nrm/subprograms.py
View file @
ffe2d632
...
...
@@ -141,6 +141,57 @@ class NodeOSClient(object):
cwd
=
environ
[
'PWD'
])
class
SingularityClient
(
object
):
"""Client to singularity."""
def
__init__
(
self
,
singularity_path
=
"singularity"
):
"""Load client configuration."""
self
.
prefix
=
singularity_path
def
instance_start
(
self
,
instance_name
,
container_image
,
bind_list
=
[]):
"""Start a named instance of a container image.
Note that singularity will also start the startscript if
defined in the container image, which might be an issue."""
args
=
[
self
.
prefix
]
args
.
extend
([
'instance'
,
'start'
])
if
bind_list
:
args
.
extend
([
'--bind'
,
','
.
join
(
bind_list
)])
args
.
extend
([
container_image
,
instance_name
])
p
=
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
stdout
,
stderr
=
p
.
communicate
()
logpopen
(
p
,
args
,
stdout
,
stderr
)
def
execute
(
self
,
instance_name
,
argv
,
environ
):
"""Execute argv inside container.
singularity exec instance://instance_name <command>"""
args
=
[
self
.
prefix
]
# singularity
container_name
=
"instance://"
+
instance_name
args
.
extend
([
'exec'
,
container_name
])
args
.
extend
(
argv
)
return
process
.
Subprocess
(
args
,
env
=
environ
,
stdout
=
process
.
Subprocess
.
STREAM
,
stderr
=
process
.
Subprocess
.
STREAM
,
close_fds
=
True
,
cwd
=
environ
[
'PWD'
])
def
instance_stop
(
self
,
instance_name
,
kill
=
False
):
"""Stop an instance and kill everything in it."""
args
=
[
self
.
prefix
]
args
.
extend
([
'instance'
,
'stop'
])
if
kill
:
args
.
append
(
"--force"
)
args
.
append
(
instance_name
)
p
=
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
stdout
,
stderr
=
p
.
communicate
()
logpopen
(
p
,
args
,
stdout
,
stderr
)
class
ChrtClient
(
object
):
"""Client to chrt command line wrapper."""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment