Commit 369b63c8 authored by Paul Rich's avatar Paul Rich
Browse files

Merge branch 'master' into 20-draining-backfilling

Conflicts:
	src/lib/Components/system/CraySystem.py
parents 9fe3c506 1415adfb
......@@ -228,6 +228,14 @@ platforms, all jobs are run as script jobs.
.SH "NOTE"
The only thing printed to STDOUT is the jobid, any other error or informational messages are printed to STDERR.
.SS "Cray Systems"
On Cray systems, the "location" attribute may be specified by a comma-delimited
list of node ids. Runs of node ids may be compacted to a hyphenated, inclusive
pair, i.e. 1-4 would expand to 1, 2, 3, 4. All nodes specified in location must
exist on the system. This list format is compatible with the values returned by
Cray's
.BRcnselect (1)
command.
.SH "SEE ALSO"
.BR qstat (1),
.BR qdel (1),
......
......@@ -503,7 +503,39 @@ command. The default is 300 seconds.
.B bgtype
The type of BlueGene being run on. For BlueGene/Q this should be set to 'bgq'.
.SS "CRAY SECTIONS"
.SS "[alps]"
.TP
.B basil
The path to Cray's apbasil command. The default path is
/opt/cray/default/alps/bin/apbasil
.TP
.B apkill
The path to Cray's apkill command. The default path is
/opt/cray/alps/default/bin/apkill
.TP
.B default_depth
The default processors per node. This should be set to the number of KNL cores
on each node for XC40 systems. The default value is 72.
.SS [alpssystem]
.TP
.B pgroup_startup_timeout
The time to allow for process group startup in seconds. The default is 120
seconds.
.TP
.B save_me_interval
The minimum interval that Cobalt will wait between saving statefiles for this
component, in seconds. By default the interval is 10.0 seconds. Under periods
of high load on the component, the interval between statefiles may be longer.
.TP
.B temp_reservation_time
The default time for the temporary allocation reservation for starting jobs in
seconds. The default is 300 seconds.
.TP
.B update_thread_timeout
The polling interval for state updates from ALPS in seconds. The default is
10 seconds.
.SH "ENVIRONMENT"
.B COBALT_CONFIG_FILES
......
.TH "nodesadm" 8
.TH "nodeadm" 8
.SH "NAME"
nodeadm is the administrative interface for cluster systems.
.SH "SYNOPSIS"
.B nodeadm [-l] [--down part1 part2] [--up part1 part2]
.B nodeadm
.R [options] [list\ of\ nodes]
.SH "DESCRIPTION"
.TP
Allows one to mark resource as being down, or back up if there are schedulable again and it handles queue-resource associations.
Allows one to mark resource as being down, or back up if there are scheduleable
again and it handles queue-resource associations.
.SH "OPTIONS"
.TP
.B \-b\ [list\ of\ nodes]
.B [Cray Only]
Print detailed information for all nodes in the list of node ids.
This will accept hyphenated ranges as well. All ranges are inclusive.
.TP
.B \-d \-\-debug
Turn on communication debugging.
......@@ -22,17 +28,23 @@ Displays the usage and a brief descriptions of the options
.B \-\-version
Displays client revision and Cobalt version
.TP
.B \-\-down
mark nodes as down
.B \-\-down [list\ of\ nodes]
Mark nodes as down
.TP
.B \-\-up
mark nodes as up (even if allocated)
.B \-\-up\ [list\ of\ nodes]
Mark nodes as up. If the node is not in a usuable state or is allocated,
this may cause unexpected behavior.
.TP
.B \-\-queue
set queue associations
.B \-\-queue\ [queue1:queue2:...:queueN]\ [list\ of\ nodes]
Set queue associations. The list of queues to set on a node is ':'-delimited.
.TP
.B \-l \-\-list'
list node states
.SH "NOTES"
.SS "Cray Systems"
On Cray systems nodes are referenced by their integer node id. Nodes may be
specified as a comma-delimited list. Ranges of node id's may be compacted with a
hyphen to an inclusive range, i.e. 1-4 will expand to 1,2,3,4.
.SH "SEE ALSO"
.BR nodelist (1)
......
......@@ -11,7 +11,11 @@ nodelist
List resources on the cluster system.
.SH "OPTIONS"
.TP
.B \-b <list of node ids>
.B [Cray Only]
Print detailed information for all nodes in the list of node ids.
This will accept hyphenated ranges as well. All ranges are inclusive.
.TP
.B \-d \-\-debug
Turn on communication debugging.
......
......@@ -4,73 +4,109 @@ setres \- Create or modify a cobalt scheduler reservation
.SH "SYNOPSIS"
.B setres [modify or create reservation options] partition1 [ partition2 ... partitionN ]
.B setres [id changing options]
.B setres [id changing options]
.SH "DESCRIPTION"
.TP
This program creates or modifies a scheduler reservation.
Creates or modifies a scheduler reservation. Reservation and cycle ids may also
be reset.
.SH "OPTIONS TO MODIFY OR CREATE RESERVATION (partition arguments required)"
.TP
.B \-\-debug
Turn on communication debugging.
.TP
.B \-h \-\-help
Displays the usage and a brief descriptions of the options
.TP
.B \-\-version
Displays client revision and Cobalt version
.TP
.B \-A \-\-project
.SS "RESERVATION CREATION AND MODIFICATION"
At a minimum all reservation creation requires use of the
.B -n, -s
and
.B -d
flags. Partions and nodes must be specified as positional arguments or via the
.B -p
flag.
.TP
.B \-A \-\-project [project\ name]
Set project name
.TP
.B \-D \-\-defer
Defer current (or next) iteration of recurring reservation (must be
used with -m)
.B \-\-allow_passthrough
Allow pass through connection on systems with interconnects that allow
passthrough communication.
.TP
.B \-\-block_passthrough
Block pass through connections on systems with interconnects that allow
passthrough communication.
.TP
.B \-c \-\-cycletime
.B \-c \-\-cycletime [time]
Set the cycle time (in minutes or DD:HH:MM:SS). This is the amount of
time from reservation start until it is automatically renewed. This
can be used to create repeating reservations.
.TP
.B \-d \-\-duration
.B \-d \-\-duration [time]
Set duration (in minutes or HH:MM:SS)
.TP
.B \-D \-\-defer
Defer current (or next) iteration of recurring reservation. This must be used
with the
.B -m
flag.
.TP
.B \-\-debug
Turn on communication debugging.
.TP
.B \-h \-\-help
Displays the usage and a brief descriptions of the options
.TP
.B \-m \-\-modify
Modify an existing reservation, specified with -n.
Modify an existing reservation. The target reservation specified with
.BR -n .
.TP
.B \-n \-\-name
Set reservation name
Set reservation name. Names must be unique for all pending and active
reservations on a system.
.TP
.B \-p \-\-partition
Set use partition. Now optional
Set use partition. Now optional. Paritions/nodes to reserve may be specified as
postional arguments. All nodes/partitoins in a reservation must exist and must
be managed by the system component at the time the reservation is set.
.TP
.B \-q \-\-queue
Set the queue name, if something other than the standard reservation naming convention is desired.
.TP
.B \-s \-\-starttime
Set start time (in format YYYY_MM_DD-HH:MM)
Set the queue name, if something other than the standard reservation naming
convention is desired. Queues may already exist and have jobs in them.
Jobs currently running in a target queue will not be affected by applying a
reservation to the queue. Jobs that are queued in the target queue will not
start until the reservation becomes active. Jobs in a reservations against an
existing queue will be permitted to run on all nodes in that queue.
If a queue isn't specified, a queue R.name will be created where name is the
reservation name specified by the
.B -n
argument.
.TP
.B \-u \-\-user
Set user for reservation. Set to "*" for all users.
.B \-s \-\-starttime [date]
Set start time (supported formats include YYYY-MM-DD-HH:MM or YYYY_MM_DD-HH:MM).
.TP
.B \-\-allow_passthrough
Allow pass through connection
.B \-u \-\-user [list\ of\ users]
Set user(s) for reservation. Set to "*" for all users. User names may be
provided as a ':'-delimited list. User names must be valid on the node where
.BR setres (8)
is running.
.TP
.B \-\-block_passthrough
Block pass through connections
.SH "ID CHANGING OPTIONS (no partition arguments)"
.B \-\-version
Displays client revision and Cobalt version
.SS "ID MODIFICATION OPTIONS"
These options must not be used with the other reservation control options.
.TP
.B \-\-cycle_id
set cycle id
.B \-\-cycle_id [cycleid]
Set cycle id
.TP
.B \-\-force_id
only used with \-\-res_id or \-\-cycle_id options
.B \-\-force_id
Only used with \-\-res_id or \-\-cycle_id options. Will force the id generator
to start with the specified value. Improper use of this option may cause
non-unique reservation ids and cycle ids to occur.
.TP
.B \-\-res_id
reservation id (int)
.B \-\-res_id [resid]
set the reservation id
.SH "NOTES"
On Cray systems nodes are referenced by their integer node id. Nodes may be
specified as a comma-delimited list. Ranges of node id's may be compacted with a
hyphen to an inclusive range, i.e. 1-4 will expand to 1,2,3,4.
.SH "SEE ALSO"
.BR showres (1),
.BR releaseres (8)
......
......@@ -24,8 +24,7 @@ _logger = logging.getLogger(__name__.split('.')[-1])
#CONFIG POINT TO ALPS
init_cobalt_config()
BASIL_PATH = get_config_option('alps', 'basil',
'/home/richp/alps-simulator/apbasil.sh')
BASIL_PATH = get_config_option('alps', 'basil', '/opt/cray/alps/default/bin/apbasil')
DEFAULT_DEPTH = int(get_config_option('alps', 'default_depth', 72))
class ALPSScriptChild (PGChild):
......
......@@ -33,6 +33,7 @@ SAVE_ME_INTERVAL = float(get_config_option('alpsssytem', 'save_me_interval', 10.
PENDING_STARTUP_TIMEOUT = float(get_config_option('alpssystem',
'pending_startup_timeout', 1200)) #default 20 minutes to account for boot.
APKILL_CMD = get_config_option('alps', 'apkill', '/opt/cray/alps/default/bin/apkill')
PGROUP_STARTUP_TIMEOUT = float(get_config_option('alpssystem', 'pgroup_startup_timeout', 120.0))
DRAIN_MODE = get_config_option('system', 'drain_mode', 'first-fit')
DRAIN_MODES = ['first-fit', 'drain-only', 'backfill']
......@@ -41,8 +42,11 @@ class ALPSProcessGroup(ProcessGroup):
def __init__(self, spec):
super(ALPSProcessGroup, self).__init__(spec)
self.alps_res_id = spec['alps_res_id']
self.alps_res_id = spec.get('alps_res_id', None)
self.interactive_complete = False
now = time.time()
self.startup_timeout = int(spec.get("pgroup_startup_timeout",
now + PGROUP_STARTUP_TIMEOUT))
#inherit generic getstate and setstate methods from parent
......@@ -1048,7 +1052,6 @@ class CraySystem(BaseSystem):
if alps_res is not None:
spec['alps_res_id'] = alps_res.alps_res_id
new_pgroups = self.process_manager.init_groups(specs)
for pgroup in new_pgroups:
_logger.info('%s: process group %s created to track job status',
pgroup.label, pgroup.id)
......
......@@ -70,7 +70,7 @@ class ProcessGroupManager(object): #degenerate with ProcessMonitor.
return self
def init_groups(self, specs):
'''Add a set of process groups from specs. Generate a unique id.]
'''Add a set of process groups from specs. Generate a unique id.
Input:
specs - a list of dictionaries that specify process groups for a
......@@ -133,6 +133,7 @@ class ProcessGroupManager(object): #degenerate with ProcessMonitor.
self.process_groups[pg_id].label)
else:
started.append(pg_id)
self.process_groups[pg_id].startup_timeout = 0
return started
#make automatic get final status of process group
......@@ -163,6 +164,9 @@ class ProcessGroupManager(object): #degenerate with ProcessMonitor.
#clean up orphaned process groups
for pg in self.process_groups.values():
if now < pg.startup_timeout:
#wait for startup timeout. We don't want any hasty kills
continue
pg_id = pg.id
child_uid = (pg.forker, pg.head_pid)
if child_uid not in children:
......@@ -176,7 +180,7 @@ class ProcessGroupManager(object): #degenerate with ProcessMonitor.
continue
orphaned.append(pg_id)
_logger.warning('%s: orphaned job exited with unknown status', pg.jobid)
pg.exit_status = 1234567 #FIXME: what should this sentinel be?
pg.exit_status = 1234567
completed_pgs.append(pg)
else:
children[child_uid]['found'] = True
......
......@@ -103,6 +103,7 @@ class ProcessGroup(Data):
self.sigkill_timeout = None
#TODO: extract into subclass
self.alps_res_id = spec.get('alps_res_id', None)
self.startup_timeout = spec.get("pgroup_startup_timeout", 0)
def __getstate__(self):
data = {}
......@@ -203,11 +204,6 @@ class ProcessGroup(Data):
else:
core_dump_str = ""
_logger.info("%s: terminated with signal %s%s", self.label, child["signum"], core_dump_str)
# else:
# if self.exit_status is None:
# # the forker has lost the child for our process group
# _logger.info("%s: job exited with unknown status", self.label)
# self.exit_status = 1234567 #FIXME: What should this sentinel be?
......
'''Process Manager for cluster/cray systems tests'''
import time
import logging
import sys
from mock import Mock, MagicMock, patch
import Cobalt.Proxy
from Cobalt.Components.system.base_pg_manager import ProcessGroupManager
default_child_data = [{'id': 1}]
def fake_forker(*args, **kwargs):
print args
print kwargs
raise RuntimeError('boom')
#return 1
class InspectMock(MagicMock):
'''allow us to inspect what is going on within a proxy call'''
def __getattr__(self, attr):
if attr == 'get_children':
return MagicMock(return_value=[{'id': 1}])
elif attr == 'fork':
return MagicMock(return_value=1)
return super(InspectMock, self).__getattr__(attr)
class TestProcessManager(object):
'''tests for the base project manager'''
def setup(self):
'''common setup for process group tests'''
self.base_spec = {'args':['arg1', 'arg2'], 'user':'frodo',
'jobid': 1, 'executable': 'job.exe', 'size': 2,
'cwd': '/home/frodo', 'location': 'loc1'
}
self.process_manager = ProcessGroupManager()
self.process_manager.forkers = ['forker1']
self.process_manager.forker_taskcounts = {'forker1':0}
def teardown(self):
'''common teardown for process group tests'''
del self.base_spec
del self.process_manager
def test_process_manager_init_groups_single(self):
'''ProcessGroupManager.init_groups: create a process group and add to process manager'''
specs = [self.base_spec]
self.process_manager.init_groups(specs)
assert self.process_manager.process_groups.get(1, None) is not None, "process group not created"
assert self.process_manager.process_groups[1].forker == 'forker1', "forker not set"
@patch.object(Cobalt.Proxy.DeferredProxyMethod, '__call__', return_value=1)
def test_process_manager_start_groups_single(self, *args, **kwargs):
'''ProcessGroupManager.start_groups: start up a single process group'''
self.base_spec['startup_timeout'] = 120
self.process_manager.init_groups([self.base_spec])
started = self.process_manager.start_groups([1])
assert len(started) == 1, "started %s groups, should have started 1" % len(started)
assert sorted(started) == [1], "wrong groups started."
assert self.process_manager.process_groups[1].startup_timeout == 0, (
"startup_timeout not reset")
@patch('Cobalt.Proxy.DeferredProxy', side_effect=InspectMock)
def test_process_manager_update_groups_timeout(self, *args, **kwargs):
'''ProcessGroupManager.update_groups: startup timeout respected.'''
now = int(time.time())
pgroups = self.process_manager.process_groups
self.process_manager.init_groups([self.base_spec])
pgroups[1].startup_timeout = 120 + now
self.process_manager.update_groups()
pgroups = self.process_manager.process_groups
assert len(pgroups) == 1, "%s groups, should have 1" % len(pgroups)
assert sorted(pgroups.keys()) == [1], "wrong groups."
assert pgroups[1].startup_timeout == now + 120, (
"bad startup timeout: %s" % pgroups[1].startup_timeout)
@patch('Cobalt.Proxy.DeferredProxy', side_effect=InspectMock)
def test_process_manager_update_groups_timeout_exceeded(self, *args, **kwargs):
'''ProcessGroupManager.update_groups: startup timeout exceeded.'''
now = int(time.time())
pgroups = self.process_manager.process_groups
self.process_manager.init_groups([self.base_spec])
pgroups[1].startup_timeout = now - 120
self.process_manager.update_groups()
pgroups = self.process_manager.process_groups
assert len(pgroups) == 0, "%s groups, should have 0" % len(pgroups)
assert sorted(pgroups.keys()) == [], "groups should be empty"
'''Tests for base ProcessGroup class and actions'''
from nose.tools import raises
from mock import Mock, MagicMock, patch
import Cobalt.Exceptions
from Cobalt.DataTypes.ProcessGroup import ProcessGroup
from Cobalt.Proxy import ComponentProxy
mock_proxy = MagicMock()
class TestProcessGroup(object):
'''Group together process group tests, and apply common setup'''
def setup(self):
'''common setup for process group tests'''
self.base_spec = {'args':['arg1', 'arg2'], 'user':'frodo',
'jobid': 1, 'executable': 'job.exe', 'size': 2,
'cwd': '/home/frodo', 'location': 'loc1'
}
def teardown(self):
'''common teardown for process group tests'''
del self.base_spec
def test_process_group_init(self):
'''ProcessGroup.__init__: basic initialization'''
pgroup = ProcessGroup(self.base_spec)
assert pgroup is not None, "process group creation failed"
@raises(Cobalt.Exceptions.DataCreationError)
def test_process_group_init_missing_fields(self):
'''ProcessGroup.__init__: exception on bad spec'''
pgroup = ProcessGroup({})
assert False, "Should raise exception"
@patch.object(Cobalt.Proxy.DeferredProxyMethod, '__call__', return_value=1)
def test_process_group_start_base(self, proxy):
'''basic process group startup'''
pgroup = ProcessGroup(self.base_spec)
data = pgroup.prefork()
pgroup.start()
proxy.assert_called_with([pgroup.executable] + pgroup.args, pgroup.tag,
"Job %s/%s/%s" %(pgroup.jobid, pgroup.user, pgroup.id), pgroup.env,
data, pgroup.runid)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment