Commit fb33faea authored by Paul Rich's avatar Paul Rich
Browse files

Parameters to bridge now config-configurble, aprun res attributes can be set

This allows a number of constants to be set in the Cobalt config file.
We can also now set attributes like width and depth from qsub using
--attrs.  This also adds the hooks for using attrs=location:xxxyyy for a
resource reservation.
parent 1b4e78d1
......@@ -4,40 +4,57 @@
#parsing XML from stdout. Synchyronous, however, the system script
#forker isn't. These will be able to block for now.
from cray_messaging import InvalidBasilMethodError, BasilRequest
from cray_messaging import parse_response, ALPSError
import logging
from cray_messaging import InvalidBasilMethodError, BasilRequest
from cray_messaging import parse_response, ALPSError
from Cobalt.Proxy import ComponentProxy
from Cobalt.Data import IncrID
from Cobalt.Util import sleep
import os
from Cobalt.Util import init_cobalt_config, get_config_option
from Cobalt.Util import compact_num_list
_logger = logging.getLogger()
init_cobalt_config()
FORKER = 'system_script_forker'
BASIL_PATH = '/home/richp/alps_simulator/apbasil.sh' #fetch this from config
#BASIL_PATH = '/usr/bin/cat' #fetch this from config
FORKER = get_config_option('alps', 'forker', 'system_script_forker')
BASIL_PATH = get_config_option('alps', 'basil_path',
'/home/richp/alps_simulator/apbasil.sh')
_RUNID_GEN = IncrID()
CHILD_SLEEP_TIMEOUT = 1.0
CHILD_SLEEP_TIMEOUT = float(get_config_option('alps', 'child_sleep_timeout',
1.0))
DEFAULT_DEPTH = int(get_config_option('alps', 'default_depth', 8))
class BridgeError(Exception):
'''Exception class so that we may easily recognize bridge-specific errors.'''
pass
def reserve(user, jobid, nodecount, node_id_list=None):
def reserve(user, jobid, nodecount, attributes=None, node_id_list=None):
'''reserve a set of nodes in ALPS'''
if attributes is None:
attributes = {}
params = {}
param_attrs = {}
params['user_name'] = user
params['batch_id'] = jobid
params['width'] = nodecount
params['depth'] = 1 #FIXME fix this. Pass this in from qsub. FIXME
param_attrs['width'] = attributes.get('width', nodecount)
param_attrs['depth'] = attributes.get('depth', DEFAULT_DEPTH)
param_attrs['nppn'] = attributes.get('nnpn', None)
param_attrs['npps'] = attributes.get('nnps', None)
param_attrs['nspn'] = attributes.get('nspn', None)
param_attrs['reservation_mode'] = attributes.get('reservation_mode',
'EXCLUSIVE')
param_attrs['nppcu'] = attributes.get('nppcu', None)
param_attrs['p-state'] = attributes.get('p-state', None)
param_attrs['p-govenor'] = attributes.get('p-govenor', None)
for key, val in param_attrs.items():
if val is not None:
params[key] = val
if node_id_list is not None:
params['node_id_list'] = node_id_list
params['node_id_list'] = compact_num_list(node_id_list)
print str(BasilRequest('RESERVE', params=params))
retval = _call_sys_forker(BASIL_PATH, str(BasilRequest('RESERVE',
params=params)))
......@@ -123,7 +140,7 @@ def _call_sys_forker(basil_path, in_str):
_logger.critical("error communicating with bridge", exc_info=True)
raise
while(True):
while True:
#Is a timeout needed here?
children = ComponentProxy(FORKER).get_children('apbridge', [runid])
complete = False
......@@ -142,6 +159,7 @@ def _call_sys_forker(basil_path, in_str):
return parse_response(resp)
def print_node_names(spec):
'''Debugging utility to print nodes returned by ALPS'''
print spec['reservations']
print spec['nodes'][0]
for node in spec['nodes']:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment