Commit 045dacc7 authored by Paul Rich's avatar Paul Rich
Browse files

BACKFILL_EPSILON now supported and used

Backfillng has an epsilon of 2 minutes by default.  This can be altered
in the cobalt config file.
parent 691c46d8
......@@ -38,6 +38,9 @@ DRAIN_MODE = get_config_option('system', 'drain_mode', 'first-fit')
#cleanup time in seconds
CLEANUP_DRAIN_WINDOW = get_config_option('system', 'cleanup_drain_window', 300)
#Epsilon for backfilling. This system does not do this on a per-node basis.
BACKFILL_EPSILON = int(get_config_option('system', 'backfill_epsilon', 120))
DRAIN_MODES = ['first-fit', 'backfill']
CLEANING_ID = -1
......@@ -251,7 +254,6 @@ class CraySystem(BaseSystem):
else:
self.nodes_by_queue[queue] = set([node.node_id])
@exposed
def get_nodes(self, as_dict=False, node_ids=None, params=None, as_json=False):
'''fetch the node dictionary.
......@@ -283,7 +285,6 @@ class CraySystem(BaseSystem):
return json.dumps(node_info)
return node_info
def _run_update_state(self):
'''automated node update functions on the update timer go here.'''
while True:
......@@ -753,14 +754,14 @@ class CraySystem(BaseSystem):
if self.nodes[str(node_id)].status in
self.nodes[str(node_id)].DOWN_STATUSES]
if drain_time is not None:
print drain_time, BACKFILL_EPSILON, drain_time - BACKFILL_EPSILON
unavailable_nodes.extend([node_id for node_id in node_id_list
if (self.nodes[str(node_id)].draining and
self.nodes[str(node_id)].drain_until < int(drain_time))])
(self.nodes[str(node_id)].drain_until - BACKFILL_EPSILON) < int(drain_time))])
for node_id in set(unavailable_nodes):
node_id_list.remove(node_id)
return sorted(node_id_list, key=lambda nid: int(nid))
def _select_first_nodes(self, job, node_id_list):
'''Given a list of nids, select the first node count nodes fromt the
list. This is the target for alternate allocator replacement.
......
......@@ -94,10 +94,14 @@ class TestCraySystem(object):
'queue':'default', 'nodes': 1, 'walltime': 60,
}
self.fake_reserve_called = False
Cobalt.Components.system.CraySystem.BACKFILL_EPSILON = 120
Cobalt.Components.system.CraySystem.DRAIN_MODE = "first-fit"
def teardown(self):
del self.system
del self.base_job
Cobalt.Components.system.CraySystem.BACKFILL_EPSILON = 120
Cobalt.Components.system.CraySystem.DRAIN_MODE = "first-fit"
self.fake_reserve_called = False
......@@ -315,7 +319,23 @@ class TestCraySystem(object):
self.base_job['attrs'] = {'location':'1-5'}
self.base_job['nodes'] = 2
nodelist = self.system._assemble_queue_data(self.base_job,
idle_only=False, drain_time=650 )
idle_only=False, drain_time=650)
assert nodelist == ['5'], 'Wrong node in list %s' % nodelist
def test_assemble_queue_data_attrs_location_any_not_down_drain_limit_no_ep(self):
'''CraySystem._assemble_queue_data: attrs locaiton return any not down in drain window no epsilon'''
Cobalt.Components.system.CraySystem.BACKFILL_EPSILON = 0
self.system.nodes['1'].status = 'busy'
self.system.nodes['2'].status = 'cleanup-pending'
self.system.nodes['3'].status = 'allocated'
self.system.nodes['4'].status = 'ADMINDOWN'
self.system.nodes['1'].set_drain(500.0, 1)
self.system.nodes['2'].set_drain(600.0, 2)
self.system.nodes['3'].set_drain(700.0, 3)
self.base_job['attrs'] = {'location':'1-5'}
self.base_job['nodes'] = 2
nodelist = self.system._assemble_queue_data(self.base_job,
idle_only=False, drain_time=650)
assert nodelist == ['3', '5'], 'Wrong node in list %s' % nodelist
def test_assemble_queue_data_non_draining(self):
......@@ -334,7 +354,7 @@ class TestCraySystem(object):
self.system.nodes['1'].status = 'busy'
self.system.nodes['2'].status = 'down'
self.system.nodes['3'].set_drain(50.0, 2)
self.system.nodes['4'].set_drain(100.0, 1)
self.system.nodes['4'].set_drain(220.0, 1) #add in epsilon
nodelist = self.system._assemble_queue_data(self.base_job,
drain_time=90.0)
assert_match(sorted(nodelist), ['4', '5'], "Bad Nodelist")
......@@ -344,7 +364,7 @@ class TestCraySystem(object):
self.system.nodes['1'].status = 'busy'
self.system.nodes['2'].status = 'down'
self.system.nodes['3'].status = 'allocated'
self.system.nodes['4'].set_drain(100.0, 1)
self.system.nodes['4'].set_drain(220.0, 1) #add in epsilon
nodelist = self.system._assemble_queue_data(self.base_job,
drain_time=100.0)
assert_match(sorted(nodelist), ['4', '5'], "Bad Nodelist")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment