Commit 672d68c3 authored by Paul Rich's avatar Paul Rich
Browse files

Optimistic backfill now working. Preexisting tests now passing

New tests pending, but the optimistic mode backfiller does appear to be
working properly.  Old behavior is preserved and may be enabled by
setting the mode to pessimistic.
parent db6a3a4d
......@@ -732,10 +732,6 @@ class BGBaseSystem (Component):
def __init__ (self, *args, **kwargs):
Component.__init__(self, *args, **kwargs)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
self._blocks = BlockDict()
self._managed_blocks = set()
self._io_blocks = IOBlockDict()
......@@ -1312,21 +1308,15 @@ class BGBaseSystem (Component):
for block in blocks.itervalues():
if block.name in job_end_times.keys():
#Keep at least minimum_not_idle open for cleanup. Also, job may be runing over time.
# Keep at least minimum_not_idle open for cleanup. Also, job may be runing over time.
if job_end_times[block.name] > block.backfill_time:
block.backfill_time = job_end_times[block.name]
#iterate over current jobs. Blocks with running jobs are set to the job's end time (startime + walltime)
#Iterate over parents and set their time to the backfill window as well.
# only set the parent block's time if it is earlier than the block's time
if BACKFILL_MODE == 'PESSIMISTIC':
for parent_block in block._parents:
if block.backfill_time > parent_block.backfill_time:
parent_block.backfill_time = block.backfill_time
elif BACKFILL_MODE == 'OPTIMISTIC':
for parent_block in block._parents:
if parent_block.backfill_time == now or block.backfill_time > parent_block.backfill_time:
parent_block.backfill_time = block.backfill_time
for parent_block in block._parents:
if parent_block.backfill_time < block.backfill_time:
parent_block.backfill_time = block.backfill_time
#Over all blocks, ignore if the time has not been changed, otherwise push
# the backfill time to children. Do so if the child is either immediately available
......
......@@ -41,7 +41,7 @@ from Cobalt.Components.bgq_base_system import A_DIM, B_DIM, C_DIM, D_DIM, E_DIM
from Cobalt.Components.bgq_base_system import get_extents_from_size
from Cobalt.Components.bgq_base_system import Wire
from Cobalt.Components.bgq_base_system import NodeCard, BlockDict, BGProcessGroupDict, BGBaseSystem
from Cobalt.Components.bgq_base_system import BACKFILL_MODE
#try:
##compatibiilty for older pythons, Check to see if this even matters for >= 2.6
#from elementtree import ElementTree
......@@ -168,6 +168,12 @@ class BGSystem (BGBaseSystem):
self.kernel = 'default'
BGBaseSystem.__init__(self, *args, **kwargs)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
else:
self.logger.info("Backfill mode set to: %s", BACKFILL_MODE)
sys.setrecursionlimit(5000)
self.process_groups.item_cls = BGProcessGroup
self.node_card_cache = dict()
......@@ -222,6 +228,12 @@ class BGSystem (BGBaseSystem):
def __setstate__(self, state):
Component.__setstate__(self, state)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
else:
self.logger.info("Backfill mode set to: %s", BACKFILL_MODE)
sys.setrecursionlimit(5000)
Cobalt.Util.fix_set(state)
self._managed_blocks = state['managed_blocks']
......
......@@ -3,6 +3,7 @@
'''
from nose import *
import Cobalt.Components.bgq_base_system
from Cobalt.Components.bgq_base_system import BGBaseSystem
class BackfillMockBlock(object):
......@@ -148,6 +149,7 @@ class TestBackfillTime(object):
job_done_1= 600.0
job_done_2= 500.0
job_end_times = {'MIR-04000-7BFF1-32768':job_done_1, 'MIR-00000-33331-512':job_done_2}
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['MIR-00000-73FF1-16384'].backfill_time == job_done_2, "MIR-00000-73FF1-16384 has backfill_time"\
" of %s should be %s" % (self.blocks['MIR-00000-73FF1-16384'].backfill_time, job_done_2)
......@@ -169,6 +171,7 @@ class TestBackfillTime(object):
jobdone = 500.0
job_end_times = {'8k-1':jobdone}
self.set_blocking_states('8k-1', 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['512-1'].backfill_time == jobdone, "Child did not recieve correct time"
......@@ -179,6 +182,7 @@ class TestBackfillTime(object):
jobdone = 300.0
job_end_times = {'8k-1':jobdone}
self.set_blocking_states('8k-1', 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['8k-1'].backfill_time == now_delta, "Minimum backfill window not set."
......@@ -191,6 +195,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1}
for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
assert self.blocks['horiz-16k-1'].backfill_time == job_done_1, 'horiz-16k-1 has time %s should be %s' % (self.blocks['horiz-16k-1'].backfill_time, job_done_1)
......@@ -209,6 +214,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_1, 'vert-16k-1':job_done_2}
for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['vert-16k-1'].backfill_time == job_done_2, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
assert self.blocks['vert-16k-2'].backfill_time == job_done_1, 'vert-16k-2 has time %s should be %s' % (self.blocks['vert-16k-2'].backfill_time, job_done_1)
......@@ -229,6 +235,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1}
for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert job_done_2 not in [val.backfill_time for val in self.blocks.values()], "Minimum backfill shadow not honored."
assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment