Commit 672d68c3 authored by Paul Rich's avatar Paul Rich
Browse files

Optimistic backfill now working. Preexisting tests now passing

New tests pending, but the optimistic mode backfiller does appear to be
working properly.  Old behavior is preserved and may be enabled by
setting the mode to pessimistic.
parent db6a3a4d
...@@ -732,10 +732,6 @@ class BGBaseSystem (Component): ...@@ -732,10 +732,6 @@ class BGBaseSystem (Component):
def __init__ (self, *args, **kwargs): def __init__ (self, *args, **kwargs):
Component.__init__(self, *args, **kwargs) Component.__init__(self, *args, **kwargs)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
self._blocks = BlockDict() self._blocks = BlockDict()
self._managed_blocks = set() self._managed_blocks = set()
self._io_blocks = IOBlockDict() self._io_blocks = IOBlockDict()
...@@ -1312,21 +1308,15 @@ class BGBaseSystem (Component): ...@@ -1312,21 +1308,15 @@ class BGBaseSystem (Component):
for block in blocks.itervalues(): for block in blocks.itervalues():
if block.name in job_end_times.keys(): if block.name in job_end_times.keys():
#Keep at least minimum_not_idle open for cleanup. Also, job may be runing over time. # Keep at least minimum_not_idle open for cleanup. Also, job may be runing over time.
if job_end_times[block.name] > block.backfill_time: if job_end_times[block.name] > block.backfill_time:
block.backfill_time = job_end_times[block.name] block.backfill_time = job_end_times[block.name]
#iterate over current jobs. Blocks with running jobs are set to the job's end time (startime + walltime) #iterate over current jobs. Blocks with running jobs are set to the job's end time (startime + walltime)
#Iterate over parents and set their time to the backfill window as well. #Iterate over parents and set their time to the backfill window as well.
# only set the parent block's time if it is earlier than the block's time # only set the parent block's time if it is earlier than the block's time
if BACKFILL_MODE == 'PESSIMISTIC': for parent_block in block._parents:
for parent_block in block._parents: if parent_block.backfill_time < block.backfill_time:
if block.backfill_time > parent_block.backfill_time: parent_block.backfill_time = block.backfill_time
parent_block.backfill_time = block.backfill_time
elif BACKFILL_MODE == 'OPTIMISTIC':
for parent_block in block._parents:
if parent_block.backfill_time == now or block.backfill_time > parent_block.backfill_time:
parent_block.backfill_time = block.backfill_time
#Over all blocks, ignore if the time has not been changed, otherwise push #Over all blocks, ignore if the time has not been changed, otherwise push
# the backfill time to children. Do so if the child is either immediately available # the backfill time to children. Do so if the child is either immediately available
......
...@@ -41,7 +41,7 @@ from Cobalt.Components.bgq_base_system import A_DIM, B_DIM, C_DIM, D_DIM, E_DIM ...@@ -41,7 +41,7 @@ from Cobalt.Components.bgq_base_system import A_DIM, B_DIM, C_DIM, D_DIM, E_DIM
from Cobalt.Components.bgq_base_system import get_extents_from_size from Cobalt.Components.bgq_base_system import get_extents_from_size
from Cobalt.Components.bgq_base_system import Wire from Cobalt.Components.bgq_base_system import Wire
from Cobalt.Components.bgq_base_system import NodeCard, BlockDict, BGProcessGroupDict, BGBaseSystem from Cobalt.Components.bgq_base_system import NodeCard, BlockDict, BGProcessGroupDict, BGBaseSystem
from Cobalt.Components.bgq_base_system import BACKFILL_MODE
#try: #try:
##compatibiilty for older pythons, Check to see if this even matters for >= 2.6 ##compatibiilty for older pythons, Check to see if this even matters for >= 2.6
#from elementtree import ElementTree #from elementtree import ElementTree
...@@ -168,6 +168,12 @@ class BGSystem (BGBaseSystem): ...@@ -168,6 +168,12 @@ class BGSystem (BGBaseSystem):
self.kernel = 'default' self.kernel = 'default'
BGBaseSystem.__init__(self, *args, **kwargs) BGBaseSystem.__init__(self, *args, **kwargs)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
else:
self.logger.info("Backfill mode set to: %s", BACKFILL_MODE)
sys.setrecursionlimit(5000) sys.setrecursionlimit(5000)
self.process_groups.item_cls = BGProcessGroup self.process_groups.item_cls = BGProcessGroup
self.node_card_cache = dict() self.node_card_cache = dict()
...@@ -222,6 +228,12 @@ class BGSystem (BGBaseSystem): ...@@ -222,6 +228,12 @@ class BGSystem (BGBaseSystem):
def __setstate__(self, state): def __setstate__(self, state):
Component.__setstate__(self, state) Component.__setstate__(self, state)
valid_backfill_modes = ['OPTIMISTIC', 'PESSIMISTIC']
if BACKFILL_MODE not in valid_backfill_modes:
self.logger.critical('[bgsystem] backfill_mode: %s invalid. Must be one of: %s. Terminating',
BACKFILL_MODE, ", ".join(valid_backfill_modes))
else:
self.logger.info("Backfill mode set to: %s", BACKFILL_MODE)
sys.setrecursionlimit(5000) sys.setrecursionlimit(5000)
Cobalt.Util.fix_set(state) Cobalt.Util.fix_set(state)
self._managed_blocks = state['managed_blocks'] self._managed_blocks = state['managed_blocks']
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
''' '''
from nose import * from nose import *
import Cobalt.Components.bgq_base_system
from Cobalt.Components.bgq_base_system import BGBaseSystem from Cobalt.Components.bgq_base_system import BGBaseSystem
class BackfillMockBlock(object): class BackfillMockBlock(object):
...@@ -148,6 +149,7 @@ class TestBackfillTime(object): ...@@ -148,6 +149,7 @@ class TestBackfillTime(object):
job_done_1= 600.0 job_done_1= 600.0
job_done_2= 500.0 job_done_2= 500.0
job_end_times = {'MIR-04000-7BFF1-32768':job_done_1, 'MIR-00000-33331-512':job_done_2} job_end_times = {'MIR-04000-7BFF1-32768':job_done_1, 'MIR-00000-33331-512':job_done_2}
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['MIR-00000-73FF1-16384'].backfill_time == job_done_2, "MIR-00000-73FF1-16384 has backfill_time"\ assert self.blocks['MIR-00000-73FF1-16384'].backfill_time == job_done_2, "MIR-00000-73FF1-16384 has backfill_time"\
" of %s should be %s" % (self.blocks['MIR-00000-73FF1-16384'].backfill_time, job_done_2) " of %s should be %s" % (self.blocks['MIR-00000-73FF1-16384'].backfill_time, job_done_2)
...@@ -169,6 +171,7 @@ class TestBackfillTime(object): ...@@ -169,6 +171,7 @@ class TestBackfillTime(object):
jobdone = 500.0 jobdone = 500.0
job_end_times = {'8k-1':jobdone} job_end_times = {'8k-1':jobdone}
self.set_blocking_states('8k-1', 'allocated') self.set_blocking_states('8k-1', 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['512-1'].backfill_time == jobdone, "Child did not recieve correct time" assert self.blocks['512-1'].backfill_time == jobdone, "Child did not recieve correct time"
...@@ -179,6 +182,7 @@ class TestBackfillTime(object): ...@@ -179,6 +182,7 @@ class TestBackfillTime(object):
jobdone = 300.0 jobdone = 300.0
job_end_times = {'8k-1':jobdone} job_end_times = {'8k-1':jobdone}
self.set_blocking_states('8k-1', 'allocated') self.set_blocking_states('8k-1', 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['8k-1'].backfill_time == now_delta, "Minimum backfill window not set." assert self.blocks['8k-1'].backfill_time == now_delta, "Minimum backfill window not set."
...@@ -191,6 +195,7 @@ class TestBackfillTime(object): ...@@ -191,6 +195,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1} job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1}
for key in job_end_times.keys(): for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated') self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1) assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
assert self.blocks['horiz-16k-1'].backfill_time == job_done_1, 'horiz-16k-1 has time %s should be %s' % (self.blocks['horiz-16k-1'].backfill_time, job_done_1) assert self.blocks['horiz-16k-1'].backfill_time == job_done_1, 'horiz-16k-1 has time %s should be %s' % (self.blocks['horiz-16k-1'].backfill_time, job_done_1)
...@@ -209,6 +214,7 @@ class TestBackfillTime(object): ...@@ -209,6 +214,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_1, 'vert-16k-1':job_done_2} job_end_times = {'8k-2':job_done_1, 'vert-16k-1':job_done_2}
for key in job_end_times.keys(): for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated') self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert self.blocks['vert-16k-1'].backfill_time == job_done_2, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1) assert self.blocks['vert-16k-1'].backfill_time == job_done_2, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
assert self.blocks['vert-16k-2'].backfill_time == job_done_1, 'vert-16k-2 has time %s should be %s' % (self.blocks['vert-16k-2'].backfill_time, job_done_1) assert self.blocks['vert-16k-2'].backfill_time == job_done_1, 'vert-16k-2 has time %s should be %s' % (self.blocks['vert-16k-2'].backfill_time, job_done_1)
...@@ -229,6 +235,7 @@ class TestBackfillTime(object): ...@@ -229,6 +235,7 @@ class TestBackfillTime(object):
job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1} job_end_times = {'8k-2':job_done_2, 'vert-16k-1':job_done_1}
for key in job_end_times.keys(): for key in job_end_times.keys():
self.set_blocking_states(key, 'allocated') self.set_blocking_states(key, 'allocated')
Cobalt.Components.bgq_base_system.BACKFILL_MODE = 'PESSIMISTIC'
BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now) BGBaseSystem.set_backfill_times(self.blocks, job_end_times, now)
assert job_done_2 not in [val.backfill_time for val in self.blocks.values()], "Minimum backfill shadow not honored." assert job_done_2 not in [val.backfill_time for val in self.blocks.values()], "Minimum backfill shadow not honored."
assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1) assert self.blocks['vert-16k-1'].backfill_time == job_done_1, 'vert-16k-1 has time %s should be %s' % (self.blocks['vert-16k-1'].backfill_time, job_done_1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment