Commit 3d51232a authored by Paul Rich's avatar Paul Rich
Browse files

Merge branch 'develop' into 90-fix-startup-race-condition

Conflicts:
	src/lib/Components/system/base_pg_manager.py
parents 9ddb7ec8 7fbe0a4b
......@@ -177,32 +177,33 @@ class ProcessGroupManager(object): #degenerate with ProcessMonitor.
#clean up orphaned process groups
for pg in self.process_groups.values():
if now < pg.startup_timeout:
#wait for startup timeout. We don't want any hasty kills
continue
pg_id = pg.id
child_uid = (pg.forker, pg.head_pid)
if child_uid not in children:
if pg.mode == 'interactive':
#interactive job, there is no child job
if pg.interactive_complete:
completed_pgs.append(pg)
#not really orphaned, but this causes the proper cleanup
#to occur
orphaned.append(pg_id)
if pg.forker in completed:
if now < pg.startup_timeout:
#wait for startup timeout. We don't want any hasty kills
continue
orphaned.append(pg_id)
_logger.warning('%s: orphaned job exited with unknown status', pg.jobid)
pg.exit_status = 1234567
completed_pgs.append(pg)
else:
children[child_uid]['found'] = True
pg.update_data(children[child_uid])
if pg.exit_status is not None:
_logger.info('%s: job exited with status %s', pg.jobid,
pg.exit_status)
completed[pg.forker].append(children[child_uid]['id'])
pg_id = pg.id
child_uid = (pg.forker, pg.head_pid)
if child_uid not in children:
if pg.mode == 'interactive':
#interactive job, there is no child job
if pg.interactive_complete:
completed_pgs.append(pg)
#not really orphaned, but this causes the proper cleanup
#to occur
orphaned.append(pg_id)
continue
orphaned.append(pg_id)
_logger.warning('%s: orphaned job exited with unknown status', pg.jobid)
pg.exit_status = 1234567
completed_pgs.append(pg)
else:
children[child_uid]['found'] = True
pg.update_data(children[child_uid])
if pg.exit_status is not None:
_logger.info('%s: job exited with status %s', pg.jobid,
pg.exit_status)
completed[pg.forker].append(children[child_uid]['id'])
completed_pgs.append(pg)
#check for children without process groups and clean
for forker, child_id in children.keys():
if not children[(forker, child_id)].has_key('found'):
......
......@@ -70,7 +70,7 @@ class RetryMethod(_Method):
return retval
except xmlrpclib.ProtocolError, err:
log.error("Server failure: Protocol Error: %s %s" % \
(err.errcode, err.errmsg))
(err.errcode, err.errmsg), exc_info=1)
raise xmlrpclib.Fault(20, "Server Failure")
except xmlrpclib.Fault as fault:
raise
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment