Commit d88268d2 authored by Michael Salim's avatar Michael Salim
Browse files

command line interface

parent 65fe3f11
......@@ -10,6 +10,14 @@ from cli_commands import newapp,newjob,newdep,ls,modify,rm,qsub
from cli_commands import kill,mkchild,launcher,service
from django.conf import settings
def main():
parser = make_parser()
args = parser.parse_args()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)
args.func(args)
def make_parser():
parser = argparse.ArgumentParser(prog='balsam', description="Balsam command line interface")
......@@ -22,51 +30,94 @@ def make_parser():
description="add a new application definition",
)
parser_app.set_defaults(func=newapp)
parser_app.add_argument('-n','--name',dest='name',
help='application name',required=True)
parser_app.add_argument('-d','--description',dest='description',
help='application description',required=True)
parser_app.add_argument('-e','--executable',dest='executable',
help='application executable with full path',required=True)
parser_app.add_argument('-r','--preprocess',dest='preprocess',
help='preprocessing script with full path', default='')
parser_app.add_argument('-o','--postprocess',dest='postprocess',
help='postprocessing script with full path', default='')
parser_app.add_argument('--name', required=True)
parser_app.add_argument('--description', nargs='+', required=True)
parser_app.add_argument('--executable', help='full path to executable',
required=True)
parser_app.add_argument('--preprocess', default='',
help='preprocessing script with full path')
parser_app.add_argument('--postprocess', default='',
help='postprocessing script with full path')
parser_app.add_argument('--env', action='append', default=[],
help="Environment variables specific "
"to this app; specify multiple envs like "
"'--env VAR1=VAL1 --env VAR2=VAL2'. ")
# Add job
# -------
BALSAM_SITE = settings.BALSAM_SITE
parser_job = subparsers.add_parser('job',
help="add a new Balsam job",
description="add a new Balsam job",
)
parser_job.set_defaults(func=newjob)
parser_job.add_argument('-e','--name',dest='name',type=str,
help='job name',required=True)
parser_job.add_argument('-d','--description',dest='description',type=str,
help='job description',required=False,default='')
parser_job.add_argument('-t','--wall-minutes',dest='wall_time_minutes',type=int,
help='estimated job walltime in minutes',required=True)
parser_job.add_argument('-n','--num-nodes',dest='num_nodes',type=int,
help='number of nodes to use',required=True)
parser_job.add_argument('-p','--processes-per-node',dest='processes_per_node',type=int,
help='number of processes to run on each node',required=True)
parser_job.add_argument('-m','--threads-per-rank',dest='threads_per_rank',type=int,
default=1)
parser_job.add_argument('-m','--threads-per-core',dest='threads_per_core',type=int,
default=1)
parser_job.add_argument('-a','--application',dest='application',type=str,
help='Name of the application to use; must exist in ApplicationDefinition DB',
required=True)
parser_job.add_argument('-i','--input-url',dest='input_url',type=str,
help='Input URL from which input files are copied.',required=False,default='')
parser_job.add_argument('-o','--output-url',dest='output_url',type=str,
help='Output URL to which output files are copied.',required=False,default='')
parser_job.add_argument('-y',dest='yes',
help='Skip prompt confirming job details.',required=False,action='store_true')
parser_job.add_argument('--name', required=True)
parser_job.add_argument('--workflow', required=True,
help="A workflow name for grouping related jobs")
parser_job.add_argument('--application', help='Name of the '
'application to use; must exist in '
'ApplicationDefinition DB', required=True)
parser_job.add_argument('--wall-minutes', type=int, required=True)
parser_job.add_argument('--num-nodes',
type=int, required=True)
parser_job.add_argument('--processes-per-node',
type=int, required=True)
parser_job.add_argument('--allowed-site', action='append',
required=False, default=[BALSAM_SITE],
help="Balsam instances where this job can run; "
"defaults to the local Balsam instance")
parser_job.add_argument('--description', required=False, nargs='*',
default=[])
parser_job.add_argument('--threads-per-rank',type=int, default=1,
help="Equivalent to -d option in aprun")
parser_job.add_argument('--threads-per-core',type=int, default=1,
help="Equivalent to -j option in aprun")
parser_job.add_argument('--args', nargs='*', required=False, default=[],
help="Command-line args to the application")
parser_job.add_argument('--preprocessor', required=False, default='',
help="Override application-defined preprocess")
parser_job.add_argument('--postprocessor', required=False, default='',
help="Override application-defined postprocess")
parser_job.add_argument('--post_handle_error', action='store_true',
help="Flag enables job runtime error handling by "
"postprocess script")
parser_job.add_argument('--post_handle_timeout', action='store_true',
help="Flag enables job timeout handling by "
"postprocess script")
parser_job.add_argument('--disable_auto_timeout_retry', action='store_true',
help="Flag disables automatic job retry if it has "
"timed out in a previous run")
parser_job.add_argument('--input-files', nargs='*', required=False,
default=['*'], help="Dataflow: filename patterns "
"that will be searched for in the parent job "
"working directories and retrieved for input. "
"[Ex: '*.log gopt.dat geom???.xyz' ]")
parser_job.add_argument('--url-in', required=False,default='',
help='Input URL from which remote input files are copied.')
parser_job.add_argument('--url-out',required=False,default='',
help='Output URL to which output files are copied.')
parser_job.add_argument('--stage-out-files', nargs='*', required=False,default=[],
help="Filename patterns; matches will be "
"transferred to the destination specified "
"by --url-out option")
parser_job.add_argument('--env', action='append', required=False,
default=[], help="Environment variables specific "
"to this job; specify multiple envs like "
"'--env VAR1=VAL1 --env VAR2=VAL2'. "
"Application-specific variables can instead be "
"given in the ApplicationDefinition to avoid "
"repetition here.")
parser_job.add_argument('--yes', help='Skip prompt confirming job details.',
required=False,action='store_true')
# Add dep
parser_dep = subparsers.add_parser('dep',
......@@ -80,8 +131,14 @@ def make_parser():
# ls
parser_ls = subparsers.add_parser('ls', help="list jobs, applications, or jobs-by-workflow")
parser_ls.set_defaults(func=ls)
parser_ls.add_argument('object', choices=['jobs', 'apps', 'wf'],
help="list all jobs, all apps, or jobs by workflow")
parser_ls.add_argument('objects', choices=['jobs', 'apps', 'wf'], default='jobs',
nargs='?', help="list all jobs, all apps, or jobs by workflow")
parser_ls.add_argument('--name', help="match any substring of job name")
parser_ls.add_argument('--history', help="show state history", action='store_true')
parser_ls.add_argument('--id', help="match any substring of job id")
parser_ls.add_argument('--wf', help="Filter jobs matching a workflow")
parser_ls.add_argument('--verbose', action='store_true')
parser_ls.add_argument('--tree', action='store_true', help="show DAG in tree format")
# modify
parser_modify = subparsers.add_parser('modify', help="alter job or application")
......@@ -90,18 +147,100 @@ def make_parser():
# rm
parser_rm = subparsers.add_parser('rm', help="remove jobs or applications from the database")
parser_rm.set_defaults(func=rm)
parser_rm.add_argument('objects', choices=['jobs', 'apps'], default='jobs',
nargs='?', help="permanently delete jobs or apps from DB")
parser_rm.add_argument('--name', help="match any substring of job name")
parser_rm.add_argument('--id', help="match any substring of job id")
parser_rm.add_argument('--recursive', action='store_true', help="delete all jobs in subtree")
parser_rm.add_argument('--force', action='store_true', help="show DAG in tree format")
# qsub
parser_qsub = subparsers.add_parser('qsub', help="add a one-line job to the database, bypassing Application table")
parser_qsub.set_defaults(func=qsub)
parser_qsub.add_argument('command', nargs='+')
parser_qsub.add_argument('-n', '--nodes', type=int, default=1)
parser_qsub.add_argument('-N', '--ppn', type=int, default=1)
parser_qsub.add_argument('--name', default='')
parser_qsub.add_argument('-t', '--wall-minutes', type=int, required=True)
parser_qsub.add_argument('-d', '--threads-per-rank',type=int, default=1)
parser_qsub.add_argument('-j', '--threads-per-core',type=int, default=1)
parser_qsub.add_argument('--env', action='append', required=False, default=[])
# kill
parser_kill = subparsers.add_parser('killjob', help="Kill a job without removing it from the DB")
parser_kill.set_defaults(func=kill)
parser_kill.add_argument('--id', required=True)
parser_kill.add_argument('--recursive', action='store_true')
# makechild
parser_mkchild = subparsers.add_parser('mkchild', help="Create a child job of a specified job")
parser_mkchild.set_defaults(func=mkchild)
parser_mkchild.add_argument('--name', required=True)
parser_mkchild.add_argument('--workflow', required=True,
help="A workflow name for grouping related jobs")
parser_mkchild.add_argument('--application', help='Name of the '
'application to use; must exist in '
'ApplicationDefinition DB', required=True)
parser_mkchild.add_argument('--wall-minutes', type=int, required=True)
parser_mkchild.add_argument('--num-nodes',
type=int, required=True)
parser_mkchild.add_argument('--processes-per-node',
type=int, required=True)
parser_mkchild.add_argument('--allowed-site', action='append',
required=False, default=[BALSAM_SITE],
help="Balsam instances where this job can run; "
"defaults to the local Balsam instance")
parser_mkchild.add_argument('--description', required=False, nargs='*',
default=[])
parser_mkchild.add_argument('--threads-per-rank',type=int, default=1,
help="Equivalent to -d option in aprun")
parser_mkchild.add_argument('--threads-per-core',type=int, default=1,
help="Equivalent to -j option in aprun")
parser_mkchild.add_argument('--args', nargs='*', required=False, default=[],
help="Command-line args to the application")
parser_mkchild.add_argument('--preprocessor', required=False, default='',
help="Override application-defined preprocess")
parser_mkchild.add_argument('--postprocessor', required=False, default='',
help="Override application-defined postprocess")
parser_mkchild.add_argument('--post_handle_error', action='store_true',
help="Flag enables job runtime error handling by "
"postprocess script")
parser_mkchild.add_argument('--post_handle_timeout', action='store_true',
help="Flag enables job timeout handling by "
"postprocess script")
parser_mkchild.add_argument('--disable_auto_timeout_retry', action='store_true',
help="Flag disables automatic job retry if it has "
"timed out in a previous run")
parser_mkchild.add_argument('--input-files', nargs='*', required=False,
default=['*'], help="Dataflow: filename patterns "
"that will be searched for in the parent job "
"working directories and retrieved for input. "
"[Ex: '*.log gopt.dat geom???.xyz' ]")
parser_mkchild.add_argument('--url-in', required=False,default='',
help='Input URL from which remote input files are copied.')
parser_mkchild.add_argument('--url-out',required=False,default='',
help='Output URL to which output files are copied.')
parser_mkchild.add_argument('--stage-out-files', nargs='*', required=False,default=[],
help="Filename patterns; matches will be "
"transferred to the destination specified "
"by --url-out option")
parser_mkchild.add_argument('--env', action='append', required=False,
default=[], help="Environment variables specific "
"to this job; specify multiple envs like "
"'--env VAR1=VAL1 --env VAR2=VAL2'. "
"Application-specific variables can instead be "
"given in the ApplicationDefinition to avoid "
"repetition here.")
parser_mkchild.add_argument('--yes', help='Skip prompt confirming job details.',
required=False,action='store_true')
# launcher
parser_launcher = subparsers.add_parser('launcher', help="Start an instance of the balsam launcher")
......@@ -111,14 +250,8 @@ def make_parser():
parser_service = subparsers.add_parser('service',
help="Start an instance of the balsam metascheduler service")
parser_service.set_defaults(func=service)
return parser
if __name__ == "__main__":
parser = make_parser()
args = parser.parse_args()
if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)
print(args)
args.func(args)
main()
# These statements must come before any other imports
#import django
import os
#os.environ['DJANGO_SETTINGS_MODULE'] = 'argobalsam.settings'
#django.setup()
# --------------------
from django.conf import settings
import balsam.models
from balsam import dag
import ls_commands as lscmd
BalsamJob = balsam.models.BalsamJob
Job = balsam.models.BalsamJob
AppDef = balsam.models.ApplicationDefinition
def cmd_confirmation(message=''):
confirm = ''
while not confirm.lower() in ['y', 'n']:
try:
confirm = input(f"{message} [y/n]: ")
except: pass
return confirm.lower() == 'y'
def newapp(args):
pass
if AppDef.objects.filter(name=args.name).exists():
raise RuntimeError(f"An application named {args.name} exists")
if not os.path.exists(args.executable):
raise RuntimeError(f"Executable {args.executable} not found")
if args.preprocess and not os.path.exists(args.preprocess):
raise RuntimeError(f"Script {args.preprocess} not found")
if args.postprocess and not os.path.exists(args.postprocess):
raise RuntimeError(f"Script {args.postprocess} not found")
app = AppDef()
app.name = args.name
app.description = ' '.join(args.description)
app.executable = args.executable
app.default_preprocess = args.preprocess
app.default_postprocess = args.postprocess
app.environ_vars = ":".join(args.env)
app.save()
print(app)
print("Added app to database")
def newjob(args):
pass
if not AppDef.objects.filter(name=args.application).exists():
raise RuntimeError(f"App {args.application} not registered in local DB")
job = Job()
job.name = args.name
job.description = ' '.join(args.description)
job.workflow = args.workflow
job.allowed_work_sites = ' '.join(args.allowed_site)
job.wall_time_minutes = args.wall_minutes
job.num_nodes = args.num_nodes
job.processes_per_node = args.processes_per_node
job.threads_per_rank = args.threads_per_rank
job.threads_per_core = args.threads_per_core
job.application = args.application
job.application_args = ' '.join(args.args)
job.preprocess = args.preprocessor
job.postprocess = args.postprocessor
job.post_error_handler = args.post_handle_error
job.post_timeout_handler = args.post_handle_timeout
job.auto_timeout_retry = not args.disable_auto_timeout_retry
job.input_files = ' '.join(args.input_files)
job.stage_in_url = args.url_in
job.stage_out_url = args.url_out
job.stage_out_files = ' '.join(args.stage_out_files)
job.environ_vars = ":".join(args.env)
print(job)
if not args.yes:
if not cmd_confirmation('Confirm adding job to DB'):
print("Add job aborted")
return
job.save()
return job
print("Added job to database")
def match_uniq_job(s):
job = Job.objects.filter(job_id__icontains=s)
if job.count() > 1:
raise ValueError(f"More than one ID matched {s}")
elif job.count() == 1: return job
job = Job.objects.filter(name__contains=s)
if job.count() > 1: job = Job.objects.filter(name=s)
if job.count() > 1:
raise ValueError(f"More than one Job name matches {s}")
elif job.count() == 1: return job
raise ValueError(f"No job in local DB matched {s}")
def newdep(args):
pass
parent = match_uniq_job(args.parent)
child = match_uniq_job(args.child)
dag.add_dependency(parent, child)
print(f"Created link [{str(parent.first().job_id)[:8]}] --> "
f"[{str(child.first().job_id)[:8]}]")
def ls(args):
pass
objects = args.objects
name = args.name
history = args.history
verbose = args.verbose
id = args.id
tree = args.tree
wf = args.wf
if objects.startswith('job'):
lscmd.ls_jobs(name, history, id, verbose, tree, wf)
elif objects.startswith('app'):
lscmd.ls_apps(name, id, verbose)
elif objects.startswith('work') or objects.startswith('wf'):
lscmd.ls_wf(name, verbose, tree, wf)
def modify(args):
pass
......@@ -28,13 +121,59 @@ def rm(args):
pass
def qsub(args):
pass
job = Job()
job.name = args.name
job.description = 'Added by balsam qsub'
job.workflow = 'qsub'
job.allowed_work_sites = settings.BALSAM_SITE
job.wall_time_minutes = args.wall_minutes
job.num_nodes = args.nodes
job.processes_per_node = args.ppn
job.threads_per_rank = args.threads_per_rank
job.threads_per_core = args.threads_per_core
job.environ_vars = ":".join(args.env)
job.application = ''
job.application_args = ''
job.preprocess = ''
job.postprocess = ''
job.post_error_handler = False
job.post_timeout_handler = False
job.auto_timeout_retry = False
job.input_files = ''
job.stage_in_url = ''
job.stage_out_url = ''
job.stage_out_files = ''
job.direct_command = ' '.join(args.command)
print(job)
job.save()
print("Added to database")
def kill(args):
pass
job_id = args.id
job = Job.objects.filter(job_id__startswith=job_id)
if job.count() > 1:
raise RuntimeError(f"More than one job matches {job_id}")
if job.count() == 0:
print(f"No jobs match the given ID {job_id}")
job = job.first()
if cmd_confirmation(f'Really kill job {job.name} [{str(job.pk)}] ??'):
dag.kill(job, recursive=args.recursive)
print("Job killed")
def mkchild(args):
pass
if not dag.current_job:
raise RuntimeError(f"mkchild requires that BALSAM_JOB_ID is in the environment")
child_job = newjob(args)
dag.add_dependency(dag.current_job, child_job)
print(f"Created link [{str(dag.current_job.job_id)[:8]}] --> "
f"[{str(child_job.job_id)[:8]}]")
def launcher(args):
pass
......
import balsam.models
Job = balsam.models.BalsamJob
AppDef = balsam.models.ApplicationDefinition
def print_history(jobs):
for job in jobs:
print(f'Job {job.name} [{job.job_id}]')
print(f'------------------------------------------------')
print(f'{job.state_history}\n')
def print_jobs(jobs, verbose):
if not verbose:
header = Job.get_header()
print(header)
print('-'*len(header))
for job in jobs:
print(job.get_line_string())
else:
for job in jobs:
print(job)
def print_subtree(job, indent=1):
def job_str(job): return f"{job.name:10} [{str(job.job_id)[:8]}]"
print('|'*indent, end=' ')
print(5*indent*' ', job_str(job))
for job in job.get_children():
print_subtree(job, indent+1)
def print_jobs_tree(jobs):
roots = [j for j in jobs if j.parents=='[]']
for job in roots: print_subtree(job)
def ls_jobs(namestr, show_history, jobid, verbose, tree, wf):
results = Job.objects.all()
if namestr: results = results.filter(name__icontains=namestr)
if jobid: results = results.filter(job_id__icontains=jobid)
if wf: results = results.filter(workflow__icontains=wf)
if not results:
print("No jobs found matching query")
return
if show_history: print_history(results)
elif tree: print_jobs_tree(results)
else: print_jobs(results, verbose)
def ls_apps(namestr, appid, verbose):
if namestr:
results = AppDef.objects.filter(name__icontains=namestr)
elif appid:
results = AppDef.objects.filter(job_id__icontains=appid)
else:
results = AppDef.objects.all()
if not results:
print("No apps found matching query")
return
if verbose:
for app in results: print(app)
else:
header = AppDef.get_header()
print(header)
print('-'*len(header))
for app in results:
print(app.get_line_string())
def ls_wf(name, verbose, tree, wf):
workflows = Job.objects.order_by().values('workflow').distinct()
workflows = [wf['workflow'] for wf in workflows]
if wf: name = wf # wf argument overrides name
if name and name not in workflows:
print(f"No workflow matching {name}")
return
if name and name in workflows:
workflows = [name]
verbose = True
print("Workflows")
print("---------")
for wf in workflows:
print(wf)
if tree:
print('-'*len(wf))
jobs_by_wf = Job.objects.filter(workflow=wf)
print_jobs_tree(jobs_by_wf)
print()
elif verbose:
print('-'*len(wf))
jobs_by_wf = Job.objects.filter(workflow=wf)
print_jobs(jobs_by_wf, False)
print()
......@@ -18,8 +18,8 @@ Example usage:
>>>
'''
import django as _django
import os as _os
import django as django
import os as os
import uuid
__all__ = ['JOB_ID', 'TIMEOUT', 'ERROR',
......@@ -27,22 +27,17 @@ __all__ = ['JOB_ID', 'TIMEOUT', 'ERROR',
'add_job', 'add_dependency', 'spawn_child',
'kill']
_os.environ['DJANGO_SETTINGS_MODULE'] = 'argobalsam.settings'
_django.setup()
os.environ['DJANGO_SETTINGS_MODULE'] = 'argobalsam.settings'
django.setup()
from django.conf import settings
from balsam.models import BalsamJob as _BalsamJob
x = _BalsamJob()
assert isinstance(x, _BalsamJob)
_envs = {k:v for k,v in _os.environ.items() if k.find('BALSAM')>=0}
current_job = None
parents = None
children = None
_envs = {k:v for k,v in os.environ.items() if k.find('BALSAM')>=0}