Commit a10ad1ce authored by Michael Salim's avatar Michael Salim
Browse files

enable batch job creation for setting up benchmark test cases

parent c2fed029
......@@ -36,7 +36,8 @@ def create_job(*, name='', app='', direct_command='', site=settings.BALSAM_SITE,
ranks_per_node=1, threads_per_rank=1, threads_per_core=1, args='', workflow='',
envs={}, state='CREATED', url_in='', input_files='', url_out='', stage_out_files='',
post_error_handler=False, post_timeout_handler=False,
auto_timeout_retry=True, preproc='', postproc='', wtime=1):
auto_timeout_retry=True, preproc='', postproc='', wtime=1,
save=True):
if app and direct_command:
raise ValueError("Cannot have both application and direct command")
......@@ -71,8 +72,9 @@ def create_job(*, name='', app='', direct_command='', site=settings.BALSAM_SITE,
job.postprocess = postproc
job.wall_time_minutes = wtime
job.save()
job.create_working_path()
if save:
job.save()
job.create_working_path()
return job
def create_app(*, name='', description='', executable='', preproc='',
......
import os
import sys
import time
import subprocess
from importlib.util import find_spec
from balsam.service.models import BalsamJob
......
import itertools
import os
import sys
from socket import gethostname
import time
import subprocess
from importlib.util import find_spec
from balsam.service.models import BalsamJob
......@@ -16,20 +12,29 @@ from tests import util
class TestNoOp(BalsamTestCase):
def setUp(self):
self.max_nodes = int(os.environ.get('COBALT_JOBSIZE', 1))
num_nodes = [2**n for n in range(1,13) if 2**n <= self.max_nodes]
rpn = [16, 32]
jpn = [16, 64, 128, 256, 512, 1024]
from itertools import takewhile, product
self.launcherInfo = util.launcher_info()
max_workers = self.launcherInfo.num_workers
num_nodes = [2**n for n in range(1,13) if 2**n <= self.max_workers]
if num_nodes[-1] != max_workers:
num_nodes.append(max_workers)
rpn = [64]
jpn = [64, 256, 1024]
self.experiments = itertools.product(num_nodes, rpn, jpn)
def serial_expt(self, num_nodes, rpn, jpn):
BalsamJob.objects.all().delete()
num_jobs = num_nodes * jpn
for i in range(num_jobs):
job = create_job(name=f'task{i}', direct_command=f'echo Hello',
args=str(i), workflow='bench-no-op')
jobs = [create_job(name=f'task{i}', direct_command=f'echo Hello',
args=str(i), workflow='bench-no-op', save=False)
for i in range(num_jobs)]
BalsamJob.objects.bulk_create(jobs)
self.assertEqual(BalsamJob.objects.count(), num_jobs)
def test_no_op(self):
def test_serial(self):
for (num_nodes, rpn, jpn) in self.experiments:
self.serial_expt(num_nodes, rpn, jpn)
#!/bin/bash -x
#COBALT -A datascience
#COBALT -n 8
#COBALT -q debug-cache-quad
#COBALT -t 15
#COBALT -M msalim@anl.gov
source ~/.bash_profile
source activate balsam
rm ~/testdb/log/*.log
export BALSAM_DB_PATH=~/testdb # postgres DB server must be active here!
aprun -n 8 -N 1 balsam make_dummies 1
time aprun -n 100 -N 13 balsam make_dummies 100
time aprun -n 100 -N 13 balsam make_dummies 100
time aprun -n 100 -N 13 balsam make_dummies 100
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment