Commit 53a5dccf authored by Michael Salim's avatar Michael Salim
Browse files

upped sqlite-client timeout for big runs on theta

parent 7d0f133a
...@@ -8,8 +8,8 @@ import zmq ...@@ -8,8 +8,8 @@ import zmq
from django.db.utils import OperationalError from django.db.utils import OperationalError
from concurrency.exceptions import RecordModifiedError from concurrency.exceptions import RecordModifiedError
REQ_TIMEOUT = 10000 # 10 seconds REQ_TIMEOUT = 30000 # 30 seconds
REQ_RETRY = 3 REQ_RETRY = 4
class Client: class Client:
...@@ -21,7 +21,7 @@ class Client: ...@@ -21,7 +21,7 @@ class Client:
self.first_message = True self.first_message = True
if self.serverAddr: if self.serverAddr:
try: try:
response = self.send_request('TEST_ALIVE', timeout=300) response = self.send_request('TEST_ALIVE', timeout=3000)
except: except:
raise RuntimeError("Cannot reach server at {self.serverAddr}") raise RuntimeError("Cannot reach server at {self.serverAddr}")
else: else:
......
...@@ -21,7 +21,7 @@ class TestInsertion(BalsamTestCase): ...@@ -21,7 +21,7 @@ class TestInsertion(BalsamTestCase):
max_workers = self.launcherInfo.num_workers max_workers = self.launcherInfo.num_workers
worker_counts = takewhile(lambda x: x<=max_workers, (2**i for i in range(20))) worker_counts = takewhile(lambda x: x<=max_workers, (2**i for i in range(20)))
ranks_per_node = [4, 8, 16, 32] ranks_per_node = [4, 8, 16, 32]
self.experiments = list(product(worker_counts, ranks_per_node)) self.experiments = list(reversed(list(product(worker_counts, ranks_per_node))))
def test_concurrent_mpi_insert(self): def test_concurrent_mpi_insert(self):
'''Timing: many MPI ranks simultaneously call dag.add_job''' '''Timing: many MPI ranks simultaneously call dag.add_job'''
......
# BENCHMARK: test_concurrent_mpi_insert # BENCHMARK: test_concurrent_mpi_insert
# Host: alcfwl138.alcf.anl.gov # Host: thetamom1
# COBALT_BLOCKNAME: 2810-2813,2816,3171,3178-3179,4253-4255,4318,4408-4409,4446,4579
# COBALT_PARTNAME: 2810-2813,2816,3171,3178-3179,4253-4255,4318,4408-4409,4446,4579
# COBALT_JOBID: 181696
# COBALT_PARTSIZE: 16
# COBALT_NODEFILE: /var/tmp/cobalt.181696
# COBALT_JOBSIZE: 16
# COBALT_BLOCKSIZE: 16
# Each rank simultaneously calls dag.add_job (num_ranks simultaneous insertions) # Each rank simultaneously calls dag.add_job (num_ranks simultaneous insertions)
# measure total time for entire aprun (including all aprun/python overheads)
# db_writer is running on thetalogin6, aprun from thetamom1
# num_nodes ranks_per_node num_ranks total_time_sec # num_nodes ranks_per_node num_ranks total_time_sec
# -------------------------------------------------------------- # --------------------------------------------------------------
1 4 4 1.700 16 32 512 62.640
1 8 8 2.010 16 16 256 38.550
1 16 16 2.780 16 8 128 25.280
1 32 32 4.400 16 4 64 19.750
8 32 256 45.760
8 16 128 26.060
8 8 64 18.790
8 4 32 15.000
4 32 128 34.560
4 16 64 21.290
4 8 32 16.400
4 4 16 13.780
2 32 64 28.300
2 16 32 19.300
2 8 16 15.060
2 4 8 13.410
1 32 32 25.250
1 16 16 17.720
1 8 8 14.610
1 4 4 13.060
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment