Commit 36b52c8a authored by Michael Salim's avatar Michael Salim
Browse files

fixed logging and some bugs. launcher no longer starts up the dbserver

parent 7fb9f45e
......@@ -6,6 +6,8 @@ import sys
import signal
import subprocess
os.environ['IS_SERVER_DAEMON']="True"
from balsam.django_config.settings import resolve_db_path
from serverinfo import ServerInfo
......@@ -39,9 +41,7 @@ def main(db_path):
server_type = serverinfo['db_type']
db_cmd = f"BALSAM_DB_PATH={db_path} " + DB_COMMANDS[server_type].format(**serverinfo.data)
print("Starting balsam DB server daemon for DB at {db_path}")
print(db_cmd)
sys.exit(0)
print(f"Starting balsam DB server daemon for DB at {db_path}")
proc = run(db_cmd)
......@@ -76,7 +76,6 @@ def main(db_path):
serverinfo.update({'address': None})
if __name__ == "__main__":
os.environ['IS_SERVER_DAEMON']="True"
input_path = sys.argv[1] if len(sys.argv) == 2 else None
db_path = resolve_db_path(input_path)
main(db_path)
......@@ -12,12 +12,9 @@ https://docs.djangoproject.com/en/1.9/ref/settings/
import os
import sys
import logging
from balsam.django_config import serverinfo, sqlite_client
from balsam.user_settings import *
logger = logging.getLogger(__name__)
# ---------------
# DATABASE SETUP
# ---------------
......@@ -62,16 +59,14 @@ DATABASES = configure_db_backend(BALSAM_PATH)
# SQLITE CLIENT SETUP
# ------------------------
is_server = os.environ.get('IS_BALSAM_SERVER')=='True'
is_daemon = os.environ.get('IS_SERVER_DAEMON')=='True'
using_sqlite = DATABASES['default']['ENGINE'].endswith('sqlite3')
SAVE_CLIENT = None
if using_sqlite and not is_server:
if using_sqlite and not (is_server or is_daemon):
SAVE_CLIENT = sqlite_client.Client(serverinfo.ServerInfo(BALSAM_PATH))
if SAVE_CLIENT.serverAddr is None:
logger.debug("SQLite client: writing straight to disk")
SAVE_CLIENT = None
else:
logger.debug(f"SQL client: save() via {client.serverAddr}")
# --------------------
# SUBDIRECTORY SETUP
......@@ -118,11 +113,19 @@ LOGGING = {
'maxBytes': LOG_FILE_SIZE_LIMIT,
'backupCount': LOG_BACKUP_COUNT,
'formatter': 'standard',
}
},
'django': {
'level': LOG_HANDLER_LEVEL,
'class':'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOGGING_DIRECTORY, 'django.log'),
'maxBytes': LOG_FILE_SIZE_LIMIT,
'backupCount': LOG_BACKUP_COUNT,
'formatter': 'standard',
},
},
'loggers': {
'django':{
'handlers': ['default'],
'django': {
'handlers': ['django'],
'level': 'DEBUG',
'propagate': True,
},
......@@ -134,6 +137,8 @@ LOGGING = {
}
}
import logging
logger = logging.getLogger(__name__)
def log_uncaught_exceptions(exctype, value, tb,logger=logger):
logger.error(f"Uncaught Exception {exctype}: {value}",exc_info=(exctype,value,tb))
logger = logging.getLogger('console')
......
......@@ -10,16 +10,17 @@ from django.db.utils import OperationalError
REQ_TIMEOUT = 10000 # 10 seconds
REQ_RETRY = 3
logger = logging.getLogger(__name__)
class Client:
def __init__(self, server_info):
self.logger = logging.getLogger(__name__)
self.server_info = server_info
self.serverAddr = self.server_info.get('address')
if self.serverAddr:
response = self.send_request('TEST_ALIVE', timeout=3000)
self.logger.debug(f"trying to reach DB write server at {self.serverAddr}")
response = self.send_request('TEST_ALIVE', timeout=3)
if response != 'ACK':
logger.exception(f"sqlite client cannot reach DB write server")
self.logger.exception(f"sqlite client cannot reach DB write server")
raise RuntimeError("Cannot reach DB write server")
def send_request(self, msg, timeout=None):
......@@ -42,7 +43,7 @@ class Client:
context.term()
return reply.decode('utf-8')
else:
logger.debug("No response from server, retrying...")
self.logger.debug("No response from server, retrying...")
client.setsockopt(zmq.LINGER, 0)
client.close()
poll.unregister(client)
......@@ -58,6 +59,6 @@ class Client:
force_update=force_update, using=using,
update_fields=update_fields)
logger.info(f"client: sending request for save of {job.cute_id}")
self.logger.info(f"client: sending request for save of {job.cute_id}")
response = self.send_request(serial_data)
assert response == 'ACK_SAVE'
......@@ -5,34 +5,33 @@ import os
import logging
import time
import zmq
from socket import gethostname
import signal
from django.conf import settings
from django.db.utils import OperationalError
os.environ['IS_BALSAM_SERVER']="True"
os.environ['IS_SERVER_DAEMON']="False"
os.environ['DJANGO_SETTINGS_MODULE'] = 'balsam.django_config.settings'
import django
django.setup()
from balsam.service.models import BalsamJob
from balsam.django_config import serverinfo
logger = logging.getLogger(__name__)
logger = logging.getLogger('balsam.django_config.sqlite_server')
logger.info("HERE IS SERVER!")
SERVER_PERIOD = 1000
TERM_LINGER = 20 # if SIGTERM, wait 20 sec after final save() to exit
os.environ['IS_BALSAM_SERVER']="True"
os.environ['IS_SERVER_DAEMON']="False"
os.environ['DJANGO_SETTINGS_MODULE'] = 'balsam.django_config.settings'
class ZMQServer:
def __init__(self, db_path):
# connect to local sqlite DB thru ORM
import django
django.setup()
from balsam.service.models import BalsamJob
self.BalsamJob = BalsamJob
self.info = serverinfo.ServerInfo(db_path)
self.address = self.info['address']
port = int(self.address.split(':'][2])
port = int(self.address.split(':')[2])
self.context = zmq.Context(1)
self.socket = self.context.socket(zmq.REP)
......
......@@ -38,7 +38,6 @@ from balsam.launcher import transitions
from balsam.launcher import worker
from balsam.launcher import runners
from balsam.launcher.exceptions import *
from balsam.service import db_writer
ALMOST_RUNNABLE_STATES = ['READY','STAGED_IN']
RUNNABLE_STATES = ['PREPROCESSED', 'RESTART_READY']
......@@ -201,7 +200,7 @@ def main(args, transition_pool, runner_group, job_source):
logger.info("No jobs to process. Exiting main loop now.")
break
def on_exit(runner_group, transition_pool, job_source, writer_proc):
def on_exit(runner_group, transition_pool, job_source):
'''Exit cleanup'''
global HANDLING_EXIT
if HANDLING_EXIT: return
......@@ -214,8 +213,6 @@ def on_exit(runner_group, transition_pool, job_source, writer_proc):
logger.debug("on_exit: send end message to transition threads")
transition_pool.end_and_wait()
client = db_writer.ZMQClient()
client.term_server()
logger.debug("on_exit: Launcher exit graceful\n\n")
sys.exit(0)
......@@ -249,22 +246,10 @@ def detect_dead_runners(job_source):
logger.info(f'Picked up dead running job {job.cute_id}: marking RESTART_READY')
job.update_state('RESTART_READY', 'Detected dead runner')
def launch_db_writer_process():
import multiprocessing
INSTALL_PATH = settings.INSTALL_PATH
path = os.path.join(INSTALL_PATH, db_writer.SOCKFILE_NAME)
if os.path.exists(path):
os.remove(path)
writer_proc = multiprocessing.Process(target=db_writer.server_main)
writer_proc.daemon = True
writer_proc.start()
return writer_proc
if __name__ == "__main__":
args = get_args()
writer_proc = launch_db_writer_process()
job_source = jobreader.JobReader.from_config(args)
job_source.refresh_from_db()
transition_pool = transitions.TransitionProcessPool()
......@@ -275,10 +260,10 @@ if __name__ == "__main__":
detect_dead_runners(job_source)
handl = lambda a,b: on_exit(runner_group, transition_pool, job_source, writer_proc)
handl = lambda a,b: on_exit(runner_group, transition_pool, job_source)
signal.signal(signal.SIGINT, handl)
signal.signal(signal.SIGTERM, handl)
signal.signal(signal.SIGHUP, handl)
main(args, transition_pool, runner_group, job_source)
on_exit(runner_group, transition_pool, job_source, writer_proc)
on_exit(runner_group, transition_pool, job_source)
......@@ -48,15 +48,12 @@ logger = logging.getLogger('balsam.launcher.transitions')
class DummyLock:
def acquire(self): pass
def release(self): pass
if sys.platform.startswith('darwin'):
LockClass = multiprocessing.Lock
elif sys.platform.startswith('win32'):
LockClass = multiprocessing.Lock
if settings.SAVE_CLIENT:
LockClass = DummyLock # With db_writer proxy; no need for lock!
else:
LockClass = DummyLock
#LockClass = multiprocessing.Lock
LockClass = multiprocessing.Lock
LockClass = DummyLock # With db_writer proxy; no need for lock!
logger.debug(f'Using lock: {LockClass}')
PREPROCESS_TIMEOUT_SECONDS = 300
......
......@@ -372,7 +372,6 @@ def dbserver(args):
pid = server_pids[0]
print(f"Stopping db_daemon {pid}")
os.kill(pid, signal.SIGUSR1)
return
else:
path = args.path
if path: cmd = [sys.executable, fname, path]
......
......@@ -12,7 +12,7 @@ from django.db import models
from concurrency.fields import IntegerVersionField
from concurrency.exceptions import RecordModifiedError
logger = logging.getLogger('balsam.service')
logger = logging.getLogger('balsam.service.models')
class InvalidStateError(ValidationError): pass
class InvalidParentsError(ValidationError): pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment