Commit 70332624 authored by Michael Salim's avatar Michael Salim
Browse files

Merge remote-tracking branch 'origin/develop'

parents c3e0845e badd48f6
......@@ -2,8 +2,8 @@ import multiprocessing,logging
logger = logging.getLogger(__name__)
from django.db import utils,connections,DEFAULT_DB_ALIAS
from balsam import QueueMessage
from common import db_tools
from balsam.service import QueueMessage
from balsam.common import db_tools
class TransitionJob(multiprocessing.Process):
''' spawns subprocess which finds the DB entry for the given id
......
import logging
logger = logging.getLogger(__name__)
import asyncio
import zmq
import time
from zmq.asyncio import Context
class ZMQMessageInterface(MessageInterface.MessageInterface):
def __init__(self, settings):
zmq.asyncio.install()
self.ctx = zmq.asyncio.Context()
self.sock_sub = None
self.sock_pub = None
self.default_routing_key = b''
self.host = 'tcp://127.0.0.1'
self.port = 5555
def setup_send(self):
self.sock_pub = self.ctx.socket(zmq.PUB)
self.sock_pub.bind('%s:%d' % (self.host, self.port))
time.sleep(1)
def setup_receive(self, consume_msg=None, routing_key=None):
if routing_key is None:
self.routing_key = self.default_routing_key
if consume_msg is not None:
self.consume_msg = consume_msg
self.sock_sub = self.ctx.socket(zmq.SUB)
self.sock_sub.connect('%s:%d' % (self.host, self.port))
self.sock_sub.subscribe(self.routing_key)
time.sleep(1)
def send_msg(self, message_body, routing_key=None):
if routing_key is None:
routing_key = self.default_routing_key
if isinstance(message_body, str):
message_body = message_body.encode('utf-8')
self.sock_pub.send(message_body)
def receive_msg(self):
msg = self.sock_sub.recv_multipart()
body = ''.join(s.decode('utf-8') for s in msg)
return body
def start_receive_loop(self):
loop = asyncio.get_event_loop()
loop.run_until_complete(self._recv_loop())
loop.close()
async def _recv_loop(self):
while True:
msg = await self.sock_sub.recv_multipart()
body = ''.join(s.decode('utf-8') for s in msg)
self.consume_msg(body)
def stop_receive_loop(self):
pass
def close(self):
if self.sock_sub is not None:
self.sock_sub.close()
self.sock_sub = None
if self.sock_pub is not None:
self.sock_pub.close()
self.sock_pub = None
......@@ -103,17 +103,18 @@ class LocalHandler:
parts = urlparse.urlparse( source_url )
command = 'cp -p -r /%s%s* %s' % (parts.netloc,parts.path,destination_directory)
logger.debug('transfer.stage_in: command=' + command )
p = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
p = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,
shell=True)
stdout,stderr = p.communicate()
if p.returncode != 0:
raise Exception("Error in stage_in: %d output:\n" % (p.returncode,stdout))
def stage_out( self, source_directory, destination_url ):
parts = urlparse.urlparse( destination_url )
command = 'cp -r %s/* /%s/%s' % (source_directory,parts.netloc,parts.path)
logger.debug( 'transfer.stage_out: command=' + command )
p = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
p = subprocess.Popen(command,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,
shell=True)
stdout,stderr = p.communicate()
if p.returncode != 0:
raise Exception("Error in stage_out: %d output:\n" % (p.returncode,stdout))
......@@ -127,7 +128,7 @@ class SCPHandler:
def stage_in( self, source_url, destination_directory ):
parts = urlparse.urlparse( source_url )
command = 'scp -p -r %s:%s %s' % (source_url, destination_directory)
print('transfer.stage_in: command=' + command )
logger.debug('transfer.stage_in: command=' + command )
ret = os.system(command)
if ret:
raise Exception("Error in stage_in: %d" % ret)
......@@ -135,7 +136,7 @@ class SCPHandler:
def stage_out( self, source_directory, destination_url ):
# ensure that source and destination each have a trailing '/'
command = 'scp -p -r %s %s' % (source_directory, destination_url)
print('transfer.stage_out: command=' + command)
logger.debug('transfer.stage_out: command=' + command)
ret = os.system(command)
if ret:
raise Exception("Error in stage_out: %d" % ret)
......
import argparse
from importlib.util import find_spec
import glob
import getpass
import os
import sys
import signal
import subprocess
import time
os.environ['IS_SERVER_DAEMON']="True"
from balsam.django_config.settings import resolve_db_path
from serverinfo import ServerInfo
CHECK_PERIOD = 4
TERM_LINGER = 30
PYTHON = sys.executable
SQLITE_SERVER = find_spec('balsam.django_config.sqlite_server').origin
DB_COMMANDS = {
'sqlite3' : f'{PYTHON} {SQLITE_SERVER}',
'postgres': f'pg_ctl -D {{pg_db_path}} -w start',
'mysql' : f'',
}
term_start = 0
def run(cmd):
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
return proc
def stop(proc, serverinfo):
print("Balsam server shutdown...", flush=True)
if serverinfo['db_type'] == 'postgres':
cmd = f'pg_ctl -D {{pg_db_path}} -w stop'.format(**serverinfo.data)
print(cmd)
proc = subprocess.Popen(cmd, shell=True)
time.sleep(2)
else:
proc.terminate()
try: retcode = proc.wait(timeout=30)
except subprocess.TimeoutExpired:
print("Warning: server did not quit gracefully")
proc.kill()
def wait(proc, serverinfo):
if serverinfo['db_type'] == 'sqlite3':
retcode = proc.wait(timeout=CHECK_PERIOD)
elif serverinfo['db_type'] == 'postgres':
time.sleep(CHECK_PERIOD)
user = getpass.getuser()
proc = subprocess.Popen('ps aux | grep {user} | grep postgres | '
'grep -v grep', shell=True, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
stdout, _ = proc.communicate()
lines = stdout.decode('utf-8').split('\n')
if len(lines) >= 1: raise subprocess.TimeoutExpired('cmd', CHECK_PERIOD)
def main(db_path):
serverinfo = ServerInfo(db_path)
serverinfo.reset_server_address()
server_type = serverinfo['db_type']
db_cmd = f"BALSAM_DB_PATH={db_path} " + DB_COMMANDS[server_type].format(**serverinfo.data)
print(f"\nStarting balsam DB server daemon for DB at {db_path}")
print(db_cmd)
proc = run(db_cmd)
# On SIGUSR1, stop immediately ("balsam server --stop" does this)
def handle_stop(signum, stack):
stop(proc, serverinfo)
serverinfo.update({'address': None, 'host':None,'port':None})
sys.exit(0)
signal.signal(signal.SIGINT, handle_stop)
signal.signal(signal.SIGTERM, handle_stop)
signal.signal(signal.SIGUSR1, handle_stop)
while not term_start or time.time() - term_start < TERM_LINGER:
try:
wait(proc, serverinfo)
except subprocess.TimeoutExpired:
pass
else:
print("\nserver process stopped unexpectedly; restarting")
serverinfo.reset_server_address()
db_cmd = f"BALSAM_DB_PATH={db_path} " + DB_COMMANDS[server_type].format(**serverinfo.data)
print(db_cmd)
proc = run(db_cmd)
stop(proc, serverinfo)
serverinfo.update({'address': None, 'host':None,'port':None})
if __name__ == "__main__":
input_path = sys.argv[1] if len(sys.argv) == 2 else None
db_path = resolve_db_path(input_path)
main(db_path)
import json
import os
import socket
ADDRESS_FNAME = 'dbwriter_address'
class ServerInfo:
def __init__(self, balsam_db_path):
self.path = os.path.join(balsam_db_path, ADDRESS_FNAME)
self.data = {}
if not os.path.exists(self.path):
self.update(self.data)
else:
self.refresh()
if not self.data.get('balsamdb_path'):
self.update({'balsamdb_path': balsam_db_path})
if self.data.get('address') and os.environ.get('IS_SERVER_DAEMON')=='True':
raise RuntimeError(f"A running server address is already posted at {self.path}\n"
' Use "balsam dbserver --stop" to shut it down.\n'
' If you are sure there is no running server process, the'
' daemon did not have a clean shutdown.\n Use "balsam'
' dbserver --reset <balsam_db_directory>" to reset the server file'
)
if self.data.get('host') and os.environ.get('IS_SERVER_DAEMON')=='True':
raise RuntimeError(f"A running server address is already posted at {self.path}\n"
' Use "balsam dbserver --stop" to shut it down.\n'
' If you are sure there is no running server process, the'
' daemon did not have a clean shutdown.\n Use "balsam'
' dbserver --reset <balsam_db_directory>" to reset the server file'
)
def get_free_port_and_address(self):
hostname = socket.gethostname()
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('', 0))
port = int(sock.getsockname()[1])
sock.close()
address = f'tcp://{hostname}:{port}'
return address
def get_free_port(self):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('', 0))
port = int(sock.getsockname()[1])
sock.close()
return port
def get_sqlite3_info(self):
new_address = self.get_free_port_and_address()
info = dict(db_type='sqlite3', address=new_address)
return info
def get_postgres_info(self):
hostname = socket.gethostname()
port = self.get_free_port()
pg_db_path = os.path.join(self['balsamdb_path'], 'balsamdb')
info = dict(host=hostname, port=port, pg_db_path=pg_db_path)
return info
def update_sqlite3_config(self):
pass
def update_postgres_config(self):
conf_path = os.path.join(self['pg_db_path'], 'postgresql.conf')
config = open(conf_path).read()
with open(f"{conf_path}.new", 'w') as fp:
for line in config.split('\n'):
if line.startswith('port'):
port_line = f"port={self['port']} # auto-set by balsam db\n"
fp.write(port_line)
else:
fp.write(line + "\n")
os.rename(f"{conf_path}.new", conf_path)
def reset_server_address(self):
db = self['db_type']
info = getattr(self, f'get_{db}_info')()
self.update(info)
getattr(self, f'update_{db}_config')()
def update(self, update_dict):
self.refresh()
self.data.update(update_dict)
with open(self.path, 'w') as fp:
fp.write(json.dumps(self.data))
def get(self, key, default=None):
if key in self.data:
return self.data[key]
else:
return default
def refresh(self):
if not os.path.exists(self.path): return
with open(self.path, 'r') as fp:
self.data = json.loads(fp.read())
def __getitem__(self, key):
if self.data is None: self.refresh()
return self.data[key]
def __setitem__(self, key, value):
self.update({key:value})
......@@ -10,22 +10,172 @@ For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.9/ref/settings/
"""
import os,logging
logger = logging.getLogger(__name__)
logger.info('here')
from user_settings import *
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import os
import sys
from balsam.django_config import serverinfo
from balsam.user_settings import *
# ---------------
# DATABASE SETUP
# ---------------
def resolve_db_path(path=None):
if path:
path = os.path.expanduser(path)
assert os.path.exists(path)
elif os.environ.get('BALSAM_DB_PATH'):
path = os.environ['BALSAM_DB_PATH']
assert os.path.exists(path), f"balsamDB path {path} not found"
else:
path = default_db_path
path = os.path.expanduser(path)
path = os.path.abspath(path)
os.environ['BALSAM_DB_PATH'] = path
return path
def configure_db_backend(db_path):
ENGINES = {
'sqlite3' : 'django.db.backends.sqlite3',
'postgres': 'django.db.backends.postgresql_psycopg2',
}
NAMES = {
'sqlite3' : os.path.join(db_path, 'db.sqlite3'),
'postgres': 'balsam',
}
OPTIONS = {
'sqlite3' : {'timeout' : 5000},
'postgres' : {},
}
info = serverinfo.ServerInfo(db_path)
db_type = info['db_type']
user = info.get('user', '')
password = info.get('password', '')
host = info.get('host', '')
port = info.get('port', '')
db_name = NAMES[db_type]
db = dict(ENGINE=ENGINES[db_type], NAME=db_name,
OPTIONS=OPTIONS[db_type], USER=user, PASSWORD=password,
HOST=host, PORT=port, CONN_MAX_AGE=60)
DATABASES = {'default':db}
return DATABASES
CONCURRENCY_ENABLED = True
BALSAM_PATH = resolve_db_path()
DATABASES = configure_db_backend(BALSAM_PATH)
# --------------------
# SUBDIRECTORY SETUP
# --------------------
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
LOGGING_DIRECTORY = os.path.join(BALSAM_PATH , 'log')
DATA_PATH = os.path.join(BALSAM_PATH ,'data')
BALSAM_WORK_DIRECTORY = DATA_PATH
for d in [
BALSAM_PATH ,
DATA_PATH,
LOGGING_DIRECTORY,
BALSAM_WORK_DIRECTORY,
]:
if not os.path.exists(d):
os.makedirs(d)
# ----------------
# LOGGING SETUP
# ----------------
HANDLER_FILE = os.path.join(LOGGING_DIRECTORY, LOG_FILENAME)
BALSAM_DB_CONFIG_LOG = os.path.join(LOGGING_DIRECTORY, "db.log")
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'standard': {
'format' : '%(asctime)s|%(process)d|%(levelname)8s|%(name)s:%(lineno)s] %(message)s',
'datefmt' : "%d-%b-%Y %H:%M:%S"
},
},
'handlers': {
'console': {
'class':'logging.StreamHandler',
'formatter': 'standard',
'level' : 'DEBUG'
},
'default': {
'level':LOG_HANDLER_LEVEL,
'class':'logging.handlers.RotatingFileHandler',
'filename': HANDLER_FILE,
'maxBytes': LOG_FILE_SIZE_LIMIT,
'backupCount': LOG_BACKUP_COUNT,
'formatter': 'standard',
},
'balsam-db-config': {
'level':LOG_HANDLER_LEVEL,
'class':'logging.handlers.RotatingFileHandler',
'filename': BALSAM_DB_CONFIG_LOG,
'maxBytes': LOG_FILE_SIZE_LIMIT,
'backupCount': LOG_BACKUP_COUNT,
'formatter': 'standard',
},
#'django': {
# 'level': LOG_HANDLER_LEVEL,
# 'class':'logging.handlers.RotatingFileHandler',
# 'filename': os.path.join(LOGGING_DIRECTORY, 'django.log'),
# 'maxBytes': LOG_FILE_SIZE_LIMIT,
# 'backupCount': LOG_BACKUP_COUNT,
# 'formatter': 'standard',
#},
},
'loggers': {
#'django': {
# 'handlers': ['django'],
# 'level': 'DEBUG',
# 'propagate': True,
#},
'balsam': {
'handlers': ['default'],
'level': 'DEBUG',
'propagate': True,
},
'balsam.django_config': {
'handlers': ['balsam-db-config'],
'level': 'DEBUG',
'propagate': False,
},
'balsam.service.models': {
'handlers': ['balsam-db-config'],
'level': 'DEBUG',
'propagate': False,
},
}
}
import logging
logger = logging.getLogger(__name__)
def log_uncaught_exceptions(exctype, value, tb,logger=logger):
logger.error(f"Uncaught Exception {exctype}: {value}",exc_info=(exctype,value,tb))
logger = logging.getLogger('console')
logger.error(f"Uncaught Exception {exctype}: {value}",exc_info=(exctype,value,tb))
sys.excepthook = log_uncaught_exceptions
# -----------------------
# SQLITE CLIENT SETUP
# ------------------------
is_server = os.environ.get('IS_BALSAM_SERVER')=='True'
is_daemon = os.environ.get('IS_SERVER_DAEMON')=='True'
using_sqlite = DATABASES['default']['ENGINE'].endswith('sqlite3')
SAVE_CLIENT = None
if using_sqlite and not (is_server or is_daemon):
from balsam.django_config import sqlite_client
SAVE_CLIENT = sqlite_client.Client(serverinfo.ServerInfo(BALSAM_PATH))
if SAVE_CLIENT.serverAddr is None:
SAVE_CLIENT = None
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.9/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '=gyp#o9ac0@w3&-^@a)j&f#_n-o=k%z2=g5u@z5+klmh_*hebj'
......@@ -34,12 +184,10 @@ DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'balsam.apps.BalsamCoreConfig',
'argo.apps.ArgoCoreConfig',
'balsam.service.apps.BalsamCoreConfig',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
......@@ -59,7 +207,7 @@ MIDDLEWARE_CLASSES = [
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'argobalsam.urls'
ROOT_URLCONF = 'balsam.django_config.urls'
TEMPLATES = [
{
......@@ -77,7 +225,7 @@ TEMPLATES = [
},
]
WSGI_APPLICATION = 'argobalsam.wsgi.application'
WSGI_APPLICATION = 'balsam.django_config.wsgi.application'
......
from io import StringIO
from traceback import print_exc
import json
import os
import uuid
import zmq
from django.db.utils import OperationalError
from concurrency.exceptions import RecordModifiedError
# These are ridiculously high to benchmark
# Should be more like 5-10 sec, 3-4 retry
REQ_TIMEOUT = 300000 # 5 minutes
REQ_RETRY = 56
class Client:
def __init__(self, server_info):
import logging
self.logger = logging.getLogger(__name__)
self.server_info = server_info
self.serverAddr = self.server_info.get('address')
self.first_message = True
if self.serverAddr:
try:
response = self.send_request('TEST_ALIVE', timeout=30000)
except:
raise RuntimeError("Cannot reach server at {self.serverAddr}")
else:
if response != 'ACK':
self.logger.exception(f"sqlite client cannot reach DB write server")
raise RuntimeError("Cannot reach server at {self.serverAddr}")
def send_request(self, msg, timeout=None):
if timeout is None:
timeout = REQ_TIMEOUT
if self.first_message: