Commit 6bcaf260 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

[svn-r3388] Merging the code part of the hydra into trunk. The design docs are...

[svn-r3388] Merging the code part of the hydra into trunk. The design docs are still left behind; those need to be moved to the wiki anyway.
parent 1364b895
......@@ -18,6 +18,8 @@
extern "C" {
#endif
#include "mpichconf.h"
/* ------------------------------------------------------------------------- */
/* mpimem.h */
/* ------------------------------------------------------------------------- */
......
# SUBDIRS_pm_name are the names that @ pm_name @ can take in SUBDIRS
# (except for util, which is included so that simplemake will process
# the Makefile.sm in that directory)
SUBDIRS_pm_name = mpd smpd gforker remshell
SUBDIRS_pm_name = mpd smpd gforker remshell hydra
SUBDIRS = @pm_name@ @other_pm_names@ .
# Remove PMPILIBNAME from the common make variables for the mpid
# directories
......
......@@ -159,7 +159,7 @@ int main( int argc, char *argv[], char *envp[] )
init, allowing an MPI process to contact a waiting mpiexec that
would serve as a process manager. This option is not implemented */
if (getenv("MPIEXEC_USE_PORT")) {
s.pmiinfo.portName = (char *)malloc( 1024 );
s.pmiinfo.portName = (char *)MPIU_Malloc( 1024 );
if (!s.pmiinfo.portName) {
MPIU_Error_printf( "Failed to allocate storage for portName" );
}
......
#
# (C) 2008 by Argonne National Laboratory.
# See COPYRIGHT in top-level directory.
#
SUBDIRS = utils control pm bootstrap demux launcher .
all-preamble:
@if [ ! -d lib ] ; then mkdir lib ; fi
mpich2-build-install:
${MAKE} -C launcher $@
mpich2-build-uninstall:
${MAKE} -C launcher $@
#
# (C) 2008 by Argonne National Laboratory.
# See COPYRIGHT in top-level directory.
#
SUBDIRS_HYDRA_BSS = ssh
SUBDIRS = utils @HYDRA_BSS@ .
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef BSCI_H_INCLUDED
#define BSCI_H_INCLUDED
#include "hydra.h"
#include "csi.h"
HYD_Status HYD_BSCI_Launch_procs();
HYD_Status HYD_BSCI_Cleanup_procs();
HYD_Status HYD_BSCI_Wait_for_completion();
HYD_Status HYD_BSCI_Finalize();
#endif /* BSCI_H_INCLUDED */
#
# (C) 2008 by Argonne National Laboratory.
# See COPYRIGHT in top-level directory.
#
HYDRA_LIB_PATH = ../../lib
libhydra_a_DIR = ${HYDRA_LIB_PATH}
libhydra_a_SOURCES = ssh_launch.c ssh_wait.c ssh_finalize.c
INCLUDES = -I${abs_srcdir}/../../demux -I${abs_srcdir}/../../control/include/ -I${abs_srcdir}/../../include -I../../include -I${abs_srcdir}/../include -I${abs_srcdir}/../utils -I${abs_srcdir}/../../utils -I${abs_srcdir}/../../../../include -I../../../../include
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "hydra.h"
#include "bsci.h"
#include "bscu.h"
HYD_BSCU_Procstate_t * HYD_BSCU_Procstate;
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCI_Finalize"
HYD_Status HYD_BSCI_Finalize()
{
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
status = HYD_BSCU_Finalize_exit_status();
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "hydra.h"
#include "hydra_sock.h"
#include "hydra_dbg.h"
#include "hydra_mem.h"
#include "csi.h"
#include "bsci.h"
#include "bscu.h"
#define MAX_CLIENT_ARG 200
#define MAX_CLIENT_ENV 200
HYD_BSCU_Procstate_t * HYD_BSCU_Procstate;
int HYD_BSCU_Num_procs;
int HYD_BSCU_Completed_procs;
HYD_CSI_Handle * csi_handle;
/*
* HYD_BSCI_Launch_procs: For each process, we create an executable
* which reads like "ssh exec args" and the list of environment
* variables. We fork a worker process that sets the environment and
* execvp's this executable.
*/
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCI_Launch_procs"
HYD_Status HYD_BSCI_Launch_procs()
{
struct HYD_CSI_Proc_params * proc_params;
char * hostname, ** client_arg, ** client_env;
int i, arg, process_id;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
HYDU_MALLOC(client_arg, char **, MAX_CLIENT_ARG * sizeof(char *), status);
HYDU_MALLOC(client_env, char **, MAX_CLIENT_ENV * sizeof(char *), status);
status = HYD_BSCU_Init_exit_status();
if (status != HYD_SUCCESS) {
HYDU_Error_printf("bootstrap utils returned error when initializing exit status\n");
goto fn_fail;
}
status = HYD_BSCU_Set_common_signals(HYD_BSCU_Signal_handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set signal\n");
goto fn_fail;
}
proc_params = csi_handle->proc_params;
hostname = NULL;
process_id = 0;
while (proc_params) {
HYDU_MALLOC(proc_params->stdout, int *, proc_params->hostlist_length * sizeof(int), status);
HYDU_MALLOC(proc_params->stderr, int *, proc_params->hostlist_length * sizeof(int), status);
for (i = 0; i < proc_params->hostlist_length; i++) {
if (hostname == NULL || proc_params->hostlist[i] != NULL) {
hostname = proc_params->hostlist[i];
}
HYD_BSCU_Setup_env(proc_params, client_env, process_id, status);
/* Setup the executable arguments */
arg = 0;
client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
client_arg[arg++] = MPIU_Strdup("-xq");
client_arg[arg++] = MPIU_Strdup(hostname);
HYD_BSCU_Append_env(proc_params, client_env, client_arg, arg, -1);
client_arg[arg++] = MPIU_Strdup("cd");
client_arg[arg++] = MPIU_Strdup(csi_handle->wdir);
client_arg[arg++] = MPIU_Strdup(";");
HYD_BSCU_Append_exec(proc_params, client_arg, arg, -1);
/* The stdin pointer will be some value for process_id 0;
* for everyone else, it's NULL. */
status = HYD_BSCU_Spawn_proc(client_arg, client_env, (process_id == 0 ? &csi_handle->stdin : NULL),
&proc_params->stdout[i], &proc_params->stderr[i],
&HYD_BSCU_Procstate[process_id].pid);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("bootstrap spawn process returned error\n");
goto fn_fail;
}
process_id++;
}
proc_params = proc_params->next;
}
fn_exit:
for (arg = 0; client_arg[arg]; arg++)
HYDU_FREE(client_arg[arg]);
HYDU_FREE(client_arg);
for (arg = 0; client_env[arg]; arg++)
HYDU_FREE(client_env[arg]);
HYDU_FREE(client_env);
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCI_Cleanup_procs"
HYD_Status HYD_BSCI_Cleanup_procs(void)
{
struct HYD_CSI_Proc_params * proc_params;
char * hostname, ** client_arg, ** client_env;
int i, arg, process_id, current_count, pid;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
HYDU_MALLOC(client_arg, char **, MAX_CLIENT_ARG * sizeof(char *), status);
HYDU_MALLOC(client_env, char **, MAX_CLIENT_ENV * sizeof(char *), status);
proc_params = csi_handle->proc_params;
hostname = NULL;
process_id = 0;
while (proc_params) {
for (i = 0; i < proc_params->hostlist_length; i++) {
if (hostname == NULL || proc_params->hostlist[i] != NULL) {
hostname = proc_params->hostlist[i];
}
/* Setup the executable arguments */
arg = 0;
client_arg[arg++] = MPIU_Strdup("ssh");
client_arg[arg++] = MPIU_Strdup("-xq");
client_arg[arg++] = MPIU_Strdup(hostname);
client_arg[arg++] = MPIU_Strdup("killall");
pid = HYD_BSCU_Procstate[process_id].pid;
process_id++;
if (pid == -1)
continue;
HYD_BSCU_Append_exec(proc_params, client_arg, arg, 1);
client_env[0] = NULL;
status = HYD_BSCU_Spawn_proc(client_arg, client_env, NULL, NULL, NULL, NULL);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("bootstrap spawn process returned error\n");
goto fn_fail;
}
}
proc_params = proc_params->next;
}
fn_exit:
for (arg = 0; client_arg[arg]; arg++)
HYDU_FREE(client_arg[arg]);
HYDU_FREE(client_arg);
for (arg = 0; client_env[arg]; arg++)
HYDU_FREE(client_env[arg]);
HYDU_FREE(client_env);
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "hydra.h"
#include "bsci.h"
#include "bscu.h"
/*
* HYD_BSCI_Wait_for_completion: We first wait for communication
* events from the available processes till all connections have
* closed. In the meanwhile, the SIGCHLD handler keeps track of all
* the terminated processes.
*/
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCI_Wait_for_completion"
HYD_Status HYD_BSCI_Wait_for_completion()
{
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
status = HYD_BSCU_Wait_for_completion();
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
#
# (C) 2008 by Argonne National Laboratory.
# See COPYRIGHT in top-level directory.
#
HYDRA_LIB_PATH = ../../lib
libhydra_a_DIR = ${HYDRA_LIB_PATH}
libhydra_a_SOURCES = bscu_spawn.c bscu_signal.c
INCLUDES = -I${abs_srcdir}/../../utils -I${abs_srcdir}/../../include -I../../include -I${abs_srcdir}/../include -I${abs_srcdir}/../../bootstrap/include -I${abs_srcdir} -I${abs_srcdir}/../../control/include -I${abs_srcdir}/../../demux -I${abs_srcdir}/../../pm/include -I${abs_srcdir}/../../launch/mpiexec -I${abs_srcdir}/../../../../include -I../../../../include
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#ifndef BSCU_H_INCLUDED
#define BSCU_H_INCLUDED
#include "hydra.h"
#include "hydra_sig.h"
#include "csi.h"
#include "bsci.h"
#define HYD_BSCU_Append_exec(proc_params, client_arg, arg, num_args) \
{ \
struct HYD_CSI_Exec * exec; \
int count = 0; \
exec = proc_params->exec; \
while (exec) { \
client_arg[arg++] = MPIU_Strdup(exec->arg); \
if (++count >= num_args && num_args != -1) \
break; \
exec = exec->next; \
} \
client_arg[arg] = NULL; \
}
#define HYD_BSCU_Append_env(proc_params, client_env, client_arg, arg, num_args) \
{ \
HYD_CSI_Env_t * env; \
int count = 0; \
while (client_env[count]) { \
client_arg[arg++] = MPIU_Strdup("export"); \
client_arg[arg++] = MPIU_Strdup(client_env[count]); \
client_arg[arg++] = MPIU_Strdup(";"); \
if (++count >= num_args && num_args != -1) \
break; \
} \
client_arg[arg] = NULL; \
}
#define HYD_BSCU_Setup_env(proc_params, client_env, process_id, status) \
{ \
int arg = 0, len; \
char * env_value, * str; \
HYD_CSI_Env_t * env; \
env = proc_params->env_list; \
while (env) { \
if (env->env_type == HYD_CSI_ENV_STATIC) \
env_value = env->env_value; \
else { /* This is an auto-increment type */ \
/* Allocate a small buffer; this is only an integer */ \
HYDU_Int_to_str(process_id, str, status); \
HYDU_MALLOC(env_value, char *, strlen(str) + 1, status); \
MPIU_Snprintf(env_value, strlen(str) + 1, "%s", str); \
} \
\
len = strlen(env->env_name) + 2; \
if (env_value) \
len += strlen(env_value); \
\
HYDU_MALLOC(client_env[arg], char *, len, status); \
MPIU_Snprintf(client_env[arg++], len, "%s=%s", env->env_name, env_value); \
if (env->env_type == HYD_CSI_ENV_AUTOINC) \
HYDU_FREE(env_value); \
env = env->next; \
} \
client_env[arg] = NULL; \
}
typedef struct HYD_BSCU_Procstate {
int pid;
int exit_status;
} HYD_BSCU_Procstate_t;
extern HYD_BSCU_Procstate_t * HYD_BSCU_Procstate;
extern int HYD_BSCU_Num_procs;
extern int HYD_BSCU_Completed_procs;
HYD_Status HYD_BSCU_Init_exit_status(void);
HYD_Status HYD_BSCU_Finalize_exit_status(void);
HYD_Status HYD_BSCU_Spawn_proc(char ** client_arg, char ** client_env, int * stdin, int * stdout, int * stderr, int * pid);
HYD_Status HYD_BSCU_Wait_for_completion(void);
HYD_Status HYD_BSCU_Set_common_signals(sighandler_t handler);
void HYD_BSCU_Signal_handler(int signal);
#endif /* BSCI_H_INCLUDED */
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "hydra.h"
#include "hydra_dbg.h"
#include "hydra_sig.h"
#include "csi.h"
#include "bsci.h"
#include "bscu.h"
HYD_BSCU_Procstate_t * HYD_BSCU_Procstate;
int HYD_BSCU_Num_procs;
int HYD_BSCU_Completed_procs;
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCU_Set_common_signals"
HYD_Status HYD_BSCU_Set_common_signals(sighandler_t handler)
{
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
status = HYDU_Set_signal(SIGCHLD, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGCHLD signal\n");
goto fn_fail;
}
status = HYDU_Set_signal(SIGINT, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGINT signal\n");
goto fn_fail;
}
status = HYDU_Set_signal(SIGQUIT, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGQUIT signal\n");
goto fn_fail;
}
status = HYDU_Set_signal(SIGTERM, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGTERM signal\n");
goto fn_fail;
}
#if defined SIGSTOP
status = HYDU_Set_signal(SIGSTOP, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGSTOP signal\n");
goto fn_fail;
}
#endif /* SIGSTOP */
#if defined SIGCONT
status = HYDU_Set_signal(SIGCONT, handler);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("signal utils returned error when trying to set SIGCONT signal\n");
goto fn_fail;
}
#endif /* SIGCONT */
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCU_Signal_handler"
void HYD_BSCU_Signal_handler(int signal)
{
int status, pid, i;
HYDU_FUNC_ENTER();
if (signal == SIGCHLD) {
pid = wait(&status);
/* If we didn't get a PID, it means that the main thread
* handled it. */
if (pid <= 0)
goto fn_exit;
/* Find the pid in the procstate structure and mark it as
* complete. */
for (i = 0; i < HYD_BSCU_Num_procs; i++) {
if (HYD_BSCU_Procstate[i].pid == pid) {
HYD_BSCU_Procstate[i].exit_status = status;
HYD_BSCU_Completed_procs++;
break;
}
}
}
else if (signal == SIGINT || signal == SIGQUIT || signal == SIGTERM
#if defined SIGSTOP
|| signal == SIGSTOP
#endif /* SIGSTOP */
#if defined SIGCONT
|| signal == SIGCONT
#endif /* SIGSTOP */
) {
/* There's nothing we can do with the return value for now. */
HYD_BSCI_Cleanup_procs();
exit(-1);
}
else {
/* Ignore other signals for now */
}
fn_exit:
HYDU_FUNC_EXIT();
return;
fn_fail:
goto fn_exit;
}
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2008 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "hydra.h"
#include "hydra_dbg.h"
#include "hydra_mem.h"
#include "csi.h"
#include "bsci.h"
#include "bscu.h"
HYD_BSCU_Procstate_t * HYD_BSCU_Procstate;
int HYD_BSCU_Num_procs;
int HYD_BSCU_Completed_procs;
HYD_CSI_Handle * csi_handle;
#if defined FUNCNAME
#undef FUNCNAME
#endif /* FUNCNAME */
#define FUNCNAME "HYD_BSCU_Init_exit_status"
HYD_Status HYD_BSCU_Init_exit_status(void)
{
struct HYD_CSI_Proc_params * proc_params;
int i;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
/* Set the exit status of all processes to 1 (> 0 means that the
* status is not set yet). Also count the number of processes in
* the same loop. */
HYD_BSCU_Num_procs = 0;
proc_params = csi_handle->proc_params;
while (proc_params) {
HYD_BSCU_Num_procs += proc_params->hostlist_length;
HYDU_MALLOC(proc_params->exit_status, int *, proc_params->hostlist_length * sizeof(int), status);
for (i = 0; i < proc_params->hostlist_length; i++)
proc_params->exit_status[i] = 1;
proc_params = proc_params->next;
}
HYDU_MALLOC(HYD_BSCU_Procstate, HYD_BSCU_Procstate_t *,
HYD_BSCU_Num_procs * sizeof(HYD_BSCU_Procstate_t), status);
HYD_BSCU_Completed_procs = 0;
fn_exit:
HYDU_FUNC_EXIT();