Commit 009e6706 authored by Pavan Balaji's avatar Pavan Balaji
Browse files

[svn-r3409] 1. Allow reuse of hostfile from the previous executable if a...

[svn-r3409] 1. Allow reuse of hostfile from the previous executable if a multi-executable command-line does not specify one for some executables.

2. Several bug fixes in returning errors to the upper layer.
parent fc2fd0b3
......@@ -71,7 +71,7 @@ HYD_Status HYD_BSCI_Launch_procs()
/* Setup the executable arguments */
arg = 0;
client_arg[arg++] = MPIU_Strdup("/usr/bin/ssh");
client_arg[arg++] = MPIU_Strdup("-xq");
client_arg[arg++] = MPIU_Strdup("-q");
client_arg[arg++] = MPIU_Strdup(hostname);
HYD_BSCU_Append_env(proc_params, client_env, client_arg, arg, -1);
......@@ -141,7 +141,7 @@ HYD_Status HYD_BSCI_Cleanup_procs(void)
/* Setup the executable arguments */
arg = 0;
client_arg[arg++] = MPIU_Strdup("ssh");
client_arg[arg++] = MPIU_Strdup("-xq");
client_arg[arg++] = MPIU_Strdup("-q");
client_arg[arg++] = MPIU_Strdup(hostname);
client_arg[arg++] = MPIU_Strdup("cd");
......
......@@ -55,6 +55,7 @@ HYD_Status HYD_CSI_Finalize()
for (i = 0; i < proc_params->hostlist_length; i++)
HYDU_FREE(proc_params->hostlist[i]);
HYDU_FREE(proc_params->hostlist);
HYDU_FREE(proc_params);
proc_params = p;
}
csi_handle->proc_params = NULL;
......
......@@ -197,7 +197,11 @@ HYD_Status HYD_DMX_Wait_for_event()
if (pollfds[i].revents & POLLIN)
events |= HYD_CSI_OUT;
run->callback(pollfds[i].fd, events);
status = run->callback(pollfds[i].fd, events);
if (status != HYD_SUCCESS) {
HYDU_Error_printf("callback returned error status\n", errno);
goto fn_fail;
}
}
i++;
......
......@@ -52,7 +52,7 @@ int main(int argc, char ** argv)
FILE * fp;
int procs_left, current_procs, count, index, i, exit_status;
char node[MAX_HOSTNAME_LEN + 5]; /* Give 5 extra digits for number of processes */
char * nodename, * procs;
char * nodename, * procs, * hostfile;
HYD_Status status = HYD_SUCCESS;
HYDU_FUNC_ENTER();
......@@ -82,6 +82,7 @@ int main(int argc, char ** argv)
/* Break host files into nodes and pass it to the control
* system. */
local = params.local;
hostfile = NULL;
while (local) {
procs_left = local->num_procs;
......@@ -97,7 +98,15 @@ int main(int argc, char ** argv)
proc_params->next = NULL;
index = 0;
fp = fopen(local->hostfile, "r");
/* If we got a new file, open it */
if (local->hostfile != NULL) {
if (hostfile != NULL) { /* We have a previously opened file */
fclose(fp);
}
fp = fopen(local->hostfile, "r");
hostfile = local->hostfile;
}
while (1) {
fscanf(fp, "%s", node);
......@@ -136,7 +145,6 @@ int main(int argc, char ** argv)
if (!procs_left)
break;
}
fclose(fp);
if (params.global.prop != HYD_LCHI_PROPAGATE_NOTSET) {
/* There is a global environment setting */
......@@ -171,6 +179,9 @@ int main(int argc, char ** argv)
proc_params->genv_list = NULL;
}
proc_params->stdout_cb = HYD_LCHI_stdout_cb;
proc_params->stderr_cb = HYD_LCHI_stderr_cb;
if (csi_handle->proc_params == NULL) {
csi_handle->proc_params = proc_params;
}
......@@ -183,6 +194,7 @@ int main(int argc, char ** argv)
local = local->next;
}
fclose(fp);
gettimeofday(&csi_handle->start, NULL);
if (getenv("MPIEXEC_TIMEOUT"))
......@@ -193,8 +205,6 @@ int main(int argc, char ** argv)
}
csi_handle->stdin_cb = HYD_LCHI_stdin_cb;
csi_handle->proc_params->stdout_cb = HYD_LCHI_stdout_cb;
csi_handle->proc_params->stderr_cb = HYD_LCHI_stderr_cb;
/* Launch the processes */
status = HYD_CSI_Launch_procs();
......@@ -228,7 +238,8 @@ int main(int argc, char ** argv)
local = params.local;
while (local) {
tlocal = local->next;
HYDU_FREE(local->hostfile);
if (local->hostfile)
HYDU_FREE(local->hostfile);
HYD_CSU_Free_env_list(local->added_env_list);
HYD_CSU_Free_env_list(local->prop_env_list);
......
......@@ -319,7 +319,7 @@ fn_fail:
#define FUNCNAME "HYD_LCHI_Get_parameters"
HYD_Status HYD_LCHI_Get_parameters(int t_argc, char ** t_argv, HYD_LCHI_Params_t * params)
{
int argc = t_argc;
int argc = t_argc, got_hostfile;
char ** argv = t_argv;
int local_params_started;
HYD_CSI_Env_t * env;
......@@ -596,6 +596,7 @@ HYD_Status HYD_LCHI_Get_parameters(int t_argc, char ** t_argv, HYD_LCHI_Params_t
}
local = params->local;
got_hostfile = 0;
while (local) {
if (local->prop == HYD_LCHI_PROPAGATE_NOTSET &&
params->global.prop == HYD_LCHI_PROPAGATE_NOTSET)
......@@ -614,17 +615,18 @@ HYD_Status HYD_LCHI_Get_parameters(int t_argc, char ** t_argv, HYD_LCHI_Params_t
goto fn_fail;
}
if (local->hostfile == NULL && getenv("HYDRA_HOST_FILE")) {
if (local->hostfile == NULL && got_hostfile == 0 && getenv("HYDRA_HOST_FILE"))
local->hostfile = MPIU_Strdup(getenv("HYDRA_HOST_FILE"));
}
if (local->hostfile == NULL) {
HYDU_Error_printf("Host file not specified\n");
status = HYD_INTERNAL_ERROR;
goto fn_fail;
}
if (local->hostfile != NULL)
got_hostfile = 1;
local = local->next;
}
if (got_hostfile == 0) {
HYDU_Error_printf("Host file not specified\n");
status = HYD_INTERNAL_ERROR;
goto fn_fail;
}
fn_exit:
HYDU_FUNC_EXIT();
......
......@@ -132,7 +132,9 @@ HYD_Status HYD_PMCD_Central_cb(int fd, HYD_CSI_Event_t events)
}
else {
/* We don't understand the command */
printf("Unrecognized command: %s\n", cmd);
HYDU_Error_printf("Unrecognized PMI command: %s\n", cmd);
status = HYD_INTERNAL_ERROR;
goto fn_fail;
}
if (status != HYD_SUCCESS) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment