Commit f43037e8 authored by Anthony Chan's avatar Anthony Chan
Browse files

[svn-r8978] Moved PBS_NODEFILE parsing from pbs_init() to pbs_launch_procs(). ...

[svn-r8978] Moved PBS_NODEFILE parsing from pbs_init() to pbs_launch_procs().  Corrected some HYD*_ prefixes so it is consistent with hydra's coding convention.  Added a temporary hack to spawn proxy servers on every allocated nodes when RMK=user (something is broken here) as pmiserv complains about incorrect proxyID.
parent d7397d3b
......@@ -11,77 +11,10 @@
#if defined(HAVE_TM_H)
struct HYDT_bscd_pbs_sys *HYDT_bscd_pbs_sys;
static char* HYD_pbs_trim_space( char *str )
{
char *newstr = NULL;
int len, idx;
len = strlen( str );
/* Locate the Last non-white space character and pad it with NULL */
for (idx=len-1; idx>=0; idx--) {
if ( !isspace(str[idx]) ) {
str[idx+1] = 0;
len = idx;
break;
}
}
/* Locate the First non-white space character */
for (idx=0; idx < len; idx++) {
if ( !isspace(str[idx]) ) {
newstr = &(str[idx]);
break;
}
}
return newstr;
}
static HYD_status HYPU_pbs_parse_for_nodes(const char *nodefile)
{
char line[HYDT_PBS_STRLEN];
FILE *fp;
int idx;
int num_nodes;
struct HYDT_bscd_pbs_node *nodes = NULL;
HYD_status status = HYD_SUCCESS;
if ((fp = fopen(nodefile, "r")) == NULL) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"unable to open host file: %s\n", nodefile);
}
/* Go over once to find the number of lines */
for (num_nodes = 0; fgets(line, HYDT_PBS_STRLEN, fp); num_nodes++) ;
/* Allocate the memory for the array of nodes */
HYDU_MALLOC(nodes, struct HYDT_bscd_pbs_node *,
num_nodes * sizeof(struct HYDT_bscd_pbs_node), status);
/* Allocate the memory for each of member in the array of nodes */
rewind(fp);
for (idx = 0; fgets(line, HYDT_PBS_STRLEN, fp); idx++) {
nodes[idx].id = idx;
strncpy(nodes[idx].name, HYD_pbs_trim_space(line), HYDT_PBS_STRLEN);
}
fclose(fp);
/* Update global PBS data structure */
HYDT_bscd_pbs_sys->num_nodes = num_nodes;
HYDT_bscd_pbs_sys->nodes = nodes;
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
HYD_status HYDT_bsci_launcher_pbs_init(void)
{
char *nodefile = NULL;
int ierr;
HYD_status status = HYD_SUCCESS;
int idx;
int ierr;
HYDU_FUNC_ENTER();
......@@ -109,22 +42,6 @@ HYD_status HYDT_bsci_launcher_pbs_init(void)
HYDT_bscd_pbs_sys->num_nodes = 0;
HYDT_bscd_pbs_sys->nodes = NULL;
/* Parse PBS_NODEFILE for all the node names */
nodefile = (char *) getenv("PBS_NODEFILE");
if (!nodefile)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"PBS_NODEFILE is undefined in PBS launcher.\n");
HYPU_pbs_parse_for_nodes(nodefile);
if (HYDT_bsci_info.debug) {
for (idx=0; idx<HYDT_bscd_pbs_sys->num_nodes; idx++) {
HYDU_dump(stdout, "ID=%d, name=%s.\n",
(HYDT_bscd_pbs_sys->nodes[idx]).id,
(HYDT_bscd_pbs_sys->nodes[idx]).name);
}
}
fn_exit:
HYDU_FUNC_EXIT();
return status;
......
......@@ -21,6 +21,71 @@ double TS_Wtime( void )
}
#endif
static char* HYDI_pbs_trim_space( char *str )
{
char *newstr = NULL;
int len, idx;
len = strlen( str );
/* Locate the Last non-white space character and pad it with NULL */
for (idx=len-1; idx>=0; idx--) {
if ( !isspace(str[idx]) ) {
str[idx+1] = 0;
len = idx;
break;
}
}
/* Locate the First non-white space character */
for (idx=0; idx < len; idx++) {
if ( !isspace(str[idx]) ) {
newstr = &(str[idx]);
break;
}
}
return newstr;
}
static HYD_status HYDI_pbs_parse_for_nodes(const char *nodefile)
{
char line[HYDT_PBS_STRLEN];
FILE *fp;
int idx;
int num_nodes;
struct HYDT_bscd_pbs_node *nodes = NULL;
HYD_status status = HYD_SUCCESS;
if ((fp = fopen(nodefile, "r")) == NULL) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"PBS nodefile, %s, can't be opened.\n", nodefile);
}
/* Go over once to find the number of lines */
for (num_nodes = 0; fgets(line, HYDT_PBS_STRLEN, fp); num_nodes++) ;
/* Allocate the memory for the array of nodes */
HYDU_MALLOC(nodes, struct HYDT_bscd_pbs_node *,
num_nodes * sizeof(struct HYDT_bscd_pbs_node), status);
/* Allocate the memory for each of member in the array of nodes */
rewind(fp);
for (idx = 0; fgets(line, HYDT_PBS_STRLEN, fp); idx++) {
nodes[idx].id = idx;
strncpy(nodes[idx].name, HYDI_pbs_trim_space(line), HYDT_PBS_STRLEN);
}
fclose(fp);
/* Update global PBS data structure */
HYDT_bscd_pbs_sys->num_nodes = num_nodes;
HYDT_bscd_pbs_sys->nodes = nodes;
fn_exit:
HYDU_FUNC_EXIT();
return status;
fn_fail:
goto fn_exit;
}
/* Comparison function for bsearch() of array of pbs_node[] based on name. */
static int cmp_pbsnode(const void *m1, const void *m2)
{
......@@ -33,9 +98,10 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd)
{
int proxy_count, spawned_count, args_count;
int ierr;
int is_rmk_pbs, idx, ierr;
struct HYD_proxy *proxy;
char *targs[HYD_NUM_TMP_STRINGS];
char *nodefile = NULL;
HYD_status status = HYD_SUCCESS;
#if defined(TS_PROFILE)
......@@ -44,12 +110,24 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
HYDU_FUNC_ENTER();
/* If the RMK is not PBS, error out for the time being. This needs
* to be modified to reparse the host file and find the spawn IDs
* separately. */
if (strcmp(HYDT_bsci_info.rmk, "pbs"))
/* Determine what RMK is being using */
is_rmk_pbs = !strcmp(HYDT_bsci_info.rmk, "pbs");
/* Parse PBS_NODEFILE for all the node names */
nodefile = (char *) getenv("PBS_NODEFILE");
if (!nodefile)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"using a non-PBS RMK with the PBS launcher is not supported\n");
"PBS_NODEFILE is undefined in PBS launcher.\n");
HYDI_pbs_parse_for_nodes(nodefile);
if (HYDT_bsci_info.debug) {
for (idx=0; idx<HYDT_bscd_pbs_sys->num_nodes; idx++) {
HYDU_dump(stdout, "ID=%d, name=%s.\n",
(HYDT_bscd_pbs_sys->nodes[idx]).id,
(HYDT_bscd_pbs_sys->nodes[idx]).name);
}
}
proxy_count = 0;
for (proxy = proxy_list; proxy; proxy = proxy->next)
......@@ -73,9 +151,11 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
HYDU_MALLOC(HYDT_bscd_pbs_sys->events, tm_event_t *,
HYDT_bscd_pbs_sys->size * sizeof(tm_event_t), status);
/* Sort the pbs_node[] in ascending name order for bsearch() */
qsort(HYDT_bscd_pbs_sys->nodes, HYDT_bscd_pbs_sys->num_nodes,
sizeof(struct HYDT_bscd_pbs_node), cmp_pbsnode);
/* Sort the pbs_node[] in ascending name order for bsearch() when RMK=PBS */
if (is_rmk_pbs)
qsort(HYDT_bscd_pbs_sys->nodes, HYDT_bscd_pbs_sys->num_nodes,
sizeof(struct HYDT_bscd_pbs_node), cmp_pbsnode);
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
......@@ -83,33 +163,67 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
* returns a taskID for the process + a eventID for the spawning.
* The returned taskID won't be ready for access until tm_poll()
* has returned the corresponding eventID. */
spawned_count = 0;
for (proxy = proxy_list; proxy; proxy = proxy->next) {
struct HYDT_bscd_pbs_node key, *found;
strncpy(key.name, proxy->node->hostname, HYDT_PBS_STRLEN);
found = bsearch(&key,
HYDT_bscd_pbs_sys->nodes,
HYDT_bscd_pbs_sys->num_nodes,
sizeof(struct HYDT_bscd_pbs_node), cmp_pbsnode);
if (found == NULL) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"Cannot locate proxy, %s, in PBS nodefile\n",
proxy->node->hostname);
if (is_rmk_pbs) {
if (HYDT_bsci_info.debug)
HYDU_dump(stdout,"RMK == PBS\n");
spawned_count = 0;
for (proxy = proxy_list; proxy; proxy = proxy->next) {
struct HYDT_bscd_pbs_node key, *found;
strncpy(key.name, proxy->node->hostname, HYDT_PBS_STRLEN);
found = bsearch(&key,
HYDT_bscd_pbs_sys->nodes,
HYDT_bscd_pbs_sys->num_nodes,
sizeof(struct HYDT_bscd_pbs_node), cmp_pbsnode);
if (found == NULL) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"Can't locate proxy, %s, in PBS nodefile\n",
proxy->node->hostname);
}
targs[args_count] = HYDU_int_to_str(proxy->proxy_id);
ierr = tm_spawn(args_count + 1, targs, NULL, found->id,
HYDT_bscd_pbs_sys->taskIDs + spawned_count,
HYDT_bscd_pbs_sys->events + spawned_count);
if (HYDT_bsci_info.debug)
HYDU_dump(stdout, "DEBUG: %d, tm_spawn(TM_nodeID=%d,name=%s)\n",
spawned_count, found->id, found->name);
if (ierr != TM_SUCCESS) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_spawn() fails with TM err %d!\n", ierr);
}
spawned_count++;
}
targs[args_count] = HYDU_int_to_str(proxy->proxy_id);
ierr = tm_spawn(args_count + 1, targs, NULL, found->id,
HYDT_bscd_pbs_sys->taskIDs + spawned_count,
HYDT_bscd_pbs_sys->events + spawned_count);
HYDT_bscd_pbs_sys->spawned_count = spawned_count;
}
else {
char *spawned_name;
spawned_count = 0;
spawned_name = NULL;
if (HYDT_bsci_info.debug)
HYDU_dump(stdout, "PBS_DEBUG: %d, tm_spawn(TM_nodeID=%d,name=%s)\n",
spawned_count, found->id, proxy->node->hostname);
if (ierr != TM_SUCCESS) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_spawn() fails with TM err %d!\n", ierr);
HYDU_dump(stdout,"RMK != PBS\n");
for (idx=0; idx < HYDT_bscd_pbs_sys->num_nodes; idx++) {
struct HYDT_bscd_pbs_node *found;
if ( !spawned_name
|| strcmp(spawned_name, (HYDT_bscd_pbs_sys->nodes[idx]).name) )
spawned_name = (HYDT_bscd_pbs_sys->nodes[idx]).name;
else
continue;
found = &(HYDT_bscd_pbs_sys->nodes[idx]);
/* ? Pavan : Not sure what proxyID is, use spawned_count for now */
targs[args_count] = HYDU_int_to_str(spawned_count);
ierr = tm_spawn(args_count + 1, targs, NULL, found->id,
HYDT_bscd_pbs_sys->taskIDs + spawned_count,
HYDT_bscd_pbs_sys->events + spawned_count);
if (HYDT_bsci_info.debug)
HYDU_dump(stdout, "DEBUG: %d, tm_spawn(TM_nodeID=%d,name=%s)\n",
spawned_count, found->id, found->name);
if (ierr != TM_SUCCESS) {
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_spawn() fails with TM err %d!\n", ierr);
}
spawned_count++;
}
spawned_count++;
HYDT_bscd_pbs_sys->spawned_count = spawned_count;
}
HYDT_bscd_pbs_sys->spawned_count = spawned_count;
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_spawn() loop takes %f\n", etime-stime );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment