Commit 72f1aaf6 authored by Anthony Chan's avatar Anthony Chan
Browse files

[svn-r8963] temporary commit, moved development back to laptop as fusion has negative allocation.

parent 895b5bd5
......@@ -31,7 +31,8 @@ HYD_status HYDT_bsci_launcher_pbs_init(void)
* the number of processes allocated in this PBS job. */
ierr = tm_init(NULL, &(HYDT_bscd_pbs_sys->tm_root));
if (ierr != TM_SUCCESS)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "tm_init() fails with TM err=%d.\n",
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_init() fails with TM err=%d.\n",
ierr);
HYDT_bscd_pbs_sys->spawned_count = 0;
HYDT_bscd_pbs_sys->size = 0;
......
......@@ -8,16 +8,32 @@
#include "bsci.h"
#include "pbs.h"
/* #define TS_PROFILE 1 */
#if defined(TS_PROFILE)
#include <sys/time.h>
double TS_Wtime( void );
double TS_Wtime( void )
{
struct timeval tval;
gettimeofday( &tval, NULL );
return ( (double) tval.tv_sec + (double) tval.tv_usec * 0.000001 );
}
#endif
HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
int *control_fd)
{
int proxy_count, spawned_count;
int args_count, events_count;
int ierr, idx, spawned_hostID;
int proxy_count, spawned_count, args_count;
int ierr, spawned_hostID;
struct HYD_proxy *proxy;
char *targs[HYD_NUM_TMP_STRINGS];
HYD_status status = HYD_SUCCESS;
#if defined(TS_PROFILE)
double stime, etime;
#endif
HYDU_FUNC_ENTER();
/* If the RMK is not PBS, error out for the time being. This needs
......@@ -35,7 +51,8 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
/* Check if number of proxies > number of processes in this PBS job */
if (proxy_count > (HYDT_bscd_pbs_sys->tm_root).tm_nnodes)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"Number of proxies(%d) > TM node count(%d)!\n", proxy_count,
"Number of proxies(%d) > TM node count(%d)!\n",
proxy_count,
(HYDT_bscd_pbs_sys->tm_root).tm_nnodes);
/* Duplicate the args in local copy, targs */
......@@ -48,14 +65,17 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
HYDU_MALLOC(HYDT_bscd_pbs_sys->events, tm_event_t *,
HYDT_bscd_pbs_sys->size * sizeof(tm_event_t), status);
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
/* Spawn a process on each allocated node through tm_spawn() which
* returns a taskID for the process + a eventID for the spawning.
* The returned taskID won't be ready for access until tm_poll()
* has returned the corresponding eventID. */
spawned_count = 0;
spawned_hostID = 0;
spawned_count = 0;
spawned_hostID = 0;
for (proxy = proxy_list; proxy; proxy = proxy->next) {
targs[args_count] = HYDU_int_to_str(spawned_count);
targs[args_count] = HYDU_int_to_str(proxy->proxy_id);
ierr = tm_spawn(args_count + 1, targs, NULL, spawned_hostID,
HYDT_bscd_pbs_sys->taskIDs + spawned_count,
HYDT_bscd_pbs_sys->events + spawned_count);
......@@ -70,9 +90,17 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
spawned_count++;
}
HYDT_bscd_pbs_sys->spawned_count = spawned_count;
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_spawn() loop takes %f\n", etime-stime );
#endif
#ifdef 0
/* Poll the TM for the spawning eventID returned by tm_spawn() to
* determine if the spawned process has started. */
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
events_count = 0;
while (events_count < spawned_count) {
tm_event_t event = -1;
......@@ -80,7 +108,8 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
ierr = tm_poll(TM_NULL_EVENT, &event, 0, &poll_err);
if (ierr != TM_SUCCESS)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_poll(spawn_event) fails with TM err %d.\n", ierr);
"tm_poll(spawn_event) fails with TM err %d.\n",
ierr);
if (event != TM_NULL_EVENT) {
for (idx = 0; idx < spawned_count; idx++) {
if (HYDT_bscd_pbs_sys->events[idx] == event) {
......@@ -90,11 +119,16 @@ HYD_status HYDT_bscd_pbs_launch_procs(char **args, struct HYD_proxy *proxy_list,
event, HYDT_bscd_pbs_sys->taskIDs[idx]);
}
events_count++;
break; /* break from for(idx<spawned_count) loop */
break; /* break from for(idx<spawned_count) loop */
}
}
}
}
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_poll(spawn_events) loop takes %f\n", etime-stime );
#endif
#endif
fn_exit:
HYDU_FUNC_EXIT();
......
......@@ -8,6 +8,12 @@
#include "bsci.h"
#include "pbs.h"
/* #define TS_PROFILE 1 */
#if defined(TS_PROFILE)
double TS_Wtime( void );
#endif
HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
{
int time_elapsed;
......@@ -16,6 +22,10 @@ HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
struct timeval start_tval, curr_tval;
HYD_status status = HYD_SUCCESS;
#if defined(TS_PROFILE)
double stime, etime;
#endif
HYDU_FUNC_ENTER();
/* Allocate memory for taskobits[] */
......@@ -30,6 +40,42 @@ HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
*/
gettimeofday(&start_tval, NULL);
/* Poll the TM for the spawning eventID returned by tm_spawn() to
* determine if the spawned process has started. */
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
events_count = 0;
while (events_count < spawned_count) {
tm_event_t event = -1;
int poll_err;
ierr = tm_poll(TM_NULL_EVENT, &event, 0, &poll_err);
if (ierr != TM_SUCCESS)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_poll(spawn_event) fails with TM err %d.\n",
ierr);
if (event != TM_NULL_EVENT) {
for (idx = 0; idx < spawned_count; idx++) {
if (HYDT_bscd_pbs_sys->events[idx] == event) {
if (HYDT_bsci_info.debug) {
HYDU_dump(stdout,
"PBS_DEBUG: Event %d received, task %d has started.\n",
event, HYDT_bscd_pbs_sys->taskIDs[idx]);
}
events_count++;
break; /* break from for(idx<spawned_count) loop */
}
}
}
}
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_poll(spawn_events) loop takes %f\n", etime-stime );
#endif
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
/* Register with TM to be notified the obituary of the spawning process. */
for (idx = 0; idx < spawned_count; idx++) {
/*
......@@ -37,7 +83,8 @@ HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
* the process labelled by taskID dies
*/
ierr = tm_obit(HYDT_bscd_pbs_sys->taskIDs[idx],
HYDT_bscd_pbs_sys->taskobits + idx, HYDT_bscd_pbs_sys->events + idx);
HYDT_bscd_pbs_sys->taskobits + idx,
HYDT_bscd_pbs_sys->events + idx);
if (ierr != TM_SUCCESS)
HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
"tm_obit() fails with TM err=%d.\n", ierr);
......@@ -49,7 +96,14 @@ HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
HYDT_bscd_pbs_sys->taskIDs[idx],
HYDT_bscd_pbs_sys->taskobits[idx]);
}
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_obit() loop takes %f\n", etime-stime );
#endif
#if defined(TS_PROFILE)
stime = TS_Wtime();
#endif
/* Poll if the spawned process has exited */
events_count = 0;
/* Substract all the processes that have already exited */
......@@ -93,6 +147,10 @@ HYD_status HYDT_bscd_pbs_wait_for_completion(int timeout)
}
}
}
#if defined(TS_PROFILE)
etime = TS_Wtime();
HYDU_dump(stdout, "tm_poll(obit_events) loop takes %f\n", etime-stime );
#endif
if (HYDT_bsci_info.debug) {
HYDU_dump(stdout, "\nPBS_DEBUG: Done with polling obit events!\n");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment