Commit e7c122ef authored by Misbah Mubarak's avatar Misbah Mubarak

Merge branch 'opt-mode-fix' into 'master'

Opt mode fix

See merge request !32
parents b25e8d10 fa3141ea
...@@ -156,7 +156,7 @@ struct codes_workload_op ...@@ -156,7 +156,7 @@ struct codes_workload_op
*/ */
/* what type of operation this is */ /* what type of operation this is */
enum codes_workload_op_type op_type; int op_type;
/* currently only used by network workloads */ /* currently only used by network workloads */
double start_time; double start_time;
double end_time; double end_time;
...@@ -198,7 +198,7 @@ struct codes_workload_op ...@@ -198,7 +198,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the recv */ int16_t data_type; /* MPI data type to be matched with the recv */
int count; /* number of elements to be received */ int count; /* number of elements to be received */
int tag; /* tag of the message */ int tag; /* tag of the message */
int32_t req_id; int req_id;
} send; } send;
struct { struct {
/* TODO: not sure why source rank is here */ /* TODO: not sure why source rank is here */
...@@ -208,7 +208,7 @@ struct codes_workload_op ...@@ -208,7 +208,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the send */ int16_t data_type; /* MPI data type to be matched with the send */
int count; /* number of elements to be sent */ int count; /* number of elements to be sent */
int tag; /* tag of the message */ int tag; /* tag of the message */
int32_t req_id; int req_id;
} recv; } recv;
/* TODO: non-stub for other collectives */ /* TODO: non-stub for other collectives */
struct { struct {
...@@ -216,14 +216,14 @@ struct codes_workload_op ...@@ -216,14 +216,14 @@ struct codes_workload_op
} collective; } collective;
struct { struct {
int count; int count;
int32_t* req_ids; int* req_ids;
} waits; } waits;
struct { struct {
int32_t req_id; int req_id;
} wait; } wait;
struct struct
{ {
int32_t req_id; int req_id;
} }
free; free;
}u; }u;
......
...@@ -13,13 +13,13 @@ LPGROUPS ...@@ -13,13 +13,13 @@ LPGROUPS
PARAMS PARAMS
{ {
# packet size in the network # packet size in the network
packet_size="1024"; packet_size="2048";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" ); modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options # scheduler options
modelnet_scheduler="fcfs"; modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be # chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks) # divided into chunks)
chunk_size="1024"; chunk_size="2048";
# modelnet_scheduler="round-robin"; # modelnet_scheduler="round-robin";
# number of routers within each group # number of routers within each group
# this is dictated by the dragonfly configuration files # this is dictated by the dragonfly configuration files
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "codes/codes-workload.h" #include "codes/codes-workload.h"
#include "codes/quickhash.h" #include "codes/quickhash.h"
#include "codes/codes-jobmap.h" #include "codes/codes-jobmap.h"
#include "codes/jenkins-hash.h"
#include "codes/model-net.h" #include "codes/model-net.h"
#if ENABLE_CORTEX #if ENABLE_CORTEX
...@@ -46,6 +47,7 @@ extern struct codes_jobmap_ctx *jobmap_ctx; ...@@ -46,6 +47,7 @@ extern struct codes_jobmap_ctx *jobmap_ctx;
static struct qhash_table *rank_tbl = NULL; static struct qhash_table *rank_tbl = NULL;
static int rank_tbl_pop = 0; static int rank_tbl_pop = 0;
static unsigned int max_threshold = INT_MAX;
/* context of the MPI workload */ /* context of the MPI workload */
typedef struct rank_mpi_context typedef struct rank_mpi_context
{ {
...@@ -54,6 +56,7 @@ typedef struct rank_mpi_context ...@@ -54,6 +56,7 @@ typedef struct rank_mpi_context
// whether we've seen an init op (needed for timing correctness) // whether we've seen an init op (needed for timing correctness)
int is_init; int is_init;
int num_reqs; int num_reqs;
unsigned int num_ops;
int64_t my_rank; int64_t my_rank;
double last_op_time; double last_op_time;
double init_time; double init_time;
...@@ -106,6 +109,12 @@ static inline double time_to_us_lf(dumpi_clock t){ ...@@ -106,6 +109,12 @@ static inline double time_to_us_lf(dumpi_clock t){
static inline double time_to_ns_lf(dumpi_clock t){ static inline double time_to_ns_lf(dumpi_clock t){
return (double) t.sec * 1e9 + (double) t.nsec; return (double) t.sec * 1e9 + (double) t.nsec;
} }
static int32_t get_unique_req_id(int32_t request_id)
{
uint32_t pc = 0, pb = 0;
bj_hashlittle2(&request_id, sizeof(int32_t), &pc, &pb);
return pc;
}
/*static inline double time_to_s_lf(dumpi_clock t){ /*static inline double time_to_s_lf(dumpi_clock t){
return (double) t.sec + (double) t.nsec / 1e9; return (double) t.sec + (double) t.nsec / 1e9;
}*/ }*/
...@@ -338,10 +347,10 @@ int handleDUMPIWaitsome(const dumpi_waitsome *prm, uint16_t thread, ...@@ -338,10 +347,10 @@ int handleDUMPIWaitsome(const dumpi_waitsome *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITSOME; wrkld_per_rank.op_type = CODES_WK_WAITSOME;
wrkld_per_rank.u.waits.count = prm->count; wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t)); wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ ) for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int32_t)prm->requests[i]; wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
update_times_and_insert(&wrkld_per_rank, wall, myctx); update_times_and_insert(&wrkld_per_rank, wall, myctx);
return 0; return 0;
...@@ -363,10 +372,10 @@ int handleDUMPIWaitany(const dumpi_waitany *prm, uint16_t thread, ...@@ -363,10 +372,10 @@ int handleDUMPIWaitany(const dumpi_waitany *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITANY; wrkld_per_rank.op_type = CODES_WK_WAITANY;
wrkld_per_rank.u.waits.count = prm->count; wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t)); wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ ) for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int32_t)prm->requests[i]; wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
update_times_and_insert(&wrkld_per_rank, wall, myctx); update_times_and_insert(&wrkld_per_rank, wall, myctx);
return 0; return 0;
...@@ -389,7 +398,7 @@ int handleDUMPIWaitall(const dumpi_waitall *prm, uint16_t thread, ...@@ -389,7 +398,7 @@ int handleDUMPIWaitall(const dumpi_waitall *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITALL; wrkld_per_rank.op_type = CODES_WK_WAITALL;
wrkld_per_rank.u.waits.count = prm->count; wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t)); wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ ) for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i]; wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
...@@ -785,6 +794,7 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank) ...@@ -785,6 +794,7 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank)
my_ctx->is_init = 0; my_ctx->is_init = 0;
my_ctx->num_reqs = 0; my_ctx->num_reqs = 0;
my_ctx->dumpi_mpi_array = dumpi_init_op_data(); my_ctx->dumpi_mpi_array = dumpi_init_op_data();
my_ctx->num_ops = 0;
if(rank < 10) if(rank < 10)
sprintf(file_name, "%s000%d.bin", dumpi_params->file_name, rank); sprintf(file_name, "%s000%d.bin", dumpi_params->file_name, rank);
...@@ -931,8 +941,20 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank) ...@@ -931,8 +941,20 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank)
while(active && !finalize_reached) while(active && !finalize_reached)
{ {
num_calls++; num_calls++;
my_ctx->num_ops++;
#ifdef ENABLE_CORTEX #ifdef ENABLE_CORTEX
active = cortex_undumpi_read_single_call(profile, callarr, transarr, (void*)my_ctx, &finalize_reached); if(my_ctx->num_ops < max_threshold)
active = cortex_undumpi_read_single_call(profile, callarr, transarr, (void*)my_ctx, &finalize_reached);
else
{
struct codes_workload_op op;
op.op_type = CODES_WK_END;
op.start_time = my_ctx->last_op_time;
op.end_time = my_ctx->last_op_time + 1;
dumpi_insert_next_op(my_ctx->dumpi_mpi_array, &op);
break;
}
#else #else
active = undumpi_read_single_call(profile, callarr, (void*)my_ctx, &finalize_reached); active = undumpi_read_single_call(profile, callarr, (void*)my_ctx, &finalize_reached);
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment