Commit e7c122ef authored by Misbah Mubarak's avatar Misbah Mubarak

Merge branch 'opt-mode-fix' into 'master'

Opt mode fix

See merge request !32
parents b25e8d10 fa3141ea
......@@ -156,7 +156,7 @@ struct codes_workload_op
*/
/* what type of operation this is */
enum codes_workload_op_type op_type;
int op_type;
/* currently only used by network workloads */
double start_time;
double end_time;
......@@ -198,7 +198,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the recv */
int count; /* number of elements to be received */
int tag; /* tag of the message */
int32_t req_id;
int req_id;
} send;
struct {
/* TODO: not sure why source rank is here */
......@@ -208,7 +208,7 @@ struct codes_workload_op
int16_t data_type; /* MPI data type to be matched with the send */
int count; /* number of elements to be sent */
int tag; /* tag of the message */
int32_t req_id;
int req_id;
} recv;
/* TODO: non-stub for other collectives */
struct {
......@@ -216,14 +216,14 @@ struct codes_workload_op
} collective;
struct {
int count;
int32_t* req_ids;
int* req_ids;
} waits;
struct {
int32_t req_id;
int req_id;
} wait;
struct
{
int32_t req_id;
int req_id;
}
free;
}u;
......
......@@ -13,13 +13,13 @@ LPGROUPS
PARAMS
{
# packet size in the network
packet_size="1024";
packet_size="2048";
modelnet_order=( "dragonfly_custom","dragonfly_custom_router" );
# scheduler options
modelnet_scheduler="fcfs";
# chunk size in the network (when chunk size = packet size, packets will not be
# divided into chunks)
chunk_size="1024";
chunk_size="2048";
# modelnet_scheduler="round-robin";
# number of routers within each group
# this is dictated by the dragonfly configuration files
......
......@@ -15,6 +15,7 @@
#include "codes/codes-workload.h"
#include "codes/quickhash.h"
#include "codes/codes-jobmap.h"
#include "codes/jenkins-hash.h"
#include "codes/model-net.h"
#if ENABLE_CORTEX
......@@ -46,6 +47,7 @@ extern struct codes_jobmap_ctx *jobmap_ctx;
static struct qhash_table *rank_tbl = NULL;
static int rank_tbl_pop = 0;
static unsigned int max_threshold = INT_MAX;
/* context of the MPI workload */
typedef struct rank_mpi_context
{
......@@ -54,6 +56,7 @@ typedef struct rank_mpi_context
// whether we've seen an init op (needed for timing correctness)
int is_init;
int num_reqs;
unsigned int num_ops;
int64_t my_rank;
double last_op_time;
double init_time;
......@@ -106,6 +109,12 @@ static inline double time_to_us_lf(dumpi_clock t){
static inline double time_to_ns_lf(dumpi_clock t){
return (double) t.sec * 1e9 + (double) t.nsec;
}
static int32_t get_unique_req_id(int32_t request_id)
{
uint32_t pc = 0, pb = 0;
bj_hashlittle2(&request_id, sizeof(int32_t), &pc, &pb);
return pc;
}
/*static inline double time_to_s_lf(dumpi_clock t){
return (double) t.sec + (double) t.nsec / 1e9;
}*/
......@@ -338,10 +347,10 @@ int handleDUMPIWaitsome(const dumpi_waitsome *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITSOME;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t));
wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int32_t)prm->requests[i];
wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
update_times_and_insert(&wrkld_per_rank, wall, myctx);
return 0;
......@@ -363,10 +372,10 @@ int handleDUMPIWaitany(const dumpi_waitany *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITANY;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t));
wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int32_t)prm->requests[i];
wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
update_times_and_insert(&wrkld_per_rank, wall, myctx);
return 0;
......@@ -389,7 +398,7 @@ int handleDUMPIWaitall(const dumpi_waitall *prm, uint16_t thread,
wrkld_per_rank.op_type = CODES_WK_WAITALL;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int32_t*)malloc(prm->count * sizeof(int32_t));
wrkld_per_rank.u.waits.req_ids = (int*)malloc(prm->count * sizeof(int));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
......@@ -785,6 +794,7 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank)
my_ctx->is_init = 0;
my_ctx->num_reqs = 0;
my_ctx->dumpi_mpi_array = dumpi_init_op_data();
my_ctx->num_ops = 0;
if(rank < 10)
sprintf(file_name, "%s000%d.bin", dumpi_params->file_name, rank);
......@@ -931,8 +941,20 @@ int dumpi_trace_nw_workload_load(const char* params, int app_id, int rank)
while(active && !finalize_reached)
{
num_calls++;
my_ctx->num_ops++;
#ifdef ENABLE_CORTEX
active = cortex_undumpi_read_single_call(profile, callarr, transarr, (void*)my_ctx, &finalize_reached);
if(my_ctx->num_ops < max_threshold)
active = cortex_undumpi_read_single_call(profile, callarr, transarr, (void*)my_ctx, &finalize_reached);
else
{
struct codes_workload_op op;
op.op_type = CODES_WK_END;
op.start_time = my_ctx->last_op_time;
op.end_time = my_ctx->last_op_time + 1;
dumpi_insert_next_op(my_ctx->dumpi_mpi_array, &op);
break;
}
#else
active = undumpi_read_single_call(profile, callarr, (void*)my_ctx, &finalize_reached);
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment