Commit 59db60d9 authored by Misbah Mubarak's avatar Misbah Mubarak
Browse files

some fixes in workload-combine

parent a990bdc3
......@@ -24,6 +24,7 @@ typedef struct codes_workload_info codes_workload_info;
typedef struct scala_trace_params scala_trace_params;
typedef struct dumpi_trace_params dumpi_trace_params;
struct bgp_params
{
/* We have the number of ranks passed in from the bg/p model because
......@@ -117,15 +118,15 @@ enum codes_workload_op_type
/* Generic collective operation */
CODES_WK_COL,
/* Waitall operation */
CODES_NW_WAITALL,
CODES_WK_WAITALL,
/* Wait operation */
CODES_NW_WAIT,
CODES_WK_WAIT,
/* Waitsome operation */
CODES_NW_WAITSOME,
CODES_WK_WAITSOME,
/* Waitany operation */
CODES_NW_WAITANY,
CODES_WK_WAITANY,
/* Testall operation */
CODES_NW_TESTALL,
CODES_WK_TESTALL,
};
/* I/O operation paramaters */
......
1- The scala-trace-data file has the MPI event data from the phold file for 16 MPI ranks.
2- The offsets file has the offset from which each LP should start reading from the scala-trace-data file.
3- To run the program with 16 MPI ranks do:
mpirun -np 8 ./codes-nw-test --sync=3 --workload_type=scalatrace --total_nw_lps=16 --offset_file="offsets" --workload_file="scala-trace-data"
The total_nw_lps is equal to the number of MPI ranks on which the data was recorded for scala-trace. In this case,
total_nw_lps = 16 as the scala-trace data was recorded on 16 MPI ranks.
/*
* Copyright (C) 2014 University of Chicago
* See COPYRIGHT notice in top-level directory.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>
#include <ross.h>
#include <assert.h>
#include "dumpi/libundumpi/bindings.h"
#include "dumpi/libundumpi/libundumpi.h"
#include "codes/codes-nw-workload.h"
#include "codes-nw-workload-method.h"
#include "codes/quickhash.h"
#define RANK_HASH_TABLE_SIZE 400
#define MAX_LENGTH 512
#define MAX_OPERATIONS 32768
#define DUMPI_IGNORE_DELAY 100
static struct qhash_table *rank_tbl = NULL;
static int rank_tbl_pop = 0;
/* context of the MPI workload */
typedef struct rank_mpi_context
{
int64_t my_rank;
double last_op_time;
void* dumpi_mpi_array;
struct qhash_head hash_link;
} rank_mpi_context;
/* Holds all the data about MPI operations from the log */
typedef struct dumpi_op_data_array
{
struct mpi_event_list* op_array;
int64_t op_arr_ndx;
int64_t op_arr_cnt;
} dumpi_op_data_array;
/* load the trace */
int dumpi_trace_nw_workload_load(const char* params, int rank);
/* dumpi implementation of get next operation in the workload */
void dumpi_trace_nw_workload_get_next(int rank, struct mpi_event_list *op);
/* get number of bytes from the workload data type and count */
int get_num_bytes(dumpi_datatype dt);
/* computes the delay between MPI operations */
void update_compute_time(const dumpi_time* time, rank_mpi_context* my_ctx);
/* initializes the data structures */
static void* dumpi_init_op_data();
/* removes next operations from the dynamic array */
static void dumpi_remove_next_op(void *mpi_op_array, struct mpi_event_list *mpi_op,
double last_op_time);
/* resets the counters for the dynamic array once the workload is completely loaded*/
static void dumpi_finalize_mpi_op_data(void *mpi_op_array);
/* insert next operation */
static void dumpi_insert_next_op(void *mpi_op_array, struct mpi_event_list *mpi_op);
/* initialize the array data structure */
static void* dumpi_init_op_data()
{
dumpi_op_data_array* tmp;
tmp = malloc(sizeof(dumpi_op_data_array));
assert(tmp);
tmp->op_array = malloc(MAX_OPERATIONS * sizeof(struct mpi_event_list));
assert(tmp->op_array);
tmp->op_arr_ndx = 0;
tmp->op_arr_cnt = MAX_OPERATIONS;
return (void *)tmp;
}
/* inserts next operation in the array */
static void dumpi_insert_next_op(void *mpi_op_array, struct mpi_event_list *mpi_op)
{
dumpi_op_data_array *array = (dumpi_op_data_array*)mpi_op_array;
struct mpi_event_list *tmp;
/*check if array is full.*/
if (array->op_arr_ndx == array->op_arr_cnt)
{
tmp = malloc((array->op_arr_cnt + MAX_OPERATIONS) * sizeof(struct mpi_event_list));
assert(tmp);
memcpy(tmp, array->op_array, array->op_arr_cnt * sizeof(struct mpi_event_list));
free(array->op_array);
array->op_array = tmp;
array->op_arr_cnt += MAX_OPERATIONS;
}
/* add the MPI operation to the op array */
array->op_array[array->op_arr_ndx] = *mpi_op;
//printf("\n insert time %f end time %f ", array->op_array[array->op_arr_ndx].start_time, array->op_array[array->op_arr_ndx].end_time);
array->op_arr_ndx++;
return;
}
/* resets the counters after file is fully loaded */
static void dumpi_finalize_mpi_op_data(void *mpi_op_array)
{
struct dumpi_op_data_array* array = (struct dumpi_op_data_array*)mpi_op_array;
array->op_arr_cnt = array->op_arr_ndx;
array->op_arr_ndx = 0;
}
/* removes the next operation from the array */
static void dumpi_remove_next_op(void *mpi_op_array, struct mpi_event_list *mpi_op,
double last_op_time)
{
dumpi_op_data_array *array = (dumpi_op_data_array*)mpi_op_array;
//printf("\n op array index %d array count %d ", array->op_arr_ndx, array->op_arr_cnt);
if (array->op_arr_ndx == array->op_arr_cnt)
{
mpi_op->op_type = CODES_NW_END;
}
else
{
struct mpi_event_list *tmp = &(array->op_array[array->op_arr_ndx]);
//printf("\n tmp end time %f ", tmp->end_time);
*mpi_op = *tmp;
array->op_arr_ndx++;
}
if(mpi_op->op_type == CODES_NW_END)
{
free(array->op_array);
free(array);
}
}
/* introduce delay between operations: delay is the compute time NOT spent in MPI operations*/
void update_compute_time(const dumpi_time* time, rank_mpi_context* my_ctx)
{
if((time->start.nsec - my_ctx->last_op_time) > DUMPI_IGNORE_DELAY)
{
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_DELAY;
wrkld_per_rank.start_time = my_ctx->last_op_time;
wrkld_per_rank.end_time = time->start.nsec;
wrkld_per_rank.u.delay.nsecs = time->start.nsec - my_ctx->last_op_time;
my_ctx->last_op_time = time->stop.nsec;
dumpi_insert_next_op(my_ctx->dumpi_mpi_array, &wrkld_per_rank);
}
}
int handleDUMPIGeneric(const void* prm, uint16_t thread, const dumpi_time *cpu, const dumpi_time *wall, const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIISend(const dumpi_isend *prm, uint16_t thread, const dumpi_time *cpu, const dumpi_time *wall, const dumpi_perfinfo *perf, void *userarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ISEND;
wrkld_per_rank.u.send.tag = prm->tag;
wrkld_per_rank.u.send.count = prm->count;
wrkld_per_rank.u.send.data_type = prm->datatype;
wrkld_per_rank.u.send.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.u.send.dest_rank = prm->dest;
wrkld_per_rank.u.send.source_rank = myctx->my_rank;
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.u.send.req_id = prm->request;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.send.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIIRecv(const dumpi_irecv *prm, uint16_t thread, const dumpi_time *cpu, const dumpi_time *wall, const dumpi_perfinfo *perf, void *userarg)
{
//printf("\n irecv source %d count %d data type %d", prm->source, prm->count, prm->datatype);
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list* wrkld_per_rank = malloc(sizeof(mpi_event_list));
wrkld_per_rank->op_type = CODES_NW_IRECV;
wrkld_per_rank->u.recv.data_type = prm->datatype;
wrkld_per_rank->u.recv.count = prm->count;
wrkld_per_rank->u.recv.tag = prm->tag;
wrkld_per_rank->u.recv.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank->u.recv.source_rank = prm->source;
wrkld_per_rank->u.recv.dest_rank = -1;
wrkld_per_rank->u.recv.req_id = prm->request;
wrkld_per_rank->start_time = cpu->start.nsec;
wrkld_per_rank->end_time = cpu->stop.nsec;
if(wrkld_per_rank->u.recv.num_bytes == 0)
wrkld_per_rank->u.recv.num_bytes = 8;
assert(wrkld_per_rank->u.recv.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPISend(const dumpi_send *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_SEND;
wrkld_per_rank.u.send.tag = prm->tag;
wrkld_per_rank.u.send.count = prm->count;
wrkld_per_rank.u.send.data_type = prm->datatype;
wrkld_per_rank.u.send.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.u.send.dest_rank = prm->dest;
wrkld_per_rank.u.send.source_rank = myctx->my_rank;
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
if(wrkld_per_rank.u.send.num_bytes < 0)
printf("\n Number of bytes %d count %d data type %d num_bytes %d", prm->count * get_num_bytes(prm->datatype), prm->count, prm->datatype, get_num_bytes(prm->datatype));
if(wrkld_per_rank.u.send.num_bytes == 0)
wrkld_per_rank.u.send.num_bytes = 8;
// printf("\n Number of data bytes %d ", prm->count);
assert(wrkld_per_rank.u.send.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIRecv(const dumpi_recv *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
//printf("\n irecv source %d count %d data type %d", prm->source, prm->count, prm->datatype);
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_RECV;
wrkld_per_rank.u.recv.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.u.recv.source_rank = prm->source;
wrkld_per_rank.u.recv.dest_rank = -1;
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.recv.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIBcast(const dumpi_bcast *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_BCAST;
wrkld_per_rank.u.collective.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIAllgather(const dumpi_allgather *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ALLGATHER;
wrkld_per_rank.u.collective.num_bytes = prm->sendcount * get_num_bytes(prm->sendtype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIAllgatherv(const dumpi_allgatherv *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ALLGATHERV;
wrkld_per_rank.u.collective.num_bytes = prm->sendcount * get_num_bytes(prm->sendtype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIAlltoall(const dumpi_alltoall *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ALLTOALL;
wrkld_per_rank.u.collective.num_bytes = prm->sendcount * get_num_bytes(prm->sendtype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIWait(const dumpi_wait *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *userarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_WAIT;
wrkld_per_rank.u.wait.req_id = prm->request;
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIWaitsome(const dumpi_waitsome *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *userarg)
{
int i;
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_WAITSOME;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int16_t*)malloc(prm->count * sizeof(int16_t));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int16_t)prm->requests[i];
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIWaitany(const dumpi_waitany *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *userarg)
{
int i;
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_WAITANY;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int16_t*)malloc(prm->count * sizeof(int16_t));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = (int16_t)prm->requests[i];
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIWaitall(const dumpi_waitall *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *userarg)
{
int i;
rank_mpi_context* myctx = (rank_mpi_context*)userarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_WAITALL;
wrkld_per_rank.u.waits.count = prm->count;
wrkld_per_rank.u.waits.req_ids = (int16_t*)malloc(prm->count * sizeof(int16_t));
for( i = 0; i < prm->count; i++ )
wrkld_per_rank.u.waits.req_ids[i] = prm->requests[i];
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIAlltoallv(const dumpi_alltoallv *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ALLTOALLV;
wrkld_per_rank.u.collective.num_bytes = prm->sendcounts[0] * get_num_bytes(prm->sendtype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIReduce(const dumpi_reduce *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_REDUCE;
wrkld_per_rank.u.collective.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIAllreduce(const dumpi_allreduce *prm, uint16_t thread,
const dumpi_time *cpu, const dumpi_time *wall,
const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_ALLREDUCE;
wrkld_per_rank.u.collective.num_bytes = prm->count * get_num_bytes(prm->datatype);
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
assert(wrkld_per_rank.u.collective.num_bytes > 0);
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
int handleDUMPIFinalize(const dumpi_finalize *prm, uint16_t thread, const dumpi_time *cpu, const dumpi_time *wall, const dumpi_perfinfo *perf, void *uarg)
{
rank_mpi_context* myctx = (rank_mpi_context*)uarg;
struct mpi_event_list wrkld_per_rank;
wrkld_per_rank.op_type = CODES_NW_END;
wrkld_per_rank.start_time = cpu->start.nsec;
wrkld_per_rank.end_time = cpu->stop.nsec;
dumpi_insert_next_op(myctx->dumpi_mpi_array, &wrkld_per_rank);
update_compute_time(cpu, myctx);
return 0;
}
static int hash_rank_compare(void *key, struct qhash_head *link)
{
int *in_rank = (int *)key;
rank_mpi_context *tmp;
tmp = qhash_entry(link, rank_mpi_context, hash_link);
if (tmp->my_rank == *in_rank)
return 1;
return 0;
}
int dumpi_trace_nw_workload_load(const char* params, int rank)
{
libundumpi_callbacks callbacks;
libundumpi_cbpair callarr[DUMPI_END_OF_STREAM];
dumpi_profile* profile;
dumpi_trace_params* dumpi_params = (dumpi_trace_params*)params;
char file_name[MAX_LENGTH];
if(rank >= dumpi_params->num_net_traces)
return -1;
if(!rank_tbl)
{
rank_tbl = qhash_init(hash_rank_compare, quickhash_64bit_hash, RANK_HASH_TABLE_SIZE);
if(!rank_tbl)
return -1;
}
rank_mpi_context *my_ctx;
my_ctx = malloc(sizeof(rank_mpi_context));
assert(my_ctx);
my_ctx->my_rank = rank;
my_ctx->last_op_time = 0.0;
my_ctx->dumpi_mpi_array = dumpi_init_op_data();
if(rank < 10)
sprintf(file_name, "%s000%d.bin", dumpi_params->file_name, rank);
else if(rank >=10 && rank < 100)
sprintf(file_name, "%s00%d.bin", dumpi_params->file_name, rank);
else if(rank >=100 && rank < 1000)
sprintf(file_name, "%s0%d.bin", dumpi_params->file_name, rank);
else
sprintf(file_name, "%s%d.bin", dumpi_params->file_name, rank);
profile = undumpi_open(file_name);
if(NULL == profile) {
printf("Error: unable to open DUMPI trace: %s", file_name);
exit(-1);
}
memset(&callbacks, 0, sizeof(libundumpi_callbacks));
memset(&callarr, 0, sizeof(libundumpi_cbpair) * DUMPI_END_OF_STREAM);