/* * Copyright (C) 2017 University of Chicago. * See COPYRIGHT notice in top-level directory. * */ #define _XOPEN_SOURCE 500 #define _GNU_SOURCE #include "darshan-runtime-config.h" #include #include #include #include #include #include #include "uthash.h" #include "darshan.h" #include "darshan-dynamic.h" #include "darshan-apmpi-log-format.h" #include "darshan-apmpi-utils.h" typedef long long ap_bytes_t; #define MAX(x,y) ((x>y)?x:y) #define MIN(x,y) ((x==0.0)?y:((xperf_record->counters[i] = 0; } for (i = 0; i < APMPI_F_MPIOP_TOTALTIME_NUM_INDICES; i++) { apmpi_runtime->perf_record->fcounters[i] = 0; } for (i = 0; i < APMPI_F_MPIOP_SYNCTIME_NUM_INDICES; i++) { apmpi_runtime->perf_record->fsynccounters[i] = 0; } for (i = 0; i < APMPI_F_MPI_GLOBAL_NUM_INDICES; i++) { apmpi_runtime->perf_record->fsynccounters[i] = 0; } return; } static void finalize_counters (void) { return; } /* * Function which updates all the counter data */ static void capture(struct darshan_apmpi_perf_record *rec, darshan_record_id rec_id) { rec->base_rec.id = rec_id; rec->base_rec.rank = my_rank; int name_len; MPI_Get_processor_name(rec->node_name, &name_len); return; } void apmpi_runtime_initialize() { int apmpi_buf_size; char rec_name[128]; darshan_module_funcs mod_funcs = { #ifdef HAVE_MPI .mod_redux_func = &apmpi_mpi_redux, #endif .mod_shutdown_func = &apmpi_shutdown }; APMPI_LOCK(); /* don't do anything if already initialized */ if(apmpi_runtime) { APMPI_UNLOCK(); return; } apmpi_buf_size = sizeof(struct darshan_apmpi_header_record) + sizeof(struct darshan_apmpi_perf_record); /* register the apmpi module with the darshan-core component */ darshan_core_register_module( APMPI_MOD, mod_funcs, &apmpi_buf_size, &my_rank, NULL); /* not enough memory to fit apmpi module record */ if(apmpi_buf_size < sizeof(struct darshan_apmpi_header_record) + sizeof(struct darshan_apmpi_perf_record)) { darshan_core_unregister_module(APMPI_MOD); APMPI_UNLOCK(); return; } /* initialize module's global state */ apmpi_runtime = malloc(sizeof(*apmpi_runtime)); if(!apmpi_runtime) { darshan_core_unregister_module(APMPI_MOD); APMPI_UNLOCK(); return; } memset(apmpi_runtime, 0, sizeof(*apmpi_runtime)); if (my_rank == 0) { apmpi_runtime->header_id = darshan_core_gen_record_id("darshan-apmpi-header"); /* register the apmpi record with darshan-core */ apmpi_runtime->header_record = darshan_core_register_record( apmpi_runtime->header_id, //NULL, "darshan-apmpi-header", APMPI_MOD, sizeof(struct darshan_apmpi_header_record), NULL); if(!(apmpi_runtime->header_record)) { darshan_core_unregister_module(APMPI_MOD); free(apmpi_runtime); apmpi_runtime = NULL; APMPI_UNLOCK(); return; } apmpi_runtime->header_record->base_rec.id = apmpi_runtime->header_id; apmpi_runtime->header_record->base_rec.rank = my_rank; apmpi_runtime->header_record->magic = APMPI_MAGIC; #ifdef __APMPI_COLL_SYNC apmpi_runtime->header_record->sync_flag = 1; #else apmpi_runtime->header_record->sync_flag = 0; #endif apmpi_runtime->header_record->version = APMPI_VER; } apmpi_runtime->rec_id = darshan_core_gen_record_id("APMPI"); //record name apmpi_runtime->perf_record = darshan_core_register_record( apmpi_runtime->rec_id, "APMPI", APMPI_MOD, sizeof(struct darshan_apmpi_perf_record), NULL); if(!(apmpi_runtime->perf_record)) { darshan_core_unregister_module(APMPI_MOD); free(apmpi_runtime); apmpi_runtime = NULL; APMPI_UNLOCK(); return; } initialize_counters(); /* collect perf counters */ capture(apmpi_runtime->perf_record, apmpi_runtime->rec_id); APMPI_UNLOCK(); return; } #if 0 static void apmpi_record_reduction_op (void* inrec_v, void* inoutrec_v, int *len, MPI_Datatype *datatype) { struct darshan_apmpi_perf_record tmp_rec; struct darshan_apmpi_perf_record *inrec = inrec_v; struct darshan_apmpi_perf_record *inoutrec = inoutrec_v; int i, j, k; for (i=0; i<*len; i++) { memset(&tmp_rec, 0, sizeof(struct darshan_apmpi_perf_record)); tmp_rec.base_rec.id = inrec->base_rec.id; tmp_file.base_rec.rank = -1; } } #endif static void apmpi_shared_record_variance(MPI_Comm mod_comm) { MPI_Datatype var_dt; MPI_Op var_op; int i; struct darshan_variance_dt *var_send_buf = NULL; struct darshan_variance_dt *var_recv_buf = NULL; PMPI_Type_contiguous(sizeof(struct darshan_variance_dt), MPI_BYTE, &var_dt); PMPI_Type_commit(&var_dt); PMPI_Op_create(darshan_variance_reduce, 1, &var_op); var_send_buf = malloc(sizeof(struct darshan_variance_dt)); if(!var_send_buf) return; if(my_rank == 0) { var_recv_buf = malloc(sizeof(struct darshan_variance_dt)); if(!var_recv_buf) return; } /* get total mpi time variances across the ranks */ var_send_buf->n = 1; var_send_buf->S = 0; var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME]; PMPI_Reduce(var_send_buf, var_recv_buf, 1, var_dt, var_op, 0, mod_comm); if(my_rank == 0) { apmpi_runtime->header_record->apmpi_f_variance_total_mpitime = (var_recv_buf->S / var_recv_buf->n); } /* get total mpi sync time variances across the ranks */ var_send_buf->n = 1; var_send_buf->S = 0; var_send_buf->T = apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME]; PMPI_Reduce(var_send_buf, var_recv_buf, 1, var_dt, var_op, 0, mod_comm); if(my_rank == 0) { apmpi_runtime->header_record->apmpi_f_variance_total_mpisynctime = (var_recv_buf->S / var_recv_buf->n); } PMPI_Type_free(&var_dt); PMPI_Op_free(&var_op); free(var_send_buf); free(var_recv_buf); return; } /******************************************************************************** * shutdown function exported by this module for coordinating with darshan-core * ********************************************************************************/ /* Pass data for the apmpi module back to darshan-core to log to file. */ //#ifdef HAVE_MPI static void apmpi_mpi_redux( void *apmpi_buf, MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count) { int i; #if 0 struct darshan_apmpi_perf_record *red_send_buf = NULL; struct darshan_apmpi_perf_record *red_recv_buf = NULL; struct darshan_apmpi_perf_record *apmpi_rec_buf = (struct darshan_apmpi_perf_record *)apmpi_buf; #endif MPI_Datatype red_type; //MPI_Op red_op; APMPI_LOCK(); if (!apmpi_runtime) { APMPI_UNLOCK(); return; } double mpisync_time = 0.0; /* Compute Total MPI time per rank: RANK_TOTAL_MPITIME */ for (i=MPI_SEND_TOTAL_TIME; iperf_record->fglobalcounters[RANK_TOTAL_MPITIME] += apmpi_runtime->perf_record->fcounters[i]; } for (i=MPI_BARRIER_TOTAL_SYNC_TIME; iperf_record->fsynccounters[i]; } apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPITIME] += mpisync_time; apmpi_runtime->perf_record->fglobalcounters[RANK_TOTAL_MPISYNCTIME] = mpisync_time; #if 0 red_send_buf = apmpi_runtime->perf_record; if (my_rank == 0){ red_recv_buf = malloc(sizeof(struct darshan_apmpi_perf_record)); if(!red_recv_buf) { APMPI_UNLOCK(); return; } } /* construct a datatype for a APMPI file record. This is serving no purpose * except to make sure we can do a reduction on proper boundaries */ PMPI_Type_contiguous(sizeof(struct darshan_apmpi_perf_record), MPI_BYTE, &red_type); PMPI_Type_commit(&red_type); /* register a APMPI file record reduction operator */ PMPI_Op_create(apmpi_record_reduction_op, 1, &red_op); /* reduce shared APMPI file records */ PMPI_Reduce(red_send_buf, red_recv_buf, shared_rec_count, red_type, red_op, 0, mod_comm); #endif /* get the time variance across all ranks */ apmpi_shared_record_variance(mod_comm); #if 0 /* clean up reduction state */ if(my_rank == 0) { free(red_recv_buf); } #endif //PMPI_Type_free(&red_type); //PMPI_Op_free(&red_op); APMPI_UNLOCK(); return; } //#endif static void apmpi_shutdown( void **apmpi_buf, int *apmpi_buf_sz) { int apmpi_rec_count; APMPI_LOCK(); assert(apmpi_runtime); *apmpi_buf_sz = 0; if(my_rank == 0) { *apmpi_buf_sz += sizeof( *apmpi_runtime->header_record); } *apmpi_buf_sz += sizeof( *apmpi_runtime->perf_record); finalize_counters(); free(apmpi_runtime); apmpi_runtime = NULL; APMPI_UNLOCK(); return; } #define APMPI_PRE_RECORD() do { \ APMPI_LOCK(); \ if(!darshan_core_disabled_instrumentation()) { \ if(!apmpi_runtime) { \ apmpi_runtime_initialize(); \ } \ if(apmpi_runtime) break; \ } \ APMPI_UNLOCK(); \ return(ret); \ } while(0) #define APMPI_POST_RECORD() do { \ APMPI_UNLOCK(); \ } while(0) #define APMPI_RECORD_UPDATE(MPI_OP) do { \ if(ret != MPI_SUCCESS) break; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _CALL_COUNT)]++; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _TOTAL_BYTES)] += bytes; \ DARSHAN_MSG_BUCKET_INC(&(apmpi_runtime->perf_record->counters[Y(MPI_OP ## _MSG_SIZE_AGG_0_256)]), bytes); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _TOTAL_TIME)] += tdiff; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)] = MAX(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)], tdiff); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)] = MIN(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)], tdiff); \ } while(0) #define APMPI_RECORD_UPDATE_NOMSG(MPI_OP) do { \ if(ret != MPI_SUCCESS) break; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _CALL_COUNT)]++; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _TOTAL_TIME)] += tdiff; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)] = MAX(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)], tdiff); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)] = MIN(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)], tdiff); \ } while(0) #define APMPI_RECORD_UPDATE_SYNC(MPI_OP) do { \ if(ret != MPI_SUCCESS) break; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _CALL_COUNT)]++; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _TOTAL_BYTES)] += bytes; \ DARSHAN_MSG_BUCKET_INC(&(apmpi_runtime->perf_record->counters[Y(MPI_OP ## _MSG_SIZE_AGG_0_256)]), bytes); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _TOTAL_TIME)] += tdiff; \ apmpi_runtime->perf_record->fsynccounters[Y(MPI_OP ## _TOTAL_SYNC_TIME)] += tsync; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)] = MAX(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)], tdiff); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)] = MIN(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)], tdiff); \ } while(0) #define APMPI_RECORD_UPDATE_SYNC_NOMSG(MPI_OP) do { \ if(ret != MPI_SUCCESS) break; \ apmpi_runtime->perf_record->counters[Y(MPI_OP ## _CALL_COUNT)]++; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _TOTAL_TIME)] += tdiff; \ apmpi_runtime->perf_record->fsynccounters[Y(MPI_OP ## _TOTAL_SYNC_TIME)] += tsync; \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)] = MAX(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MAX_TIME)], tdiff); \ apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)] = MIN(apmpi_runtime->perf_record->fcounters[Y(MPI_OP ## _MIN_TIME)], tdiff); \ } while(0) #define Y(a) a /********************************************************** * Wrappers for MPI functions of interest * **********************************************************/ int DARSHAN_DECL(MPI_Send)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Send); TIME(__real_PMPI_Send(buf, count, datatype, dest, tag, comm)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); // Potential optimization: Lock around the count - lock only if MPI_THREAD_MULTIPLE is used ... locking mutex APMPI_RECORD_UPDATE(MPI_SEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Send, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm), MPI_Send) int DARSHAN_DECL(MPI_Ssend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Ssend); TIME(__real_PMPI_Ssend(buf, count, datatype, dest, tag, comm)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_SSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Ssend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm), MPI_Ssend) int DARSHAN_DECL(MPI_Rsend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Rsend); TIME(__real_PMPI_Rsend(buf, count, datatype, dest, tag, comm)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_RSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Rsend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm), MPI_Rsend) int DARSHAN_DECL(MPI_Bsend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Bsend); TIME(__real_PMPI_Bsend(buf, count, datatype, dest, tag, comm)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_BSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Bsend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm), MPI_Bsend) int DARSHAN_DECL(MPI_Isend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Isend); TIME(__real_PMPI_Isend(buf, count, datatype, dest, tag, comm, request)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_ISEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Isend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request), MPI_Isend) int DARSHAN_DECL(MPI_Issend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Issend); TIME(__real_PMPI_Issend(buf, count, datatype, dest, tag, comm, request)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_ISSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Issend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request), MPI_Issend) int DARSHAN_DECL(MPI_Irsend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Irsend); TIME(__real_PMPI_Irsend(buf, count, datatype, dest, tag, comm, request)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_IRSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Irsend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request), MPI_Irsend) int DARSHAN_DECL(MPI_Ibsend)(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Ibsend); TIME(__real_PMPI_Ibsend(buf, count, datatype, dest, tag, comm, request)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_IBSEND); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Ibsend, int, (const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request), MPI_Ibsend) int DARSHAN_DECL(MPI_Recv)(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) { MAP_OR_FAIL(PMPI_Recv); TIME(__real_PMPI_Recv(buf, count, datatype, source, tag, comm, status)); int count_received, src; if (status != MPI_STATUS_IGNORE) { PMPI_Get_count(status, datatype, &count_received); if (count_received == MPI_UNDEFINED) count_received = count; //src = status->MPI_SOURCE; } else { count_received = count; //src = source; } BYTECOUNT(datatype, count_received); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_RECV); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Recv, int, (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status), MPI_Recv) int DARSHAN_DECL(MPI_Irecv)(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request * request) { MAP_OR_FAIL(PMPI_Irecv); TIME(__real_PMPI_Irecv(buf, count, datatype, source, tag, comm, request)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_IRECV); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Irecv, int, (void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request), MPI_Irecv) int DARSHAN_DECL(MPI_Sendrecv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status) { MAP_OR_FAIL(PMPI_Sendrecv); TIME(__real_PMPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status)); int count_received; //, src; if (status != MPI_STATUS_IGNORE) { PMPI_Get_count(status, recvtype, &count_received); if (count_received == MPI_UNDEFINED) count_received = recvcount; //src = status->MPI_SOURCE; } else { count_received = recvcount; //src = source; } BYTECOUNT(sendtype, sendcount); ap_bytes_t sbytes = bytes; BYTECOUNTND(recvtype, count_received); bytes += sbytes; APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_SENDRECV); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Sendrecv, int, (const void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status), MPI_Sendrecv) int DARSHAN_DECL(MPI_Sendrecv_replace)(void *buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status * status) { MAP_OR_FAIL(PMPI_Sendrecv_replace); TIME(__real_PMPI_Sendrecv_replace(buf, count, datatype, dest, sendtag, source, recvtag, comm, status)); int count_received; //, src; if (status != MPI_STATUS_IGNORE) { PMPI_Get_count(status, datatype, &count_received); if (count_received == MPI_UNDEFINED) count_received = count; //src = status->MPI_SOURCE; } else { count_received = count; //src = source; } BYTECOUNT(datatype, count + count_received); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_SENDRECV_REPLACE); APMPI_POST_RECORD(); return ret; } /* int DARSHAN_DECL(MPI_Isendrecv)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Isendrecv); TIME(__real_PMPI_Isendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, request)); BYTECOUNT(sendtype, sendcount + recvcount); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_ISENDRECV); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Isendrecv, int, (const void *sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Request *request), MPI_Isendrecv) int DARSHAN_DECL(MPI_Isendrecv_replace)(void *buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Request *request) { MAP_OR_FAIL(PMPI_Isendrecv_replace); TIME(__real_PMPI_Isendrecv_replace(buf, count, datatype, dest, sendtag, source, recvtag, comm, request)); BYTECOUNT(datatype, count + count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_ISENDRECV_REPLACE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Isendrecv_replace, int, (void *buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Request *request), MPI_Isendrecv_replace) */ int DARSHAN_DECL(MPI_Put)(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) { MAP_OR_FAIL(PMPI_Put); TIME(__real_PMPI_Put(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win)); BYTECOUNT(origin_datatype, origin_count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_PUT); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Put, int, (const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win), MPI_Put) int DARSHAN_DECL(MPI_Get)(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) { MAP_OR_FAIL(PMPI_Get); TIME(__real_PMPI_Get(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win)); BYTECOUNT(target_datatype, target_count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_GET); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Get, int, (void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win), MPI_Get) int DARSHAN_DECL(MPI_Accumulate)(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { MAP_OR_FAIL(PMPI_Accumulate); TIME(__real_PMPI_Accumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win)); BYTECOUNT(target_datatype, target_count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_ACCUMULATE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Accumulate, int, (const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win), MPI_Accumulate) int DARSHAN_DECL(MPI_Get_accumulate)(const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) { MAP_OR_FAIL(PMPI_Get_accumulate); TIME(__real_PMPI_Get_accumulate(origin_addr, origin_count, origin_datatype, result_addr, result_count, result_datatype, target_rank, target_disp, target_count, target_datatype, op, win)); BYTECOUNT(target_datatype, target_count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE(MPI_GET_ACCUMULATE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Get_accumulate, int, (const void *origin_addr, int origin_count, MPI_Datatype origin_datatype, void *result_addr, int result_count, MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win), MPI_Get_accumulate) int DARSHAN_DECL(MPI_Fetch_and_op)(const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win) { MAP_OR_FAIL(PMPI_Fetch_and_op); TIME(__real_PMPI_Fetch_and_op(origin_addr, result_addr, datatype, target_rank, target_disp, op, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_FETCH_AND_OP); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Fetch_and_op, int, (const void *origin_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Op op, MPI_Win win), MPI_Fetch_and_op) int DARSHAN_DECL(MPI_Compare_and_swap)(const void *origin_addr, const void *compare_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win) { MAP_OR_FAIL(PMPI_Compare_and_swap); TIME(__real_PMPI_Compare_and_swap(origin_addr, compare_addr, result_addr, datatype, target_rank, target_disp, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_COMPARE_AND_SWAP); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Compare_and_swap, int, (const void *origin_addr, const void *compare_addr, void *result_addr, MPI_Datatype datatype, int target_rank, MPI_Aint target_disp, MPI_Win win), MPI_Compare_and_swap) int DARSHAN_DECL(MPI_Win_fence)(int assert, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_fence); TIME(__real_PMPI_Win_fence(assert, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_FENCE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_fence, int, (int assert, MPI_Win win), MPI_Win_fence) int DARSHAN_DECL(MPI_Win_start)(MPI_Group group, int assert, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_start); TIME(__real_PMPI_Win_start(group, assert, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_START); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_start, int, (MPI_Group group, int assert, MPI_Win win), MPI_Win_start) int DARSHAN_DECL(MPI_Win_complete)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_complete); TIME(__real_PMPI_Win_complete(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_COMPLETE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_complete, int, (MPI_Win win), MPI_Win_complete) int DARSHAN_DECL(MPI_Win_post)(MPI_Group group, int assert, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_post); TIME(__real_PMPI_Win_post(group, assert, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_POST); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_post, int, (MPI_Group group, int assert, MPI_Win win), MPI_Win_post) int DARSHAN_DECL(MPI_Win_wait)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_wait); TIME(__real_PMPI_Win_wait(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_WAIT); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_wait, int, (MPI_Win win), MPI_Win_wait) int DARSHAN_DECL(MPI_Win_test)(MPI_Win win, int *flag) { MAP_OR_FAIL(PMPI_Win_test); TIME(__real_PMPI_Win_test(win, flag)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_TEST); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_test, int, (MPI_Win win, int *flag), MPI_Win_test) int DARSHAN_DECL(MPI_Win_lock)(int lock_type, int rank, int assert, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_lock); TIME(__real_PMPI_Win_lock(lock_type, rank, assert, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_LOCK); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_lock, int, (int lock_type, int rank, int assert, MPI_Win win), MPI_Win_lock) int DARSHAN_DECL(MPI_Win_unlock)(int rank, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_unlock); TIME(__real_PMPI_Win_unlock(rank, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_UNLOCK); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_unlock, int, (int rank, MPI_Win win), MPI_Win_unlock) int DARSHAN_DECL(MPI_Win_unlock_all)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_unlock_all); TIME(__real_PMPI_Win_unlock_all(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_UNLOCK_ALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_unlock_all, int, (MPI_Win win), MPI_Win_unlock_all) int DARSHAN_DECL(MPI_Win_flush)(int rank, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_flush); TIME(__real_PMPI_Win_flush(rank, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_FLUSH); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_flush, int, (int rank, MPI_Win win), MPI_Win_flush) int DARSHAN_DECL(MPI_Win_flush_all)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_flush_all); TIME(__real_PMPI_Win_flush_all(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_FLUSH_ALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_flush_all, int, (MPI_Win win), MPI_Win_flush_all) int DARSHAN_DECL(MPI_Win_flush_local)(int rank, MPI_Win win) { MAP_OR_FAIL(PMPI_Win_flush_local); TIME(__real_PMPI_Win_flush_local(rank, win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_FLUSH_LOCAL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_flush_local, int, (int rank, MPI_Win win), MPI_Win_flush_local) int DARSHAN_DECL(MPI_Win_flush_local_all)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_flush_local_all); TIME(__real_PMPI_Win_flush_local_all(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_FLUSH_LOCAL_ALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_flush_local_all, int, (MPI_Win win), MPI_Win_flush_local_all) int DARSHAN_DECL(MPI_Win_sync)(MPI_Win win) { MAP_OR_FAIL(PMPI_Win_sync); TIME(__real_PMPI_Win_sync(win)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WIN_SYNC); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Win_sync, int, (MPI_Win win), MPI_Win_sync) int DARSHAN_DECL(MPI_Probe)(int source, int tag, MPI_Comm comm, MPI_Status * status) { MAP_OR_FAIL(PMPI_Probe); TIME(__real_PMPI_Probe(source, tag, comm, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_PROBE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Probe, int, (int source, int tag, MPI_Comm comm, MPI_Status * status), MPI_Probe) int DARSHAN_DECL(MPI_Iprobe)(int source, int tag, MPI_Comm comm, int *flag, MPI_Status * status) { MAP_OR_FAIL(PMPI_Iprobe); TIME(__real_PMPI_Iprobe(source, tag, comm, flag, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_IPROBE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Iprobe, int, (int source, int tag, MPI_Comm comm, int *flag, MPI_Status * status), MPI_Iprobe) int DARSHAN_DECL(MPI_Test)(MPI_Request *request, int *flag, MPI_Status *status) { MAP_OR_FAIL(PMPI_Test); TIME(__real_PMPI_Test(request, flag, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_TEST); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Test, int, (MPI_Request *request, int *flag, MPI_Status *status), MPI_Test) int DARSHAN_DECL(MPI_Testany)(int count, MPI_Request array_of_requests[], int *indx, int *flag, MPI_Status *status) { MAP_OR_FAIL(PMPI_Testany); TIME(__real_PMPI_Testany(count, array_of_requests, indx, flag, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_TESTANY); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Testany, int, (int count, MPI_Request array_of_requests[], int *indx, int *flag, MPI_Status *status), MPI_Testany) int DARSHAN_DECL(MPI_Testall)(int count, MPI_Request array_of_requests[], int *flag, MPI_Status array_of_statuses[]) { MAP_OR_FAIL(PMPI_Testall); TIME(__real_PMPI_Testall(count, array_of_requests, flag, array_of_statuses)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_TESTALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Testall, int, (int count, MPI_Request array_of_requests[], int *flag, MPI_Status array_of_statuses[]), MPI_Testall) int DARSHAN_DECL(MPI_Testsome)(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]) { MAP_OR_FAIL(PMPI_Testsome); TIME(__real_PMPI_Testsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_TESTSOME); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Testsome, int, (int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]), MPI_Testsome) int DARSHAN_DECL(MPI_Wait)(MPI_Request * request, MPI_Status * status) { MAP_OR_FAIL(PMPI_Wait); TIME(__real_PMPI_Wait(request, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WAIT); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Wait, int, (MPI_Request * request, MPI_Status * status), MPI_Wait) int DARSHAN_DECL(MPI_Waitany)(int count, MPI_Request array_of_requests[], int *indx, MPI_Status * status) { MAP_OR_FAIL(PMPI_Waitany); TIME(__real_PMPI_Waitany(count, array_of_requests, indx, status)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WAITANY); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Waitany, int, (int count, MPI_Request array_of_requests[], int *indx, MPI_Status * status), MPI_Waitany) int DARSHAN_DECL(MPI_Waitall)(int count, MPI_Request array_of_requests[], MPI_Status array_of_statuses[]) { MAP_OR_FAIL(PMPI_Waitall); TIME(__real_PMPI_Waitall(count, array_of_requests, array_of_statuses)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WAITALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Waitall, int, (int count, MPI_Request array_of_requests[], MPI_Status array_of_statuses[]), MPI_Waitall) int DARSHAN_DECL(MPI_Waitsome)(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]) { MAP_OR_FAIL(PMPI_Waitsome); TIME(__real_PMPI_Waitsome(incount, array_of_requests, outcount, array_of_indices, array_of_statuses)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_NOMSG(MPI_WAITSOME); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Waitsome, int, (int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]), MPI_Waitsome) int DARSHAN_DECL(MPI_Barrier)(MPI_Comm comm) { MAP_OR_FAIL(PMPI_Barrier); TIME_SYNC(__real_PMPI_Barrier(comm)); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_SYNC_NOMSG(MPI_BARRIER); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Barrier, int, (MPI_Comm comm), MPI_Barrier) int DARSHAN_DECL(MPI_Bcast)(void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Bcast); TIME_SYNC(__real_PMPI_Bcast(buffer, count, datatype, root, comm)); ap_bytes_t bytes = 0; if (root != MPI_PROC_NULL) { BYTECOUNTND(datatype, count); } APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_SYNC(MPI_BCAST); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Bcast, int, (void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm), MPI_Bcast) int DARSHAN_DECL(MPI_Reduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Reduce); TIME_SYNC(__real_PMPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm)); ap_bytes_t bytes = 0; if (root != MPI_PROC_NULL) { BYTECOUNTND(datatype, count); } APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_SYNC(MPI_REDUCE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Reduce, int, (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm), MPI_Reduce) int DARSHAN_DECL(MPI_Allreduce)(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Allreduce); TIME_SYNC(__real_PMPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm)); BYTECOUNT(datatype, count); APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_SYNC(MPI_ALLREDUCE); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Allreduce, int, (const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm), MPI_Allreduce) int DARSHAN_DECL(MPI_Alltoall)(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Alltoall); TIME_SYNC(__real_PMPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm)); BYTECOUNT(recvtype, recvcount); int tasks; PMPI_Comm_size(comm, &tasks); bytes = bytes*tasks; APMPI_PRE_RECORD(); APMPI_RECORD_UPDATE_SYNC(MPI_ALLTOALL); APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_Alltoall, int, (const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm), MPI_Alltoall) int DARSHAN_DECL(MPI_Alltoallv)(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) { MAP_OR_FAIL(PMPI_Alltoallv); TIME_SYNC(__real_PMPI_Alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm)); int tsize; PMPI_Type_size(recvtype, &tsize); int i, tasks, count = 0; PMPI_Comm_size(comm, &tasks); for (i=0; iperf_record->counters[MPI_ _COUNT]++; APMPI_POST_RECORD(); return ret; } DARSHAN_WRAPPER_MAP(PMPI_ , int, (), MPI_ ) */ #undef Y /* * Local variables: * c-indent-level: 4 * c-basic-offset: 4 * End: * * vim: ts=8 sts=4 sw=4 expandtab */