Commit 64b3e69c authored by Xin Wang's avatar Xin Wang

Delete cosmoflow.c

parent 15f3c1b6
/**********************************************************************
* This file was generated by coNCePTuaL on Mon Jan 7 23:10:01 2019
* using the c_mpi backend (C + MPI).
* Do not modify this file; modify /Users/xin/macworkspace/codes-dev/codes/scripts/conceptual_benchmarks/cosmoflow.ncptl instead.
*
* Entire source program
* ---------------------
* # Measure the performance of MPI_Allgather()
* # By Scott Pakin <pakin@lanl.gov>
* #
* # N.B. Requires the c_mpi backend.
* Require language version "1.5".
* # Parse the command line.
* numwords is "Message size (words)" and comes from "--msgsize" or "-s" with default 28825K.
* reps is "Number of repetitions" and comes from "--reps" or "-r" with default 100.
* computetime is "Computation time (ms)" and comes from "--compute" or "-c" with default 129.
*
* # Allocate a send buffer and a receive buffer.
* Task 0 multicasts a numwords*num_tasks word message from buffer 0 to all other tasks.
* Task 0 multicasts a numwords*num_tasks word message from buffer 1 to all other tasks.
*
* # Measure the performance of MPI_Allreduce().
* Task 0 resets its counters then
* for reps repetitions {
* all tasks COMPUTES FOR computetime MILLISECONDS then
* all tasks backend execute "
* MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)" and numwords and ",
* MPI_INT, MPI_SUM, MPI_COMM_WORLD);
* " then
* all tasks backend execute "
* MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)" and numwords and ",
* MPI_INT, MPI_SUM, MPI_COMM_WORLD);
* "
* } then
* task 0 logs elapsed_usecs/1000 as "Elapse time (ms)".
**********************************************************************/
/*****************
* Include files *
*****************/
/* Header files needed by all C-based backends */
#include <stdio.h>
#include <string.h>
#include <ncptl/ncptl.h>
/* Header files specific to the c_mpi backend */
#include <mpi.h>
#include <stdarg.h>
/**********
* Macros *
**********/
/* Define the maximum loop trip count that we're willing to unroll fully. */
#define CONC_MAX_UNROLL 5
/* Specify the minimum number of trial iterations in each FOR <time> loop. */
#define CONC_FOR_TIME_TRIALS 1
/* Define a macro that rounds a double to a ncptl_int. */
#define CONC_DBL2INT(D) ((ncptl_int)((D)+0.5))
/* Define a macro that increments a buffer pointer by a byte offset. */
#define CONC_GETBUFPTR(S) ((void *)((char *)thisev->s.S.buffer + thisev->s.S.bufferofs))
/* Implement ncptl_func_task_of in terms of ncptl_physical_to_virtual. */
#define ncptl_func_task_of(P) ((ncptl_int)(P) < 0LL || (ncptl_int)(P) >= var_num_tasks ? -1LL : ncptl_physical_to_virtual (procmap, (ncptl_int)(P)))
#define ncptl_dfunc_task_of(P) ((double) ncptl_func_task_of(P))
/* Implement ncptl_func_processor_of in terms of ncptl_virtual_to_physical. */
#define ncptl_func_processor_of(V) ((ncptl_int)(V) < 0LL || (ncptl_int)(V) >= var_num_tasks ? -1LL : ncptl_virtual_to_physical (procmap, (ncptl_int)(V)))
#define ncptl_dfunc_processor_of(V) ((double) ncptl_func_processor_of(V))
/* Define a macro that increments REDUCE's alternate buffer pointer by a byte offset. */
#define CONC_GETALTBUFPTR(S) ((void *)((char *)thisev->s.S.altbuffer + thisev->s.S.bufferofs))
/* Estimate the number of unique communicators that this program will need.
* (The tradeoff is one of initialization time versus memory consumption.) */
#define ESTIMATED_COMMUNICATORS 128
/* Specify an operation to use for all reduction operations. */
#define REDUCE_OPERATION MPI_SUM
#define REDUCE_OPERATION_NAME "MPI_SUM"
/*********************
* Type declarations *
*********************/
/* Enumerate the various mechanisms used to implement MULTICAST statements. */
typedef enum {
CONC_MCAST_MPI_BCAST, /* One to many */
CONC_MCAST_MPI_ALLTOALL, /* Many to many, same data to all */
CONC_MCAST_MPI_ALLTOALLV, /* General many to many */
CONC_MCAST_MPI_NUM_FUNCS /* Number of the above */
} CONC_MCAST_MPI_FUNC;
/* Define the type of event to perform. */
typedef enum {
EV_SEND, /* Synchronous send */
EV_ASEND, /* Asynchronous send */
EV_RECV, /* Synchronous receive */
EV_ARECV, /* Asynchronous receive */
EV_WAIT, /* Wait for all asynchronous sends/receives to complete */
EV_DELAY, /* Spin or sleep */
EV_TOUCH, /* Touch a region of memory */
EV_SYNC, /* Barrier synchronization */
EV_RESET, /* Reset counters */
EV_STORE, /* Store all counters' current values */
EV_RESTORE, /* Restore the previously pushed counter values */
EV_FLUSH, /* Compute aggregate functions for log-file columns */
EV_MCAST, /* Synchronous multicast */
EV_REDUCE, /* Reduction with or without a subsequent multicast */
EV_BTIME, /* Beginning of a timed loop */
EV_ETIME, /* Ending of a timed loop */
EV_REPEAT, /* Repeatedly process the next N events */
EV_SUPPRESS, /* Suppress writing to the log and standard output */
EV_NEWSTMT, /* Beginning of a new top-level statement */
EV_CODE, /* None of the above */
NUM_EVS /* Number of event types in CONC_EVENT_TYPE */
} CONC_EVENT_TYPE;
/* Describe a synchronous or asynchronous send event. */
typedef struct {
ncptl_int dest; /* Destination task */
ncptl_int size; /* Number of bytes to send */
ncptl_int alignment; /* Message alignment (in bytes) */
ncptl_int pendingsends; /* # of outstanding sends */
ncptl_int pendingrecvs; /* # of outstanding receives */
ncptl_int buffernum; /* Buffer # to send from */
ncptl_int bufferofs; /* Byte offset into the message buffer */
ncptl_int tag; /* Tag to use for selective receives */
int misaligned; /* 1=misaligned from a page; 0=align as specified */
int touching; /* 1=touch every word before sending */
int verification; /* 1=fill message buffer with known contents */
void *buffer; /* Pointer to message memory */
MPI_Request * handle; /* MPI handle representing an asynchronous send */
} CONC_SEND_EVENT;
/* Describe a synchronous or asynchronous receive event. */
typedef struct {
ncptl_int source; /* Source task */
ncptl_int size; /* Number of bytes to receive */
ncptl_int alignment; /* Message alignment (in bytes) */
ncptl_int pendingsends; /* # of outstanding sends */
ncptl_int pendingrecvs; /* # of outstanding receives */
ncptl_int buffernum; /* Buffer # to receive into */
ncptl_int bufferofs; /* Byte offset into the message buffer */
ncptl_int tag; /* Tag to use for selective receives */
int misaligned; /* 1=misaligned from a page; 0=align as specified */
int touching; /* 1=touch every word after reception */
int verification; /* 1=verify that all bits are correct */
void *buffer; /* Pointer to message memory */
MPI_Request * handle; /* MPI handle representing an asynchronous receive */
} CONC_RECV_EVENT;
/* Describe a wait-for-asynchronous-completions event. */
typedef struct {
ncptl_int numsends; /* # of sends we expect to complete. */
ncptl_int numrecvs; /* # of receives we expect to complete. */
ncptl_int numrecvbytes; /* # of bytes we expect to receive-complete */
ncptl_int *touchedlist; /* List of receives that need to be touched */
ncptl_int numtouches; /* # of elements in the above */
} CONC_WAIT_EVENT;
/* Describe a spin or sleep delay. */
typedef struct {
uint64_t microseconds; /* Length of delay in microseconds */
int spin0sleep1; /* 0=spin; 1=sleep */
} CONC_DELAY_EVENT;
/* Describe a barrier synchronization event. */
typedef struct {
MPI_Comm communicator; /* Set of tasks to synchronize */
} CONC_SYNC_EVENT;
/* Describe a walk over a memory-region. */
typedef struct {
ncptl_int regionbytes; /* Size in bytes of the region to touch */
ncptl_int bytestride; /* Stride in bytes to touch */
ncptl_int numaccesses; /* Number of words to touch */
ncptl_int wordsize; /* Size in bytes of each touch */
ncptl_int firstbyte; /* Byte offset of the first byte to touch */
} CONC_TOUCH_EVENT;
/* Describe a synchronous multicast event. */
typedef struct {
ncptl_int source; /* Source task, -1 in the many-to-many case */
ncptl_int size; /* Number of bytes to send */
ncptl_int alignment; /* Message alignment (in bytes) */
ncptl_int pendingsends; /* # of outstanding sends */
ncptl_int pendingrecvs; /* # of outstanding receives */
ncptl_int buffernum; /* Buffer # to send/receive from */
ncptl_int bufferofs; /* Byte offset into the message buffer */
ncptl_int tag; /* Tag to use for selective receives */
int misaligned; /* 1=misaligned from a page; 0=align as specified */
int touching; /* 1=touch every word before sending */
int verification; /* 1=verify that all bits are correct */
void *buffer; /* Pointer to message memory */
ncptl_int size2; /* Number of bytes to receive in the many-to-many case */
ncptl_int bufferofs2; /* Byte offset into the message buffer in the many-to-many case */
void * buffer2; /* Pointer to receive-message memory in the many-to-many case */
MPI_Comm communicator; /* Set of tasks to multicast to/from */
int root; /* source's rank within communicator */
int * sndvol; /* Volume of data to send to each rank in the communicator */
int * snddisp; /* Offset from buffer of each message to send */
int * rcvvol; /* Volume of data to receive from each rank in the communicator */
int * rcvdisp; /* Offset from buffer2 of each message to receive */
CONC_MCAST_MPI_FUNC mpi_func; /* MPI function to use to perform the multicast */
} CONC_MCAST_EVENT;
/* Describe a reduction event. */
typedef struct {
ncptl_int numitems; /* # of items to reduce */
ncptl_int itemsize; /* # of bytes per item */
ncptl_int alignment; /* Message alignment (in bytes) */
ncptl_int pendingsends; /* # of outstanding sends */
ncptl_int pendingrecvs; /* # of outstanding receives */
ncptl_int buffernum; /* Buffer # to send/receive from */
ncptl_int bufferofs; /* Byte offset into the message buffer */
ncptl_int tag; /* Tag to use for selective receives */
int misaligned; /* 1=misaligned from a page; 0=align as specified */
int touching; /* 1=touch every word before sending/after receiving */
int sending; /* 1=we're a sender */
int receiving; /* 1=we're a receiver */
void *buffer; /* Pointer to message memory */
void * altbuffer; /* Pointer to additional message memory */
MPI_Comm sendcomm; /* Set of tasks to reduce from */
MPI_Comm recvcomm; /* Set of tasks to reduce to */
MPI_Datatype datatype; /* MPI datatype to reduce */
int reducetype; /* 0=reduce; 1=allreduce; 2=reduce+bcast */
ncptl_int reduceroot; /* Root task of the reduction if reducetype is 0 or 2 */
ncptl_int bcastroot; /* Root task of the multicast if reducetype is 2 */
} CONC_REDUCE_EVENT;
/* Describe an event representing the beginning of a timed loop. */
typedef struct {
uint64_t usecs; /* Requested loop duration */
uint64_t warmup_usecs; /* Requested duration of warmup loops */
uint64_t starttime; /* Time at which the loop state last changed */
uint64_t itersleft; /* # of iterations remaining */
uint64_t previters; /* # of iterations we performed last time */
int prev_quiet; /* Previous value of suppress_output */
int timing_trial; /* 1=performing a timing trial; 0=running for real */
volatile int finished; /* 1=time has expired; 0=still ticking */
} CONC_BTIME_EVENT;
/* Describe an event representing the end of a timed loop. */
typedef struct {
ncptl_int begin_event; /* Index into eventlist[] of the corresponding BTIME event */
} CONC_ETIME_EVENT;
/* Describe an event representing repetitions of subsequent events. */
typedef struct {
ncptl_int end_event; /* Index into eventlist[] of the last event to repeat */
ncptl_int numreps; /* # of repetitions to perform */
} CONC_REPEAT_EVENT;
/* Describe an event representing output suppression (either on or off). */
typedef struct conc_suppress_event {
int quiet; /* 0=allow output; 1=suppress it */
int prev_quiet; /* Previous value of suppress_output */
ncptl_int matching_event; /* Event ID of the "suppression on" event */
uint64_t stop_elapsed_usecs; /* Time at which we suppressed output */
} CONC_SUPPRESS_EVENT;
/* Describe an event representing arbitrary code to execute at run time. */
typedef struct {
ncptl_int number; /* Unique number corresponding to a specific piece of code */NCPTL_VIRT_PHYS_MAP *procmap; /* Current mapping between tasks and processors */
ncptl_int var_numwords; /* Copy of var_numwords to use within a piece of code */
} CONC_CODE_EVENT;
/* Describe an arbitrary coNCePTuaL event. */
typedef struct {
CONC_EVENT_TYPE type; /* Type of event */
union {
CONC_SEND_EVENT send; /* Send state */
CONC_RECV_EVENT recv; /* Receive state */
CONC_WAIT_EVENT wait; /* Wait-for-completions state */
CONC_DELAY_EVENT delay; /* State for spins and sleeps */
CONC_TOUCH_EVENT touch; /* State for memory touching */
CONC_SYNC_EVENT sync; /* Synchronization state */
CONC_MCAST_EVENT mcast; /* Multicast state */
CONC_REDUCE_EVENT reduce; /* Reduction state */
CONC_BTIME_EVENT btime; /* Timed-loop state */
CONC_ETIME_EVENT etime; /* Additional timed-loop state */
CONC_REPEAT_EVENT rep; /* Repeated-events state */
CONC_SUPPRESS_EVENT suppress; /* State for suppressing output */
CONC_CODE_EVENT code; /* State for arbitrary code */
} s;
} CONC_EVENT;
/* Fully specify an arbitrary for() loop (used by FOR EACH). */
typedef struct {
NCPTL_QUEUE *list_comp; /* NULL=ordinary list; other=list comprehension values */
int integral; /* 1=integral values; 0=floating-point values */
enum { /* Comparison of loop variable to end variable */
CONC_LEQ, /* Increasing progression */
CONC_GEQ /* Decreasing progression */
} comparator;
enum { /* How to increment the loop variable */
CONC_ADD, /* Arithmetically */
CONC_MULT, /* Geometrically increasing */
CONC_DIV /* Geometrically decreasing */
} increment;
union {
struct {
ncptl_int loopvar; /* Loop variable */
ncptl_int prev_loopvar; /* Previous value of loop variable */
ncptl_int startval; /* Initial value of loop variable */
ncptl_int endval; /* Value not to exceed */
ncptl_int incval; /* Loop-variable increment */
} i;
struct {
double loopvar; /* Loop variable */
double prev_loopvar; /* Previous value of loop variable */
double startval; /* Initial value of loop variable */
double endval; /* Value not to exceed */
double incval; /* Loop-variable increment */
} d;
} u;
} LOOPBOUNDS;
/********************
* Global variables *
********************/
/* Variables exported to coNCePTuaL programs */
static ncptl_int var_bytes_received = 0; /* Total number of bytes received */
static ncptl_int var_msgs_received = 0; /* Total number of messages received */
static ncptl_int var_bit_errors = 0; /* Total number of bit errors observed */
static ncptl_int var_total_msgs = 0; /* Sum of messages sent and messages received */
static ncptl_int var_msgs_sent = 0; /* Total number of messages sent */
static ncptl_int var_bytes_sent = 0; /* Total number of bytes sent */
static ncptl_int var_num_tasks = 1; /* Number of tasks running the program */
static ncptl_int var_elapsed_usecs = 0; /* Elapsed time in microseconds */
static ncptl_int var_total_bytes = 0; /* Sum of bytes sent and bytes received */
/* Dummy variable to help mark other variables as used */
union {
ncptl_int ni;
int i;
void *vp;
} conc_dummy_var;
/* Variables used internally by boilerplate code */
static uint64_t starttime; /* Time the clock was last reset (microseconds) */
static ncptl_int pendingrecvs = 0; /* Current # of outstanding receives */
static ncptl_int pendingrecvbytes = 0; /* Current # of bytes in outstanding receives */
static NCPTL_QUEUE *touchedqueue; /* Queue of asynchronous receives to touch */
static ncptl_int pendingsends = 0; /* Current # of outstanding sends */
static NCPTL_QUEUE *eventqueue; /* List of coNCePTuaL events to perform */
static int within_time_loop = 0; /* 1=we're within a FOR <time> loop */
static int suppress_output = 0; /* 1=suppress output to stdout and the log file */
static void *touch_region = NULL; /* Memory region to touch */
static ncptl_int touch_region_size = 0; /* # of bytes in the above */
static int virtrank; /* This task's virtual rank in the computation */
static int physrank; /* This task's physical rank in the computation */
static NCPTL_VIRT_PHYS_MAP *procmap; /* Virtual to physical rank mapping */
static NCPTL_LOG_FILE_STATE *logstate; /* Opaque object representing all log-file state */
static char *logfile_uuid; /* Execution UUID to write to every log file */
static char *logfiletmpl; /* Template for the log file's name */
static char *logfiletmpl_default; /* Default value of the above */
/* Global variables specific to the c_mpi backend */
static ncptl_int mpi_is_running = 0; /* 1=MPI has been initialized */
static NCPTL_QUEUE * recvreqQ; /* List of MPI receive requests */
static MPI_Request * recvrequests; /* List version of recvreqQ */
static NCPTL_QUEUE * recvstatQ; /* List of MPI receive statuses */
static MPI_Status * recvstatuses; /* List version of recvstatQ */
static NCPTL_QUEUE * sendreqQ; /* List of MPI send requests */
static MPI_Request * sendrequests; /* List version of sendreqQ */
static NCPTL_QUEUE * sendstatQ; /* List of MPI send statuses */
static MPI_Status * sendstatuses; /* List version of sendstatQ */
static NCPTL_SET * communicators; /* Map from an array of processor flags to an MPI communicator */
static MPI_Errhandler mpi_error_handler; /* Handle to handle_MPI_error() */
static ncptl_int mpi_tag_ub; /* Upper bound on an MPI tag value */
static ncptl_int conc_mcast_tallies[CONC_MCAST_MPI_NUM_FUNCS] = {0}; /* Tallies of (static) multicast implementation functions */
/* Program-specific variables */
ncptl_int var_numwords; /* Message size (words) (command-line argument) */
ncptl_int var_reps; /* Number of repetitions (command-line argument) */
ncptl_int var_computetime; /* Computation time (ms) (command-line argument) */
/*************************
* Function declarations *
*************************/
/* Make MPI errors invoke ncptl_fatal(). */
static void handle_MPI_error (MPI_Comm *comm, int *errcode, ...)
{
va_list args;
char errstring[MPI_MAX_ERROR_STRING];
int errstrlen;
va_start (args, errcode);
if (MPI_Error_string (*errcode, errstring, &errstrlen) == MPI_SUCCESS)
ncptl_fatal ("MPI run-time error: %s", errstring);
else
ncptl_fatal ("MPI aborted with unrecognized error code %d", *errcode);
conc_dummy_var.vp = (void *) comm; /* Prevent the compiler from complaining that comm is unused. */
va_end (args);
}
/* Perform the equivalent of MPI_Comm_rank() for an arbitrary process. */
static int rank_in_MPI_communicator (MPI_Comm subcomm, int global_rank)
{
MPI_Group world_group; /* Group associated with MPI_COMM_WORLD */
MPI_Group subgroup; /* Group associate with subcomm */
int subrank; /* global_rank's rank within subcomm */
MPI_Comm_group (MPI_COMM_WORLD, &world_group);
MPI_Comm_group (subcomm, &subgroup);
MPI_Group_translate_ranks (world_group, 1, &global_rank, subgroup, &subrank);
return subrank;
}
/* Map an arbitrary tag to within MPI's valid range of [0, mpi_tag_ub]. */
static ncptl_int map_tag_into_MPI_range (ncptl_int tag)
{
if (tag == NCPTL_INT_MIN)
/* Avoid taking the absolute value of NCPTL_INT_MIN. */
tag = 555666773LL; /* Arbitrary value */
tag = ncptl_func_abs (tag); /* Only nonnegatives values are allowed. */
if (mpi_tag_ub < NCPTL_INT_MAX)
tag %= mpi_tag_ub + 1;
return tag;
}
/* Given an array of task in/out booleans return an MPI
* communicator that represents the "in" tasks. */
static MPI_Comm define_MPI_communicator (char *procflags)
{
MPI_Comm *existing_comm; /* Previously defined MPI communicator */
MPI_Comm new_comm; /* Newly defined MPI communicator */
existing_comm = (MPI_Comm *) ncptl_set_find (communicators, (void *)procflags);
if (existing_comm)
return *existing_comm;
(void) MPI_Comm_split (MPI_COMM_WORLD, (int)procflags[physrank], physrank, &new_comm);
(void) MPI_Errhandler_set (new_comm, mpi_error_handler);
ncptl_set_insert (communicators, (void *)procflags, (void *)&new_comm);
return define_MPI_communicator (procflags);
}
/* Inhibit the compiler from complaining that
* certain variables are defined but not used.
* This function should never be called. */
void conc_mark_variables_used (void)
{
conc_dummy_var.ni = var_bytes_received;
conc_dummy_var.ni = var_msgs_received;
conc_dummy_var.ni = var_bit_errors;
conc_dummy_var.ni = var_total_msgs;
conc_dummy_var.ni = var_msgs_sent;
conc_dummy_var.ni = var_bytes_sent;
conc_dummy_var.ni = var_num_tasks;
conc_dummy_var.ni = var_elapsed_usecs;
conc_dummy_var.ni = var_total_bytes;
conc_dummy_var.ni = pendingrecvbytes;
conc_dummy_var.ni = touch_region_size;
conc_dummy_var.vp = touch_region;
conc_dummy_var.i = within_time_loop;
conc_dummy_var.i = suppress_output;
rank_in_MPI_communicator (MPI_COMM_WORLD, 0);
}
/* Allocate a new event of a given type and return a pointer to it. */
static CONC_EVENT *conc_allocate_event (CONC_EVENT_TYPE type)
{
CONC_EVENT *newevent = (CONC_EVENT *) ncptl_queue_allocate (eventqueue);
newevent->type = type;
return newevent;
}
/* Declare an exit handler that gets called automatically when the
* program terminates, whether successfully or not. */
static void conc_exit_handler (void)
{
if (mpi_is_running)
MPI_Abort (MPI_COMM_WORLD, 1);
}
/* Initialize coNCePTuaL, the messaging layer, and this program itself. */
static void conc_initialize (int argc, char *argv[])
{
/* Variables needed by all C-based backends */
CONC_EVENT * eventlist; /* List of events to execute */
ncptl_int numevents; /* Number of entries in eventlist[] */
int help_only = 0; /* 1=User specified --help; save time by skipping ncptl_init() */
char * argv0 = strrchr(argv[0], '/') ? strrchr(argv[0], '/')+1 : argv[0]; /* Base name of the executable program */
int i; /* Generic loop variable */
/* Declare all of our command-line arguments. */
NCPTL_CMDLINE arguments[] = {
{ NCPTL_TYPE_STRING, NULL, "logfile", 'L', "Log-file template", {0}},
{ NCPTL_TYPE_INT, NULL, "msgsize", 's', "Message size (words)", {0}},
{ NCPTL_TYPE_INT, NULL, "reps", 'r', "Number of repetitions", {0}},
{ NCPTL_TYPE_INT, NULL, "compute", 'c', "Computation time (ms)", {0}}
};
/* Incorporate the complete coNCePTuaL source code as an array
* for use by ncptl_log_write_prologue(). */
char *sourcecode[] = {
"# Measure the performance of MPI_Allgather()",
"# By Scott Pakin <pakin@lanl.gov>",
"#",
"# N.B. Requires the c_mpi backend.",
"Require language version \"1.5\".",
"# Parse the command line.",
"numwords is \"Message size (words)\" and comes from \"--msgsize\" or \"-s\" with default 28825K.",
"reps is \"Number of repetitions\" and comes from \"--reps\" or \"-r\" with default 100.",
"computetime is \"Computation time (ms)\" and comes from \"--compute\" or \"-c\" with default 129.",
"",
"# Allocate a send buffer and a receive buffer.",
"Task 0 multicasts a numwords*num_tasks word message from buffer 0 to all other tasks.",
"Task 0 multicasts a numwords*num_tasks word message from buffer 1 to all other tasks.",
"",
"# Measure the performance of MPI_Allreduce().",
"Task 0 resets its counters then",
"for reps repetitions {",
" all tasks COMPUTES FOR computetime MILLISECONDS then",
" all tasks backend execute \"",
" MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)\" and numwords and \",",
" MPI_INT, MPI_SUM, MPI_COMM_WORLD);",
" \" then",
" all tasks backend execute \"",
" MPI_Allreduce([MESSAGE BUFFER 0], [MESSAGE BUFFER 1], (int)\" and numwords and \",",
" MPI_INT, MPI_SUM, MPI_COMM_WORLD);",
" \"",
"} then",
"task 0 logs elapsed_usecs/1000 as \"Elapse time (ms)\".",
NULL
};
/* Variables specific to the c_mpi backend */
int num_tasks; /* int version of var_num_tasks needed by MPI_Comm_size() */
char * procflags; /* Array of 1s representing an all-task MPI communicator */
MPI_Comm comm_world = MPI_COMM_WORLD; /* Copy of MPI_COMM_WORLD that we can take the address of */
void * attr_val; /* Pointed to the value of MPI_TAG_UB */
int attr_flag = 0; /* true=MPI_TAG_UB was extracted; false=not extracted */
char log_key_str[128]; /* String representing the range of valid MPI tags */
/* As a special case, if the command line contains --help, then skip
* the coNCePTuaL initialization step. */
for (i=1; i<argc; i++)
if (!strcmp(argv[i], "--"))
break;
else
if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-?")) {
argv[1] = "-?"; /* Guaranteed to work, even with getopt() */
help_only = 1;
break;
}
/* Perform various initializations specific to the c_mpi backend. */
/* Initialize MPI. */
(void) MPI_Init(&argc, &argv);
mpi_is_running = 1;
/* Initialize the coNCePTuaL run-time library. */
if (!help_only)
ncptl_init (NCPTL_RUN_TIME_VERSION, argv[0]);
(void) atexit (conc_exit_handler);
/* Initialize the communication routines needed by the c_mpi backend. */
(void) MPI_Errhandler_create ((MPI_Handler_function *)handle_MPI_error, &mpi_error_handler);
(void) MPI_Errhandler_set (MPI_COMM_WORLD, mpi_error_handler);
(void) MPI_Comm_rank(MPI_COMM_WORLD, &physrank);
(void) MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
var_num_tasks = (ncptl_int) num_tasks;
(void) MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &attr_val, &attr_flag);
mpi_tag_ub = (ncptl_int) (attr_flag ? *(int *)attr_val : 32767);
/* Generate and broadcast a UUID. */
logfile_uuid = ncptl_log_generate_uuid();
(void) MPI_Bcast ((void *)logfile_uuid, 37, MPI_CHAR, 0, MPI_COMM_WORLD);
/* Plug variables and default values into the NCPTL_CMDLINE structure. */
arguments[0].variable = (CMDLINE_VALUE *) &logfiletmpl;
arguments[1].variable = (CMDLINE_VALUE *) &var_numwords;
arguments[1].defaultvalue.intval = 29516800LL;
arguments[2].variable = (CMDLINE_VALUE *) &var_reps;
arguments[2].defaultvalue.intval = 100LL;
arguments[3].variable = (CMDLINE_VALUE *) &var_computetime;
arguments[3].defaultvalue.intval = 129LL;
logfiletmpl_default = (char *) ncptl_malloc (strlen(argv0) + 15, 0);
sprintf (logfiletmpl_default, "%s-%%p.log", argv0);
arguments[0].defaultvalue.stringval = logfiletmpl_default;
/* Parse the command line. */
mpi_is_running = 0; /* Don't invoke MPI_Abort() after --help. */
ncptl_parse_command_line (argc, argv, arguments, sizeof(arguments)/sizeof(NCPTL_CMDLINE));
if (help_only)
ncptl_fatal ("Internal error in the c_generic backend: failed to exit after giving help");
mpi_is_running = 1;
/* Establish a mapping from (virtual) task IDs to (physical) ranks. */
procmap = ncptl_allocate_task_map (var_num_tasks);
virtrank = ncptl_physical_to_virtual (procmap, physrank);
/* Perform initializations specific to the c_mpi backend. */
ncptl_log_add_comment ("MPI send routines", "MPI_Send() and MPI_Isend()");
ncptl_log_add_comment ("MPI reduction operation", REDUCE_OPERATION_NAME);
sprintf (log_key_str, "[0, %" NICS "]", mpi_tag_ub);
ncptl_log_add_comment ("MPI tag range", log_key_str);
/* Open the log file and write some standard prologue information to it. */
logstate = ncptl_log_open (logfiletmpl, physrank);
ncptl_log_write_prologue (logstate, argv[0], logfile_uuid, "c_mpi", "C + MPI",
var_num_tasks,
arguments, sizeof(arguments)/sizeof(NCPTL_CMDLINE),
sourcecode);
ncptl_free (logfile_uuid);
/* Allocate a variety of dynamically growing queues. */
eventqueue = ncptl_queue_init (sizeof (CONC_EVENT));
touchedqueue = ncptl_queue_init (sizeof (ncptl_int));
/* Perform initializations specific to the c_mpi backend. */
sendreqQ = ncptl_queue_init (sizeof (MPI_Request));
sendstatQ = ncptl_queue_init (sizeof (MPI_Status));
recvreqQ = ncptl_queue_init (sizeof (MPI_Request));
recvstatQ = ncptl_queue_init (sizeof (MPI_Status));
communicators = ncptl_set_init (ESTIMATED_COMMUNICATORS, var_num_tasks*sizeof(char), sizeof(MPI_Comm));procflags = (char *) ncptl_malloc (var_num_tasks*sizeof(char), 0);
for (i=0; i<var_num_tasks; i++)
procflags[i] = 1;
ncptl_set_insert (communicators, (void *)procflags, (void *)&comm_world);
ncptl_free (procflags);
/****************************************************
* Generated, program-specific initialization code. *
****************************************************/
/* TASK 0LL MULTICAST...TO ALL OTHER TASKS */
{
ncptl_int numsenders = 0LL; /* Number of sending tasks */
int * sndvol; /* Number of bytes we send to each other task */
int * snddisp; /* Buffer offset of each send */
int sndnum = 0; /* Total number of sends from us */
int * rcvvol; /* Number of bytes sent to us from each other task */
int * rcvdisp; /* Buffer offset of each receive */
int rcvnum = 0; /* Total number of sends to us */
int peervar; /* Physical rank of one of our peer tasks */
char * procflags = (char *) ncptl_malloc (var_num_tasks*sizeof(char), 0); /* Flags indicating whether each task is in or out */
ncptl_int * sendsfrom = (ncptl_int *) ncptl_malloc (var_num_tasks*sizeof(ncptl_int), 0); /* Tally of sends from each rank */
ncptl_int * recvsby = (ncptl_int *) ncptl_malloc (var_num_tasks*sizeof(ncptl_int), 0); /* Tally of receives by each rank */
int stasknum; /* A single source task mapped by ncptl_virtual_to_physical() */
int ttasknum; /* A single target task mapped by ncptl_virtual_to_physical() */
CONC_MCAST_MPI_FUNC mpi_func; /* The MPI function that will implement the multicast */
MPI_Comm subcomm; /* MPI subcommunicator to use */
/* Determine all participants in the many-to-many multicast. */
memset(procflags, 0, var_num_tasks);
memset(sendsfrom, 0, var_num_tasks*sizeof(ncptl_int));
memset(recvsby, 0, var_num_tasks*sizeof(ncptl_int));
if ((0LL) >= 0 && (0LL) < var_num_tasks) {
stasknum = (int) ncptl_virtual_to_physical(procmap, 0LL);
procflags[stasknum] = 1;
numsenders = 1;
{
ncptl_int ivar_a;
for (ivar_a=0; ivar_a<var_num_tasks; ivar_a++)
if (ivar_a != 0LL) {
ttasknum = (int) ncptl_virtual_to_physical(procmap, ivar_a);
sendsfrom[stasknum]++;
recvsby[ttasknum]++;
procflags[ttasknum] = 1;
}
}
}
subcomm = define_MPI_communicator(procflags);
/* Determine if all participants are sending and receiving the same number
* and volume of messages. If so, then we can use the faster MPI_Alltoall()
* function for the multicast instead of the slower MPI_Alltoallv(). */
{
ncptl_int msgtally = -1; /* Messages sent or received by any participant */
ncptl_int i;
mpi_func = CONC_MCAST_MPI_ALLTOALL; /* Use MPI_Alltoall() unless we require MPI_Alltoallv(). */
for (i=0; i<var_num_tasks; i++) {
stasknum = (int) ncptl_virtual_to_physical(procmap, i);
if (procflags[stasknum]) {
if (msgtally == -1)
msgtally = sendsfrom[stasknum];
if (sendsfrom[stasknum] != msgtally || recvsby[stasknum] != msgtally) {
mpi_func = CONC_MCAST_MPI_ALLTOALLV;
break;
}
}
}
}
/* The following steps are performed only by those tasks who are
* involved in the communication (as senders and/or receivers). */
if (numsenders > 0 && procflags[physrank]) {
int groupsize = 0; /* Number of MPI ranks represented by subcomm */
ncptl_int ivar_b_loop;
if (numsenders == 1) {
/* As a special case, use MPI_Bcast() if there's a single sender. */
conc_mcast_tallies[CONC_MCAST_MPI_BCAST]++;
CONC_EVENT *thisev = conc_allocate_event (EV_MCAST);
thisev->s.mcast.source = ncptl_virtual_to_physical (procmap, 0LL);
thisev->s.mcast.size = ((var_numwords)*(var_num_tasks))*4LL;