Commit 03e8d607 authored by Richard Zamora's avatar Richard Zamora
Browse files

improving topology version of exerciser to allow contrived example, where agg...

improving topology version of exerciser to allow contrived example, where agg selection should matter. maybe
parent 70a1b8d8
......@@ -80,9 +80,11 @@
//#define METACOLOK // Are collective functions available?
//#define USING_CCIO // Using CCIO branch of HDF5?
// MPI_CHECK will display a custom error message as well as an error string
// from the MPI_STATUS and then exit the program. This macro is borrowed
// directly from the HPC-IOR code
/*
* MPI_CHECK will display a custom error message as well as an error string
* from the MPI_STATUS and then exit the program. This macro is borrowed
* directly from the HPC-IOR code
*/
#define MPI_CHECK(MPI_STATUS, MSG) do { \
char resultString[MPI_MAX_ERROR_STRING]; \
int resultLength; \
......@@ -385,6 +387,9 @@ int main( int argc, char* argv[] )
createPropList = H5Pcreate(H5P_FILE_CREATE);
MPI_Info mpiHints = MPI_INFO_NULL;
//MPI_Info mpiHints; // = MPI_INFO_NULL;
//MPI_Info_create(&mpiHints);
//MPI_Info_set(mpiHints, "cb_nodes", "4");
// set the parallel driver - don't repetatively do this as overhead is quite large for some mpi implementations and most
// apps will just do this once
......@@ -398,19 +403,55 @@ int main( int argc, char* argv[] )
// lets create an aggregator list and pass a "hint" here.
if (topohint) {
// Override cb_nodes with, Ex: export HDF5_CB_NODES_OVERRIDE=4
char* cb_override = getenv("HDF5_CB_NODES_OVERRIDE");
char* div_data_I = getenv("HDF5_I_DIV");
//char* all_to_all = getenv("HDF5_ALL2ALL");
// Call the topology API to get collecive-buffer count and size
get_cb_props( &agg_size, &agg_count, testFileName);
get_cb_props( &agg_size, &agg_count, testFileName, cb_override);
// Now create a fake data set that will have all ranks writing to all aggs
int64_t chunk_size = agg_size * agg_count; // Assume all to all
int64_t num_chunks = 1; // Assume single chunk
int64_t dperiod = (int64_t) (chunk_size * nprocs);
int64_t* data_size = (int64_t*) malloc(sizeof(int64_t*) * (num_chunks) );
int64_t* offset_size = (int64_t*) malloc(sizeof(int64_t*) * (num_chunks) );
for (i=0; i<num_chunks; i++) {
data_size[i] = (int64_t) (chunk_size);
offset_size[i] = (int64_t) (i * dperiod + rank * chunk_size);
// Real expected data pattern for first data size (Dim-1 Only)
if ( div_dat_I && !(strcmp(div_data_I,"0") == 0) && (numDims == 1)) {
/*
* n_el = ( I_f * S_stripe * C_stripe ) / ( np * sizeof( double ) )
*
* n_el = element count in 1-D case to ensure data-pattern is divisible by aggs
* I_f = must be 1,2,..
* S_stripe = Size of the aggregator buffer (stripe size in lustre)
* C_stripe = Number of aggregator buffers (stripe count in lustre)
* np = Total number of MPI ranks
*
*/
int I_f = atoi( div_data_I ); // Let's assume we want to adjust n_el according to above eqn:
minNEls[0] = ( I_f * agg_size * agg_count ) / ( nprocs * sizeof(double) );
chunk_size = minNEls[0] * sizeof( double );
dperiod = (int64_t) (chunk_size * nprocs);
for (i=0; i<num_chunks; i++) {
data_size[i] = (int64_t) (chunk_size);
offset_size[i] = (int64_t) (i * dperiod + rank * chunk_size);
}
// Create a fake data set that will have all ranks writing to all aggs
} else {
for (i=0; i<num_chunks; i++) {
data_size[i] = (int64_t) (chunk_size);
offset_size[i] = (int64_t) (i * dperiod + rank * chunk_size);
}
}
// Define agg_array
char agg_array[256];
get_cb_config_list ( data_size, offset_size, num_chunks, &agg_array[0], agg_size, agg_count );
if (rank==0) printf("Aggregator List:\n%s\n", agg_array);
......@@ -418,10 +459,20 @@ int main( int argc, char* argv[] )
free(data_size);
free(offset_size);
//sprintf(agg_array, "TEST_STRING");
//if (rank == 0) printf("agg_array is :%s:\n",agg_array);
// Set the MPIO cb_config_list Hint
MPI_Info_create(&mpiHints);
MPI_Info_set(mpiHints, "cb_config_list", agg_array);
// Double check the cb_config_list setting
int mpi_code;
int info_flag;
char info_value[MPI_MAX_INFO_VAL];
mpi_code = MPI_Info_get(mpiHints, "cb_config_list", MPI_MAX_INFO_VAL, info_value, &info_flag);
if (rank == 0) printf("cb_config_list is :%s:\n",info_value);
// Re-open the property list to pass the hint
H5Pclose(accessPropList);
accessPropList = H5Pcreate(H5P_FILE_ACCESS);
......
......@@ -106,7 +106,7 @@ int distance_to_io_node ( int src_rank ) {
return 1;
}
void get_cb_props( int64_t *buffer_size, int64_t *nb_aggr, char* fname ) {
void get_cb_props( int64_t *buffer_size, int64_t *nb_aggr, char* fname, char* cb_override ) {
int rank, nprocs;
int info_flag;
......@@ -147,6 +147,12 @@ void get_cb_props( int64_t *buffer_size, int64_t *nb_aggr, char* fname ) {
#elif ROMIO
MPI_File_get_info(fh, &mpi_file_info);
/* Override the number of aggeregators (if HDF5_CB_NODES_OVERRIDE env is set) */
if ( cb_override && !(strcmp(cb_override,"0") == 0) ) {
MPI_Info_set(mpi_file_info, "cb_nodes", cb_override);
}
mpi_code = MPI_Info_get(mpi_file_info, "cb_nodes", MPI_MAX_INFO_VAL, info_value, &info_flag);
*nb_aggr = (int64_t) atoi(info_value);
if (rank == 0) printf("cb_nodes is :%lld:\n",*nb_aggr);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment