#include #include #include #include #include #include "tapioca.hpp" #define RED "\x1b[31m" #define GREEN "\x1b[32m" #define BLUE "\x1b[34m" #define RESET "\x1b[0m" int main (int argc, char * argv[]) { int world_numtasks, world_myrank, mycolor, mykey, sub_numtasks, sub_myrank, i, file_id; int64_t num_particles; int64_t sub_particles, tot_particles, particle_size, file_size, tot_size; int64_t scan_size = 0, offset, hdr = 0; double start_time, end_time, tot_time, max_time; double io_bw; MPI_Comm sub_comm; MPI_Status status; char output[100]; Tapioca tp; int64_t chunkCount[9], chunkOffset[9]; int chunkSize[9]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &world_numtasks); MPI_Comm_rank(MPI_COMM_WORLD, &world_myrank); mycolor = tp.topology.BridgeNodeId (); //mycolor = 42; mykey = world_myrank; MPI_Comm_split (MPI_COMM_WORLD, mycolor, mykey, &sub_comm); MPI_Comm_size(sub_comm, &sub_numtasks); MPI_Comm_rank(sub_comm, &sub_myrank); num_particles = atoi ( argv[1] ); file_id = 0; if ( argv[2] != NULL ) file_id = atoi ( argv[2] ); #ifdef BGQ snprintf (output, 100, "/projects/visualization/ftessier/debug/HACC-AOS-%08d-%d.dat", mycolor, file_id); #elif XC40 snprintf (output, 100, "/lus/theta-fs0/projects/Performance/ftessier/HACC/HACC-AOS-%08d-%d.dat", mycolor, file_id); #else snprintf (output, 100, "./HACC-SOA-%08d-%d.dat", mycolor, file_id); #endif /*****************/ /* WRITE */ /*****************/ float *xx, *yy, *zz, *vx, *vy, *vz, *phi; int64_t* pid; uint16_t* mask; xx = new float[num_particles]; yy = new float[num_particles]; zz = new float[num_particles]; vx = new float[num_particles]; vy = new float[num_particles]; vz = new float[num_particles]; phi = new float[num_particles]; pid = new int64_t[num_particles]; mask = new uint16_t[num_particles]; for (uint64_t i = 0; i< num_particles; i++) { xx[i] = (float)i; yy[i] = (float)i; zz[i] = (float)i; vx[i] = (float)i; vy[i] = (float)i; vz[i] = (float)i; phi[i] = (float)i; pid[i] = (int64_t)i; mask[i] = (uint16_t)world_myrank; } MPI_Allreduce(&num_particles, &sub_particles, 1, MPI_LONG_LONG, MPI_SUM, sub_comm); MPI_Allreduce(&num_particles, &tot_particles, 1, MPI_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); particle_size = (7 * sizeof(float)) + sizeof(int64_t) + sizeof(uint16_t); file_size = particle_size * sub_particles; tot_size = particle_size * tot_particles; MPI_Exscan (&num_particles, &scan_size, 1, MPI_LONG_LONG, MPI_SUM, sub_comm); if (0 == sub_myrank) { fprintf (stdout, GREEN "[INFO]" RESET " [%08d] miniHACC-AoS\n", mycolor); fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Read output file (AoS data layout)\n", mycolor); fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> %lld particles per rank\n", mycolor, num_particles); fprintf (stdout, GREEN "[INFO]" RESET " [%08d] --> File size: %.2f MB (%lld particles)\n", mycolor, (double)file_size/(1024*1024), sub_particles); } /*****************/ /* READ */ /*****************/ float *xx_r, *yy_r, *zz_r, *vx_r, *vy_r, *vz_r, *phi_r; int64_t* pid_r; uint16_t* mask_r; xx_r = new float[num_particles]; yy_r = new float[num_particles]; zz_r = new float[num_particles]; vx_r = new float[num_particles]; vy_r = new float[num_particles]; vz_r = new float[num_particles]; phi_r = new float[num_particles]; pid_r = new int64_t[num_particles]; mask_r = new uint16_t[num_particles]; /*****************/ /* INIT TAPIOCA */ /*****************/ for ( i = 0; i < 9; i++ ) { chunkCount[i] = num_particles; } chunkSize[0] = sizeof(float); chunkSize[1] = sizeof(float); chunkSize[2] = sizeof(float); chunkSize[3] = sizeof(float); chunkSize[4] = sizeof(float); chunkSize[5] = sizeof(float); chunkSize[6] = sizeof(float); chunkSize[7] = sizeof(int64_t); chunkSize[8] = sizeof(uint16_t); chunkOffset[0] = hdr + scan_size * particle_size; for ( i = 1; i < 9; i++ ) { chunkOffset[i] = chunkOffset[i - 1] + chunkCount[i - 1] * chunkSize[i - 1]; } tp.Init (chunkCount, chunkSize, chunkOffset, 9, hdr, sub_comm); tp.setAggregationTier (2, DDR, "/scratch/tmp"); tp.setTargetTier (HDD, file_size, output); /*****************/ start_time = MPI_Wtime(); if (0 == sub_myrank) fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Read output file\n", mycolor); offset = scan_size * particle_size; tp.Read (offset, xx_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, yy_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, zz_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, vx_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, vy_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, vz_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, phi_r, num_particles, MPI_FLOAT, &status); offset += num_particles * sizeof(float); tp.Read (offset, pid_r, num_particles, MPI_LONG_LONG, &status); offset += num_particles * sizeof(int64_t); tp.Read (offset, mask_r, num_particles, MPI_UNSIGNED_SHORT, &status); tp.Finalize (); end_time = MPI_Wtime(); tot_time = end_time - start_time; MPI_Reduce (&tot_time, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (0 == world_myrank) { io_bw = (double)tot_size / max_time / (1024 * 1024); fprintf (stdout, BLUE "[TIMING]" RESET " Read I/O bandwidth: %.2f MBps (%.2f MB in %.2f ms)\n", io_bw, (double)tot_size/(1024*1024), max_time * 1000); } /*****************/ /* VERIFICATION */ /*****************/ for (uint64_t i = 0; i< num_particles; i++) { if ((xx[i] != xx_r[i]) || (yy[i] != yy_r[i]) || (zz[i] != zz_r[i]) || (vx[i] != vx_r[i]) || (vy[i] != vy_r[i]) || (vz[i] != vz_r[i]) || (phi[i] != phi_r[i])|| (pid[i] != pid_r[i]) || (mask[i] != mask_r[i])) { fprintf (stdout, RED "[ERROR]" RESET " Wrong value for particle %d\n", i); MPI_Abort (MPI_COMM_WORLD, -1); } } if (0 == sub_myrank) fprintf (stdout, GREEN "[INFO]" RESET " [%08d] Content verified and consistent\n", mycolor); /*****************/ /* FREE */ /*****************/ delete [] xx; delete [] xx_r; delete [] yy; delete [] yy_r; delete [] zz; delete [] zz_r; delete [] vx; delete [] vx_r; delete [] vy; delete [] vy_r; delete [] vz; delete [] vz_r; delete [] phi; delete [] phi_r; delete [] pid; delete [] pid_r; delete [] mask; delete [] mask_r; MPI_Finalize (); }