Commit 45635698 authored by Sudheer Chunduri's avatar Sudheer Chunduri
Browse files

uploading MILC and HACC SWMs

parent 0bd993ef
#include "hacc_swm_user_code.h"
HACCSWMUserCode::HACCSWMUserCode(
SWMUserIF* user_if,
boost::property_tree::ptree cfg,
void**& generic_ptrs
) :
SWMUserCode(user_if),
user_if(user_if),
request_vc(cfg.get<uint32_t>("request_vc", 0)),
response_vc(cfg.get<uint32_t>("response_vc", 4)),
pkt_rsp_bytes(cfg.get<uint32_t>("pkt_rsp_bytes", 0)),
gen_cfg_filename(cfg.get<std::string>("gen_cfg_filename")),
cfg(cfg)
{
//cerr << "Got filename: " << gen_cfg_filename << endl;
//here we should parse the json pointed to by gen_cfg_filename and do whatever with it...
std::ifstream gen_cfg_file;
// in case there are environment paths in the variable...
gen_cfg_filename = expand_environment_variables(gen_cfg_filename);
gen_cfg_file.open(gen_cfg_filename);
ASSERT(gen_cfg_file.is_open(), "Could not open gen_cfg_file: " << gen_cfg_filename << std::endl);
//boost::property_tree::ptree gen_cfg;
std::stringstream ss;
std::string line;
while(getline(gen_cfg_file, line)) {
ss << line;
}
boost::property_tree::read_json(ss, gen_cfg);
ng = gen_cfg.get<int>("ng");
nranks = gen_cfg.get<int>("nranks"); //8;
assert(sscanf(gen_cfg.get<std::string>("rank_shape_3d").c_str(), "(%d, %d, %d)", &(rank_shape_3d[0]), &(rank_shape_3d[1]), &(rank_shape_3d[2])) == 3);
assert(sscanf(gen_cfg.get<std::string>("rank_shape_2d_x").c_str(), "(%d, %d, %d)", &(rank_shape_2d_x[0]), &(rank_shape_2d_x[1]), &(rank_shape_2d_x[2])) == 3);
assert(sscanf(gen_cfg.get<std::string>("rank_shape_2d_y").c_str(), "(%d, %d, %d)", &(rank_shape_2d_y[0]), &(rank_shape_2d_y[1]), &(rank_shape_2d_y[2])) == 3);
assert(sscanf(gen_cfg.get<std::string>("rank_shape_2d_z").c_str(), "(%d, %d, %d)", &(rank_shape_2d_z[0]), &(rank_shape_2d_z[1]), &(rank_shape_2d_z[2])) == 3);
box_length = gen_cfg.get<double>("box_length"); //96.1458;
/*
printf("ng: %d\n", ng);
printf("nranks: %d\n", nranks);
printf("box_length: %g\n", box_length);
for(int i=0; i<3; i++) printf("rank_shape_3d[%d]: %d\n", i, rank_shape_3d[i]);
for(int i=0; i<3; i++) printf("rank_shape_2d_x[%d]: %d\n", i, rank_shape_2d_x[i]);
for(int i=0; i<3; i++) printf("rank_shape_2d_y[%d]: %d\n", i, rank_shape_2d_y[i]);
for(int i=0; i<3; i++) printf("rank_shape_2d_z[%d]: %d\n", i, rank_shape_2d_z[i]);
*/
gen_cfg_file.close();
}
void
HACCSWMUserCode::call() {
/*
const int ng = 8;
const int nranks = 8;
const double box_length = 96.1458;
const int rank_shape_3d [3] = {2, 2, 2};
const int rank_shape_2d_x[3] = {1, 4, 2};
const int rank_shape_2d_y[3] = {4, 1, 2};
const int rank_shape_2d_z[3] = {4, 2, 1};
*/
//double box_length = 19613.75;
// Perf model parameters
const double ninteractions_per_rank_mean = 1e10;
const double ninteractions_per_rank_delta = 0.01;
const double ninteractions_per_rank_per_wallsecond = 1e9;
const double buffer_copy_MBps = 1000.0;
const double fft_work_per_second = 1e9;
bool enable_hacc_fft = cfg.get<bool>("enable_hacc_fft",true);
bool enable_hacc_exchange = cfg.get<bool>("enable_hacc_exchange",true);
bool enable_hacc_checksum = cfg.get<bool>("enable_hacc_checksum",true);
// Configuration for this run
HaccConfig config(
ng,
box_length,
process_cnt,
process_id,
rank_shape_3d,
rank_shape_2d_x,
rank_shape_2d_y,
rank_shape_2d_z,
request_vc,
response_vc,
pkt_rsp_bytes
);
// Assemble timestep model
timestep = new HaccTimestep (
user_if,
&done_to_child,
config,
ninteractions_per_rank_mean,
ninteractions_per_rank_delta,
ninteractions_per_rank_per_wallsecond,
buffer_copy_MBps,
fft_work_per_second,
enable_hacc_fft,
enable_hacc_exchange,
enable_hacc_checksum
);
// ====================================================================
// Code below will go to call() method of SWMUserCode subclass
// ====================================================================
// Go!
if (enable_contexts)
while(1) {
(*timestep)(); //timestep.do_steps();
if(done_to_child) break;
else yield();
}
else
(*timestep)(); //timestep.do_steps();
SWM_Finalize();
// assert(0);
}
DLL_POSTAMBLE(HACCSWMUserCode)
{"rank_shape_2d_z": "(768, 512, 1)", "rank_shape_2d_y": "(768, 1, 512)", "rank_shape_2d_x": "(1, 768, 512)", "nranks": "393216", "ng": "26112", "rank_shape_3d": "(96, 64, 64)", "box_length": "19613.75", "nthreads_per_rank": "16", "config": "CORAL_2.8M_32tiles"}
\ No newline at end of file
#include "hacc_compute_rcbtree.h"
HaccComputeRCBTree::HaccComputeRCBTree(
SWMUserIF* user_if,
bool* done_from_parent,
HaccConfig & config,
double nint_mean,
double nint_delta,
double nint_per_wall_second
) :
SWMUserCode(user_if),
done_to_parent(done_from_parent),
config(config),
nint_mean(nint_mean),
nint_delta(nint_delta),
nint_per_wall_second(nint_per_wall_second)
{
if (nint_delta > 0.0) {
// Draw a Gaussian random sample using boost::random
//double nint_sigma = nint_mean*nint_delta;
// Make sure to seed the RNG with our rank id, so that a) it's
// reproducible and b) all the ranks don't get the same variate
// BOZO -- this is causing trouble
//boost::mt19937 rng(config.myrank);
//boost::normal_distribution<double> d(nint_mean, nint_sigma);
//nint = d(rng);
nint = nint_mean;
} else {
// Just use the mean
nint = nint_mean;
}
}
void
HaccComputeRCBTree::call() { //build_tree_and_evaluate_forces() {
if (enable_contexts)
while(1) {
// TODO:compute: tree build
// Compute interactions
//backend.compute_seconds(nint/nint_per_wall_second);
*done_to_parent = false;
SWM_Compute(nint/nint_per_wall_second);
*done_to_parent = true; yield();
}
else
{
*done_to_parent = false;
SWM_Compute(nint/nint_per_wall_second);
*done_to_parent = true; yield();
}
}
#ifndef _HACC_COMPUTE_RCBTREE_HPP
#define _HACC_COMPUTE_RCBTREE_HPP
#include <stdio.h>
#include <boost/random/normal_distribution.hpp>
//#include <boost/random/mersenne_twister.hpp> //BOZO -- this is causing issues
#include "hacc_config.h"
#include "swm_user_code.h"
#include "swm.h"
#include "swm_process_app_if.h"
#include "app_base_swm_user_code.h"
class HaccComputeRCBTree : public SWMUserCode {
public:
HaccComputeRCBTree(
SWMUserIF* user_if,
bool* done_from_parent,
HaccConfig & config,
double nint_mean,
double nint_delta,
double nint_per_wall_second
);
void call();
protected:
bool* done_to_parent;
HaccConfig & config;
// Ensemble mean (across ranks) of number of force interactions/rank
double nint_mean;
// Standard deviation (across ranks) of number of force interactions/rank,
// normalized by nint_mean (i.e. delta=sigma/mu)
double nint_delta;
// Number of interactions computed per wall second
double nint_per_wall_second;
double nint;
};
#endif
#include "hacc_config.h"
HaccConfig::HaccConfig (
int _ng, double _box_length,
int _nranks, int _myrank,
const int * _rank_shape_3d,
const int * _rank_shape_2d_x,
const int * _rank_shape_2d_y,
const int * _rank_shape_2d_z,
uint32_t request_vc,
uint32_t response_vc,
uint32_t pkt_rsp_bytes) :
ng(_ng),
box_length(_box_length),
nranks(_nranks),
myrank(_myrank),
request_vc(request_vc),
response_vc(response_vc),
pkt_rsp_bytes(pkt_rsp_bytes)
{
for (int i=0; i<NDIM; i++) {
rank_shape_3d [i] = _rank_shape_3d [i];
rank_shape_2d_x[i] = _rank_shape_2d_x[i];
rank_shape_2d_y[i] = _rank_shape_2d_y[i];
rank_shape_2d_z[i] = _rank_shape_2d_z[i];
}
rank_shape_1d[0] = _myrank;
rank_shape_1d[1] = 1;
rank_shape_1d[2] = 1;
overload_len = 8.0; // Fixed in CORAL indat file
phys2grid = ng / box_length;
ng_overload = static_cast< int > (ceilf(overload_len*phys2grid));
}
#ifndef _HACC_CONFIG_HPP
#define _HACC_CONFIG_HPP
#include <stdint.h>
#include <math.h>
const int NDIM = 3;
class HaccConfig {
public:
int ng;
double box_length;
int nranks;
int myrank;
int rank_shape_3d [NDIM];
int rank_shape_2d_x[NDIM];
int rank_shape_2d_y[NDIM];
int rank_shape_2d_z[NDIM];
int rank_shape_1d [NDIM];
double overload_len;
double phys2grid;
int ng_overload;
uint32_t request_vc;
uint32_t response_vc;
uint32_t pkt_rsp_bytes;
HaccConfig (
int _ng, double _box_length,
int _nranks, int _myrank,
const int * _rank_shape_3d,
const int * _rank_shape_2d_x,
const int * _rank_shape_2d_y,
const int * _rank_shape_2d_z,
uint32_t request_vc,
uint32_t response_vc,
uint32_t pkt_rsp_bytes);
};
#endif
{
"jobs": [
{
"name": "StandaloneSWM",
"app": "hacc",
"size": 786432,
"time": 0,
"placement": {
"type": "offset_stride_weight",
"offset": 0,
"stride": 1,
"weight": 1
},
"cfg": {
"request_vc": 0,
"response_vc": 1,
"iteration_cnt": 1,
"gen_cfg_filename": "hacc_gen_coral.json"
}
}
]
}
#include "hacc_exchange.h"
HaccExchange::HaccExchange(
SWMUserIF* user_if,
bool* done_from_parent,
HaccConfig & config,
double buffer_copy_MBps
) :
SWMUserCode(user_if),
done_to_parent(done_from_parent),
config(config),
buffer_copy_MBps(buffer_copy_MBps),
index3d()
{
// Set our position in the 3D rank layout
index3d.index_to_tuple(config.rank_shape_3d, config.myrank, mytuple);
//const int dead0 = config.ng_overload;
//const int dead1 = config.ng_overload+1;
// Compute buffer size (max of all halo sizes)
int max_ng = 0;
for (int i=0; i<3; i++) {
ng_alive[i] = config.ng/config.rank_shape_3d[i];
if (ng_alive[i] > max_ng) max_ng = ng_alive[i];
}
//Thomas needs to check this change for a compiler warning... BOZO, JOHNT
//buffer_size = max_ng * max_ng * (dead0 > dead1 ? dead0 : dead1);
buffer_size = max_ng * max_ng * (config.ng_overload == INT_MAX ? config.ng_overload : (config.ng_overload+1));
// Fill in neighbor list
// CAUTION: Do not change the order of the following lines
// unless you know what you're doing!
int i = 0;
// Face neighbors
neighbor_rank[i] = get_my_neighbor(-1, 0, 0); i++;
neighbor_rank[i] = get_my_neighbor( 1, 0, 0); i++;
neighbor_rank[i] = get_my_neighbor( 0, -1, 0); i++;
neighbor_rank[i] = get_my_neighbor( 0, 1, 0); i++;
neighbor_rank[i] = get_my_neighbor( 0, 0, -1); i++;
neighbor_rank[i] = get_my_neighbor( 0, 0, 1); i++;
// Edge neighbors
neighbor_rank[i] = get_my_neighbor(-1, -1, 0); i++;
neighbor_rank[i] = get_my_neighbor( 1, 1, 0); i++;
neighbor_rank[i] = get_my_neighbor(-1, 1, 0); i++;
neighbor_rank[i] = get_my_neighbor( 1, -1, 0); i++;
neighbor_rank[i] = get_my_neighbor( 0, -1, -1); i++;
neighbor_rank[i] = get_my_neighbor( 0, 1, 1); i++;
neighbor_rank[i] = get_my_neighbor( 0, -1, 1); i++;
neighbor_rank[i] = get_my_neighbor( 0, 1, -1); i++;
neighbor_rank[i] = get_my_neighbor(-1, 0, -1); i++;
neighbor_rank[i] = get_my_neighbor( 1, 0, 1); i++;
neighbor_rank[i] = get_my_neighbor( 1, 0, -1); i++;
neighbor_rank[i] = get_my_neighbor(-1, 0, 1); i++;
// Corner neighbors
neighbor_rank[i] = get_my_neighbor(-1, -1, -1); i++;
neighbor_rank[i] = get_my_neighbor( 1, 1, 1); i++;
neighbor_rank[i] = get_my_neighbor(-1, -1, 1); i++;
neighbor_rank[i] = get_my_neighbor( 1, 1, -1); i++;
neighbor_rank[i] = get_my_neighbor(-1, 1, -1); i++;
neighbor_rank[i] = get_my_neighbor( 1, -1, 1); i++;
neighbor_rank[i] = get_my_neighbor(-1, 1, 1); i++;
neighbor_rank[i] = get_my_neighbor( 1, -1, -1); i++;
assert(i == NUM_OF_NEIGHBORS);
}
int
HaccExchange::get_my_neighbor(int ishift, int jshift, int kshift) {
int nbtuple[3] = { mytuple[0]+ishift, mytuple[1]+jshift, mytuple[2]+kshift };
// Handle periodicity
for (int i=0; i<3; i++) {
nbtuple[i] = (nbtuple[i]+config.rank_shape_3d[i]) % config.rank_shape_3d[i];
}
int nbrank;
index3d.tuple_to_index(config.rank_shape_3d, nbtuple, &nbrank);
return nbrank;
}
void
HaccExchange::do_buffer_copy(int buffer_size) {
const int nbytes = buffer_size * SIZEOF_ELT;
const double seconds = (nbytes/1e6) / buffer_copy_MBps;
SWM_Compute(seconds);
}
void
HaccExchange::exchange(int inbor_send_to, int inbor_recv_from) {
int rank_send_to = neighbor_rank[inbor_send_to ];
int rank_recv_from = neighbor_rank[inbor_recv_from];
int nbytes = buffer_size * SIZEOF_ELT;
uint32_t rsp_bytes = 0; // should really be nonzero to have network send rsp pkt when matching completes
// Important note about buffer copies:
// While the MPI op size is exactly buffer_size, the copy size is not (it
// can be smaller). However, because the buffer copies are not a major
// hotspot, we overlook this fact for simplicity and use buffer_size for
// the copies as well. The idea is just to introduce some delay between the
// MPI comms.
// Pack send buffer
do_buffer_copy(buffer_size);
// Perform the MPI exchange
SWM_Sendrecv(
SWM_COMM_WORLD, //0, //comm_id
rank_send_to, //sendpeer
0, //sendtag
config.request_vc, //sendreqvc
config.response_vc, //sendrspvc
NO_BUFFER, //sendbuf
nbytes, //sendbytes
rsp_bytes, //rspbytes
rank_recv_from, //recvpeer
0, //recvtag
NO_BUFFER //recvbuf
);
// Unpack receive buffer
do_buffer_copy(buffer_size);
}
void
HaccExchange::call() { //exchange_grid() {
if(enable_contexts)
while(1) {
*done_to_parent = false;
for (int inbor=0; inbor<NUM_OF_NEIGHBORS; inbor+=2) {
exchange(inbor, inbor+1);
exchange(inbor+1, inbor);
}
*done_to_parent = true; yield();
}
else
{
*done_to_parent = false;
for (int inbor=0; inbor<NUM_OF_NEIGHBORS; inbor+=2) {
exchange(inbor, inbor+1);
exchange(inbor+1, inbor);
}
*done_to_parent = true; yield();
}
}
#ifndef _HACC_EXCHANGE_H_
#define _HACC_EXCHANGE_H_
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "app_base_swm_user_code.h"
#include "hacc_config.h"
#include "hacc_ndindex.h"
class SWMProcessUserIF;
class HaccExchange : public SWMUserCode {
protected:
bool* done_to_parent;
HaccConfig & config;
double buffer_copy_MBps;
RowMajorIndexer<3> index3d;
int buffer_size;
int ng_alive[3];
int mytuple[3];
const int SIZEOF_ELT = 4; // We have -DGRID_32, so we're using MPI_FLOAT
static const int NUM_OF_NEIGHBORS = 26;
int neighbor_rank[NUM_OF_NEIGHBORS];
public:
HaccExchange(
SWMUserIF* user_if,
bool* done_from_parent,
HaccConfig & config,
double buffer_copy_MBps
);
int get_my_neighbor(int ishift, int jshift, int kshift);
void do_buffer_copy(int buffer_size);
void exchange(int inbor_send_to, int inbor_recv_from);
void call();
};
#endif
#include "hacc_fft.h"
//#include <asim/restricted/swm_process.h>
HaccFFT::HaccFFT(
SWMUserIF* user_if,
bool* done_from_parent,
HaccConfig & config,
double buffer_copy_MBps,
double fft_work_per_second
) :
SWMUserCode(user_if),
done_to_parent(done_from_parent),
config(config),
buffer_copy_MBps(buffer_copy_MBps),
fft_work_per_second(fft_work_per_second)//,
//indexer_3d()
{