Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
codes
workloads
Commits
316208ec
Commit
316208ec
authored
Feb 21, 2018
by
Misbah Mubarak
Browse files
Adding workloads provided by intel
parents
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
swm/lammps/lammps.cpp
0 → 100644
View file @
316208ec
This diff is collapsed.
Click to expand it.
swm/lammps/lammps.h
0 → 100644
View file @
316208ec
#ifndef _LAMMPS_
#define _LAMMPS_
#include "app_base_swm_user_code.h"
#include <list>
// Internal LAMMPS paramenters
// Skin cutoff for ghost neighbor exchange (on comm)
#define GHOST_SKIN_CUTOFF 12.0
// Skin cutoff for fft neighbor exchange (on commgrid)
#define FFT_SKIN_CUTOFF 2.0
// Number of atoms in a basic block
#define N_ATOMS_BASE 32000
// Neighbor check after NEIGH_DELAY, then every NEIGH_EVERY
#define NEIGH_DELAY 5
#define NEIGH_EVERY 1
// Dimensions of the basic block
#define XLO_BASE (-27.5)
#define XHI_BASE (27.5)
#define YLO_BASE (-38.5)
#define YHI_BASE (38.5)
#define ZLO_BASE (-36.3646)
#define ZHI_BASE (36.3615)
// lammps factors for determining required decomposition
#define GEWALD 0.243177
#define FFT_ACCURACY 0.033206
// number of transposes in fft
#define NUM_TRANSPOSE 13
// number of allreduces at the end of neighbor exchange
#define NUM_NEIGH_ALLREDUCE 5
#define PI 3.14159265358979323846
class
LAMMPS_SWM
:
public
AppBaseSWMUserCode
{
public:
LAMMPS_SWM
(
SWMUserIF
*
user_if
,
boost
::
property_tree
::
ptree
cfg
,
void
**&
generic_ptrs
);
~
LAMMPS_SWM
();
void
call
();
protected:
uint32_t
x_rep
;
// number of replicas in X dimension
uint32_t
y_rep
;
// number of replicas in Y dimension
uint32_t
z_rep
;
// number of replicas in Z dimension
uint32_t
num_timesteps
;
// number of time steps to simulate
uint32_t
req_vc
;
// request vc
uint32_t
resp_vc
;
// response vc
double
router_freq
;
// router frequency in Hz
double
cpu_freq
;
// CPU frequency in Hz
double
cpu_sim_speedup
;
// simulation speedup factor (makes CPU faster)
uint32_t
rsp_bytes
;
private:
double
prd
[
3
];
double
pppmGrid
[
3
];
int
procNums
[
3
];
int
*
k_r_targets
[
NUM_TRANSPOSE
];
int
*
k_s_targets
[
NUM_TRANSPOSE
];
int
*
k_s_sizes
[
NUM_TRANSPOSE
];
long
k_cyc
[
NUM_TRANSPOSE
];
int
k_len
[
NUM_TRANSPOSE
];
int
*
gh_fw_r_targets
;
int
*
gh_fw_s_targets
;
int
*
gh_fw_s_sizes
;
long
*
gh_fw_cyc
;
int
gh_fw_len
;
int
*
gh_rw_r_targets
;
int
*
gh_rw_s_targets
;
int
*
gh_rw_s_sizes
;
long
*
gh_rw_cyc
;
int
gh_rw_len
;
int
*
k_pre_r_targets
;
int
*
k_pre_s_targets
;
int
*
k_pre_s_sizes
;
long
*
k_pre_cyc
;
int
k_pre_len
;
int
*
k_post_r_targets
;
int
*
k_post_s_targets
;
int
*
k_post_s_sizes
;
long
*
k_post_cyc
;
int
k_post_len
;
int
*
fix_r_targets
;
int
*
fix_s_targets
;
int
*
fix_s_sizes
;
long
*
fix_cyc
;
int
fix_len
;
int
*
neigh_e_r_targets
;
int
*
neigh_e_s_targets
;
int
*
neigh_e_s_sizes
;
long
*
neigh_e_cyc
;
int
neigh_e_len
;
int
*
neigh_b_r_targets
;
int
*
neigh_b_s_targets
;
int
*
neigh_b_s_sizes
;
long
*
neigh_b_cyc
;
int
neigh_b_len
;
long
neigh_check_cyc
;
double
neigh_check_average
;
double
neigh_check_cumulative
;
int
neigh_check_count
;
long
neigh_end_cyc
[
NUM_NEIGH_ALLREDUCE
];
long
start_cyc
;
long
k_energy_cyc
;
long
final_cyc
;
void
modelInit
();
void
doP2P
(
int
len
,
int
*
r_targets
,
int
*
s_targets
,
int
*
s_sizes
,
long
*
cyc_cnt
);
void
doNeighExch
();
void
doFFT
();
bool
neigh_check
();
// process decomposition
void
proc_decomposition
(
int
n
,
double
prd
[],
int
procNums
[]);
// PPPM decomposition
void
pppm_decomposition
(
int
n
,
double
prd
[],
double
pppmGrid
[]);
double
pppm_estimate_ik_error
(
double
h
,
double
prd
,
int
n
,
double
all_prd
[]);
int
pppm_factorable
(
int
n
);
// neighbor comm setup
void
ghost_setup
(
double
cutoff
,
int
rank
,
double
t_vol
);
void
k_pre_setup
(
double
cutoff
,
int
rank
,
double
f_vol
);
void
k_post_setup
(
double
cutoff
,
int
rank
,
double
f_vol
);
void
neigh_e_setup
(
double
cutoff
,
int
rank
,
double
t_vol
);
// k space paramenters
void
get_k_params
(
int
rank
,
double
f_vol
);
void
get_nx_in
(
int
rank
,
int
nx
[
10
]);
void
get_nx_fft
(
int
rank
,
int
nx
[
10
]);
void
get_nx_mid1
(
int
rank
,
int
nx
[
10
]);
void
get_nx_mid2
(
int
rank
,
int
nx
[
10
]);
int
find_one_overlap
(
int
a
[
6
],
int
b
[
6
],
int
s
[
3
]);
void
find_overlap
(
int
all_in
[],
int
in_shift
,
int
all_out
[],
int
out_shift
,
int
rank
,
int
r_r
[],
int
*
r_len
,
int
s_r
[],
int
s_rs
[],
int
*
s_len
);
void
best_2d_mapping
(
int
*
px
,
int
*
py
,
int
nx
,
int
ny
);
void
bifactor
(
int
n
,
int
*
f1
,
int
*
f2
);
void
rank_to_xyz
(
int
rank
,
int
coord
[
3
]);
int
xyz_to_rank
(
int
coord
[
3
]);
void
rank_to_neigh
(
int
rank
,
int
neighs
[
6
]);
};
#endif
swm/lammps/lammps_model.h
0 → 100644
View file @
316208ec
double
msg_ghost_fw
=
2
.
48839990371
;
double
msg_ghost_rw
=
2
.
48841071356
;
double
msg_k_pre
=
8
.
0
;
double
msg_k_post
=
24
.
0
;
double
msg_fix
=
2
.
48841071356
;
double
msg_neigh_exch
=
3
.
08673789851
;
double
msg_neigh_border
=
6
.
63563071593
;
double
ins_start_a
[
1
]
=
{
8
.
51937488057
};
double
ins_start_b
[
1
]
=
{
1544
.
46231029
};
double
ins_start_cpi
=
0
.
843141163755
;
double
ins_neigh_check_a
[
1
]
=
{
89
.
6202085326
};
double
ins_neigh_check_b
[
1
]
=
{
195042
.
694781
};
double
ins_neigh_check_cpi
=
0
.
951841661097
;
double
ins_neigh_exch_sr_a
[
3
]
=
{
11
.
5746361748
,
1
.
3778877165
,
1
.
34223584427
};
double
ins_neigh_exch_sr_b
[
3
]
=
{
438096
.
47233
,
4800
.
95420873
,
8838
.
30958016
};
double
ins_neigh_exch_sr_cpi
=
1
.
58963777201
;
double
ins_neigh_border_sr_a
[
6
]
=
{
1
.
8243979135
,
2
.
03
810250649
,
3
.
0667
9631198
,
3
.
0870981696
,
3
.
58608401984
,
2
.
7521157202
};
double
ins_neigh_border_sr_b
[
6
]
=
{
32382
.
7816726
,
51218
.
9714454
,
83557
.
2150064
,
99920
.
5231836
,
248049
.
508775
,
357653
.
369027
};
double
ins_neigh_border_sr_cpi
=
1
.
75604132297
;
double
ins_neigh_end_a
[
5
]
=
{
1
.
21665755465
,
6595
.
30712353
,
29
.
6655250587
,
58
.
3229990241
,
1
.
69059035676
};
double
ins_neigh_end_b
[
5
]
=
{
139153
.
690154
,
11183101
.
9944
,
44150
.
0262654
,
91071
.
0968296
,
2057
.
50606924
};
double
ins_neigh_end_cpi
=
0
.
784053776222
;
double
ins_k_pre_a
[
6
]
=
{
43360
.
7612799
,
0
.
780443563075
,
0
.
999500801383
,
1
.
23253340415
,
1
.
11044737418
,
0
.
813347233046
};
double
ins_k_pre_b
[
6
]
=
{
-
8151826
.
36712
,
12652
.
2538632
,
10478
.
6380748
,
5362
.
54935036
,
4946
.
43943567
,
2809
.
82745824
};
double
ins_k_pre_cpi
=
0
.
897392796161
;
double
ins_k_fft_a
[
13
]
=
{
12
.
7660165971
,
49
.
5132610315
,
36
.
7957959
,
48
.
6517835605
,
102
.
611869648
,
36
.
7771213175
,
48
.
6352484315
,
71
.
1465535394
,
36
.
7760281598
,
48
.
624983362
,
71
.
1461101858
,
36
.
7661712493
,
48
.
6196591605
};
double
ins_k_fft_b
[
13
]
=
{
2755
.
32405875
,
-
14031
.
7206559
,
-
268
.
944769389
,
-
34509
.
6094468
,
-
15768
.
811004
,
468
.
975498509
,
-
34540
.
223738
,
-
23736
.
2087919
,
-
119
.
26574367
,
-
34526
.
5937504
,
-
23716
.
4687588
,
130
.
351010748
,
-
34514
.
7315393
};
double
ins_k_fft_cpi
=
0
.
700575655531
;
double
ins_k_post_a
[
6
]
=
{
15
.
7888010275
,
1
.
98489719387
,
2
.
63496119567
,
3
.
0066445031
9
,
2
.
41731560611
,
1
.
9085386988
};
double
ins_k_post_b
[
6
]
=
{
390
.
234582372
,
6358
.
92071557
,
11170
.
1400931
,
12661
.
0465342
,
24102
.
3020575
,
30456
.
6591775
};
double
ins_k_post_cpi
=
1
.
72457235374
;
double
ins_k_energy_a
[
1
]
=
{
5476
.
95439615
};
double
ins_k_energy_b
[
1
]
=
{
-
1073884
.
00556
};
double
ins_k_energy_cpi
=
0
.
475585305054
;
double
ins_ghost_fw_a
[
6
]
=
{
43
.
2730897193
,
0
.
0
,
9.23745386168e-09
,
2.35234627328e-08
,
2.00592476871e-08
,
1.33019109126e-07
};
double
ins_ghost_fw_b
[
6
]
=
{
96380
.
315439
,
313
.
368687371
,
313
.
346132312
,
313
.
327974814
,
313
.
346666244
,
313
.
3420572
};
double
ins_ghost_fw_cpi
=
0
.
924608655408
;
double
ins_ghost_rw_a
[
6
]
=
{
0
.
0
,
0
.
31849027582
,
0
.
316224042474
,
0
.
513934022608
,
0
.
501449013036
,
0
.
298131697301
};
double
ins_ghost_rw_b
[
6
]
=
{
566
.
574947244
,
81855
.
4482201
,
82072
.
2144336
,
18124
.
8399648
,
22463
.
1938402
,
12639
.
7445334
};
double
ins_ghost_rw_cpi
=
1
.
91339519762
;
double
ins_fix_a
[
6
]
=
{
6
.
31029520441
,
0
.
519042043438
,
0
.
699422916624
,
0
.
862615710189
,
0
.
708782029108
,
0
.
552896821411
};
double
ins_fix_b
[
6
]
=
{
25475
.
6674291
,
22357
.
989938
,
30827
.
99377
,
38715
.
8802264
,
89865
.
0435751
,
140845
.
450888
};
double
ins_fix_cpi
=
1
.
96053897728
;
double
ins_final_a
[
1
]
=
{
43
.
3348975221
};
double
ins_final_b
[
1
]
=
{
141841
.
44285
};
double
ins_final_cpi
=
1
.
56863134534
;
double
neigh_check_avg
=
0
.
196428571429
;
swm/lammps/lammps_workload.json
0 → 100644
View file @
316208ec
{
"jobs"
:
[
{
"name"
:
"StandaloneSWM"
,
"app"
:
"dll"
,
"dll_path"
:
"apps/dll/lammps.so"
,
"size"
:
128
,
"time"
:
0
,
"cfg"
:
{
"num_x_replicas"
:
1
,
#
number
of
replicas
in
X
dimension
"num_y_replicas"
:
1
,
#
number
of
replicas
in
Y
dimension
"num_z_replicas"
:
1
,
#
number
of
replicas
in
Z
dimension
"num_time_steps"
:
30
,
#
number
of
time
steps
to
simulate
"req_vc"
:
0
,
#
request
vc
"resp_vc"
:
1
,
#
response
vc
"router_freq"
:
800e6
,
#
router
frequency
in
Hz
"cpu_freq"
:
1.2e9
,
#
CPU
frequency
in
Hz
"cpu_sim_speedup"
:
1e6
#
simulation
speedup
factor
(makes
CPU
faster)
(use
this
to
shorten
computation
periods)
}
}
]
}
\ No newline at end of file
swm/nearest_neighbor/nearest_neighbor_swm_user_code.cpp
0 → 100644
View file @
316208ec
#include "nearest_neighbor_swm_user_code.h"
#include "boost_ptree_array_to_std_vector.h"
extern
uint64_t
global_cycle
;
NearestNeighborSWMUserCode
::
NearestNeighborSWMUserCode
(
SWMUserIF
*
user_if
,
boost
::
property_tree
::
ptree
cfg
,
void
**&
generic_ptrs
)
:
AppBaseSWMUserCode
(
user_if
,
cfg
,
"nearest_neighbor"
),
dimension_cnt
(
cfg
.
get
<
uint32_t
>
(
"dimension_cnt"
,
0
)),
dimension_sizes
(
boost_ptree_array_to_std_vector
<
uint32_t
>
(
cfg
,
"dimension_sizes"
,
{
0
})),
max_dimension_distance
(
cfg
.
get
<
uint32_t
>
(
"max_dimension_distance"
,
0
)),
synchronous
(
cfg
.
get
<
bool
>
(
"synchronous"
,
false
)),
iterations_per_sync
(
cfg
.
get
<
uint32_t
>
(
"iterations_per_sync"
,
1
)),
randomize_communication_order
(
cfg
.
get
<
bool
>
(
"randomize_communication_order"
,
false
))
{
assert
(
dimension_sizes
.
size
()
==
dimension_cnt
);
size_t
dim_product
=
1
;
for
(
size_t
dim_i
=
0
;
dim_i
<
dimension_sizes
.
size
();
dim_i
++
)
{
dim_product
*=
dimension_sizes
[
dim_i
];
}
std
::
cout
<<
"dim_product is "
<<
dim_product
<<
" and process_cnt is "
<<
process_cnt
<<
std
::
endl
;
assert
(
dim_product
==
process_cnt
);
}
void
NearestNeighborSWMUserCode
::
xlat_pid_to_coords
(
uint32_t
pid
,
std
::
vector
<
uint32_t
>&
coords
)
{
coords
.
clear
();
uint32_t
dim_div
=
1
;
for
(
uint32_t
dim_idx
=
0
;
dim_idx
<
dimension_cnt
;
dim_idx
++
)
{
uint32_t
pid_coord_in_dim
=
(
pid
/
dim_div
)
%
dimension_sizes
[
dim_idx
];
dim_div
*=
dimension_sizes
[
dim_idx
];
coords
.
push_back
(
pid_coord_in_dim
);
}
}
void
NearestNeighborSWMUserCode
::
xlat_coords_to_pid
(
std
::
vector
<
uint32_t
>
coords
,
uint32_t
&
pid
)
{
pid
=
0
;
/*
std::cout << "xlat_coords_to_pid on coords ";
for(size_t coords_idx=0; coords_idx<coords.size(); coords_idx++) {
std::cout << " " << coords[coords_idx];
}
std::cout << endl;
*/
uint32_t
dim_mult
=
1
;
for
(
uint32_t
dim_idx
=
0
;
dim_idx
<
dimension_cnt
;
dim_idx
++
)
{
pid
+=
coords
[
dim_idx
]
*
dim_mult
;
dim_mult
*=
dimension_sizes
[
dim_idx
];
}
}
std
::
string
NearestNeighborSWMUserCode
::
get_neighbor_string
(
uint32_t
my_pid
,
uint32_t
neighbor_pid
)
{
std
::
vector
<
uint32_t
>
my_coords
;
std
::
vector
<
uint32_t
>
neighbor_coords
;
xlat_pid_to_coords
(
my_pid
,
my_coords
);
xlat_pid_to_coords
(
neighbor_pid
,
neighbor_coords
);
assert
(
my_coords
.
size
()
==
neighbor_coords
.
size
());
std
::
ostringstream
oss
;
for
(
size_t
c
=
0
;
c
<
my_coords
.
size
();
c
++
)
{
if
(
my_coords
[
c
]
!=
neighbor_coords
[
c
])
{
if
(
my_coords
[
c
]
==
0
)
{
if
(
neighbor_coords
[
c
]
==
(
my_coords
[
c
]
+
1
))
{
oss
<<
"p"
<<
c
;
}
else
if
(
neighbor_coords
[
c
]
==
(
dimension_sizes
[
c
]
-
1
))
{
oss
<<
"m"
<<
c
;
}
else
{
assert
(
0
);
}
}
else
{
if
(
neighbor_coords
[
c
]
==
((
my_coords
[
c
]
+
1
)
%
dimension_sizes
[
c
]))
{
oss
<<
"p"
<<
c
;
}
else
if
(
neighbor_coords
[
c
]
==
(
my_coords
[
c
]
-
1
))
{
oss
<<
"m"
<<
c
;
}
else
{
assert
(
0
);
}
}
}
}
return
oss
.
str
();
}
void
NearestNeighborSWMUserCode
::
derive_neighbors_recurse
(
std
::
vector
<
uint32_t
>
coords
,
std
::
vector
<
neighbor_tuple
>&
neighbors
,
uint32_t
dimension_to_vary
,
uint32_t
accumulated_dimension_distance
)
{
std
::
vector
<
uint32_t
>
coords_copy
;
//uint32_t accumulated_dimension_distance_copy;
coords_copy
.
resize
(
coords
.
size
());
if
(
accumulated_dimension_distance
==
max_dimension_distance
)
{
uint32_t
neighbor_pid
;
xlat_coords_to_pid
(
coords
,
neighbor_pid
);
std
::
string
neighbor_string
=
get_neighbor_string
(
process_id
,
neighbor_pid
);
std
::
string
regexed_string
=
GetFirstMatch
(
neighbor_string
);
//std::cout << "neighbor_string is " << neighbor_string << ", regexd_string is " << regexed_string << std::endl;
neighbors
.
push_back
(
std
::
make_tuple
(
neighbor_pid
,
regexed_string
)
);
return
;
}
else
if
(
dimension_to_vary
==
dimension_cnt
)
{
if
(
accumulated_dimension_distance
>
0
)
{
uint32_t
neighbor_pid
;
xlat_coords_to_pid
(
coords
,
neighbor_pid
);
std
::
string
neighbor_string
=
get_neighbor_string
(
process_id
,
neighbor_pid
);
std
::
string
regexed_string
=
GetFirstMatch
(
neighbor_string
);
//std::cout << "neighbor_string is " << neighbor_string << ", regexd_string is " << regexed_string << std::endl;
neighbors
.
push_back
(
std
::
make_tuple
(
neighbor_pid
,
regexed_string
)
);
}
return
;
}
//negative
coords_copy
=
coords
;
if
(
coords_copy
[
dimension_to_vary
]
==
0
)
{
coords_copy
[
dimension_to_vary
]
=
(
dimension_sizes
[
dimension_to_vary
]
-
1
);
}
else
{
coords_copy
[
dimension_to_vary
]
=
(
coords_copy
[
dimension_to_vary
]
-
1
);
}
derive_neighbors_recurse
(
coords_copy
,
neighbors
,
dimension_to_vary
+
1
,
accumulated_dimension_distance
+
1
);
//none
coords_copy
=
coords
;
derive_neighbors_recurse
(
coords_copy
,
neighbors
,
dimension_to_vary
+
1
,
accumulated_dimension_distance
);
//positive
coords_copy
=
coords
;
if
(
coords_copy
[
dimension_to_vary
]
==
(
dimension_sizes
[
dimension_to_vary
]
-
1
))
{
coords_copy
[
dimension_to_vary
]
=
0
;
}
else
{
coords_copy
[
dimension_to_vary
]
=
(
coords_copy
[
dimension_to_vary
]
+
1
);
}
derive_neighbors_recurse
(
coords_copy
,
neighbors
,
dimension_to_vary
+
1
,
accumulated_dimension_distance
+
1
);
}
void
NearestNeighborSWMUserCode
::
call
()
{
/*
if(process_id == 0) { //lets print every pid in coords and back again
std::vector<uint32_t> coords;
uint32_t pid_again;
for(uint32_t pid=0; pid<process_cnt; pid++) {
coords.clear();
pid_again=0;
xlat_pid_to_coords(pid, coords);
std::cout << "pid " << pid << " has coords.size " << coords.size() << " ";
for(size_t i=0; i<coords.size(); i++) {
std::cout << " " << coords[i];
}
std::cout << "; which have pid ";
xlat_coords_to_pid(coords, pid_again);
std::cout << pid_again << endl;
}
}
*/
std
::
vector
<
uint32_t
>
my_coords
;
std
::
vector
<
uint32_t
>
neighbor_pids
;
xlat_pid_to_coords
(
process_id
,
my_coords
);
derive_neighbors_recurse
(
my_coords
,
neighbors
);
/*
if(process_id == 0)
{
std::cout << "neighbors of pid " << process_id << " are: ";
for(size_t neighbors_idx=0; neighbors_idx<neighbors.size(); neighbors_idx++) {
std::cout << " " << std::get<0>(neighbors[neighbors_idx]) << "," << std::get<1>(neighbors[neighbors_idx]);
}
std::cout << "\n";
}
*/
uint32_t
*
send_handles
=
NULL
;
uint32_t
*
recv_handles
=
NULL
;
if
(
synchronous
)
{
send_handles
=
new
uint32_t
[
neighbors
.
size
()
*
iterations_per_sync
];
recv_handles
=
new
uint32_t
[
neighbors
.
size
()
*
iterations_per_sync
];
}
uint32_t
iter_before_sync
=
0
;
uint32_t
neighbors_size
=
neighbors
.
size
();
for
(
uint32_t
iter
=
0
;
iter
<
iteration_cnt
;
iter
++
)
{
//shuffle the neighbors
if
(
randomize_communication_order
)
{
std
::
default_random_engine
e
{
rng_unique_seed
->
Get
(
INT_MAX
)};
std
::
shuffle
(
neighbors
.
begin
(),
neighbors
.
end
(),
e
);
}
//send to each neighbor
for
(
size_t
neighbor_idx
=
0
;
neighbor_idx
<
neighbors
.
size
();
neighbor_idx
++
)
{
msg_traffic_desc
msg_desc
;
GetMsgDetails
(
&
msg_desc
,
std
::
get
<
1
>
(
neighbors
[
neighbor_idx
]));
if
(
synchronous
)
{
//send/recv pair that we'll later wait on
SWM_Isend
(
std
::
get
<
0
>
(
neighbors
[
neighbor_idx
]),
SWM_COMM_WORLD
,
process_id
,
msg_desc
.
msg_req_vc
,
msg_desc
.
msg_rsp_vc
,
NO_BUFFER
,
msg_desc
.
msg_req_bytes
,
msg_desc
.
pkt_rsp_bytes
,
&
(
send_handles
[
neighbor_idx
+
iter_before_sync
*
neighbors_size
]),
msg_desc
.
msg_req_routing_type
,
msg_desc
.
msg_rsp_routing_type
);
SWM_Irecv
(
std
::
get
<
0
>
(
neighbors
[
neighbor_idx
]),
SWM_COMM_WORLD
,
std
::
get
<
0
>
(
neighbors
[
neighbor_idx
]),
NO_BUFFER
,
&
(
recv_handles
[
neighbor_idx
+
iter_before_sync
*
neighbors_size
])
);
for
(
uint32_t
noop
=
0
;
noop
<
noop_cnt
;
noop
++
)
{
SWM_Noop
();
}
}
else
{
//fire and forget
SWM_Synthetic
(
std
::
get
<
0
>
(
neighbors
[
neighbor_idx
]),
//dst
msg_desc
.
msg_req_vc
,
msg_desc
.
msg_rsp_vc
,
msg_desc
.
pkt_rsp_vc
,
msg_desc
.
msg_req_bytes
,
msg_desc
.
msg_rsp_bytes
,
msg_desc
.
pkt_rsp_bytes
,
msg_desc
.
msg_req_routing_type
,
msg_desc
.
msg_rsp_routing_type
,
msg_desc
.
pkt_rsp_routing_type
,
NULL
,
msg_desc
.
attribute
#ifdef FABSIM_EMULATION
,
msg_desc
.
l2_encoding
#endif
);
for
(
uint32_t
noop
=
0
;
noop
<
noop_cnt
;
noop
++
)
{
SWM_Noop
();
}
}
}