Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Rob Latham
MPICH-BlueGene
Commits
b0b46bb0
Commit
b0b46bb0
authored
Oct 30, 2010
by
William Gropp
Browse files
[svn-r7401] Expanded manyrma and added option to select individual tests
parent
819834c8
Changes
1
Hide whitespace changes
Inline
Side-by-side
test/mpi/perf/manyrma.c
View file @
b0b46bb0
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* (C) 2010 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
/* This test measures the performance of many rma operations to a single
target process.
It uses a number of operations (put, get, or accumulate) to different
locations in the target window */
It uses a number of operations (put or accumulate) to different
locations in the target window
This is one of the ways that RMA may be used, and is used in the
reference implementation of the graph500 benchmark.
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#define MAX_COUNT 65536
#define MAX_RMA_SIZE 16
#define MAX_RUNS 10
typedef
enum
{
SYNC_NONE
=
0
,
SYNC_ALL
=-
1
,
SYNC_FENCE
=
1
,
SYNC_LOCK
=
2
,
SYNC_PSCW
=
4
}
sync_t
;
typedef
enum
{
RMA_NONE
=
0
,
RMA_ALL
=-
1
,
RMA_PUT
=
1
,
RMA_ACC
=
2
,
RMA_GET
=
4
}
rma_t
;
/* Note GET not yet implemented */
sync_t
syncChoice
=
SYNC_ALL
;
rma_t
rmaChoice
=
RMA_ALL
;
typedef
struct
{
double
startOp
,
endOp
,
endSync
;
}
timing
;
static
int
verbose
=
1
;
void
PrintResults
(
int
cnt
,
timing
t
[]
);
void
RunAccFence
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
);
void
RunAccLock
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
);
void
RunPutFence
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
);
void
RunPutLock
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
);
void
RunAccPSCW
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
MPI_Group
exposureGroup
,
MPI_Group
accessGroup
,
timing
t
[]
);
void
RunPutPSCW
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
MPI_Group
exposureGroup
,
MPI_Group
accessGroup
,
timing
t
[]
);
int
main
(
int
argc
,
char
*
argv
[]
)
{
int
arraysize
,
i
,
k
,
cnt
,
maxCount
,
*
arraybuffer
;
int
arraysize
,
i
,
k
,
cnt
,
sz
,
maxCount
,
*
arraybuffer
;
int
one
=
1
;
int
wrank
,
wsize
,
destRank
;
int
wrank
,
wsize
,
destRank
,
srcRank
;
MPI_Win
win
;
MPI_Group
wgroup
,
accessGroup
,
exposureGroup
;
double
t1
[
MAX_RUNS
],
t2
[
MAX_RUNS
],
t3
[
MAX_RUNS
];
timing
t
[
MAX_RUNS
];
MPI_Init
(
&
argc
,
&
argv
);
for
(
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
strcmp
(
argv
[
i
],
"-put"
)
==
0
)
{
if
(
rmaChoice
==
RMA_ALL
)
rmaChoice
=
RMA_NONE
;
rmaChoice
|=
RMA_PUT
;
}
else
if
(
strcmp
(
argv
[
i
],
"-acc"
)
==
0
)
{
if
(
rmaChoice
==
RMA_ALL
)
rmaChoice
=
RMA_NONE
;
rmaChoice
|=
RMA_ACC
;
}
else
if
(
strcmp
(
argv
[
i
],
"-fence"
)
==
0
)
{
if
(
syncChoice
==
SYNC_ALL
)
syncChoice
=
SYNC_NONE
;
syncChoice
|=
SYNC_FENCE
;
}
else
if
(
strcmp
(
argv
[
i
],
"-lock"
)
==
0
)
{
if
(
syncChoice
==
SYNC_ALL
)
syncChoice
=
SYNC_NONE
;
syncChoice
|=
SYNC_LOCK
;
}
else
if
(
strcmp
(
argv
[
i
],
"-pscw"
)
==
0
)
{
if
(
syncChoice
==
SYNC_ALL
)
syncChoice
=
SYNC_NONE
;
syncChoice
|=
SYNC_PSCW
;
}
else
{
fprintf
(
stderr
,
"Unrecognized argument %s
\n
"
,
argv
[
i
]
);
MPI_Abort
(
MPI_COMM_WORLD
,
1
);
}
}
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
wrank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
wsize
);
destRank
=
wrank
+
1
;
while
(
destRank
>=
wsize
)
destRank
=
destRank
-
wsize
;
arraysize
=
MAX_COUNT
;
srcRank
=
wrank
-
1
;
if
(
srcRank
<
0
)
srcRank
+=
wsize
;
/* Create groups for PSCW */
MPI_Comm_group
(
MPI_COMM_WORLD
,
&
wgroup
);
MPI_Group_incl
(
wgroup
,
1
,
&
destRank
,
&
accessGroup
);
MPI_Group_incl
(
wgroup
,
1
,
&
srcRank
,
&
exposureGroup
);
MPI_Group_free
(
&
wgroup
);
arraysize
=
MAX_RMA_SIZE
*
MAX_COUNT
;
arraybuffer
=
(
int
*
)
malloc
(
arraysize
*
sizeof
(
int
)
);
if
(
!
arraybuffer
)
{
fprintf
(
stderr
,
"Unable to allocate %d words
\n
"
,
arraysize
);
...
...
@@ -35,44 +104,263 @@ int main( int argc, char *argv[] )
MPI_Win_create
(
arraybuffer
,
arraysize
*
sizeof
(
int
),
(
int
)
sizeof
(
int
),
MPI_INFO_NULL
,
MPI_COMM_WORLD
,
&
win
);
/* FIXME: we need a test on performance consistency.
The test needs to have both a relative growth limit and
an absolute limit.
*/
maxCount
=
MAX_COUNT
;
maxCount
=
33000
;
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_fence
(
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
t1
[
k
]
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Accumulate
(
&
one
,
1
,
MPI_INT
,
destRank
,
i
,
1
,
MPI_INT
,
MPI_SUM
,
win
);
if
((
syncChoice
&
SYNC_FENCE
)
&&
(
rmaChoice
&
RMA_ACC
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Accumulate with fence, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunAccFence
(
win
,
destRank
,
cnt
,
sz
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
t2
[
k
]
=
MPI_Wtime
();
MPI_Win_fence
(
0
,
win
);
t3
[
k
]
=
MPI_Wtime
();
}
if
(
wrank
==
0
)
{
double
d1
=
0
,
d2
=
0
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
d1
+=
t2
[
k
]
-
t1
[
k
];
d2
+=
t3
[
k
]
-
t2
[
k
];
}
}
if
((
syncChoice
&
SYNC_LOCK
)
&&
(
rmaChoice
&
RMA_ACC
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Accumulate with lock, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunAccLock
(
win
,
destRank
,
cnt
,
sz
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
if
(
verbose
)
{
printf
(
"%d
\t
%e
\t
%e
\t
%e
\t
%e
\n
"
,
cnt
,
d1
/
MAX_RUNS
,
d2
/
MAX_RUNS
,
d1
/
(
MAX_RUNS
*
cnt
),
d2
/
(
MAX_RUNS
*
cnt
)
);
}
}
if
((
syncChoice
&
SYNC_FENCE
)
&&
(
rmaChoice
&
RMA_PUT
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Put with fence, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunPutFence
(
win
,
destRank
,
cnt
,
sz
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
}
}
if
((
syncChoice
&
SYNC_LOCK
)
&&
(
rmaChoice
&
RMA_PUT
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Put with lock, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunPutLock
(
win
,
destRank
,
cnt
,
sz
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
}
}
if
((
syncChoice
&
SYNC_PSCW
)
&&
(
rmaChoice
&
RMA_PUT
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Put with pscw, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunPutPSCW
(
win
,
destRank
,
cnt
,
sz
,
exposureGroup
,
accessGroup
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
}
}
if
((
syncChoice
&
SYNC_PSCW
)
&&
(
rmaChoice
&
RMA_ACC
))
{
for
(
sz
=
1
;
sz
<=
MAX_RMA_SIZE
;
sz
=
sz
+
sz
)
{
if
(
wrank
==
0
)
printf
(
"Accumulate with pscw, %d elements
\n
"
,
sz
);
cnt
=
1
;
while
(
cnt
<=
maxCount
)
{
RunAccPSCW
(
win
,
destRank
,
cnt
,
sz
,
exposureGroup
,
accessGroup
,
t
);
if
(
wrank
==
0
)
{
PrintResults
(
cnt
,
t
);
}
cnt
=
2
*
cnt
;
}
/* FIXME: we need a test on performance consistency.
The test needs to have both a relative growth limit and
an absolute limit.
*/
}
cnt
=
2
*
cnt
;
}
MPI_Win_free
(
&
win
);
MPI_Group_free
(
&
accessGroup
);
MPI_Group_free
(
&
exposureGroup
);
MPI_Finalize
();
return
0
;
}
void
RunAccFence
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_fence
(
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Accumulate
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
MPI_SUM
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_fence
(
0
,
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
RunAccLock
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_lock
(
MPI_LOCK_SHARED
,
destRank
,
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Accumulate
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
MPI_SUM
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_unlock
(
destRank
,
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
RunPutFence
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_fence
(
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Put
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_fence
(
0
,
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
RunPutLock
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_lock
(
MPI_LOCK_SHARED
,
destRank
,
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Put
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_unlock
(
destRank
,
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
RunPutPSCW
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
MPI_Group
exposureGroup
,
MPI_Group
accessGroup
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_post
(
exposureGroup
,
0
,
win
);
MPI_Win_start
(
accessGroup
,
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Put
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_complete
(
win
);
MPI_Win_wait
(
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
RunAccPSCW
(
MPI_Win
win
,
int
destRank
,
int
cnt
,
int
sz
,
MPI_Group
exposureGroup
,
MPI_Group
accessGroup
,
timing
t
[]
)
{
int
k
,
i
,
j
,
one
=
1
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
MPI_Win_post
(
exposureGroup
,
0
,
win
);
MPI_Win_start
(
accessGroup
,
0
,
win
);
MPI_Barrier
(
MPI_COMM_WORLD
);
j
=
0
;
t
[
k
].
startOp
=
MPI_Wtime
();
for
(
i
=
0
;
i
<
cnt
;
i
++
)
{
MPI_Accumulate
(
&
one
,
sz
,
MPI_INT
,
destRank
,
j
,
sz
,
MPI_INT
,
MPI_SUM
,
win
);
j
+=
sz
;
}
t
[
k
].
endOp
=
MPI_Wtime
();
MPI_Win_complete
(
win
);
MPI_Win_wait
(
win
);
t
[
k
].
endSync
=
MPI_Wtime
();
}
}
void
PrintResults
(
int
cnt
,
timing
t
[]
)
{
int
k
;
double
d1
=
0
,
d2
=
0
;
double
minD1
=
1e10
,
minD2
=
1e10
;
double
tOp
,
tSync
;
for
(
k
=
0
;
k
<
MAX_RUNS
;
k
++
)
{
tOp
=
t
[
k
].
endOp
-
t
[
k
].
startOp
;
tSync
=
t
[
k
].
endSync
-
t
[
k
].
endOp
;
d1
+=
tOp
;
d2
+=
tSync
;
if
(
tOp
<
minD1
)
minD1
=
tOp
;
if
(
tSync
<
minD2
)
minD2
=
tSync
;
}
if
(
verbose
)
{
long
rate
=
0
;
/* Use the minimum times because they are more stable - if timing
accuracy is an issue, use the min over multiple trials */
d1
=
minD1
;
d2
=
minD2
;
/* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */
if
(
d2
>
0
)
rate
=
(
long
)(
cnt
)
/
d2
;
printf
(
"%d
\t
%e
\t
%e
\t
%e
\t
%e
\t
%ld
\n
"
,
cnt
,
d1
,
d2
,
d1
/
cnt
,
d2
/
cnt
,
rate
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment