Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
HeteroFlow
THAPI
Commits
06cdd63c
Commit
06cdd63c
authored
Oct 13, 2020
by
Brice Videau
Browse files
WIP CUDA profiling.
parent
d73cca23
Pipeline
#11594
failed with stage
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
cuda/Makefile.am
View file @
06cdd63c
...
...
@@ -12,13 +12,16 @@ CUDA_PROBES_SRC = \
cuda_tracepoints.c
CUDA_STATIC_PROBES_TP
=
\
cuda_args.tp
cuda_args.tp
\
cuda_profiling.tp
CUDA_STATIC_PROBES_INCL
=
\
cuda_args.h
cuda_args.h
\
cuda_profiling.h
CUDA_STATIC_PROBES_SRC
=
\
cuda_args.c
cuda_args.c
\
cuda_profiling.c
CUDART_PROBES_TP
=
\
cudart_tracepoints.tp
...
...
@@ -146,6 +149,9 @@ cudart_tracepoints.tp: $(srcdir)/gen_cudart_tracepoints.rb $(CUDART_MODEL) gen_p
cuda_args.tp
:
$(srcdir)/gen_cuda_custom_probes.rb $(CUDA_MODEL)
SRC_DIR
=
$(srcdir)
$(RUBY)
$(srcdir)
/gen_cuda_custom_probes.rb lttng_ust_cuda_args
>
$@
cuda_profiling.tp
:
$(srcdir)/gen_cuda_custom_probes.rb $(CUDA_MODEL)
SRC_DIR
=
$(srcdir)
$(RUBY)
$(srcdir)
/gen_cuda_custom_probes.rb lttng_ust_cuda_profiling
>
$@
%.h %.c
:
%.tp
$(LTTNG_GEN_TP)
$<
-o
$*
.c
-o
$*
.h
...
...
cuda/cuda_events.yaml
View file @
06cdd63c
...
...
@@ -19,3 +19,27 @@ lttng_ust_cuda_args:
-
[
ctf_integer
,
size_t
,
argIndex
,
argIndex
]
-
[
ctf_integer_hex
,
uintptr_t
,
kernelParam
,
"
(uintptr_t)kernelParam"
]
-
[
ctf_sequence_text
,
uint8_t
,
kernelParam_val
,
"
(uint8_t
*)kernelParam"
,
size_t
,
"
kernelParam
?
argSize
:
0"
]
lttng_ust_cuda_profiling
:
events
:
-
name
:
event_profiling
args
:
-
[
CUevent
,
hStart
]
-
[
CUevent
,
hStop
]
fields
:
-
[
ctf_integer_hex
,
uintptr_t
,
hStart
,
"
(uintptr_t)hStart"
]
-
[
ctf_integer_hex
,
uintptr_t
,
hStop
,
"
(uintptr_t)hStop"
]
-
name
:
event_profiling_results
args
:
-
[
CUevent
,
hStart
]
-
[
CUevent
,
hStop
]
-
[
CUresult
,
startStatus
]
-
[
CUresult
,
stopStatus
]
-
[
CUresult
,
status
]
-
[
float
,
milliseconds
]
fields
:
-
[
ctf_integer_hex
,
uintptr_t
,
hStart
,
"
(uintptr_t)hStart"
]
-
[
ctf_integer_hex
,
uintptr_t
,
hStop
,
"
(uintptr_t)hStop"
]
-
[
ctf_integer
,
CUresult
,
startStatus
,
startStatus
]
-
[
ctf_integer
,
CUresult
,
stopStatus
,
stopStatus
]
-
[
ctf_integer
,
CUresult
,
status
,
status
]
-
[
ctf_float
,
float
,
milliseconds
,
milliseconds
]
cuda/cuda_export_tables.yaml
View file @
06cdd63c
...
...
@@ -17,3 +17,8 @@
-
offset
:
0x110
name
:
cuFunction_get_arg_descriptor
declaration
:
"
CUresult
cuFunction_get_arg_descriptor(CUfunction
f,
size_t
argIndex,
CUfunction_arg_desc_query
*pArgDescQuery)"
-
uuid
:
[
0xa3
,
0x17
,
0x46
,
0xd1
,
0x54
,
0x75
,
0xef
,
0x4e
,
0xae
,
0xb6
,
0xdf
,
0xb1
,
0x47
,
0x6a
,
0xdd
,
0x6c
]
functions
:
-
offset
:
0x288
name
:
cuDevice_get_primary_context_refcount
declaration
:
"
uint
cuDevice_get_primary_context_refcount(CUdevice
device)"
cuda/cuda_model.rb
View file @
06cdd63c
...
...
@@ -54,7 +54,7 @@ all_types.select { |t| t.type.kind_of? YAMLCAst::Struct }.each { |t|
end
}
INIT_FUNCTIONS
=
/cuInit|cuDriverGetVersion/
INIT_FUNCTIONS
=
/cuInit|cuDriverGetVersion
|cuGetExportTable
/
FFI_TYPE_MAP
=
{
"unsigned char"
=>
"ffi_type_uint8"
,
...
...
@@ -795,3 +795,44 @@ register_epilogue "cuGraphKernelNodeGetParams", <<EOF
_dump_kernel_args(nodeParams->func, nodeParams->kernelParams, nodeParams->extra);
}
EOF
profiling_start
=
lambda
{
|
stream
|
<<
EOF
CUevent _hStart = NULL;
if (_do_profile)
_hStart = _create_record_event(
#{
stream
}
);
EOF
}
profiling_start_no_stream
=
profiling_start
.
call
(
"NULL"
)
profiling_start_stream
=
profiling_start
.
call
(
"hStream"
)
profiling_stop
=
lambda
{
|
stream
|
<<
EOF
if (_do_profile)
_event_profile(_retval, _hStart,
#{
stream
}
);
EOF
}
profiling_stop_no_stream
=
profiling_stop
.
call
(
"NULL"
)
profiling_stop_stream
=
profiling_stop
.
call
(
"hStream"
)
[
"cuLaunchKernel"
,
"cuLaunchKernel_ptsz"
].
each
{
|
m
|
register_prologue
m
,
profiling_start_stream
}
[
"cuLaunchKernel"
,
"cuLaunchKernel_ptsz"
].
each
{
|
m
|
register_epilogue
m
,
profiling_stop_stream
}
[
"cuMemcpyHtoD_v2"
,
"cuMemcpyDtoH_v2"
].
each
{
|
m
|
register_prologue
m
,
profiling_start_no_stream
}
[
"cuMemcpyHtoD_v2"
,
"cuMemcpyDtoH_v2"
].
each
{
|
m
|
register_epilogue
m
,
profiling_stop_no_stream
}
cuda/gen_babeltrace_cuda_lib.rb
View file @
06cdd63c
...
...
@@ -138,6 +138,8 @@ EOF
else
"s <<
\"
#{
field
.
name
}
:
\#
{defi[
\"
#{
field
.
name
}
\"
]}
\"
"
end
when
:ctf_float
"s <<
\"
#{
field
.
name
}
:
\#
{defi[
\"
#{
field
.
name
}
\"
]}
\"
"
when
:ctf_sequence_text
arg
=
e
[
"args"
].
find
{
|
type
,
name
|
name
==
field
.
expression
...
...
cuda/gen_cuda.rb
View file @
06cdd63c
...
...
@@ -6,6 +6,8 @@ puts <<EOF
#include <pthread.h>
#include "cuda_tracepoints.h"
#include "cuda_args.h"
#include "cuda_profiling.h"
#include "utlist.h"
EOF
$cuda_commands
.
each
{
|
c
|
...
...
@@ -42,12 +44,14 @@ EOF
export_tables
=
YAML
::
load_file
(
File
.
join
(
SRC_DIR
,
"cuda_export_tables.yaml"
))
export_tables
.
each
{
|
table
|
table
[
"structures"
].
each
{
|
struct
|
puts
<<
EOF
if
table
[
"structures"
]
table
[
"structures"
].
each
{
|
struct
|
puts
<<
EOF
typedef
#{
struct
[
"declaration"
].
chomp
}
#{
struct
[
"name"
]
}
;
EOF
}
}
end
table
[
"functions"
].
each
{
|
func
|
puts
<<
EOF
#define
#{
upper_snake_case
(
func
[
"name"
]
+
"_ptr"
)
}
#{
func
[
"name"
]
+
"_ptr"
}
...
...
cuda/tracer_cuda.sh.in
View file @
06cdd63c
...
...
@@ -21,6 +21,7 @@ while true; do
case
"
$1
"
in
--cudart
)
shift
;
cudart
=
1
;;
-a
|
--arguments
)
shift
;
arguments
=
1
;;
-p
|
--profiling
)
shift
;
profiling
=
1
;
LTTNG_UST_CUDA_PROFILE
=
1
;;
-v
|
--visualize
)
shift
;
lttng_view
=
1
;;
--
)
shift
;
break
;;
*
)
break
;;
...
...
@@ -45,6 +46,10 @@ if [ ! -z "$arguments" ]
then
lttng enable-event
--channel
=
blocking-channel
--userspace
lttng_ust_cuda_args:
*
fi
if
[
!
-z
"
$profiling
"
]
then
lttng enable-event
--channel
=
blocking-channel
--userspace
lttng_ust_cuda_profiling:
*
fi
if
[
-z
"
$LTTNG_UST_CUDA_LIBCUDA
"
]
then
export
LTTNG_UST_CUDA_LIBCUDA
=
$(
whichlib libcuda.so |
head
-n
1
)
...
...
cuda/tracer_cuda_helpers.include.c
View file @
06cdd63c
...
...
@@ -53,6 +53,106 @@ static inline void _dump_kernel_args(CUfunction f, void **kernelParams, void** e
}
}
static
int
_do_profile
=
0
;
static
pthread_mutex_t
_cuda_events_mutex
=
PTHREAD_MUTEX_INITIALIZER
;
struct
_cuda_event_s
;
struct
_cuda_event_s
{
struct
_cuda_event_s
*
prev
;
struct
_cuda_event_s
*
next
;
CUevent
start
;
CUevent
stop
;
CUcontext
context
;
};
struct
_cuda_event_s
*
_events
=
NULL
;
static
inline
void
_register_cuda_event
(
CUevent
hStart
,
CUevent
hStop
)
{
CUcontext
context
;
CUresult
status
;
struct
_cuda_event_s
*
ev
;
status
=
CU_CTX_GET_CURRENT_PTR
(
&
context
);
if
(
status
!=
CUDA_SUCCESS
)
goto
error
;
ev
=
(
struct
_cuda_event_s
*
)
calloc
(
sizeof
(
struct
_cuda_event_s
),
1
);
if
(
!
ev
)
goto
error
;
ev
->
start
=
hStart
;
ev
->
stop
=
hStop
;
ev
->
context
=
context
;
tracepoint
(
lttng_ust_cuda_profiling
,
event_profiling
,
hStart
,
hStop
);
pthread_mutex_lock
(
&
_cuda_events_mutex
);
DL_APPEND
(
_events
,
ev
);
pthread_mutex_unlock
(
&
_cuda_events_mutex
);
return
;
error:
CU_EVENT_DESTROY_V2_PTR
(
hStart
);
CU_EVENT_DESTROY_V2_PTR
(
hStop
);
}
static
inline
CUevent
_create_record_event
(
CUstream
hStream
)
{
CUevent
hEvent
;
if
(
CU_EVENT_CREATE_PTR
(
&
hEvent
,
CU_EVENT_DEFAULT
)
!=
CUDA_SUCCESS
)
hEvent
=
NULL
;
else
{
if
(
CU_EVENT_RECORD_PTR
(
hEvent
,
hStream
)
!=
CUDA_SUCCESS
)
{
CU_EVENT_DESTROY_V2_PTR
(
hEvent
);
hEvent
=
NULL
;
}
}
return
hEvent
;
}
static
inline
void
_event_profile
(
CUresult
status
,
CUevent
hStart
,
CUstream
hStream
)
{
CUevent
hStop
;
if
(
status
!=
CUDA_SUCCESS
)
{
CU_EVENT_DESTROY_V2_PTR
(
hStart
);
return
;
}
if
(
hStart
)
{
hStop
=
_create_record_event
(
hStream
);
if
(
!
hStop
)
{
CU_EVENT_DESTROY_V2_PTR
(
hStart
);
return
;
}
_register_cuda_event
(
hStart
,
hStop
);
}
}
static
void
_profile_event_results
(
struct
_cuda_event_s
*
ev
)
{
float
milliseconds
;
CUresult
startStatus
,
stopStatus
,
status
;
if
(
tracepoint_enabled
(
lttng_ust_cuda_profiling
,
event_profiling_results
))
{
startStatus
=
CU_EVENT_QUERY_PTR
(
ev
->
start
);
stopStatus
=
CU_EVENT_QUERY_PTR
(
ev
->
stop
);
status
=
CU_EVENT_ELAPSED_TIME_PTR
(
&
milliseconds
,
ev
->
start
,
ev
->
stop
);
do_tracepoint
(
lttng_ust_cuda_profiling
,
event_profiling_results
,
ev
->
start
,
ev
->
stop
,
startStatus
,
stopStatus
,
status
,
milliseconds
);
}
}
static
void
_event_cleanup
()
{
struct
_cuda_event_s
*
ev
,
*
tmp
;
DL_FOREACH_SAFE
(
_events
,
ev
,
tmp
)
{
DL_DELETE
(
_events
,
ev
);
_profile_event_results
(
ev
);
CU_EVENT_DESTROY_V2_PTR
(
ev
->
start
);
CU_EVENT_DESTROY_V2_PTR
(
ev
->
stop
);
free
(
ev
);
}
}
static
void
_lib_cleanup
()
{
if
(
_do_profile
)
{
_event_cleanup
();
}
}
static
pthread_once_t
_init
=
PTHREAD_ONCE_INIT
;
static
__thread
volatile
int
in_init
=
0
;
static
volatile
int
_initialized
=
0
;
...
...
@@ -72,7 +172,15 @@ static void _load_tracer(void) {
}
find_cuda_symbols
(
handle
);
CU_INIT_PTR
(
0
);
find_cuda_extensions
();
s
=
getenv
(
"LTTNG_UST_CUDA_PROFILE"
);
if
(
s
)
_do_profile
=
1
;
if
(
_do_profile
)
atexit
(
&
_lib_cleanup
);
}
static
inline
void
_init_tracer
(
void
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment