Commit 06cdd63c authored by Brice Videau's avatar Brice Videau
Browse files

WIP CUDA profiling.

parent d73cca23
Pipeline #11594 failed with stage
......@@ -12,13 +12,16 @@ CUDA_PROBES_SRC = \
cuda_tracepoints.c
CUDA_STATIC_PROBES_TP = \
cuda_args.tp
cuda_args.tp \
cuda_profiling.tp
CUDA_STATIC_PROBES_INCL = \
cuda_args.h
cuda_args.h \
cuda_profiling.h
CUDA_STATIC_PROBES_SRC = \
cuda_args.c
cuda_args.c \
cuda_profiling.c
CUDART_PROBES_TP = \
cudart_tracepoints.tp
......@@ -146,6 +149,9 @@ cudart_tracepoints.tp: $(srcdir)/gen_cudart_tracepoints.rb $(CUDART_MODEL) gen_p
cuda_args.tp: $(srcdir)/gen_cuda_custom_probes.rb $(CUDA_MODEL)
SRC_DIR=$(srcdir) $(RUBY) $(srcdir)/gen_cuda_custom_probes.rb lttng_ust_cuda_args > $@
cuda_profiling.tp: $(srcdir)/gen_cuda_custom_probes.rb $(CUDA_MODEL)
SRC_DIR=$(srcdir) $(RUBY) $(srcdir)/gen_cuda_custom_probes.rb lttng_ust_cuda_profiling > $@
%.h %.c: %.tp
$(LTTNG_GEN_TP) $< -o $*.c -o $*.h
......
......@@ -19,3 +19,27 @@ lttng_ust_cuda_args:
- [ ctf_integer, size_t, argIndex, argIndex ]
- [ ctf_integer_hex, uintptr_t, kernelParam, "(uintptr_t)kernelParam" ]
- [ ctf_sequence_text, uint8_t, kernelParam_val, "(uint8_t *)kernelParam", size_t, "kernelParam ? argSize : 0" ]
lttng_ust_cuda_profiling:
events:
- name: event_profiling
args:
- [ CUevent, hStart ]
- [ CUevent, hStop ]
fields:
- [ ctf_integer_hex, uintptr_t, hStart, "(uintptr_t)hStart" ]
- [ ctf_integer_hex, uintptr_t, hStop, "(uintptr_t)hStop" ]
- name: event_profiling_results
args:
- [ CUevent, hStart ]
- [ CUevent, hStop ]
- [ CUresult, startStatus ]
- [ CUresult, stopStatus ]
- [ CUresult, status ]
- [ float, milliseconds ]
fields:
- [ ctf_integer_hex, uintptr_t, hStart, "(uintptr_t)hStart" ]
- [ ctf_integer_hex, uintptr_t, hStop, "(uintptr_t)hStop" ]
- [ ctf_integer, CUresult, startStatus, startStatus ]
- [ ctf_integer, CUresult, stopStatus, stopStatus ]
- [ ctf_integer, CUresult, status, status ]
- [ ctf_float, float, milliseconds, milliseconds ]
......@@ -17,3 +17,8 @@
- offset: 0x110
name: cuFunction_get_arg_descriptor
declaration: "CUresult cuFunction_get_arg_descriptor(CUfunction f, size_t argIndex, CUfunction_arg_desc_query *pArgDescQuery)"
- uuid: [0xa3, 0x17, 0x46, 0xd1, 0x54, 0x75, 0xef, 0x4e, 0xae, 0xb6, 0xdf, 0xb1, 0x47, 0x6a, 0xdd, 0x6c]
functions:
- offset: 0x288
name: cuDevice_get_primary_context_refcount
declaration: "uint cuDevice_get_primary_context_refcount(CUdevice device)"
......@@ -54,7 +54,7 @@ all_types.select { |t| t.type.kind_of? YAMLCAst::Struct }.each { |t|
end
}
INIT_FUNCTIONS = /cuInit|cuDriverGetVersion/
INIT_FUNCTIONS = /cuInit|cuDriverGetVersion|cuGetExportTable/
FFI_TYPE_MAP = {
"unsigned char" => "ffi_type_uint8",
......@@ -795,3 +795,44 @@ register_epilogue "cuGraphKernelNodeGetParams", <<EOF
_dump_kernel_args(nodeParams->func, nodeParams->kernelParams, nodeParams->extra);
}
EOF
profiling_start = lambda { |stream|
<<EOF
CUevent _hStart = NULL;
if (_do_profile)
_hStart = _create_record_event(#{stream});
EOF
}
profiling_start_no_stream = profiling_start.call("NULL")
profiling_start_stream = profiling_start.call("hStream")
profiling_stop = lambda { |stream|
<<EOF
if (_do_profile)
_event_profile(_retval, _hStart, #{stream});
EOF
}
profiling_stop_no_stream = profiling_stop.call("NULL")
profiling_stop_stream = profiling_stop.call("hStream")
[ "cuLaunchKernel",
"cuLaunchKernel_ptsz" ].each { |m|
register_prologue m, profiling_start_stream
}
[ "cuLaunchKernel",
"cuLaunchKernel_ptsz" ].each { |m|
register_epilogue m, profiling_stop_stream
}
[ "cuMemcpyHtoD_v2",
"cuMemcpyDtoH_v2" ].each { |m|
register_prologue m, profiling_start_no_stream
}
[ "cuMemcpyHtoD_v2",
"cuMemcpyDtoH_v2" ].each { |m|
register_epilogue m, profiling_stop_no_stream
}
......@@ -138,6 +138,8 @@ EOF
else
"s << \"#{field.name}: \#{defi[\"#{field.name}\"]}\""
end
when :ctf_float
"s << \"#{field.name}: \#{defi[\"#{field.name}\"]}\""
when :ctf_sequence_text
arg = e["args"].find { |type, name|
name == field.expression
......
......@@ -6,6 +6,8 @@ puts <<EOF
#include <pthread.h>
#include "cuda_tracepoints.h"
#include "cuda_args.h"
#include "cuda_profiling.h"
#include "utlist.h"
EOF
$cuda_commands.each { |c|
......@@ -42,12 +44,14 @@ EOF
export_tables = YAML::load_file(File.join(SRC_DIR,"cuda_export_tables.yaml"))
export_tables.each { |table|
table["structures"].each { |struct|
puts <<EOF
if table["structures"]
table["structures"].each { |struct|
puts <<EOF
typedef #{struct["declaration"].chomp} #{struct["name"]};
EOF
}
}
end
table["functions"].each { |func|
puts <<EOF
#define #{upper_snake_case(func["name"]+"_ptr")} #{func["name"]+"_ptr"}
......
......@@ -21,6 +21,7 @@ while true; do
case "$1" in
--cudart) shift; cudart=1;;
-a | --arguments ) shift; arguments=1;;
-p | --profiling ) shift; profiling=1; LTTNG_UST_CUDA_PROFILE=1;;
-v | --visualize ) shift; lttng_view=1;;
-- ) shift; break ;;
* ) break ;;
......@@ -45,6 +46,10 @@ if [ ! -z "$arguments" ]
then
lttng enable-event --channel=blocking-channel --userspace lttng_ust_cuda_args:*
fi
if [ ! -z "$profiling" ]
then
lttng enable-event --channel=blocking-channel --userspace lttng_ust_cuda_profiling:*
fi
if [ -z "$LTTNG_UST_CUDA_LIBCUDA" ]
then
export LTTNG_UST_CUDA_LIBCUDA=$(whichlib libcuda.so | head -n 1)
......
......@@ -53,6 +53,106 @@ static inline void _dump_kernel_args(CUfunction f, void **kernelParams, void** e
}
}
static int _do_profile = 0;
static pthread_mutex_t _cuda_events_mutex = PTHREAD_MUTEX_INITIALIZER;
struct _cuda_event_s;
struct _cuda_event_s {
struct _cuda_event_s *prev;
struct _cuda_event_s *next;
CUevent start;
CUevent stop;
CUcontext context;
};
struct _cuda_event_s * _events = NULL;
static inline void _register_cuda_event(CUevent hStart, CUevent hStop) {
CUcontext context;
CUresult status;
struct _cuda_event_s *ev;
status = CU_CTX_GET_CURRENT_PTR(&context);
if (status != CUDA_SUCCESS)
goto error;
ev = (struct _cuda_event_s *)calloc(sizeof(struct _cuda_event_s), 1);
if (!ev)
goto error;
ev->start = hStart;
ev->stop = hStop;
ev->context = context;
tracepoint(lttng_ust_cuda_profiling, event_profiling, hStart, hStop);
pthread_mutex_lock(&_cuda_events_mutex);
DL_APPEND(_events, ev);
pthread_mutex_unlock(&_cuda_events_mutex);
return;
error:
CU_EVENT_DESTROY_V2_PTR(hStart);
CU_EVENT_DESTROY_V2_PTR(hStop);
}
static inline CUevent _create_record_event(CUstream hStream) {
CUevent hEvent;
if(CU_EVENT_CREATE_PTR(&hEvent, CU_EVENT_DEFAULT) != CUDA_SUCCESS)
hEvent = NULL;
else {
if(CU_EVENT_RECORD_PTR(hEvent, hStream) != CUDA_SUCCESS) {
CU_EVENT_DESTROY_V2_PTR(hEvent);
hEvent = NULL;
}
}
return hEvent;
}
static inline void _event_profile(CUresult status, CUevent hStart, CUstream hStream) {
CUevent hStop;
if (status != CUDA_SUCCESS) {
CU_EVENT_DESTROY_V2_PTR(hStart);
return;
}
if (hStart) {
hStop = _create_record_event(hStream);
if (!hStop) {
CU_EVENT_DESTROY_V2_PTR(hStart);
return;
}
_register_cuda_event(hStart, hStop);
}
}
static void _profile_event_results(struct _cuda_event_s *ev) {
float milliseconds;
CUresult startStatus, stopStatus, status;
if (tracepoint_enabled(lttng_ust_cuda_profiling, event_profiling_results)) {
startStatus = CU_EVENT_QUERY_PTR(ev->start);
stopStatus = CU_EVENT_QUERY_PTR(ev->stop);
status = CU_EVENT_ELAPSED_TIME_PTR(&milliseconds, ev->start, ev->stop);
do_tracepoint(lttng_ust_cuda_profiling, event_profiling_results,
ev->start, ev->stop, startStatus, stopStatus,
status, milliseconds);
}
}
static void _event_cleanup() {
struct _cuda_event_s *ev, *tmp;
DL_FOREACH_SAFE(_events, ev, tmp) {
DL_DELETE(_events, ev);
_profile_event_results(ev);
CU_EVENT_DESTROY_V2_PTR(ev->start);
CU_EVENT_DESTROY_V2_PTR(ev->stop);
free(ev);
}
}
static void _lib_cleanup() {
if (_do_profile) {
_event_cleanup();
}
}
static pthread_once_t _init = PTHREAD_ONCE_INIT;
static __thread volatile int in_init = 0;
static volatile int _initialized = 0;
......@@ -72,7 +172,15 @@ static void _load_tracer(void) {
}
find_cuda_symbols(handle);
CU_INIT_PTR(0);
find_cuda_extensions();
s = getenv("LTTNG_UST_CUDA_PROFILE");
if (s)
_do_profile = 1;
if (_do_profile)
atexit(&_lib_cleanup);
}
static inline void _init_tracer(void) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment