Commit 46ca2835 authored by Thomas Applencourt's avatar Thomas Applencourt
Browse files

xprof tally + refratroring

parent 553c481a
......@@ -113,13 +113,6 @@ install-exec-hook::
clinterval_callbacks.cpp: $(srcdir)/clinterval_callbacks.cpp.erb $(srcdir)/gen_clprof.rb opencl_model.yaml babeltrace_cl_callbacks.h tracer_opencl.h
SRC_DIR=$(srcdir) $(RUBY) $(srcdir)/gen_clprof.rb production
#clprof_callbacks.cpp clprof_callbacks.h: clprof.c
# @if test -f $@; then \
# touch $@; \
# else \
# rm -f clprof.c; \
# $(MAKE) $(AM_MAKEFLAGS) clprof.c; \
# fi
nodist_libCLProf_la_SOURCES = \
clinterval_callbacks.cpp \
......
......@@ -4,13 +4,11 @@
#include <unordered_map>
#include <tuple>
#include <climits>
#include <queue>
#include "clinterval.h" // For self_message_iterator_g
#include "clinterval_callbacks.h"
#include "clinterval_helpers.h"
#include "clinterval_callbacks_state.h"
//std::queue<const bt_message*> downstream_message_queue;
#include "xprof_utils.h"
void *init_clinterval_callbacks_state() {
clinterval_callbacks_state *s = new clinterval_callbacks_state;
......@@ -62,8 +60,8 @@ static void create_and_enqueue_host_message(const char* hostname, const process_
bt_field_integer_unsigned_set_value(dur_field, duration);
// err
bt_field *err_field_err = bt_field_structure_borrow_member_field_by_index(payload_field, 2);
bt_field_integer_unsigned_set_value(err_field_err, err);
bt_field *err_field = bt_field_structure_borrow_member_field_by_index(payload_field, 2);
bt_field_integer_unsigned_set_value(err_field, err);
/* Set message */
clinterval_callbacks_state* state = (clinterval_callbacks_state*) clinterval_iter_g->callbacks_state;
......@@ -202,7 +200,7 @@ static void clinterval_<%= dbt_event.name %>_callback(
<%# To do handle Alloc and cl_mem_host_ptr %>
<%if dbt_event.name.include?("clEnqueue") and dbt_event.fields.key?("size") %>
state->memory_trafic[hpt_function_name_t(hostname,process_id, thread_id, "<%= dbt_event.name_striped %>")].delta(size);
// state->memory_trafic[hpt_function_name_t(hostname,process_id, thread_id, "<%= dbt_event.name_striped %>")].delta(size);
<% end %>
<%# ___ __ _
......
......@@ -6,6 +6,11 @@
#include <babeltrace2/babeltrace.h>
#include <unordered_map>
typedef std::tuple<hostname_t, process_id_t, cl_command_queue> hp_command_queue_t;
typedef std::tuple<hostname_t, process_id_t, cl_event> hp_event_t;
typedef std::tuple<hostname_t, process_id_t, cl_kernel> hp_kernel_t;
struct clinterval_callbacks_state {
std::unordered_map<hp_command_queue_t, dsd_t> command_queue_to_device;
std::unordered_map<hp_event_t,tfn_ts_t> event_to_function_name_and_ts;
......@@ -15,7 +20,6 @@ struct clinterval_callbacks_state {
std::unordered_map<hp_kernel_t, thapi_function_name> kernel_to_name;
std::unordered_map<hpt_function_name_t, uint64_t> host_start;
std::unordered_map<hpt_function_name_t, StatByte> memory_trafic;
std::unordered_map<hp_device_t, std::string> device_to_name;
std::unordered_map<hp_device_t, thapi_device_id> device_to_rootdevice;
std::unordered_map<hpt_t, thapi_device_id> start_device;
......
noinst_HEADERS = lttng/tracepoint.h utarray.h uthash.h utlist.h
nodist_noinst_HEADERS = lttng/tracepoint_gen.h xprof_utils.h xprof.sh.erb
nodist_noinst_HEADERS = lttng/tracepoint_gen.h xprof.sh.erb
lttng/tracepoint_gen.h: $(srcdir)/tracepoint_gen.rb
mkdir -p lttng
$(RUBY) $(srcdir)/tracepoint_gen.rb 25 > lttng/tracepoint_gen.h
xprof_utils.h: $(srcdir)/xprof_utils.h.erb
$(ERB) $(srcdir)/xprof_utils.h.erb > xprof_utils.h
CLEANFILES = lttng/tracepoint_gen.h xprof_utils.h
CLEANFILES = lttng/tracepoint_gen.h
EXTRA_DIST = LTTng.rb tracepoint_gen.rb yaml_ast.rb xprof_utils.h.erb
#pragma once
#include <tuple>
#include <string>
#include "babeltrace2/babeltrace.h"
typedef intptr_t process_id_t;
typedef uintptr_t thread_id_t;
typedef std::string hostname_t;
typedef std::string thapi_function_name;
typedef uintptr_t thapi_device_id;
// Represent a device and a sub device
typedef std::tuple<thapi_device_id, thapi_device_id> dsd_t;
typedef std::tuple<hostname_t, process_id_t> hp_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t> hpt_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_function_name> hpt_function_name_t;
typedef std::tuple<thread_id_t, thapi_function_name> t_function_name_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi_device_id> hpt_dsd_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi_device_id, thapi_function_name> hpt_device_function_name_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id> hp_device_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_device_id> hp_dsd_t;
typedef std::tuple<long,long> sd_t;
typedef std::tuple<thread_id_t, thapi_function_name, long> tfn_ts_t;
typedef std::tuple<thapi_function_name, long> fn_ts_t;
// https://stackoverflow.com/questions/7110301/generic-hash-for-tuples-in-unordered-map-unordered-set
// Hash of std tuple
namespace std{
namespace
{
// Code from boost
// Reciprocal of the golden ratio helps spread entropy
// and handles duplicates.
// See Mike Seymour in magic-numbers-in-boosthash-combine:
// https://stackoverflow.com/questions/4948780
template <class T>
inline void hash_combine(std::size_t& seed, T const& v)
{
seed ^= hash<T>()(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
}
// Recursive template code derived from Matthieu M.
template <class Tuple, size_t Index = std::tuple_size<Tuple>::value - 1>
struct HashValueImpl
{
static void apply(size_t& seed, Tuple const& tuple)
{
HashValueImpl<Tuple, Index-1>::apply(seed, tuple);
hash_combine(seed, get<Index>(tuple));
}
};
template <class Tuple>
struct HashValueImpl<Tuple,0>
{
static void apply(size_t& seed, Tuple const& tuple)
{
hash_combine(seed, get<0>(tuple));
}
};
}
template <typename ... TT>
struct hash<std::tuple<TT...>>
{
size_t
operator()(std::tuple<TT...> const& tt) const
{
size_t seed = 0;
HashValueImpl<std::tuple<TT...> >::apply(seed, tt);
return seed;
}
};
}
/*
Utils function
*/
const hostname_t borrow_hostname(const bt_event *event){
const bt_stream *stream = bt_event_borrow_stream_const(event);
const bt_trace *trace = bt_stream_borrow_trace_const(stream);
const bt_value *host_name_str = bt_trace_borrow_environment_entry_value_by_name_const(trace, "hostname");
return bt_value_string_get(host_name_str);
}
process_id_t borrow_process_id(const bt_event *event){
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 0);
return bt_field_integer_signed_get_value(field);
}
thread_id_t borrow_thread_id(const bt_event *event){
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 1);
return bt_field_integer_unsigned_get_value(field);
}
.DELETE_ON_ERROR:
BUILT_SOURCES = xprof.c
# I put tally_utils.h to force the generation
# It seem to me that this is an ugly hack...
BUILT_SOURCES = xprof.c tally_utils.h
bin_SCRIPTS = iprof
......@@ -8,12 +11,21 @@ iprof: $(top_builddir)/utils/xprof.sh.erb
$(ERB) -T 1 languages=["opencl","ze"] $(top_builddir)/utils/xprof.sh.erb > $@
chmod a+x $@
tally_utils.h: tally_utils.h.erb
SRC_DIR=$(srcdir) ${ERB} $(srcdir)/tally_utils.h.erb > $@
# Library to be packaged
lib_LTLIBRARIES = libXProf.la
# File to compiled
nodist_libXProf_la_SOURCES = \
tally_utils.h
# File to compiled
libXProf_la_SOURCES = \
xprof.c \
tally_callbacks_state.h \
tally.h \
tally.cpp
......@@ -32,3 +44,8 @@ tmplibdir = $(libdir)/tmp
install-data-hook:
$(RM) -r $(DESTDIR)$(tmplibdir)
EXTRA_DIST = \
tally_utils.h.erb
CLEANFILES = \
tally_utils.h
#include "tally.h"
#include "xprof_utils.h" //Typedef and hashtuple
#include "tally_utils.h"
#include "tally_callbacks_state.h"
#include <stdlib.h> // calloc
#include <stdio.h> // printf
#include <string.h> // strcmp
......@@ -45,18 +49,42 @@ bt_component_class_sink_consume_method_status tally_dispatch_consume(
const bt_event *event = bt_message_event_borrow_event_const(message);
const bt_event_class *event_class = bt_event_borrow_class_const(event);
const char * class_name = bt_event_class_get_name(event_class);
//Common context field
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *hostname_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 0);
const hostname_t hostname = std::string{bt_field_string_get_value(hostname_field)};
const bt_field *process_id_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 1);
const process_id_t process_id = bt_field_integer_signed_get_value(process_id_field);
const bt_field *thread_id_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 2);
const thread_id_t thread_id = bt_field_integer_unsigned_get_value(thread_id_field);
//Payload
const bt_field *payload_field = bt_event_borrow_payload_field_const(event);
const bt_field *name_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 0);
const std::string name = std::string{bt_field_string_get_value(name_field)};
const bt_field *dur_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 1);
const long dur = bt_field_integer_unsigned_get_value(dur_field);
// I should compare type. Not somme string.
if (strcmp(class_name,"lttng:host") == 0 ) {
const bt_field *err_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 2);
const bool err = bt_field_bool_get_value(err_field);
//const hostname_t hostname = borrow_hostname(bt_evt);
//const process_id_t process_id = borrow_process_id(bt_evt);
//const thread_id_t thread_id = borrow_thread_id(bt_evt);
if (strcmp(class_name,"lttng:host") == 0 ) {
printf("%s, host\n",class_name);
//dispatch->tally_host[hpt_function_name_t(hostname,process_id, thread_id, event_name)].delta(duration, error);
//printf("%s %s, host\n",class_name, hostname.c_str() );
dispatch->host[hpt_function_name_t(hostname,process_id, thread_id, name)].delta(dur, err);
} else if ( strcmp(class_name,"lttng:device") == 0 ) {
printf("device\n");
//dispatch->tally_device[hpt_function_name_t(hostname,process_id, thread_id, device, subdevice, function_name)].delta(duration, error);
const bt_field *did_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 2);
const thapi_device_id did = bt_field_integer_unsigned_get_value(did_field);
const bt_field *sdid_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 3);
const thapi_device_id sdid = bt_field_integer_unsigned_get_value(sdid_field);
dispatch->device[hpt_device_function_name_t(hostname,process_id, thread_id, did, sdid, (thapi_function_name) name)].delta(dur, false);
}
}
......@@ -75,7 +103,7 @@ bt_component_class_initialize_method_status tally_dispatch_initialize(
const bt_value *params, void *initialize_method_data)
{
/* Allocate a private data structure */
struct tally_dispatch *dispatch = (tally_dispatch*) calloc(1, sizeof(struct tally_dispatch));
struct tally_dispatch *dispatch = new tally_dispatch; //(tally_dispatch*) calloc(1, sizeof(struct tally_dispatch));
/* Set the component's user data to our private data structure */
bt_self_component_set_data(
......@@ -96,6 +124,18 @@ bt_component_class_initialize_method_status tally_dispatch_initialize(
return BT_COMPONENT_CLASS_INITIALIZE_METHOD_STATUS_OK;
}
/*
* Finalizes the sink component.
*/
void tally_dispatch_finalize(bt_self_component_sink *self_component_sink)
{
struct tally_dispatch *dispatch = (tally_dispatch*) bt_self_component_get_data(
bt_self_component_sink_as_self_component(self_component_sink));
print_compact_api_call(dispatch->host);
print_compact_device_id_result(dispatch->device);
}
/*
* Called when the trace processing graph containing the sink component
* is configured.
......
......@@ -6,11 +6,6 @@
extern "C" {
#endif
/* Sink component's private data */
struct tally_dispatch {
bt_message_iterator *message_iterator;
};
bt_component_class_sink_consume_method_status tally_dispatch_consume(
bt_self_component_sink *self_component_sink);
......
#pragma once
#include "xprof_utils.h"
#include "tally_utils.h"
/* Sink component's private data */
struct tally_dispatch {
bt_message_iterator *message_iterator;
std::unordered_map<hpt_function_name_t, StatTime> host;
std::unordered_map<hpt_device_function_name_t, StatTime> device;
};
#pragma once
#include <string>
#include <iomanip>
#include <climits>
......@@ -7,147 +9,47 @@
#include <algorithm>
#include <set>
#include "babeltrace2/babeltrace.h"
typedef intptr_t process_id_t;
typedef uintptr_t thread_id_t;
typedef std::string hostname_t;
typedef std::string thapi_function_name;
typedef uintptr_t thapi_device_id;
<%# Represent a device and a sub device %>
typedef std::tuple<thapi_device_id, thapi_device_id> dsd_t;
<%# Represent a start and a timestamp %>
typedef std::tuple<thapi_function_name, size_t> fn_ts_t;
typedef std::tuple<thread_id_t, thapi_function_name, size_t> tfn_ts_t;
typedef std::tuple<hostname_t, process_id_t> hp_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t> hpt_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_function_name> hpt_function_name_t;
typedef std::tuple<thread_id_t, thapi_function_name> t_function_name_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi_device_id> hpt_dsd_t;
typedef std::tuple<hostname_t, process_id_t, thread_id_t, thapi_device_id, thapi_device_id, thapi_function_name> hpt_device_function_name_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id> hp_device_t;
typedef std::tuple<hostname_t, process_id_t, thapi_device_id, thapi_device_id> hp_dsd_t;
typedef std::tuple<hostname_t, process_id_t, cl_command_queue> hp_command_queue_t;
typedef std::tuple<hostname_t, process_id_t, cl_event> hp_event_t;
typedef std::tuple<hostname_t, process_id_t, cl_kernel> hp_kernel_t;
typedef std::tuple<uint64_t, uint64_t> sd_t; // Start, Duration
// https://stackoverflow.com/questions/7110301/generic-hash-for-tuples-in-unordered-map-unordered-set
// Hash of std tuple
namespace std{
namespace
{
// Code from boost
// Reciprocal of the golden ratio helps spread entropy
// and handles duplicates.
// See Mike Seymour in magic-numbers-in-boosthash-combine:
// https://stackoverflow.com/questions/4948780
template <class T>
inline void hash_combine(std::size_t& seed, T const& v)
{
seed ^= hash<T>()(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
}
// Recursive template code derived from Matthieu M.
template <class Tuple, size_t Index = std::tuple_size<Tuple>::value - 1>
struct HashValueImpl
{
static void apply(size_t& seed, Tuple const& tuple)
{
HashValueImpl<Tuple, Index-1>::apply(seed, tuple);
hash_combine(seed, get<Index>(tuple));
}
};
template <class Tuple>
struct HashValueImpl<Tuple,0>
{
static void apply(size_t& seed, Tuple const& tuple)
{
hash_combine(seed, get<0>(tuple));
}
};
}
template <typename ... TT>
struct hash<std::tuple<TT...>>
{
size_t
operator()(std::tuple<TT...> const& tt) const
{
size_t seed = 0;
HashValueImpl<std::tuple<TT...> >::apply(seed, tt);
return seed;
}
};
}
/*
Utils function
*/
const hostname_t borrow_hostname(const bt_event *event){
const bt_stream *stream = bt_event_borrow_stream_const(event);
const bt_trace *trace = bt_stream_borrow_trace_const(stream);
const bt_value *host_name_str = bt_trace_borrow_environment_entry_value_by_name_const(trace, "hostname");
return bt_value_string_get(host_name_str);
}
process_id_t borrow_process_id(const bt_event *event){
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 0);
return bt_field_integer_signed_get_value(field);
}
thread_id_t borrow_thread_id(const bt_event *event){
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 1);
return bt_field_integer_unsigned_get_value(field);
}
#include "xprof_utils.h"
template <typename T>
std::string to_string_with_precision(const T a_value, const std::string extension, const int n = 2)
{
std::ostringstream out;
out.precision(n);
out << std::fixed << a_value << extension;
return out.str();
std::ostringstream out;
out.precision(n);
out << std::fixed << a_value << extension;
return out.str();
}
template <typename T>
std::string format_byte(const T duration) {
<%[ ['TB', '1e+12'],
<%[ ['TB', '1e+12'],
['GB', '1e+9'],
['MB', '1e+6'],
['kB', '1e+3'],
['MB', '1e+6'],
['kB', '1e+3'],
['B', '1']].each do | unit, factor | %>
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
<% end %>
return "";
return "";
}
template <typename T>
std::string format_time(const T duration) {
<% [['h', '3.6e+12'],
<% [['h', '3.6e+12'],
['min', '6e+10'],
['s', '1e+9'],
['ms', '1e+6'],
['us', '1e+3'],
['ns', '1']].each do | unit, factor | %>
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
<% end %>
return "";
return "";
}
/*
......@@ -155,25 +57,21 @@ Class for time
*/
struct StatIprof_string {
std::string time;
std::string time_ratio;
std::string count;
std::string avg;
std::string min;
std::string max;
std::string error;
std::string time;
std::string time_ratio;
std::string count;
std::string avg;
std::string min;
std::string max;
std::string error;
};
class StatIprof {
uint64_t _start;
uint64_t _start;
public:
uint64_t _total_time{1};
void start(const int64_t);
void stop(const int64_t);
void delta(const uint64_t);
void delta(const uint64_t, const bool);
void merge(StatIprof);
void set_error(void);
StatIprof_string to_string(const uint64_t);
//Used in merge
......@@ -181,33 +79,19 @@ class StatIprof {
uint64_t _min{ULONG_MAX};
uint64_t _max{0};
uint64_t _time{0};
bool _finished{true};
uint64_t _error{0};
};
void StatIprof::start(const int64_t start) {
_start = start;
_finished = false;
}
void StatIprof::stop(const int64_t stop) {
const uint64_t delta = stop - _start;
_min = std::min(_min,delta);
_max = std::max(_max,delta);
_count++;
_time += delta;
_finished = true;
}
void StatIprof::set_error() {
_error++;
}
void StatIprof::delta(const uint64_t delta) {
void StatIprof::delta(const uint64_t delta,const bool error) {
_min = std::min(_min,delta);
_max = std::max(_max,delta);
_count++;
// total can overflow. Need to handle that correctly at some point...
_time += delta;
if (error) {
_error += 1;
}
}
void StatIprof::merge(StatIprof st) {
......@@ -216,7 +100,6 @@ void StatIprof::merge(StatIprof st) {
// total can overflow. Need to handle that correctly at some point...
_count += st._count;
_time += st._time;
_finished = _finished && st._finished;
_error += st._error;
}
......@@ -237,23 +120,6 @@ class <%= n %>: public StatIprof {
};
<% end %>
<%# Glolbal variable %>
std::unordered_map<hpt_device_function_name_t, StatTime> device_id_result;
std::unordered_map<hpt_function_name_t, StatTime> api_call;
std::unordered_map<hpt_function_name_t, StatByte> memory_trafic;
std::unordered_map<hp_device_t, std::string> device_to_name;
std::unordered_map<hp_device_t, thapi_device_id> device_to_rootdevice;
std::unordered_map<hpt_t, thapi_device_id> start_device;
std::unordered_map<hpt_t, thapi_function_name> profiled_function_name;
std::unordered_map<hpt_t, fn_ts_t> profiled_function_name_and_ts;
std::unordered_map<hpt_function_name_t, dsd_t> function_name_to_dsd;
<%#
_
|_) ._ o ._ _|_ /\ ._ ._ _.
......@@ -265,7 +131,7 @@ std::unordered_map<hpt_function_name_t, dsd_t> function_name_to_dsd;
<% d_headers = [ ["StatTime","format_time", [ 'Name', 'Time', 'Time(%)', 'Calls', 'Average', 'Min', 'Max', 'Failed'] ],
["StatByte", "format_byte", [ 'Name', 'Byte', 'Byte(%)', 'Calls', 'Average', 'Min', 'Max', 'Failed'] ] ] %>
<% d_headers.each do | main_type, main_function, l_headers | %>
<% d_headers.each do | main_type, main_function, l_headers | %>
void print_array(std::unordered_map<thapi_function_name, <%= main_type %> > aggregated, std::string header, unsigned int _print_error= 0) {
<%# print_error = 0 -> Never print error
= 1 -> Print only when error
......@@ -368,24 +234,24 @@ void print_array(std::unordered_map<thapi_function_name, <%= main_type %> > aggr
/
%>
<% output = [ ['StatTime', 'api_call', 'API calls', '1', 'hpt_t',
<% output = [ ['StatTime', 'std::unordered_map<hpt_function_name_t, StatTime>', 'api_call', 'API calls', '1', 'hpt_t',