Commit 4c609030 authored by Thomas Applencourt's avatar Thomas Applencourt
Browse files

Add tally

parent 99cd02cf
Pipeline #13149 canceled with stage
......@@ -92,4 +92,5 @@ AC_CONFIG_FILES([ze/test_wrapper.sh], [chmod +x ze/test_wrapper.sh])
AC_CONFIG_FILES([cuda/babeltrace_cuda], [chmod +x cuda/babeltrace_cuda])
AC_CONFIG_FILES([cuda/tracer_cuda.sh], [chmod +x cuda/tracer_cuda.sh])
AC_CONFIG_FILES([cuda/test_wrapper.sh], [chmod +x cuda/test_wrapper.sh])
AC_CONFIG_FILES([xprof/test_wrapper.sh], [chmod +x xprof/test_wrapper.sh])
AC_OUTPUT
.DELETE_ON_ERROR:
bin_SCRIPTS = iprof
iprof: $(top_builddir)/utils/xprof.sh.erb
$(ERB) -T 1 languages=["opencl","ze"] $(top_builddir)/utils/xprof.sh.erb > $@
chmod a+x $@
CLEANFILES = iprof
tally_utils.hpp: tally_utils.hpp.erb
SRC_DIR=$(srcdir) ${ERB} $(srcdir)/tally_utils.hpp.erb > $@
xprof_utils.hpp: $(top_srcdir)/utils/xprof_utils.hpp
cp $< $@
BUILT_SOURCES = \
tally_utils.hpp \
xprof_utils.hpp
# Library to be packaged
lib_LTLIBRARIES = libXProf.la
# File to compiled
nodist_libXProf_la_SOURCES = \
tally_utils.hpp \
xprof_utils.hpp
# File to compiled
libXProf_la_SOURCES = \
xprof.c \
tally.hpp \
tally.h \
tally.cpp \
tally_utils.hpp
# Compiler flags
libXProf_la_CPPFLAGS = -I$(top_srcdir)/utils -I$(srcdir)/include -I./
libXProf_la_CFLAGS = -Wall -Wextra -Wno-unused-parameter -Werror $(BABELTRACE2_CFLAGS)
libXProf_la_CXXFLAGS = -std=c++17 -Wall -Wextra -Wno-unused-parameter -Werror $(BABELTRACE2_CFLAGS)
libXProf_la_LDFLAGS = $(BABELTRACE2_LIBS) -avoid-version -module
# Cannot use check_LTLIBRARIES because we need the shared version of those
# Thanks Vincent Danjean
# noinst_LTLIBRARIES would be the correct thing but then libtool
# only built non shared version :-( So, declaring the libs as
# pkglib_LTLIBRARIES and using an install hook to remove them.
tmplibdir = $(libdir)/tmp
install-data-hook:
$(RM) -r $(DESTDIR)$(tmplibdir)
TALLY_DUST_FILES = \
tests/interval_profiling_normal.dust
$(TALLY_DUST_FILES): tests/%.dust: tests/tally.dust.erb
mkdir -p tests/
$(ERB) namespace="$@" $(srcdir)/tests/tally.dust.erb > $@
TESTS = \
$(TALLY_DUST_FILES)
TEST_EXTENSIONS = .dust
DUST_LOG_COMPILER = ./test_wrapper.sh
check_DATA = \
test_wrapper.sh
data_DATA = \
$(TRACE_FILES)
EXTRA_DIST = \
tally_utils.hpp.erb \
interval.c.erb \
interval.h.erb \
interval_model.yaml
interval_model.yaml \
xprof_utils.hpp
CLEANFILES = \
iprof \
tally_utils.hpp \
xprof_utils.hpp \
$(TALLY_DUST_FILES)
#include "tally.h"
#include "tally.hpp"
#include "xprof_utils.hpp" //Typedef and hashtuple
#include "tally_utils.hpp"
#include <string.h> // strcmp
bt_component_class_sink_consume_method_status tally_dispatch_consume(
bt_self_component_sink *self_component_sink)
{
bt_component_class_sink_consume_method_status status =
BT_COMPONENT_CLASS_SINK_CONSUME_METHOD_STATUS_OK;
/* Retrieve our private data from the component's user data */
struct tally_dispatch *dispatch = (tally_dispatch*) bt_self_component_get_data(
bt_self_component_sink_as_self_component(self_component_sink));
/* Consume a batch of messages from the upstream message iterator */
bt_message_array_const messages;
uint64_t message_count;
bt_message_iterator_next_status next_status =
bt_message_iterator_next(dispatch->message_iterator, &messages,
&message_count);
switch (next_status) {
case BT_MESSAGE_ITERATOR_NEXT_STATUS_END:
/* End of iteration: put the message iterator's reference */
bt_message_iterator_put_ref(dispatch->message_iterator);
status = BT_COMPONENT_CLASS_SINK_CONSUME_METHOD_STATUS_END;
goto end;
case BT_MESSAGE_ITERATOR_NEXT_STATUS_AGAIN:
status = BT_COMPONENT_CLASS_SINK_CONSUME_METHOD_STATUS_AGAIN;
goto end;
case BT_MESSAGE_ITERATOR_NEXT_STATUS_MEMORY_ERROR:
status = BT_COMPONENT_CLASS_SINK_CONSUME_METHOD_STATUS_MEMORY_ERROR;
goto end;
case BT_MESSAGE_ITERATOR_NEXT_STATUS_ERROR:
status = BT_COMPONENT_CLASS_SINK_CONSUME_METHOD_STATUS_ERROR;
goto end;
default:
break;
}
/* For each consumed message */
for (uint64_t i = 0; i < message_count; i++) {
const bt_message *message = messages[i];
if (bt_message_get_type(message) == BT_MESSAGE_TYPE_EVENT) {
const bt_event *event = bt_message_event_borrow_event_const(message);
const bt_event_class *event_class = bt_event_borrow_class_const(event);
const char * class_name = bt_event_class_get_name(event_class);
//Common context field
const bt_field *common_context_field = bt_event_borrow_common_context_field_const(event);
const bt_field *hostname_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 0);
const hostname_t hostname = std::string{bt_field_string_get_value(hostname_field)};
const bt_field *process_id_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 1);
const process_id_t process_id = bt_field_integer_signed_get_value(process_id_field);
const bt_field *thread_id_field = bt_field_structure_borrow_member_field_by_index_const(common_context_field, 2);
const thread_id_t thread_id = bt_field_integer_unsigned_get_value(thread_id_field);
//Payload
const bt_field *payload_field = bt_event_borrow_payload_field_const(event);
const bt_field *name_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 0);
const std::string name = std::string{bt_field_string_get_value(name_field)};
// I should compare type. Not somme string.
if (strcmp(class_name,"lttng:host") == 0 ) {
const bt_field *dur_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 1);
const long dur = bt_field_integer_unsigned_get_value(dur_field);
const bt_field *err_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 2);
const bool err = bt_field_bool_get_value(err_field);
dispatch->host[hpt_function_name_t(hostname,process_id, thread_id, name)].delta(dur, err);
} else if ( strcmp(class_name,"lttng:device") == 0 ) {
const bt_field *dur_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 1);
const long dur = bt_field_integer_unsigned_get_value(dur_field);
const bt_field *did_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 2);
const thapi_device_id did = bt_field_integer_unsigned_get_value(did_field);
const bt_field *sdid_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 3);
const thapi_device_id sdid = bt_field_integer_unsigned_get_value(sdid_field);
dispatch->device[hpt_device_function_name_t(hostname,process_id, thread_id, did, sdid, (thapi_function_name) name)].delta(dur, false);
} else if ( strcmp(class_name,"lttng:traffic") == 0 ) {
const bt_field *size_field = bt_field_structure_borrow_member_field_by_index_const(payload_field, 1);
const long size = bt_field_integer_unsigned_get_value(size_field);
dispatch->traffic[hpt_function_name_t(hostname,process_id, thread_id, name)].delta(size, false);
}
}
bt_message_put_ref(message);
}
end:
return status;
}
/*
* Initializes the sink component.
*/
bt_component_class_initialize_method_status tally_dispatch_initialize(
bt_self_component_sink *self_component_sink,
bt_self_component_sink_configuration *configuration,
const bt_value *params, void *initialize_method_data)
{
/* Allocate a private data structure */
struct tally_dispatch *dispatch = new tally_dispatch; //(tally_dispatch*) calloc(1, sizeof(struct tally_dispatch));
/* Set the component's user data to our private data structure */
bt_self_component_set_data(
bt_self_component_sink_as_self_component(self_component_sink),
dispatch);
/*
* Add an input port named `in` to the sink component.
*
* This is needed so that this sink component can be connected to a
* filter or a source component. With a connected upstream
* component, this sink component can create a message iterator
* to consume messages.
*/
bt_self_component_sink_add_input_port(self_component_sink,
"in", NULL, NULL);
return BT_COMPONENT_CLASS_INITIALIZE_METHOD_STATUS_OK;
}
/*
* Finalizes the sink component.
*/
void tally_dispatch_finalize(bt_self_component_sink *self_component_sink)
{
struct tally_dispatch *dispatch = (tally_dispatch*) bt_self_component_get_data(
bt_self_component_sink_as_self_component(self_component_sink));
print_compact_host(dispatch->host);
print_compact_device(dispatch->device);
print_compact_traffic(dispatch->traffic);
}
/*
* Called when the trace processing graph containing the sink component
* is configured.
*
* This is where we can create our upstream message iterator.
*/
bt_component_class_sink_graph_is_configured_method_status
tally_dispatch_graph_is_configured(bt_self_component_sink *self_component_sink)
{
/* Retrieve our private data from the component's user data */
struct tally_dispatch *dispatch = (tally_dispatch*) bt_self_component_get_data(
bt_self_component_sink_as_self_component(self_component_sink));
/* Borrow our unique port */
bt_self_component_port_input *in_port =
bt_self_component_sink_borrow_input_port_by_index(
self_component_sink, 0);
/* Create the uptream message iterator */
bt_message_iterator_create_from_sink_component(self_component_sink,
in_port, &dispatch->message_iterator);
return BT_COMPONENT_CLASS_SINK_GRAPH_IS_CONFIGURED_METHOD_STATUS_OK;
}
#pragma once
#include <babeltrace2/babeltrace.h>
#ifdef __cplusplus
extern "C" {
#endif
bt_component_class_sink_consume_method_status tally_dispatch_consume(
bt_self_component_sink *self_component_sink);
bt_component_class_initialize_method_status tally_dispatch_initialize(
bt_self_component_sink *self_component_sink,
bt_self_component_sink_configuration *configuration,
const bt_value *params, void *initialize_method_data);
void tally_dispatch_finalize(bt_self_component_sink *self_component_sink);
bt_component_class_sink_graph_is_configured_method_status
tally_dispatch_graph_is_configured(bt_self_component_sink *self_component_sink);
#ifdef __cplusplus
}
#endif
#pragma once
#include "xprof_utils.hpp"
#include "tally_utils.hpp"
/* Sink component's private data */
struct tally_dispatch {
bt_message_iterator *message_iterator;
std::unordered_map<hpt_function_name_t, StatTime> host;
std::unordered_map<hpt_device_function_name_t, StatTime> device;
std::unordered_map<hpt_function_name_t, StatByte> traffic;
};
#pragma once
#include <string>
#include <iomanip>
#include <climits>
#include <unordered_map>
#include <iostream>
#include <vector>
#include <algorithm>
#include <set>
#include "xprof_utils.hpp"
template <typename T>
std::string to_string_with_precision(const T a_value, const std::string extension, const int n = 2)
{
std::ostringstream out;
out.precision(n);
out << std::fixed << a_value << extension;
return out.str();
}
template <typename T>
std::string format_byte(const T duration) {
<%[ ['TB', '1e+12'],
['GB', '1e+9'],
['MB', '1e+6'],
['kB', '1e+3'],
['B', '1']].each do | unit, factor | %>
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
<% end %>
return "";
}
template <typename T>
std::string format_time(const T duration) {
<% [['h', '3.6e+12'],
['min', '6e+10'],
['s', '1e+9'],
['ms', '1e+6'],
['us', '1e+3'],
['ns', '1']].each do | unit, factor | %>
const double <%= unit %> = duration / <%= factor %>;
if (<%= unit %> >= 1.) {
return to_string_with_precision(<%= unit %>, "<%= unit %>");
}
<% end %>
return "";
}
/*
Class for time
*/
struct StatIprof_string {
std::string time;
std::string time_ratio;
std::string count;
std::string avg;
std::string min;
std::string max;
std::string error;
};
class StatIprof {
uint64_t _start;
public:
void delta(const uint64_t, const bool);
void merge(StatIprof);
StatIprof_string to_string(const uint64_t);
//Used in merge
uint64_t _count{0};
uint64_t _min{ULONG_MAX};
uint64_t _max{0};
uint64_t _time{0};
uint64_t _error{0};
};
void StatIprof::delta(const uint64_t delta,const bool error) {
_min = std::min(_min,delta);
_max = std::max(_max,delta);
_count++;
// total can overflow. Need to handle that correctly at some point...
_time += delta;
if (error) {
_error += 1;
}
}
void StatIprof::merge(StatIprof st) {
_min = std::min(_min,st._min);
_max = std::max(_max,st._max);
// total can overflow. Need to handle that correctly at some point...
_count += st._count;
_time += st._time;
_error += st._error;
}
<% { "StatTime" => "format_time",
"StatByte" => "format_byte" }.each do |n,t| %>
class <%= n %>: public StatIprof {
public:
StatIprof_string to_string(const uint64_t total_time){
const double avg = _count ? _time / _count : 0.;
return StatIprof_string{<%= t %>(_time), // Time
to_string_with_precision( (100.*_time / total_time), "%"), // Percent
to_string_with_precision(_count,"",0), // Count
<%= t %>(avg),
<%= t %>(_min != ULONG_MAX ? _min : 0),
<%= t %>(_max),
to_string_with_precision(_error,"",0)};
}
};
<% end %>
<%#
_
|_) ._ o ._ _|_ /\ ._ ._ _.
| | | | | |_ /--\ | | (_| \/
/
%>
<% l_menbers = ['function_name', 'time','time_ratio','count','avg','min','max','error'] %>
<% d_headers = [ ["StatTime","format_time", [ 'Name', 'Time', 'Time(%)', 'Calls', 'Average', 'Min', 'Max', 'Failed'] ],
["StatByte", "format_byte", [ 'Name', 'Byte', 'Byte(%)', 'Calls', 'Average', 'Min', 'Max', 'Failed'] ] ] %>
<% d_headers.each do | main_type, main_function, l_headers | %>
void print_array(std::unordered_map<thapi_function_name, <%= main_type %> > aggregated, std::string header, unsigned int _print_error= 0) {
<%# print_error = 0 -> Never print error
= 1 -> Print only when error
>= 2 -> Always print error
%>
uint64_t total_time{0};
uint64_t total_count{0};
uint64_t total_error{0};
for (std::pair<thapi_function_name, <%= main_type %> > element: aggregated) {
total_time += element.second._time;
total_count += element.second._count;
total_error += element.second._error;
}
if (!total_count) {
return;
}
std::cout << header << std::endl;
bool print_error;
if (_print_error == 0)
print_error = false;
else if (_print_error == 1)
print_error = total_error != 0;
else
print_error = true;
<%# Need to be bigger than the header %>
<% l_menbers.zip(l_headers).each do |v, n| %>
uint64_t len_max_<%= v %> = <%= n.size() %>;
<% end %>
<%# Need to be bigger than the footer %>
len_max_function_name = std::max(len_max_function_name, uint64_t{<%= "Total".size() %>} );
len_max_time = std::max(len_max_time, <%= main_function %>(total_time).size());
len_max_count = std::max(len_max_count, to_string_with_precision(total_count,"",0).size() );
len_max_error = std::max(len_max_error, to_string_with_precision(total_error,"",0).size() );
<%# Need to be bigger than the body %>
for (std::pair<thapi_function_name, <%= main_type %> > element: aggregated) {
len_max_function_name = std::max(len_max_function_name, element.first.size());
const auto [time, time_ratio, count, avg, min, max, error ] = element.second.to_string(total_time);
<% l_menbers.drop(1).each do |v| %>
len_max_<%= v %> = std::max(len_max_<%= v %>, <%= v %>.size());
<% end %>
}
<%# Sort the table %>
std::vector<std::pair<thapi_function_name, <%= main_type %> >> array_sorted(aggregated.begin(), aggregated.end());
std::sort(array_sorted.begin(), array_sorted.end(), [](auto a, auto b) { return a.second._time > b.second._time;} );
<%# Print Header %>
<% l_menbers.zip(l_headers).each do |v, n| %>
<% if v == "error" %>
if (print_error)
<% end %>
std::cout << std::setw(len_max_<%= v %>) << std::right << "<%= n %>" << " | ";
<% end %>
std::cout << std::endl;
<%# Print Body %>
for (std::pair<thapi_function_name, <%= main_type %> > element: array_sorted) {
const thapi_function_name function_name = element.first;
const auto [time, time_ratio, count, avg, min, max, error ] = element.second.to_string(total_time);
<% l_menbers.each do |v | %>
<% if v == "error" %>
if (print_error)
<% end %>
std::cout << std::setw(len_max_<%= v %>) << std::right << <%= v %> << " | ";
<% end %>
std::cout << std::endl;
}
<%# Print footer %>
std::cout << std::setw(len_max_function_name) << std::right << "Total" << " | "
<< std::setw(len_max_time) << std::right << <%= main_function %>(total_time) << " | "
<< std::setw(len_max_time_ratio) << std::right << "100.00%" << " | "
<< std::setw(len_max_count) << std::right << to_string_with_precision(total_count,"",0) << " | ";
if (print_error) {
std::cout
<% l_menbers.zip(l_headers).drop(4).each do |v,n| %>
<% if v == "error" %>
<< std::setw(len_max_error) << std::right << to_string_with_precision(total_error,"",0) << " | "
<% else %>
<< std::setw(len_max_<%= v %>) << "" << " "
<% end %>
<% end %>
;
}
std::cout << std::endl;
}
<% end %>
<%#
_
|_) ._ o ._ _|_ /\ ._ ._ _.
| | | | | |_ /--\ | | (_| \/
/
%>
<% output = [ ['StatTime', 'std::unordered_map<hpt_function_name_t, StatTime>', 'host', 'API calls', '1', 'hpt_t',
[ ['hostname_t','Hostnames','Hostname'],
['process_id_t','Processes','Process'],
['thread_id_t','Threads','Thread'] ] ],
['StatTime', 'std::unordered_map<hpt_device_function_name_t, StatTime>', 'device','Device profiling', '0', 'hpt_dsd_t',
[ ['hostname_t','Hostnames','Hostname'],
['hp_t','Processes','Process'],
['hpt_t','Threads','Thread'] ,
['hp_device_t','Device pointers','Device'],
['hp_dsd_t','SubDevice pointers','SubDevice'] ] ],
['StatByte', 'std::unordered_map<hpt_function_name_t, StatByte>', 'traffic', 'Explicit memory traffic', '0', 'hpt_t',
[ ['hostname_t','Hostnames','Hostname'],
['process_id_t','Processes','Process'],
['thread_id_t','Threads','Thread'] ] ],
] %>
<% output.each do |main_type, tuple_typle, result,header, error, type, var| %>
void print_compact_<%= result %>( <%=tuple_typle %> <%=result%> ){
<% var.each do |t,n,_ | %>
std::set<<%= t %>> s_<%= n.gsub(' ','_') %>;
<% end %>
<%# Agreagate API Call Result and compute total time spend %>
std::unordered_map<thapi_function_name, <%= main_type %> > aggregated;
for (auto element: <%= result %>)
{
auto [ <%= var.map{ |t,n| n.gsub(' ','_')}.join(',') %>,function_name ] = element.first;
const <%= main_type %> time = element.second;
<% if type == "hpt_dsd_t" %>
s_Hostnames.insert(hostname_t{Hostnames} ) ;
s_Processes.insert(hp_t{Hostnames,Processes} ) ;
s_Threads.insert(hpt_t{Hostnames,Processes,Threads} ) ;
s_Device_pointers.insert(hp_device_t{Hostnames,Processes,Device_pointers} ) ;
<%# This is apporixmation. Maybe two sub device alias to the same pointer. We cannot post-process that. But good enought to give a name %>
s_SubDevice_pointers.insert(hp_dsd_t{Hostnames,Processes,Device_pointers,SubDevice_pointers} ) ;
<% else %>
<% var.each do | _, n,_ | %>
s_<%= n.gsub(' ','_') %>.insert(<%= n.gsub(' ','_') %>) ;
<% end %>
<% end %>
aggregated[function_name].merge(time);
}
<% if type == "hpt_dsd_t" %>
for (auto element: s_SubDevice_pointers) {
auto [ _a, _b, d1, d2 ] = element;
(void)_a; (void)_b; <%# Use un-used variable %>
if ( d1 == d2 ) {
s_SubDevice_pointers.erase(element);
}
}
<% end %>
std::ostringstream oss;
oss << "<%= header %>";
<% var.each do | _, n,_ | %>
if (s_<%= n.gsub(' ','_') %>.size() != 0)
oss << " | " << s_<%= n.gsub(' ','_') %>.size() << " <%= n %>";
<% end %>
oss << std::endl;
print_array(aggregated,oss.str(), <%= error %>);
std::cout << std::endl;
};
<% if result == 'device_id_result' %>
void print_extented_<%= result %>( <%=tuple_typle %> <%=result%>, std::unordered_map<hp_device_t, std::string> device_to_name ){
<% else %>
void print_extented_<%= result %>( <%=tuple_typle %> <%=result%> ){
<% end %>
<% args = var.map{ |t,_,n| n}.join(',') %>
std::unordered_map< <%= type %>, std::unordered_map<thapi_function_name, <%= main_type %> >> d;
for (auto [s, time]: <%= result %> ) {
auto [<%= args %>, function_name] = s;
d[ <%= type %>( <%= args %> )][function_name].merge(time);
}
std::vector<std::pair< <%= type %> , std::unordered_map<thapi_function_name, <%= main_type %> >>> array_sorted(d.begin(), d.end());
std::sort(array_sorted.begin(), array_sorted.end(), [](auto a, auto b) { return a.first > b.first ; } );
for (auto[s, aggregated]: array_sorted) {
auto [ <%= args %> ] = s;
std::ostringstream oss;
<% if type == "hpt_dsd_t" %>
if (Device != SubDevice) {