Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
darshan
darshan
Commits
ac6e8ca0
Commit
ac6e8ca0
authored
Feb 24, 2020
by
Jakob Luettgau
Browse files
Add additional documentation, simplefy documentation.
parent
6ba187a4
Changes
24
Expand all
Hide whitespace changes
Inline
Side-by-side
darshan-util/pydarshan/HISTORY.rst
deleted
100644 → 0
View file @
6ba187a4
=======
History
=======
0.1.0 (2019-07-30)
------------------
* Initial python package.
darshan-util/pydarshan/MANIFEST.in
View file @
ac6e8ca0
include HISTORY.rst
include README.rst
recursive-include tests *
...
...
darshan-util/pydarshan/README.rst
View file @
ac6e8ca0
...
...
@@ -3,11 +3,12 @@ pydarshan
=========
Python utilities to interact with darshan log records of HPC applications.
pydarshan requires
that you have darshan-util
pydarshan requires
darshan-utils (3.2.0+) to be installed.
Features
--------
* Darshan Report Object Wrapper
* CFFI bindings to access darshan log files
* Plots typically found in the darshan reports (matplotlib)
* Auto-discover darshan-util.so (via darshan-parser in $PATH)
...
...
@@ -17,41 +18,29 @@ Usage
-----
For examples and a jupyter notebook to get started with pydarshan make sure
to check out the `
example
s` subdirectory.
to check out the `
notebook
s` subdirectory.
A brief examples showing some of the basic functionality is the following::
import darshan
log = darshan.log_open("example.darshan")
# Access Job Information
darshan.log_get_job(log)
# Example Return:
# {'jobid': 4478544,
# 'uid': 69615,
# 'start_time': 1490000867,
# 'end_time': 1490000983,
# 'metadata': {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}}
# Access available modules and modules
darshan.log_get_modules(log)
# Returns:
# {'POSIX': {'len': 186, 'ver': 3, 'idx': 1},
# 'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2},
# 'LUSTRE': {'len': 87, 'ver': 1, 'idx': 6},
# 'STDIO': {'len': 3234, 'ver': 1, 'idx': 7}}
# Access different record types as numpy arrays, with integer and float counters seperated
# Example Return: {'counters': array([...], dtype=uint64), 'fcounters': array([...])}
posix_record = darshan.log_get_posix_record(log)
mpiio_record = darshan.log_get_mpiio_record(log)
stdio_record = darshan.log_get_stdio_record(log)
# ...
# open darshan log
report = darshan.DarshanReport('example.darshan')
# load some report data
report.mod_read_all_records('POSIX')
report.mod_read_all_records('MPI-IO')
# or fetch all
report.read_all_generic_records()
darshan.log_close(log)
# ...
# generate summaries for currently loaded data
report.summarize()
# ...
# generate a timeline from dxt records
report.read_all_dxt_records()
report.create_timeline()
Installation
...
...
darshan-util/pydarshan/darshan/__init__.py
View file @
ac6e8ca0
...
...
@@ -5,5 +5,8 @@
__version__
=
'0.1.0'
from
darshan.backend.cffi_backend
import
*
from
darshan.report
import
*
options
=
{}
#from darshan.backend.cffi_backend import *
from
darshan.report
import
DarshanReport
darshan-util/pydarshan/darshan/api_def_c.py
View file @
ac6e8ca0
# -*- coding: utf-8 -*-
"""
The api_def_c carries a copy of CFFI compatible headers for libdarshan-util.so.
"""
header
=
"""/* from darshan-logutils.h */
struct darshan_mnt_info
{
...
...
@@ -135,21 +140,21 @@ struct darshan_name_record
/* counter names */
char *apxc_counter_names[];
char *bgq_counter_names[];
char *bgq_f_counter_names[];
char *hdf5_counter_names[];
char *hdf5_f_counter_names[];
char *mpiio_counter_names[];
char *mpiio_f_counter_names[];
char *pnetcdf_counter_names[];
char *pnetcdf_f_counter_names[];
char *posix_counter_names[];
char *posix_f_counter_names[];
char *stdio_counter_names[];
char *stdio_f_counter_names[];
char *decaf_counter_names[];
char *decaf_f_counter_names[];
extern
char *apxc_counter_names[];
extern
char *bgq_counter_names[];
extern
char *bgq_f_counter_names[];
extern
char *hdf5_counter_names[];
extern
char *hdf5_f_counter_names[];
extern
char *mpiio_counter_names[];
extern
char *mpiio_f_counter_names[];
extern
char *pnetcdf_counter_names[];
extern
char *pnetcdf_f_counter_names[];
extern
char *posix_counter_names[];
extern
char *posix_f_counter_names[];
extern
char *stdio_counter_names[];
extern
char *stdio_f_counter_names[];
extern
char *decaf_counter_names[];
extern
char *decaf_f_counter_names[];
/* Supported Functions */
void* darshan_log_open(char *);
...
...
darshan-util/pydarshan/darshan/backend/cffi_backend.py
View file @
ac6e8ca0
...
...
@@ -14,19 +14,18 @@ from darshan.api_def_c import load_darshan_header
DARSHAN_PATH
=
discover_darshan
()
API_def_c
=
load_darshan_header
()
# print(API_def_c)
ffi
=
cffi
.
FFI
()
ffi
.
cdef
(
API_def_c
)
libdutil
=
ffi
.
dlopen
(
DARSHAN_PATH
+
"/lib/libdarshan-util.so"
)
#libdutil = ffi.dlopen(DARSHAN_PATH + "/lib/libdarshan-util.so")
libdutil
=
ffi
.
dlopen
(
"libdarshan-util.so"
)
#libdutil = ffi.dlopen("/home/pq/p/software/darshan-cffi/darshan-util/libdarshan-util.so")
#print(DARSHAN_PATH + "/lib/libdarshan-util.so")
#modules = {}
def
log_open
(
filename
):
...
...
darshan-util/pydarshan/darshan/common.py
View file @
ac6e8ca0
# -*- coding: utf-8 -*-
"""
The darshan.common module provides common functionality required by mulitple modules.
"""
darshan-util/pydarshan/darshan/plots/matplotlib.py
View file @
ac6e8ca0
...
...
@@ -5,7 +5,8 @@ import matplotlib
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
darshan
import
darshan.backend.cffi_backend
as
backend
def
plot_access_histogram
(
log
,
filter
=
None
,
data
=
None
):
...
...
@@ -28,12 +29,12 @@ def plot_access_histogram(log, filter=None, data=None):
write_vals
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
mods
=
darshan
.
log_get_modules
(
log
)
mods
=
backend
.
log_get_modules
(
log
)
if
str
(
filter
).
upper
()
==
"POSIX"
:
posix_record
=
darshan
.
log_get_posix_record
(
log
)
posix
=
dict
(
zip
(
darshan
.
counter_names
(
"POSIX"
),
posix_record
[
'counters'
]))
posix_record
=
backend
.
log_get_posix_record
(
log
)
posix
=
dict
(
zip
(
backend
.
counter_names
(
"POSIX"
),
posix_record
[
'counters'
]))
read_vals
=
[
posix
[
'POSIX_SIZE_READ_0_100'
],
...
...
@@ -62,8 +63,8 @@ def plot_access_histogram(log, filter=None, data=None):
]
elif
str
(
filter
).
upper
()
==
"MPIIO"
:
mpiio_record
=
darshan
.
log_get_mpiio_record
(
log
)
mpiio
=
dict
(
zip
(
darshan
.
counter_names
(
"mpiio"
),
mpiio_record
[
'counters'
]))
mpiio_record
=
backend
.
log_get_mpiio_record
(
log
)
mpiio
=
dict
(
zip
(
backend
.
counter_names
(
"mpiio"
),
mpiio_record
[
'counters'
]))
read_vals
=
[
mpiio
[
'MPIIO_SIZE_READ_AGG_0_100'
],
...
...
@@ -158,12 +159,12 @@ def plot_opcounts(log=None, filter=None, data=None):
stdio_vals
=
[
0
,
0
,
0
,
0
,
0
,
0
,
0
]
mods
=
darshan
.
log_get_modules
(
log
)
mods
=
backend
.
log_get_modules
(
log
)
# Gather POSIX
if
'POSIX'
in
mods
:
posix_record
=
darshan
.
log_get_posix_record
(
log
)
posix
=
dict
(
zip
(
darshan
.
counter_names
(
"POSIX"
),
posix_record
[
'counters'
]))
posix_record
=
backend
.
log_get_posix_record
(
log
)
posix
=
dict
(
zip
(
backend
.
counter_names
(
"POSIX"
),
posix_record
[
'counters'
]))
posix_vals
=
[
posix
[
'POSIX_READS'
],
...
...
@@ -177,8 +178,8 @@ def plot_opcounts(log=None, filter=None, data=None):
# Gather MPIIO
if
'MPI-IO'
in
mods
:
mpiio_record
=
darshan
.
log_get_mpiio_record
(
log
)
mpiio
=
dict
(
zip
(
darshan
.
counter_names
(
"mpiio"
),
mpiio_record
[
'counters'
]))
mpiio_record
=
backend
.
log_get_mpiio_record
(
log
)
mpiio
=
dict
(
zip
(
backend
.
counter_names
(
"mpiio"
),
mpiio_record
[
'counters'
]))
mpiind_vals
=
[
mpiio
[
'MPIIO_INDEP_READS'
],
...
...
@@ -202,8 +203,8 @@ def plot_opcounts(log=None, filter=None, data=None):
# Gather Stdio
# if 'STDIO' in mods:
# stdio_record =
darshan
.log_get_stdio_record(log)
# stdio = dict(zip(
darshan
.counter_names("STDIO"), stdio_record['counters']))
# stdio_record =
backend
.log_get_stdio_record(log)
# stdio = dict(zip(
backend
.counter_names("STDIO"), stdio_record['counters']))
#
# stdio_vals = [
# stdio['STDIO_READS'],
...
...
darshan-util/pydarshan/darshan/report.py
View file @
ac6e8ca0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""
import
darshan.backend.cffi_backend
as
backend
import
json
import
numpy
as
np
...
...
@@ -10,7 +16,13 @@ import datetime
class
NumpyEncoder
(
json
.
JSONEncoder
):
"""
Helper class for JSON serialization if the report contains numpy
log records, which are not handled by the default json encoder.
"""
def
default
(
self
,
obj
):
if
isinstance
(
obj
,
np
.
ndarray
):
return
obj
.
tolist
()
...
...
@@ -18,6 +30,8 @@ class NumpyEncoder(json.JSONEncoder):
class
DarshanReport
(
object
):
"""
The DarshanReport class provides a convienient wrapper to access darshan
...
...
@@ -38,7 +52,7 @@ class DarshanReport(object):
self
.
log
=
backend
.
log_open
(
self
.
filename
)
# state dependent book-keeping
self
.
converted_records
=
False
# true if convert_records() was called (unnumpyf
i
y)
self
.
converted_records
=
False
# true if convert_records() was called (unnumpyfy)
# when using report algebra this log allows to untangle potentially
# unfair aggregations (e.g., double accounting)
...
...
@@ -52,19 +66,22 @@ class DarshanReport(object):
def
__add__
(
self
,
other
):
new_report
=
self
.
copy
()
#new_report = copy.deepcopy(self)
new_report
.
provenance_log
.
append
((
"add"
,
self
,
other
))
return
new_report
def
read_all
(
self
):
self
.
read_all_generic_records
()
self
.
read_all_dxt_records
()
return
def
read_all_generic_records
(
self
):
"""
Read all available information from darshan log and return
d
as dictionary.
Read all available information from darshan log and return as dictionary.
Args:
None
...
...
@@ -79,6 +96,25 @@ class DarshanReport(object):
pass
def
read_all_dxt_records
(
self
):
"""
Read all available information from darshan log and return as dictionary.
Args:
None
Return:
None
"""
for
mod
in
self
.
report
[
'modules'
]:
self
.
mod_read_all_dxt_records
(
mod
)
pass
def
read_metadata
(
self
):
"""
Read metadata such as the job, the executables and available modules.
...
...
@@ -97,9 +133,10 @@ class DarshanReport(object):
def
mod_read_all_dxt_records
(
self
,
mod
,
mode
=
'numpy'
):
def
mod_read_all_records
(
self
,
mod
,
mode
=
'numpy'
):
"""
Reads all
dxt
records for
provided
module
.
Reads all
generic
records for module
Args:
mod (str): Identifier of module to fetch all records
...
...
@@ -109,27 +146,34 @@ class DarshanReport(object):
None
"""
unsupported
=
[
'DXT_POSIX'
,
'DXT_MPIIO'
,
'LUSTRE'
]
unsupported
.
append
(
'STDIO'
)
# TODO: reenable when segfault resolved
if
mod
not
in
self
.
report
[
'modules'
]:
print
(
"Skipping. Log does not contain data for mod:"
,
mod
)
return
supported
=
[
'DXT_POSIX'
,
'DXT_MPIIO'
]
if
mod
not
in
supported
:
if
mod
in
unsupported
:
print
(
"Skipping. Currently unsupported:"
,
mod
)
# skip mod
return
structdefs
=
{
"BG/Q"
:
"struct darshan_bgq_record **"
,
"HDF5"
:
"struct darshan_hdf5_file **"
,
"MPI-IO"
:
"struct darshan_mpiio_file **"
,
"PNETCDF"
:
"struct darshan_pnetcdf_file **"
,
"POSIX"
:
"struct darshan_posix_file **"
,
"STDIO"
:
"struct darshan_stdio_file **"
,
"DECAF"
:
"struct darshan_decaf_record **"
,
"DXT_POSIX"
:
"struct dxt_file_record **"
,
"DXT_MPIIO"
:
"struct dxt_file_record **"
,
}
self
.
report
[
'records'
][
mod
]
=
[]
cn
=
backend
.
counter_names
(
mod
)
fcn
=
backend
.
fcounter_names
(
mod
)
self
.
report
[
'modules'
][
mod
][
'counters'
]
=
cn
self
.
report
[
'modules'
][
mod
][
'fcounters'
]
=
fcn
self
.
report
[
'modules'
][
mod
][
'num_records'
]
=
0
...
...
@@ -137,7 +181,7 @@ class DarshanReport(object):
rec
=
backend
.
log_get_
dxt
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
rec
=
backend
.
log_get_
generic
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
while
rec
!=
None
:
# TODO: performance hog and hacky ;)
#recs = json.dumps(rec, cls=NumpyEncoder)
...
...
@@ -146,19 +190,15 @@ class DarshanReport(object):
if
mode
==
'numpy'
:
self
.
report
[
'records'
][
mod
].
append
(
rec
)
else
:
print
(
"Not implemented."
)
exit
(
1
)
#c = dict(zip(cn, rec['counters']))
#fc = dict(zip(fcn, rec['fcounters']))
#self.report['records'][mod].append([c, fc])
pass
c
=
dict
(
zip
(
cn
,
rec
[
'counters'
]))
fc
=
dict
(
zip
(
fcn
,
rec
[
'fcounters'
]))
self
.
report
[
'records'
][
mod
].
append
([
c
,
fc
])
self
.
report
[
'modules'
][
mod
][
'num_records'
]
+=
1
# fetch next
rec
=
backend
.
log_get_
dxt
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
rec
=
backend
.
log_get_
generic
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
pass
...
...
@@ -166,10 +206,9 @@ class DarshanReport(object):
def
mod_read_all_records
(
self
,
mod
,
mode
=
'numpy'
):
def
mod_read_all_dxt_records
(
self
,
mod
,
mode
=
'numpy'
):
"""
Reads all records for module
Reads all
dxt
records for
provided
module
.
Args:
mod (str): Identifier of module to fetch all records
...
...
@@ -179,33 +218,27 @@ class DarshanReport(object):
None
"""
unsupported
=
[
'DXT_POSIX'
,
'DXT_MPIIO'
,
'LUSTRE'
,
'STDIO'
]
if
mod
in
unsupported
:
if
mod
not
in
self
.
report
[
'modules'
]:
print
(
"Skipping. Log does not contain data for mod:"
,
mod
)
return
supported
=
[
'DXT_POSIX'
,
'DXT_MPIIO'
]
if
mod
not
in
supported
:
print
(
"Skipping. Currently unsupported:"
,
mod
)
# skip mod
return
structdefs
=
{
"BG/Q"
:
"struct darshan_bgq_record **"
,
"HDF5"
:
"struct darshan_hdf5_file **"
,
"MPI-IO"
:
"struct darshan_mpiio_file **"
,
"PNETCDF"
:
"struct darshan_pnetcdf_file **"
,
"POSIX"
:
"struct darshan_posix_file **"
,
"STDIO"
:
"struct darshan_stdio_file **"
,
"DECAF"
:
"struct darshan_decaf_record **"
,
"DXT_POSIX"
:
"struct dxt_file_record **"
,
"DXT_MPIIO"
:
"struct dxt_file_record **"
,
}
self
.
report
[
'records'
][
mod
]
=
[]
cn
=
backend
.
counter_names
(
mod
)
fcn
=
backend
.
fcounter_names
(
mod
)
self
.
report
[
'modules'
][
mod
][
'counters'
]
=
cn
self
.
report
[
'modules'
][
mod
][
'fcounters'
]
=
fcn
self
.
report
[
'modules'
][
mod
][
'num_records'
]
=
0
...
...
@@ -213,7 +246,7 @@ class DarshanReport(object):
rec
=
backend
.
log_get_
generic
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
rec
=
backend
.
log_get_
dxt
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
while
rec
!=
None
:
# TODO: performance hog and hacky ;)
#recs = json.dumps(rec, cls=NumpyEncoder)
...
...
@@ -222,20 +255,26 @@ class DarshanReport(object):
if
mode
==
'numpy'
:
self
.
report
[
'records'
][
mod
].
append
(
rec
)
else
:
c
=
dict
(
zip
(
cn
,
rec
[
'counters'
]))
fc
=
dict
(
zip
(
fcn
,
rec
[
'fcounters'
]))
self
.
report
[
'records'
][
mod
].
append
([
c
,
fc
])
print
(
"Not implemented."
)
exit
(
1
)
#c = dict(zip(cn, rec['counters']))
#fc = dict(zip(fcn, rec['fcounters']))
#self.report['records'][mod].append([c, fc])
pass
self
.
report
[
'modules'
][
mod
][
'num_records'
]
+=
1
# fetch next
rec
=
backend
.
log_get_
generic
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
rec
=
backend
.
log_get_
dxt
_record
(
self
.
log
,
mod
,
structdefs
[
mod
])
pass
def
mod_agg
(
self
,
mod
,
ranks
=
None
,
files
=
None
,
preserve_rank
=
False
,
preserve_file
=
False
):
"""
Aggregate counters for a given module name and return updated dictionary.
...
...
@@ -429,7 +468,7 @@ class DarshanReport(object):
ctx
[
mod
]
=
agg
ctx
[
mod
+
'_
final
'
]
=
tmp
ctx
[
mod
+
'_
simple
'
]
=
tmp
...
...
@@ -623,12 +662,6 @@ class DarshanReport(object):
pass
def
create_sankey
(
self
):
"""
Generate a summary that shows the dataflow between ranks, files and
...
...
darshan-util/pydarshan/docs/Makefile
View file @
ac6e8ca0
...
...
@@ -3,7 +3,7 @@
# You can set these variables from the command line.
SPHINXOPTS
=
SPHINXBUILD
=
python
-msphinx
SPHINXBUILD
=
python
-m
sphinx
SPHINXPROJ
=
pydarshan
SOURCEDIR
=
.
BUILDDIR
=
_build
...
...
darshan-util/pydarshan/docs/conf.py
View file @
ac6e8ca0
...
...
@@ -121,6 +121,20 @@ html_static_path = ['static']
htmlhelp_basename
=
'darshandoc'
# -- Options for manual page output ------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages
=
[
(
master_doc
,
'pydarshan'
,
u
'pydarshan Documentation'
,
[
author
],
1
)
]
# -- Options for LaTeX output ------------------------------------------
latex_elements
=
{
...
...
@@ -151,17 +165,6 @@ latex_documents = [
]
# -- Options for manual page output ------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages
=
[
(
master_doc
,
'pydarshan'
,
u
'pydarshan Documentation'
,
[
author
],
1
)
]
# -- Options for Texinfo output ----------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
...
...
@@ -177,12 +180,6 @@ texinfo_documents = [
]
intersphinx_mapping
=
{
'https://docs.python.org/'
:
None
,
'http://matplotlib.org'
:
None
,
...
...
darshan-util/pydarshan/docs/darshan.backend.rst
View file @
ac6e8ca0
...
...
@@ -8,23 +8,23 @@ darshan.backend.ascii\_backend module
-------------------------------------
.. automodule:: darshan.backend.ascii_backend
:members:
:undoc-members:
:show-inheritance:
:members:
:undoc-members:
:show-inheritance:
darshan.backend.cffi\_backend module
------------------------------------