Commit 9c4ed094 authored by Jakob Luettgau's avatar Jakob Luettgau
Browse files

Update notebooks and part of the documention.

parent 94d3a016
......@@ -6,7 +6,7 @@ SPHINXOPTS =
SPHINXBUILD = python -m sphinx
SPHINXPROJ = pydarshan
SOURCEDIR = .
BUILDDIR = _build
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# pydarshan documentation build configuration file, created by
# Documentation build configuration file, created by
# sphinx-quickstart on Fri Jun 9 13:47:02 2017.
#
# This file is execfile()d with the current directory set to its
......@@ -63,7 +63,7 @@ source_suffix = '.rst'
master_doc = 'index'
# General information about the project.
project = u'darshan'
project = u'Darshan'
copyright = u"2019, ANL"
author = u""
......@@ -128,9 +128,7 @@ htmlhelp_basename = 'darshandoc'
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pydarshan',
u'pydarshan Documentation',
[author], 1)
(master_doc, 'pydarshan', u'pydarshan Documentation', [author], 1)
]
......@@ -159,9 +157,7 @@ latex_elements = {
# (source start file, target name, title, author, documentclass
# [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pydarshan.tex',
u'pydarshan Documentation',
u'Argonne National Laboratory', 'manual'),
(master_doc, 'pydarshan.tex', u'pydarshan Documentation', u'Argonne National Laboratory', 'manual'),
]
......@@ -171,12 +167,7 @@ latex_documents = [
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pydarshan',
u'pydarshan Documentation',
author,
'pydarshan',
'One line description of project.',
'Miscellaneous'),
(master_doc, 'pydarshan', u'pydarshan Documentation', author, 'pydarshan', 'Python utilities for Darshan HPC I/O instrumentation.', 'Miscellaneous'),
]
......
......@@ -4,14 +4,6 @@ darshan.backend package
Submodules
----------
darshan.backend.ascii\_backend module
-------------------------------------
.. automodule:: darshan.backend.ascii_backend
:members:
:undoc-members:
:show-inheritance:
darshan.backend.cffi\_backend module
------------------------------------
......
......@@ -7,6 +7,7 @@ Subpackages
.. toctree::
darshan.backend
darshan.cli
darshan.experimental
Submodules
......
......@@ -8,7 +8,7 @@ This documenation is only for the Darshan Python bindings, for documentation for
:caption: Contents:
readme
installation
setup
usage
.. toctree::
......
.. highlight:: shell
============
Installation
============
Stable release
--------------
To install pydarshan, run this command in your terminal:
.. code-block:: console
$ pip install pydarshan
This is the preferred method to install pydarshan, as it will always install the most recent stable release.
If you don't have `pip`_ installed, this `Python installation guide`_ can guide
you through the process.
.. _pip: https://pip.pypa.io
.. _Python installation guide: https://www.mcs.anl.gov/research/projects/darshan/
Pydarshan assumes that a recent 'darshan-utils' is installed as a shared
library. If darshan-util is not installed consult with the darshan
documentation or consider using `Spack`_ to install::
spack install darshan-util
.. _Spack: https://spack.io/
From sources
------------
The sources for pydarshan can be downloaded from the `Github repo`_.
You can either clone the public repository:
.. code-block:: console
$ git clone https://xgitlab.cels.anl.gov/darshan/darshan.git
$ cd darshan/darshan-util/pydarshan
.. code-block:: console
$ python setup.py install
.. _Github repo: https://xgitlab.cels.anl.gov/darshan/darshan
%% Cell type:markdown id: tags:
# DarshanUtils for Python
This notebook gives an overwiew of features provided by the Python bindings for DarshanUtils.
%% Cell type:markdown id: tags:
By default all records, metadata, available modules and the name records are loaded:
%% Cell type:code id: tags:
``` python
import darshan
report = darshan.DarshanReport("example.darshan", read_all=True) # Default behavior
report = darshan.DarshanReport("example-logs/example.darshan", read_all=True) # Default behavior
report.info()
```
%%%% Output: stream
Filename: example.darshan
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 4
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140525897881584) (tmp)
DarshanReport: id(139721554559216) (tmp)
%% Cell type:code id: tags:
``` python
report.modules
```
%%%% Output: execute_result
{'POSIX': {'len': 186, 'ver': 3, 'idx': 1, 'num_records': 1},
'MPI-IO': {'len': 154, 'ver': 2, 'idx': 2, 'num_records': 1},
'LUSTRE': {'len': 87, 'ver': 1, 'idx': 7},
'STDIO': {'len': 3234, 'ver': 1, 'idx': 8, 'num_records': 129}}
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
report.mod_records
```
%%%% Output: execute_result
<bound method DarshanReport.mod_records of <darshan.report.DarshanReport object at 0x7fcebc3997f0>>
<bound method DarshanReport.mod_records of <darshan.report.DarshanReport object at 0x7f1375a080f0>>
%% Cell type:markdown id: tags:
A few of the internal data structures explained:
%% Cell type:code id: tags:
``` python
# report.metadata # dictionary with raw metadata from darshan log
# report.modules # dictionary with raw module info from darshan log (need: technical, module idx)
# report.name_records # dictionary for resovling name records: id -> path/name
# report.records # per module "dataframes"/dictionaries holding loaded records
```
%% Cell type:markdown id: tags:
The darshan report holds a variety of namespaces for report related data. All of them are also referenced in `report.data` at the moment, but reliance on this internal organization of the report object is discouraged once the API stabilized. Currently, `report.data` references the following information:
%% Cell type:code id: tags:
``` python
report.data.keys()
```
%%%% Output: execute_result
dict_keys(['version', 'metadata', 'records', 'summary', 'modules', 'counters', 'name_records', 'mounts'])
%% Cell type:code id: tags:
``` python
report.mod_read_all_records('POSIX')
```
%% Cell type:code id: tags:
``` python
report.mod_read_all_records('STDIO')
```
%% Cell type:code id: tags:
``` python
report.update_name_records()
report.info()
```
%%%% Output: stream
POSIX
MPI-IO
STDIO
Filename: example.darshan
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'POSIX': 1, 'MPI-IO': 1, 'STDIO': 129}
Name Records: 3
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(140525897881584) (tmp)
DarshanReport: id(139721554559216) (tmp)
%% Cell type:code id: tags:
``` python
from IPython.display import display, HTML
# use: display(obj)
```
%% Cell type:markdown id: tags:
### Selectively Loading Records
For memory efficiant analysis, it is possible to supress records from being loaded automatically. This is useful, for example, when analysis considers only records of a particular layer/module.
%% Cell type:code id: tags:
``` python
report = darshan.DarshanReport("example.darshan", read_all=False) # Loads no records!
report = darshan.DarshanReport("example-logs/example.darshan", read_all=False) # Loads no records!
```
%% Cell type:code id: tags:
``` python
# expected to fail, as no records were loaded
try:
print(len(report.records['STDIO']), "records loaded for STDIO.")
except:
print("No STDIO records loaded for this report yet.")
```
%%%% Output: stream
No STDIO records loaded for this report yet.
%% Cell type:markdown id: tags:
Additional records then can be loaded selectively, for example, on a per module basis:
%% Cell type:code id: tags:
``` python
report.mod_read_all_records("STDIO")
```
%% Cell type:code id: tags:
``` python
len(report.records['STDIO'])
```
%%%% Output: execute_result
129
%% Cell type:code id: tags:
``` python
report.info()
```
%%%% Output: stream
Filename: example.darshan
Filename: example-logs/example.darshan
Times: 2017-03-20 10:07:47 to 2017-03-20 10:09:43 (Duration 0:01:56)
Executeable: /global/project/projectdirs/m888/glock/tokio-abc-results/bin.edison/vpicio_uni /scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5 32
Processes: 2048
JobID: 4478544
UID: 69615
Modules in Log: ['POSIX', 'MPI-IO', 'LUSTRE', 'STDIO']
Loaded Records: {'STDIO': 129}
Name Records: 4
Name Records: 0
Darshan/Hints: {'lib_ver': '3.1.3', 'h': 'romio_no_indep_rw=true;cb_nodes=4'}
DarshanReport: id(139750934822192) (tmp)
DarshanReport: id(139721553801904) (tmp)
%% Cell type:code id: tags:
``` python
report.records['STDIO'][0]
```
%%%% Output: execute_result
{'id': 15920181672442173319,
'rank': 0,
'counters': array([ 1, 18446744073709551615, 0,
6, 0, 0,
280, 0, 0,
279, 0, 0,
0], dtype=uint64),
'fcounters': array([ 1., -1., 0., 6., 0., 0., 280., 0., 0., 279., 0.,
0., 0.])}
'counters': array([ 1, -1, 0, 6, 0, 0, 280, 0, 0, 279, 0, 0, 0,
0]),
'fcounters': array([0.00000000e+00, 6.79492950e-05, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 7.75279999e-02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.16283583e+02, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])}
%% Cell type:markdown id: tags:
### Aggregation and Filtering (Experimental)
Darshan log data is routinely aggregated for quick overview. The report object offers a few methods to perform common aggregations:
%% Cell type:markdown id: tags:
Report aggregations and summarization remains **experimental** for now, mostly to allow interfaces to stabilize. But experimental features can be switched on easily by invoking `darshan.enable_experimental()`:
%% Cell type:code id: tags:
``` python
import darshan
darshan.enable_experimental(verbose=True) # Enable verbosity, listing new functionality
```
%%%% Output: stream
Added method create_time_summary to DarshanReport.
Added method print_module_records to DarshanReport.
Added method summarize to DarshanReport.
Added method merge to DarshanReport.
Added method create_timeline to DarshanReport.
Added method records_as_dict to DarshanReport.
Added method reduce to DarshanReport.
Added method agg_ioops to DarshanReport.
Added method create_sankey to DarshanReport.
Added method filter to DarshanReport.
Added method mod_agg_iohist to DarshanReport.
Added method name_records_summary to DarshanReport.
%% Cell type:code id: tags:
``` python
# Example report, which counts records in log across modules
report.name_records_summary()
```
%%%% Output: execute_result
{6301063301082038805: {'name': '/scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5',
'counts': {'POSIX': 1, 'MPI-IO': 1}},
15920181672442173319: {'name': '<STDOUT>', 'counts': {'STDIO': 1}},
7238257241479193519: {'name': '<STDERR>', 'counts': {'STDIO': 128}}}
%% Cell type:markdown id: tags:
### Chain operations like filtering and reductions
The filter and reduce operations return DarshanReports themsleves, thus allow to convieniently chain operations.
%% Cell type:code id: tags:
``` python
import pprint
import darshan
darshan.enable_experimental()
report = darshan.DarshanReport("example.darshan", read_all=True)
report = darshan.DarshanReport("example-logs/example.darshan", read_all=True)
report.name_records
```
%%%% Output: execute_result
{14734109647742566553: '<STDIN>',
15920181672442173319: '<STDOUT>',
7238257241479193519: '<STDERR>',
6301063301082038805: '/scratch2/scratchdirs/glock/tokioabc-s.4478544/vpicio/vpicio.hdf5'}
%% Cell type:code id: tags:
``` python
report.filter(name_records=[6301063301082038805, 15920181672442173319]).records
```
%%%% Output: execute_result
{'POSIX': [{'id': 6301063301082038805,
'rank': -1,
'counters': array([ 2049, 18446744073709551615, 18446744073709551615,
0, 16402, 16404,
0, 0, 0,
0, 18446744073709551615, 18446744073709551615,
0, 0, 0,
2199023259968, 0, 2199023261831,
0, 0, 0,
16384, 0, 0,
8, 16401, 1048576,
0, 134217728, 0,
0, 0, 0,
0, 0, 0,
0, 0, 0,
4, 14, 0,
0, 0, 0,
0, 0, 16384,
0, 274743689216, 274743691264,
0, 0, 10240,
4096, 0, 0,
134217728, 272, 544,
328, 16384, 8,
2], dtype=uint64),
'fcounters': array([ 2.04900000e+03, -1.00000000e+00, -1.00000000e+00, 0.00000000e+00,
1.64020000e+04, 1.64040000e+04, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, -1.00000000e+00, -1.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.19902326e+12,
0.00000000e+00, 2.19902326e+12, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 0.00000000e+00,
8.00000000e+00, 1.64010000e+04, 1.04857600e+06, 0.00000000e+00,
1.34217728e+08, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.00000000e+00,
1.40000000e+01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.63840000e+04,
0.00000000e+00, 2.74743689e+11, 2.74743691e+11, 0.00000000e+00,
0.00000000e+00, 1.02400000e+04, 4.09600000e+03, 0.00000000e+00,
0.00000000e+00, 1.34217728e+08, 2.72000000e+02, 5.44000000e+02,
3.28000000e+02, 1.63840000e+04, 8.00000000e+00, 2.00000000e+00])}],
'MPI-IO': [{'id': 6301063301082038805,
'rank': -1,
'counters': array([ 0, 2048, 0, 18,
0, 16384, 0, 0,
0, 0, 0, 0,
32768, 9, 0, 2199023259968,
0, 0, 134217728, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 4, 14, 0,
0, 0, 0, 0,
0, 16384, 0, 134217728,
272, 544, 328, 16384,
8, 2, 2, 597,
1073741824, 1312, 1073741824], dtype=uint64),
'fcounters': array([0.00000000e+00, 2.04800000e+03, 0.00000000e+00, 1.80000000e+01,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
3.27680000e+04, 9.00000000e+00, 0.00000000e+00, 2.19902326e+12,
0.00000000e+00, 0.00000000e+00, 1.34217728e+08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 4.00000000e+00, 1.40000000e+01, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 1.34217728e+08,
2.72000000e+02, 5.44000000e+02, 3.28000000e+02, 1.63840000e+04,
8.00000000e+00, 2.00000000e+00, 2.00000000e+00, 5.97000000e+02,
1.07374182e+09, 1.31200000e+03, 1.07374182e+09])}],
'STDIO': [{'id': 15920181672442173319,
'rank': 0,
'counters': array([ 1, 18446744073709551615, 0,
6, 0, 0,
280, 0, 0,
279, 0, 0,
0], dtype=uint64),
'fcounters': array([ 1., -1., 0., 6., 0., 0., 280., 0., 0., 279., 0.,
0., 0.])}]}
%% Cell type:code id: tags:
``` python
# reduce all after filtering
report.filter(pattern="*.hdf5").reduce().records
```
%%%% Output: execute_result
{'POSIX': [{'id': '*',
'rank': -1,
'counters': array([ 2049, 18446744073709551615, 18446744073709551615,
0, 16402, 16404,
0, 0, 0,
0, 18446744073709551615, 18446744073709551615,
0, 0, 0,
2199023259968, 0, 2199023261831,
0, 0, 0,
16384, 0, 0,
8, 16401, 1048576,
0, 134217728, 0,
0, 0, 0,
0, 0, 0,
0, 0, 0,
4, 14, 0,
0, 0, 0,
0, 0, 16384,
0, 274743689216, 274743691264,
0, 0, 10240,
4096, 0, 0,
134217728, 272, 544,
328, 16384, 8,
2], dtype=uint64),
'fcounters': array([ 2.04900000e+03, -1.00000000e+00, -1.00000000e+00, 0.00000000e+00,
1.64020000e+04, 1.64040000e+04, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, -1.00000000e+00, -1.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.19902326e+12,
0.00000000e+00, 2.19902326e+12, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 0.00000000e+00,
8.00000000e+00, 1.64010000e+04, 1.04857600e+06, 0.00000000e+00,
1.34217728e+08, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.00000000e+00,
1.40000000e+01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.63840000e+04,
0.00000000e+00, 2.74743689e+11, 2.74743691e+11, 0.00000000e+00,
0.00000000e+00, 1.02400000e+04, 4.09600000e+03, 0.00000000e+00,
0.00000000e+00, 1.34217728e+08, 2.72000000e+02, 5.44000000e+02,
3.28000000e+02, 1.63840000e+04, 8.00000000e+00, 2.00000000e+00])}],
'MPI-IO': [{'id': '*',
'rank': -1,
'counters': array([ 0, 2048, 0, 18,
0, 16384, 0, 0,
0, 0, 0, 0,
32768, 9, 0, 2199023259968,
0, 0, 134217728, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 4, 14, 0,
0, 0, 0, 0,
0, 16384, 0, 134217728,
272, 544, 328, 16384,
8, 2, 2, 597,
1073741824, 1312, 1073741824], dtype=uint64),
'fcounters': array([0.00000000e+00, 2.04800000e+03, 0.00000000e+00, 1.80000000e+01,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
3.27680000e+04, 9.00000000e+00, 0.00000000e+00, 2.19902326e+12,
0.00000000e+00, 0.00000000e+00, 1.34217728e+08, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 4.00000000e+00, 1.40000000e+01, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 1.63840000e+04, 0.00000000e+00, 1.34217728e+08,
2.72000000e+02, 5.44000000e+02, 3.28000000e+02, 1.63840000e+04,
8.00000000e+00, 2.00000000e+00, 2.00000000e+00, 5.97000000e+02,
1.07374182e+09, 1.31200000e+03, 1.07374182e+09])}]}
%% Cell type:code id: tags:
``` python
# only preserve some
report.filter(name_records=[6301063301082038805]).reduce(mods=['POSIX', 'STDIO']).records
```
%%%% Output: execute_result