Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
darshan
darshan
Commits
2da3a81e
Commit
2da3a81e
authored
May 06, 2020
by
Jakob Luettgau
Browse files
Enable function chaining for filter and reduce. Remove obsolete.
parent
88bd7814
Changes
20
Expand all
Hide whitespace changes
Inline
Side-by-side
darshan-util/pydarshan/Makefile
View file @
2da3a81e
.PHONY
:
clean clean-test clean-pyc clean-build docs help
.DEFAULT_GOAL
:=
help
help
:
@
python
-c
"
$$
PRINT_HELP_PYSCRIPT"
<
$(MAKEFILE_LIST)
clean
:
clean-build clean-pyc clean-test
#
#
remove all build
,
test
,
coverage and Python artifacts
clean
:
clean-build clean-pyc clean-test
#
remove all build
,
test
,
coverage and Python artifacts
clean-build
:
#
#
remove build artifacts
clean-build
:
#
remove build artifacts
rm
-rf
build/
rm
-rf
dist/
rm
-rf
.eggs/
...
...
@@ -14,34 +11,36 @@ clean-build: ## remove build artifacts
find
.
-name
'*.egg-info'
-exec
rm
-fr
{}
+
find
.
-name
'*.egg'
-exec
rm
-f
{}
+
clean-pyc
:
#
#
remove Python file artifacts
clean-pyc
:
#
remove Python file artifacts
find
.
-name
'*.pyc'
-exec
rm
-f
{}
+
find
.
-name
'*.pyo'
-exec
rm
-f
{}
+
find
.
-name
'*~'
-exec
rm
-f
{}
+
find
.
-name
'__pycache__'
-exec
rm
-fr
{}
+
clean-test
:
#
#
remove test and coverage artifacts
clean-test
:
#
remove test and coverage artifacts
rm
-rf
.tox/
rm
-f
.coverage
rm
-rf
htmlcov/
rm
-rf
.pytest_cache
lint
:
##
check style with flake8
lint
:
#
check style with flake8
flake8 darshan tests
test
:
#
#
run tests quickly with the default Python
test
:
#
run tests quickly with the default Python
py.test
--verbose
test-all
:
#
#
run tests on every Python version with tox
test-all
:
#
run tests on every Python version with tox
tox
coverage
:
#
#
check code coverage quickly with the default Python
coverage
:
#
check code coverage quickly with the default Python
coverage run
--source
darshan
-m
pytest
coverage report
-m
coverage html
$(BROWSER)
htmlcov/index.html
xdg-open htmlcov/index.html
docs
:
#
#
generate Sphinx HTML documentation
,
including API docs
docs
:
#
generate Sphinx HTML documentation
,
including API docs
rm
-f
docs/darshan.rst
rm
-f
docs/darshan.backend.rst
rm
-f
docs/darshan.plots.rst
...
...
@@ -53,16 +52,19 @@ docs: ## generate Sphinx HTML documentation, including API docs
docs-show
:
docs
xdg-open docs/_build/html/index.html
servedocs
:
docs
#
#
compile the docs watching for changes
servedocs
:
docs
#
compile the docs watching for changes
watchmedo shell-command
-p
'*.rst'
-c
'
$(MAKE)
-C docs html'
-R
-D
.
release
:
dist
##
package and upload a release
twine upload dist/
*
dist
:
clean
##
builds source and wheel package
release
:
dist
#
package and upload a release
twine upload
--repository
testpypi dist/
*
#twine upload dist/*
dist
:
clean
#
builds source and wheel package
python setup.py sdist
python setup.py bdist_wheel
ls
-l
dist
install
:
clean
##
install the package to the active Python's site-packages
install
:
clean
#
install the package to the active Python's site-packages
python setup.py
install
darshan-util/pydarshan/darshan/__init__.py
View file @
2da3a81e
...
...
@@ -5,12 +5,18 @@
__version__
=
'0.1.0'
options
=
{
}
#from darshan.backend.cffi_backend import *
from
darshan.report
import
DarshanReport
def
enable_experimental
(
verbose
=
Tru
e
):
def
enable_experimental
(
verbose
=
Fals
e
):
"""
Enable experimental features such as aggregation methods for reports.
...
...
darshan-util/pydarshan/darshan/experimental/aggregators/agg_ioops.py
View file @
2da3a81e
...
...
@@ -103,7 +103,7 @@ def agg_ioops(self, mode='append'):
# cleanup and prepare for json serialization?
tmp
=
json
.
dumps
(
ctx
,
cls
=
Numpy
Encoder
)
tmp
=
json
.
dumps
(
ctx
,
cls
=
DarshanReportJSON
Encoder
)
ctx
=
json
.
loads
(
tmp
)
...
...
darshan-util/pydarshan/darshan/experimental/aggregators/create_time_summary.py
View file @
2da3a81e
...
...
@@ -43,12 +43,8 @@ def create_time_summary(self, mode="append"):
# overwrite existing summary entry
if
mode
==
"append"
:
self
.
summary
[
'time_summary'
]
=
ctx
return
ctx
darshan-util/pydarshan/darshan/experimental/aggregators/filter.py
View file @
2da3a81e
from
darshan.report
import
*
import
sys
import
copy
import
re
def
filter
(
self
,
mods
=
None
,
name_records
=
None
,
d
at
a_format
=
'numpy'
,
mode
=
'append'
):
def
filter
(
self
,
mods
=
None
,
name_records
=
None
,
p
at
tern
=
None
,
regex
=
None
):
"""
Return filtered list of records.
...
...
@@ -15,11 +17,12 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
None
"""
r
=
copy
.
deepcopy
(
self
)
# convienience
recs
=
self
.
records
recs
=
r
.
records
ctx
=
{}
...
...
@@ -36,38 +39,51 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
name_records
=
None
#
change inputs to
whitelists
# whitelist
all mod
s
if
mods
==
None
:
mods
=
self
.
records
.
keys
()
mods
=
r
.
records
.
keys
()
if
name_records
==
None
:
name_records
=
list
(
self
.
name_records
.
keys
())
if
pattern
!=
None
:
pattern
=
pattern
.
replace
(
"*"
,
"(.*?)"
)
elif
regex
:
pattern
=
regex
# whitelist name_records
if
name_records
==
None
and
pattern
==
None
and
regex
==
None
:
# allow all name records if no critirium provided
name_records
=
list
(
r
.
name_records
.
keys
())
else
:
resolve_table
=
{}
for
key
,
value
in
self
.
name_records
.
items
():
ids
=
[]
for
key
,
value
in
r
.
name_records
.
items
():
resolve_table
[
key
]
=
key
resolve_table
[
value
]
=
key
ids
=
[]
for
nrec
in
name_records
:
if
nrec
in
resolve_table
:
ids
.
append
(
resolve_table
[
nrec
])
# TODO: decide if overwriting kargs is really a good idea.. currently considering it a sanitation step
name_records
=
ids
# whitelist names that match pattern
if
pattern
!=
None
or
regex
!=
None
:
if
re
.
match
(
pattern
,
value
):
print
(
"YES"
,
pattern
,
value
)
ids
.
append
(
key
)
else
:
print
(
"NO"
,
pattern
,
value
)
# convert filenames/name_records mix into list of ids only
if
name_records
!=
None
:
for
nrec
in
name_records
:
if
nrec
in
resolve_table
:
ids
.
append
(
resolve_table
[
nrec
])
print
(
mods
)
print
(
name_records
)
# TODO: decide if overwriting kargs is really a good idea.. currently considering it a sanitation step
name_records
=
ids
if
name_records
!=
None
:
# aggragate
for
mod
,
recs
in
self
.
records
.
items
():
for
mod
,
recs
in
r
.
records
.
items
():
if
mod
not
in
mods
:
continue
...
...
@@ -82,13 +98,7 @@ def filter(self, mods=None, name_records=None, data_format='numpy', mode='append
ctx
[
mod
].
append
(
rec
)
r
.
records
=
ctx
if
mode
==
'append'
:
name
=
'filter'
if
name
not
in
self
.
summary
:
self
.
summary
[
name
]
=
{}
self
.
data
[
name
]
=
ctx
return
ctx
return
r
darshan-util/pydarshan/darshan/experimental/aggregators/merge.py
0 → 100644
View file @
2da3a81e
from
darshan.report
import
*
import
sys
import
copy
def
merge
(
self
,
other
,
reduce_first
=
False
):
"""
Merge two darshan reports and return a new combined report.
Args:
mods: Name(s) of modules to preserve (reduced)
name_records: Id(s)/Name(s) of name_records to preserve (reduced)
Return:
None
"""
# new report
nr
=
DarshanReport
()
# keep provenance?
if
self
.
provenance_enabled
or
other
.
provenance_enabled
:
# Currently, assume logs remain in memomry to create prov. tree on demand
# Alternative: maintain a tree with simpler refs? (modified reports would not work then)
#nr.provenance_reports[self.filename] = copy.copy(self)
#nr.provenance_reports[other.filename] = copy.copy(other)
nr
.
provenance_reports
[
self
.
filename
]
=
None
nr
.
provenance_reports
[
other
.
filename
]
=
None
nr
.
provenance_log
.
append
((
"add"
,
self
,
other
,
datetime
.
datetime
.
now
()))
# update metadata helper
def
update_metadata
(
report
,
force
=
False
):
if
force
:
nr
.
start_time
=
report
.
start_time
nr
.
end_time
=
report
.
end_time
return
if
report
.
start_time
<
nr
.
start_time
:
nr
.
start_time
=
report
.
start_time
if
report
.
end_time
>
nr
.
end_time
:
nr
.
end_time
=
report
.
end_time
update_metadata
(
self
,
force
=
True
)
update_metadata
(
other
)
# copy over records (references, under assumption single records are not altered)
for
report
in
[
self
,
other
]:
for
key
,
records
in
report
.
data
[
'records'
].
items
():
#print(report, key)
if
key
not
in
nr
.
records
:
nr
.
records
[
key
]
=
copy
.
copy
(
records
)
else
:
nr
.
records
[
key
]
+=
copy
.
copy
(
records
)
for
key
,
mod
in
report
.
modules
.
items
():
if
key
not
in
nr
.
modules
:
nr
.
modules
[
key
]
=
copy
.
copy
(
mod
)
# TODO: invalidate len/counters
for
key
,
counter
in
report
.
counters
.
items
():
if
key
not
in
nr
.
counters
:
nr
.
counters
[
key
]
=
copy
.
copy
(
counter
)
# TODO: invalidate len/counters
for
key
,
nrec
in
report
.
name_records
.
items
():
if
key
not
in
nr
.
counters
:
nr
.
name_records
[
key
]
=
copy
.
copy
(
nrec
)
# TODO: verify colliding name_records?
return
nr
darshan-util/pydarshan/darshan/experimental/aggregators/mod_agg_iohist.py
View file @
2da3a81e
...
...
@@ -48,7 +48,7 @@ def mod_agg_iohist(self, mod, mode='append'):
name
=
"%s_%s_%s"
%
(
typ
,
name
[
-
2
],
name
[
-
1
])
return
name
tmp
=
json
.
dumps
(
ctx
[
mod
],
cls
=
Numpy
Encoder
)
tmp
=
json
.
dumps
(
ctx
[
mod
],
cls
=
DarshanReportJSON
Encoder
)
tmp
=
json
.
loads
(
tmp
)
cn
=
backend
.
counter_names
(
mod
)
c
=
dict
(
zip
(
cn
,
tmp
))
...
...
darshan-util/pydarshan/darshan/experimental/aggregators/print_module_records.py
0 → 100644
View file @
2da3a81e
from
darshan.report
import
*
def
print_module_records
(
self
,
mode
=
'append'
):
"""
Compile the I/O operations summary for the current report.
Args:
mode (str): Whether to 'append' (default) or to 'return' aggregation.
Return:
None or dict: Depending on mode
"""
descriptions
=
{
'POSIX'
:
"""
# *******************************************************
# POSIX module data
# *******************************************************
# description of POSIX counters:
# POSIX_*: posix operation counts.
# READS,WRITES,OPENS,SEEKS,STATS,MMAPS,SYNCS,FILENOS,DUPS are types of operations.
# POSIX_RENAME_SOURCES/TARGETS: total count file was source or target of a rename operation
# POSIX_RENAMED_FROM: Darshan record ID of the first rename source, if file was a rename target
# POSIX_MODE: mode that file was opened in.
# POSIX_BYTES_*: total bytes read and written.
# POSIX_MAX_BYTE_*: highest offset byte read and written.
# POSIX_CONSEC_*: number of exactly adjacent reads and writes.
# POSIX_SEQ_*: number of reads and writes from increasing offsets.
# POSIX_RW_SWITCHES: number of times access alternated between read and write.
# POSIX_*_ALIGNMENT: memory and file alignment.
# POSIX_*_NOT_ALIGNED: number of reads and writes that were not aligned.
# POSIX_MAX_*_TIME_SIZE: size of the slowest read and write operations.
# POSIX_SIZE_*_*: histogram of read and write access sizes.
# POSIX_STRIDE*_STRIDE: the four most common strides detected.
# POSIX_STRIDE*_COUNT: count of the four most common strides.
# POSIX_ACCESS*_ACCESS: the four most common access sizes.
# POSIX_ACCESS*_COUNT: count of the four most common access sizes.
# POSIX_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# POSIX_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).
# POSIX_F_*_START_TIMESTAMP: timestamp of first open/read/write/close.
# POSIX_F_*_END_TIMESTAMP: timestamp of last open/read/write/close.
# POSIX_F_READ/WRITE/META_TIME: cumulative time spent in read, write, or metadata operations.
# POSIX_F_MAX_*_TIME: duration of the slowest read and write operations.
# POSIX_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# POSIX_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: POSIX module log format version <=3 has the following limitations:
# - No support for the following counters to properly instrument dup, fileno, and rename operations:
# - POSIX_FILENOS
# - POSIX_DUPS
# - POSIX_RENAME_SOURCES
# - POSIX_RENAME_TARGETS
# - POSIX_RENAMED_FROM
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
"""
,
"MPI-IO"
:
"""
# *******************************************************
# MPI-IO module data
# *******************************************************
# description of MPIIO counters:
# MPIIO_INDEP_*: MPI independent operation counts.
# MPIIO_COLL_*: MPI collective operation counts.
# MPIIO_SPLIT_*: MPI split collective operation counts.
# MPIIO_NB_*: MPI non blocking operation counts.
# READS,WRITES,and OPENS are types of operations.
# MPIIO_SYNCS: MPI file sync operation counts.
# MPIIO_HINTS: number of times MPI hints were used.
# MPIIO_VIEWS: number of times MPI file views were used.
# MPIIO_MODE: MPI-IO access mode that file was opened with.
# MPIIO_BYTES_*: total bytes read and written at MPI-IO layer.
# MPIIO_RW_SWITCHES: number of times access alternated between read and write.
# MPIIO_MAX_*_TIME_SIZE: size of the slowest read and write operations.
# MPIIO_SIZE_*_AGG_*: histogram of MPI datatype total sizes for read and write operations.
# MPIIO_ACCESS*_ACCESS: the four most common total access sizes.
# MPIIO_ACCESS*_COUNT: count of the four most common total access sizes.
# MPIIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# MPIIO_*_RANK_BYTES: total bytes transferred at MPI-IO layer by the fastest and slowest ranks (for shared files).
# MPIIO_F_*_START_TIMESTAMP: timestamp of first MPI-IO open/read/write/close.
# MPIIO_F_*_END_TIMESTAMP: timestamp of last MPI-IO open/read/write/close.
# MPIIO_F_READ/WRITE/META_TIME: cumulative time spent in MPI-IO read, write, or metadata operations.
# MPIIO_F_MAX_*_TIME: duration of the slowest MPI-IO read and write operations.
# MPIIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# MPIIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: MPIIO module log format version <=2 does not support the following counters:
# - MPIIO_F_CLOSE_START_TIMESTAMP
# - MPIIO_F_OPEN_END_TIMESTAMP
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
"""
,
"LUSTRE"
:
"""
# *******************************************************
# LUSTRE module data
# *******************************************************
# description of LUSTRE counters:
# LUSTRE_OSTS: number of OSTs across the entire file system.
# LUSTRE_MDTS: number of MDTs across the entire file system.
# LUSTRE_STRIPE_OFFSET: OST ID offset specified when the file was created.
# LUSTRE_STRIPE_SIZE: stripe size for file in bytes.
# LUSTRE_STRIPE_WIDTH: number of OSTs over which the file is striped.
# LUSTRE_OST_ID_*: indices of OSTs over which the file is striped.
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
"""
,
"STDIO"
:
"""
# *******************************************************
# STDIO module data
# *******************************************************
# description of STDIO counters:
# STDIO_{OPENS|FDOPENS|WRITES|READS|SEEKS|FLUSHES} are types of operations.
# STDIO_BYTES_*: total bytes read and written.
# STDIO_MAX_BYTE_*: highest offset byte read and written.
# STDIO_*_RANK: rank of the processes that were the fastest and slowest at I/O (for shared files).
# STDIO_*_RANK_BYTES: bytes transferred by the fastest and slowest ranks (for shared files).
# STDIO_F_*_START_TIMESTAMP: timestamp of the first call to that type of function.
# STDIO_F_*_END_TIMESTAMP: timestamp of the completion of the last call to that type of function.
# STDIO_F_*_TIME: cumulative time spent in different types of functions.
# STDIO_F_*_RANK_TIME: fastest and slowest I/O time for a single rank (for shared files).
# STDIO_F_VARIANCE_RANK_*: variance of total I/O time and bytes moved for all ranks (for shared files).
# WARNING: STDIO module log format version 1 has the following limitations:
# - No support for properly instrumenting fdopen operations (STDIO_FDOPENS)
#<module> <rank> <record id> <counter> <value> <file name> <mount pt> <fs type>
"""
,
}
pass
darshan-util/pydarshan/darshan/experimental/aggregators/records_as_dict.py
0 → 100644
View file @
2da3a81e
from
darshan.report
import
*
def
records_as_dict
(
self
,
mode
=
'append'
,
recs
=
None
):
"""
Convert all counters to dictionaries with human-readable names.
Args:
mode (str): Whether to 'append' (default) or to 'return' aggregation.
Return:
None or dict: Depending on mode
"""
# convienience
if
recs
is
None
:
recs
=
self
.
records
ctx
=
{}
# aggragate
for
mod
in
recs
:
if
mod
in
[
'DXT_POSIX'
]:
continue
# check records for module are present
if
mod
not
in
ctx
:
ctx
[
mod
]
=
[]
for
rec
in
recs
[
mod
]:
c
=
None
fc
=
None
if
'counters'
in
rec
:
c
=
dict
(
zip
(
self
.
counters
[
mod
][
'counters'
],
rec
[
'counters'
].
tolist
()))
if
'fcounters'
in
rec
:
fc
=
dict
(
zip
(
self
.
counters
[
mod
][
'fcounters'
],
rec
[
'fcounters'
].
tolist
()))
if
rec
[
'id'
]
in
self
.
name_records
:
nrec
=
self
.
name_records
[
rec
[
'id'
]]
else
:
nrec
=
None
ctx
[
mod
].
append
({
'id'
:
rec
[
'id'
],
'rank'
:
rec
[
'rank'
],
'counters'
:
c
,
'fcounters'
:
fc
,
'name_record'
:
nrec
})
return
ctx
darshan-util/pydarshan/darshan/experimental/aggregators/reduce.py
View file @
2da3a81e
...
...
@@ -17,8 +17,11 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
"""
r
=
copy
.
deepcopy
(
self
)
# convienience
recs
=
self
.
records
recs
=
r
.
records
ctx
=
{}
...
...
@@ -43,15 +46,15 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
# change inputs to whitelists
if
mods
==
None
:
mods
=
self
.
records
.
keys
()
mods
=
r
.
records
.
keys
()
if
name_records
==
None
:
name_records
=
list
(
self
.
name_records
.
keys
())
name_records
=
list
(
r
.
name_records
.
keys
())
else
:
resolve_table
=
{}
for
key
,
value
in
self
.
name_records
.
items
():
for
key
,
value
in
r
.
name_records
.
items
():
resolve_table
[
key
]
=
key
resolve_table
[
value
]
=
key
...
...
@@ -72,7 +75,7 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
if
name_records
!=
None
:
# aggragate
for
mod
,
recs
in
self
.
records
.
items
():
for
mod
,
recs
in
r
.
records
.
items
():
if
mod
not
in
mods
:
continue
...
...
@@ -121,12 +124,7 @@ def reduce(self, operation="sum", mods=None, name_records=None, mode='append', d
result
[
mod
].
append
(
rec
)
r
.
records
=
result
if
mode
==
'append'
:
name
=
'reduction'
if
name
not
in
self
.
summary
:
self
.
summary
[
name
]
=
{}
self
.
data
[
name
]
=
ctx
return
result
return
r
darshan-util/pydarshan/darshan/experimental/plots/cffi_matplotlib.py
deleted
100644 → 0
View file @
88bd7814
# -*- coding: utf-8 -*-
import
matplotlib
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
darshan.backend.cffi_backend
as
backend
def
plot_access_histogram
(
log
,
filter
=
None
,
data
=
None
):
"""
Plots a histogram of access sizes for specified module.