report.py 31.9 KB
Newer Older
1
2
3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

4
5
6
7
8
9
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""


10
import darshan.backend.cffi_backend as backend
11

12
import json
13
import re
14
import copy
15
import datetime
16
import sys
17

18
19
20
import numpy as np
import pandas as pd

Jakob Luettgau's avatar
Jakob Luettgau committed
21
22
import collections.abc

23
import logging
24
logger = logging.getLogger(__name__)
25
26
27



28
class DarshanReportJSONEncoder(json.JSONEncoder):
29
    """
30
31
    Helper class for JSON serialization if the report contains, for example,
    numpy or dates records, which are not handled by the default JSON encoder.
32
    """
33
34
35
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
36
37
38
39
    
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()

40
41
42
        return json.JSONEncoder.default(self, obj)


Jakob Luettgau's avatar
Jakob Luettgau committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
class DarshanRecordCollection(collections.abc.MutableSequence):
    """
    Darshan log records may nest various properties (e.g., DXT, Lustre).
    As such they can not faithfully represented using only a single
    Numpy array or a Pandas dataframe.

    The DarshanRecordCollection is used as a wrapper to offer
    users a stable API to DarshanReports and contained records
    in various popular formats while allowing to optimize 
    memory and internal representations as necessary.
    """

    def __init__(self, mod=None, report=None):     
        super(DarshanRecordCollection, self).__init__()
        self.mod = mod             # collections should be homogenous in module type
        self.report = report       # reference the report offering lookup for, e.g., counter names

        self.rank = None           # if all records in collection share rank, save memory
        self.id = None             # if all records in collection share id/nrec, save memory

        self.timebase = None       # allow fast time rebase without touching every record
        self.start_time = None
        self.end_time = None

        self._type = "collection"  # collection => list(), single => [record], nested => [[], ... ,[]]
        self._records = list()     # internal format before user conversion
        pass
    
    def __len__(self):
        return len(self._records)
    
    def __setitem__(self, key, val):
        self._records[key] = val

    def __getitem__(self, key):
        if self._type == "record":
            if isinstance(key, collections.abc.Hashable):
                #TODO: might extend this style access to collection/nested type as well
                #      but do not want to offer an access which might not be feasible to maintain
                return self._records[0][key]
            else:
                return self._records[0]

        # Wrap single record in RecordCollection to attach conversions: to_json, to_dict, to_df, ...
        # This way conversion logic can be shared.
        record = DarshanRecordCollection(mod=self.mod, report=self.report)

        if isinstance(key, slice):
            record._type = "collection"
            record._records = self._records[key]
        else:
            record._type = "record"
            record.append(self._records[key])
        return record

    def __delitem__(self, key):
        del self._list[ii]

    def insert(self, key, val):
        self._records.insert(key, val)

    def append(self, val):
        self.insert(len(self._records), val)


    def __repr__(self):
        if self._type == "record":
            return self._records[0].__repr__()
        
        return object.__repr__(self)

    #def __repr__(self):
    #    print("DarshanRecordCollection.__repr__")
    #    repr = ""
    #    for rec in self._records:
    #        repr += f"{rec}\n"
    #    return repr

    def info(self, describe=False, plot=False):
        """
        Print information about the record for inspection.

        Args:
            describe (bool): show detailed summary and statistics (default: False)
            plot (bool): show plots for quick value overview for counters and fcounters (default: False)

        Return:
            None
        """
        mod = self.mod
        records = self._records

        print("Module:       ", mod, sep="")
        print("Records:      ", len(self), sep="")
        print("Coll. Type:   ", self._type, sep="")

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                pass
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            ids = set()
            ranks = set()
            hostnames = set()
            reads = 0
            writes = 0
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
                hostnames.add(rec['hostname']) 
                reads += rec['read_count']
                writes += rec['write_count']
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")
            print("Hostnames:    ", str(hostnames), sep="")
            print("Read Events:  ", str(reads), sep="")
            print("Write Events: ", str(writes), sep="")


            if describe or plot:
                logger.warn("No plots/descriptions defined for DXT records info.")

        else:
            ids = set()
            ranks = set()
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")


            if describe or plot:
                df = self.to_df(attach=None)
                pd_max_rows = pd.get_option('display.max_rows')
                pd_max_columns = pd.get_option('display.max_columns')
                pd.set_option('display.max_rows', None)

                if plot:
                    figw = 7
                    lh = 0.3    # lineheight
                    # get number of counters for plot height adjustment
                    nc = self[0]['counters'].size
                    nfc = self[0]['fcounters'].size

                    display(df['counters'].plot.box(vert=False, figsize=(figw, nc*lh)))
                    display(df['fcounters'].plot.box(vert=False, figsize=(figw, nfc*lh)))

                if describe:
                    display(df['counters'].describe().transpose())
                    display(df['fcounters'].describe().transpose())

                pd.set_option('display.max_rows', pd_max_rows)


    ###########################################################################
    # Export Conversions (following the pandas naming conventions)
    ###########################################################################
    def to_numpy(self):
        records = copy.deepcopy(self._records)
        return records

    def to_list(self):
        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            raise NotImplementedError
        else:
            for i, rec in enumerate(records):
                rec['counters'] = rec['counters'].tolist()
                rec['fcounters'] = rec['fcounters'].tolist()
        return records

    def to_dict(self):
        mod = self.mod
        records = copy.deepcopy(self._records)
        counters = self.report.counters[self.mod]
        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            # format already in a dict format, but may offer switches for expansion
            logger.warn("WARNING: The output of DarshanRecordCollection.to_dict() may change in the future.")
            pass
        else:
            for i, rec in enumerate(records):
                rec['counters'] = dict(zip(counters['counters'], rec['counters']))
                rec['fcounters'] = dict(zip(counters['fcounters'], rec['fcounters']))
        return records

    def to_json(self):
        records = self.to_list()
        return json.dumps(records, cls=DarshanReportJSONEncoder)

    def to_df(self, attach="default"):
        if attach == "default":
            attach = ['id', 'rank']

        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                rec = rec
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            for i, rec in enumerate(records):
                rec['read_segments'] = pd.DataFrame(rec['read_segments'])
                rec['write_segments'] = pd.DataFrame(rec['write_segments'])
        else:
            counters = []
            fcounters = []
            ids = []
            ranks = []

            for i, rec in enumerate(records):
                counters.append(rec['counters'])
                fcounters.append(rec['fcounters'])
                ids.append(rec['id'])
                ranks.append(rec['rank'])
            
            records = {"counters": None, "fcounters": None}
            records['counters'] = pd.DataFrame(counters, columns=self.report.counters[mod]['counters'])
            records['fcounters'] = pd.DataFrame(fcounters, columns=self.report.counters[mod]['fcounters'])

            def flip_column_order(df):
                return df[df.columns[::-1]]

            # attach ids and ranks
            if attach is not None:
                for counter_type in ['counters', 'fcounters']:
                    records[counter_type] = flip_column_order(records[counter_type])
                    if 'id' in attach:
                        records[counter_type]['id'] = ids
                    if 'rank' in attach:
                        records[counter_type]['rank'] = ranks
                    records[counter_type] = flip_column_order(records[counter_type])

        return records


284
class DarshanReport(object):
285
286
287
288
289
    """
    The DarshanReport class provides a convienient wrapper to access darshan
    logs, which also caches already fetched information. In addition to that
    a number of common aggregations can be performed.
    """
290

Jakob Luettgau's avatar
Jakob Luettgau committed
291
    # a way to conserve memory?
292
293
294
    #__slots__ = ['attr1', 'attr2']


295
    def __init__(self, 
Jakob Luettgau's avatar
Jakob Luettgau committed
296
            filename=None, dtype='numpy', 
297
            start_time=None, end_time=None,
298
            automatic_summary=False,
299
            read_all=True, lookup_name_records=True):
300
301
302
303
304
305
306
307
308
309
310
311
        """
        Args:
            filename (str): filename to open (optional)
            dtype (str): default dtype for internal structures
            automatic_summary (bool): automatically generate summary after loading
            read_all (bool): whether to read all records for log
            lookup_name_records (bool): lookup and update name_records as records are loaded

        Return:
            None

        """
312
313
        self.filename = filename

314
        # Behavioral Options
Jakob Luettgau's avatar
Jakob Luettgau committed
315
        self.dtype = dtype                                  # default dtype to return when viewing records
316
        self.automatic_summary = automatic_summary
317
        self.lookup_name_records = lookup_name_records
318

319
        # State dependent book-keeping
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
320
        self.converted_records = False  # true if convert_records() was called (unnumpyfy)
321

322

323
        # Report Metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
324
325
        #
        # Start/End + Timebase are 
326
327
328
329
330
        self.start_time = start_time if start_time else float('inf')
        self.end_time = end_time if end_time else float('-inf')
        self.timebase = self.start_time

        # Initialize data namespaces
Jakob Luettgau's avatar
Jakob Luettgau committed
331
332
333
        self._metadata = {}
        self._modules = {}
        self._counters = {}
334
        self.records = {}
Jakob Luettgau's avatar
Jakob Luettgau committed
335
        self._mounts = {}
336
        self.name_records = {}
337
338

        # initialize report/summary namespace
339
        self.summary_revision = 0       # counter to check if summary needs update (see data_revision)
340
341
342
343
344
345
        self.summary = {}


        # legacy references (deprecate before 1.0?)
        self.data_revision = 0          # counter for consistency checks
        self.data = {'version': 1}
Jakob Luettgau's avatar
Jakob Luettgau committed
346
        self.data['metadata'] = self._metadata
347
348
        self.data['records'] = self.records
        self.data['summary'] = self.summary
Jakob Luettgau's avatar
Jakob Luettgau committed
349
        self.data['modules'] = self._modules
350
351
352
        self.data['counters'] = self.counters
        self.data['name_records'] = self.name_records

353

354
355
356

        # when using report algebra this log allows to untangle potentially
        # unfair aggregations (e.g., double accounting)
357
        self.provenance_enabled = True
358
        self.provenance_graph = []
359
360
361
        self.provenance_reports = {}


362
363
364
365
        if filename:
            self.open(filename, read_all=read_all)    


Jakob Luettgau's avatar
Jakob Luettgau committed
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
    @property
    def metadata(self):
        return self._metadata

    @property
    def modules(self):
        return self._modules

    @property
    def counters(self):
        return self._counters

#    @property
#    def counters(self):
#        return self._counters
#
#    @property
#    def name_records(self):
#        return self._name_records
#
#
#    @property
#    def summary(self):
#        return self._summary
#   
      

393
394
395
    def open(self, filename, read_all=False):
        """
        Open log file via CFFI backend.
396

397
398
399
        Args:
            filename (str): filename to open (optional)
            read_all (bool): whether to read all records for log
400

401
402
403
404
        Return:
            None

        """
405
406
407

        self.filename = filename

408
409
        if filename:
            self.log = backend.log_open(self.filename)
410
411
412
            if not bool(self.log['handle']):
                raise RuntimeError("Failed to open file.")

413
            self.read_metadata(read_all=read_all)
414

415
416
            if read_all:
                self.read_all()
417
418
419


    def __add__(self, other):
420
        """
421
        Allow reports to be merged using the addition operator.
422
        """
423

424
        return self.merge(other)
425

426

427
428
429
    def __deepcopy__(self, memo):
        """
        Creates a deepcopy of report.
430

431
432
        .. note::
            Needed to purge reference to self.log as Cdata can not be pickled:
433
434
            TypeError: can't pickle _cffi_backend.CData objects
        """
435

436
437
438
439
440
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            if k in ["log"]:
441
                # blacklist of members not to copy
442
443
444
                continue
            setattr(result, k, copy.deepcopy(v, memo))
        return result
445

446
        # TODO: might consider treating self.log as list of open logs to not deactivate load functions?
447

448
        return result
449

450

451
    def read_metadata(self, read_all=False):
452
453
454
455
456
457
458
459
460
461
        """
        Read metadata such as the job, the executables and available modules.

        Args:
            None

        Return:
            None

        """
462
463
        self.metadata['job'] = backend.log_get_job(self.log)
        self.metadata['exe'] = backend.log_get_exe(self.log)
464

465
466
        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time'])
        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time'])
467
468

        self.data['mounts'] = backend.log_get_mounts(self.log)
469
        self.mounts = self.data['mounts']
470
471

        self.data['modules'] = backend.log_get_modules(self.log)
Jakob Luettgau's avatar
Jakob Luettgau committed
472
        self._modules = self.data['modules']
473

474
        if read_all == True:
475
476
            self.data["name_records"] = backend.log_get_name_records(self.log)
            self.name_records = self.data['name_records']
477
478


479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
    def update_name_records(self, mod=None):
        """
        Update (and prune unused) name records from resolve table.

        First reindexes all used name record identifiers and then queries 
        darshan-utils library to compile filtered list of name records.

        Args:
            None

        Return:
            None

        """
        # sanitize inputs
        mods = mod
        if mods is None:
            mods = self.records
        else:
            mods = [mod]

        
        # state
        ids = set()

        for mod in mods:
505
            logger.debug(f" Refreshing name_records for mod={mod}")
506
507
508
509
            for rec in self.records[mod]:
                ids.add(rec['id'])


510
        self.name_records.update(backend.log_lookup_name_records(self.log, ids))
511
512
        

513
    def read_all(self, dtype=None):
514
515
516
517
518
519
520
521
522
        """
        Read all available records from darshan log and return as dictionary.

        Args:
            None

        Return:
            None
        """
523
524
525
526

        self.read_all_generic_records(dtype=dtype)
        self.read_all_dxt_records(dtype=dtype)
        self.mod_read_all_lustre_records(dtype=dtype)
527
        
528
529
530
        return


531
    def read_all_generic_records(self, counters=True, fcounters=True, dtype=None):
532
        """
533
        Read all generic records from darshan log and return as dictionary.
534

535
536
        Args:
            None
537

538
539
540
        Return:
            None
        """
541
542
543

        dtype = dtype if dtype else self.dtype

544
        for mod in self.data['modules']:
545
            self.mod_read_all_records(mod, dtype=dtype, warnings=False)
546

547
548
549
        pass


550
    def read_all_dxt_records(self, reads=True, writes=True, dtype=None):
551
        """
552
        Read all dxt records from darshan log and return as dictionary.
553
554
555
556
557
558
559

        Args:
            None

        Return:
            None
        """
560
561
562

        dtype = dtype if dtype else self.dtype

563
        for mod in self.data['modules']:
564
            self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype)
565
566
567
568

        pass


569
    def mod_read_all_records(self, mod, dtype=None, warnings=True):
570
        """
571
        Reads all generic records for module
572
573
574

        Args:
            mod (str): Identifier of module to fetch all records
575
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas'
576
577
578
579
580

        Return:
            None

        """
581
        unsupported =  ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE']
582

583
        if mod in unsupported:
584
            if warnings:
585
                logger.warning(f" Skipping. Currently unsupported: {mod} in mod_read_all_records().")
586
587
588
589
            # skip mod
            return 


590
591
592
        # handling options
        dtype = dtype if dtype else self.dtype

593

Jakob Luettgau's avatar
Jakob Luettgau committed
594
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
595
596
597
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

598
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
599
        self._modules[mod]['num_records'] = 0
600
601
        if mod not in self.counters:
            self.counters[mod] = {}
602
603
            self.counters[mod]['counters'] = cn 
            self.counters[mod]['fcounters'] = fcn
604
605


606
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
607
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
608
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
609
            self.records[mod].append(rec)
Jakob Luettgau's avatar
Jakob Luettgau committed
610
            self._modules[mod]['num_records'] += 1
611
612

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
613
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
614

615

616
617
        if self.lookup_name_records:
            self.update_name_records()
618
619
620

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
621
622
            combined_c = None
            combined_fc = None
623

624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    
                if combined_fc is None:
                    combined_fc = rec['fcounters']
                else:
                    combined_fc = pd.concat([combined_fc, rec['fcounters']])

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                'fcounters': combined_fc
                }]
645

646
647
        pass

648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
    def mod_read_all_apmpi_records(self, mod, dtype=None, warnings=True):
        """ 
        Reads all APMPI records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['APMPI'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apmpi_records(). Supported: {supported}")
            # skip mod
            return
672

673
674
675
        # handling options
        dtype = dtype if dtype else self.dtype

676
677
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)

678
        # update module metadata
679
        self._modules[mod]['num_records'] = 0
680
681
682
683
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
684
685
        # fetch header record
        rec = backend.log_get_apmpi_record(self.log, mod, "HEADER", dtype=dtype)
686
        while rec != None:
687
            self.records[mod].append(rec)
688
689
690
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
691
            rec = backend.log_get_apmpi_record(self.log, mod, "PERF", dtype=dtype)
692
693
694
695


        if self.lookup_name_records:
            self.update_name_records()
696
697
698

        pass

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
    def mod_read_all_apxc_records(self, mod, dtype=None, warnings=True):
        """ 
        Reads all APXC records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
            return

        supported =  ['APXC'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apxc_records(). Supported: {supported}")
            # skip mod
            return

        # handling options
        dtype = dtype if dtype else self.dtype

        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
        cn = backend.counter_names(mod)

        # update module metadata
        self._modules[mod]['num_records'] = 0
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
        # fetch header record
        rec = backend.log_get_apxc_record(self.log, mod, "HEADER", dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_apxc_record(self.log, mod, "PERF", dtype=dtype)

        if self.lookup_name_records:
            self.update_name_records()

        pass

749
    def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True):
750
        """
751
        Reads all dxt records for provided module.
752
753
754

        Args:
            mod (str): Identifier of module to fetch all records
755
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
756
757
758

        Return:
            None
759
760

        """
761
        if mod not in self.data['modules']:
762
            if warnings:
763
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
764
765
766
767
768
769
            return


        supported =  ['DXT_POSIX', 'DXT_MPIIO']

        if mod not in supported:
770
            if warnings:
771
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
772
773
774
            # skip mod
            return 

775

776
777
        # handling options
        dtype = dtype if dtype else self.dtype
778

779

Jakob Luettgau's avatar
Jakob Luettgau committed
780
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
781
782

        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
783
        self._modules[mod]['num_records'] = 0
784
785
        if mod not in self.counters:
            self.counters[mod] = {}
786
787


788
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
789
        rec = backend.log_get_dxt_record(self.log, mod, dtype=dtype)
790
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
791
            self.records[mod].append(rec)
792
            self.data['modules'][mod]['num_records'] += 1
793
794

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
795
            rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype)
796

797
798
799
800

        if self.lookup_name_records:
            self.update_name_records()

801
802
803
        pass


804

805
    def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True):
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
        """
        Reads all dxt records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['LUSTRE']

        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
            # skip mod
            return 


832
833
834
        # handling options
        dtype = dtype if dtype else self.dtype

835

Jakob Luettgau's avatar
Jakob Luettgau committed
836
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
837
        cn = backend.counter_names(mod)
838

839
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
840
        self._modules[mod]['num_records'] = 0
841
842
        if mod not in self.counters:
            self.counters[mod] = {}
843
            self.counters[mod]['counters'] = cn 
844
845


846
        # fetch records
847
848
849
850
851
852
853
854
        rec = backend.log_get_record(self.log, mod, dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_record(self.log, mod, dtype=dtype)

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879

        if self.lookup_name_records:
            self.update_name_records()

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
            combined_c = None

            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                }]

880
881
882
883
884
        pass




885
886
    def mod_records(self, mod, 
                    dtype='numpy', warnings=True):
887
888
        """
        Return generator for lazy record loading and traversal.
889
890
891
892

        .. warning::
            Can't be used for now when alternating between different modules.
            A temporary workaround can be to open the same log multiple times,
Jakob Luettgau's avatar
Jakob Luettgau committed
893
            as this way buffers are not shared between get_record invocations
894
895
            in the lower level library.

896
897
898

        Args:
            mod (str): Identifier of module to fetch records for
899
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
900
901
902
903
904
905
906
907
908
909
910
911
912

        Return:
            None

        """
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

        if mod not in self.counters:
            self.counters[mod] = {}
        self.counters[mod]['counters'] = cn 
        self.counters[mod]['fcounters'] = fcn

Kevin Harms's avatar
Kevin Harms committed
913
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
914
915
916
917
        while rec != None:
            yield rec

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
918
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
919
920


921
    def info(self, metadata=False):
922
        """
923
        Print information about the record for inspection.
924
925

        Args:
926
            metadata (bool): show detailed metadata (default: False)
927
928
929

        Return:
            None
930
        """
931
        print("Filename:       ", self.filename, sep="")
932

933
934
935
        tdelta = self.end_time - self.start_time
        print("Times:          ", self.start_time, " to ", self.end_time, " (Duration ", tdelta, ")", sep="")

936
937
938
939
940
941
942
        if 'exe' in self.metadata:
            print("Executeable:    ", self.metadata['exe'], sep="")

        if 'job' in self.metadata:
            print("Processes:      ", self.metadata['job']['nprocs'], sep="")
            print("JobID:          ", self.metadata['job']['jobid'], sep="")
            print("UID:            ", self.metadata['job']['uid'], sep="")
Jakob Luettgau's avatar
Jakob Luettgau committed
943
            print("Modules in Log: ", list(self._modules.keys()), sep="")
944
945
946
947
948
949
950
951

        loaded = {}
        for mod in self.records:
            loaded[mod] = len(self.records[mod])
        print("Loaded Records: ", loaded, sep="")

        print("Name Records:   ", len(self.name_records), sep="")
        
952
953
        if 'job' in self.metadata:
            print("Darshan/Hints:  ", self.metadata['job']['metadata'], sep="")
954
        print("DarshanReport:  id(", id(self), ") (tmp)", sep="")
955

956

957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
        if metadata:
            for key, val in self.metadata.items():
                if key == "job":
                    for key2, val2 in self.metadata[key].items():
                        print("metadata['", key ,"']['", key2, "'] = ", val2, sep="")
                else:
                    print("metadata['", key, "'] = ", val, sep="")
    
    
        #def get_size(obj, seen=None):
        #    """Recursively finds size of objects"""
        #    size = sys.getsizeof(obj)
        #    if seen is None:
        #        seen = set()
        #    obj_id = id(obj)
        #    if obj_id in seen:
        #        return 0
        #    # Important mark as seen *before* entering recursion to gracefully handle
        #    # self-referential objects
        #    seen.add(obj_id)
        #    if isinstance(obj, dict):
        #        size += sum([get_size(v, seen) for v in obj.values()])
        #        size += sum([get_size(k, seen) for k in obj.keys()])
        #    elif hasattr(obj, '__dict__'):
        #        size += get_size(obj.__dict__, seen)
        #    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        #        size += sum([get_size(i, seen) for i in obj])
        #    return size
985

986
        #print("Memory:", get_size(self), 'bytes')
987

988

989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
    ###########################################################################
    # Internal Organisation
    ###########################################################################
    def rebase_timestamps(records=None, inplace=False, timebase=False):
        """
        Updates all records in the report to use timebase (defaults: start_time).
        This might allow to conserve memory as reports are merged.

        Args:
            records (dict, list):  records to rebase
            inplace (bool): weather to merel return a copy or to update records
            timebase (datetime.datetime): new timebase to use

        Return:
            rebased_records (same type as provided to records)
        """
        rebase_records = copy.deepcopy(record)

        # TODO: apply timestamp rebase
        # TODO: settle on format

        return rebased_records

    ###########################################################################
Jakob Luettgau's avatar
Jakob Luettgau committed
1013
    # Export Conversions
1014
    ###########################################################################
1015
    def to_dict(self):
1016
        """
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
1017
        Return dictionary representation of report data.
1018
1019
1020
1021
1022

        Args:
            None

        Return:
1023
            dict
1024
        """
1025
        data = copy.deepcopy(self.data)
1026

1027
1028
        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1029
1030
1031
1032
1033
1034
1035
            try:
                #recs[mod] = recs[mod].to_dict()
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."


1036

1037
        return data
1038
1039


1040
    def to_json(self):
1041
        """
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
1042
        Return JSON representation of report data as string.
1043
1044
1045
1046
1047
1048
1049

        Args:
            None

        Return:
            JSON String
        """
1050
1051
1052
1053
        data = copy.deepcopy(self.data)

        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1054
1055
1056
1057
            try:
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."
1058

1059
        return json.dumps(data, cls=DarshanReportJSONEncoder)
1060
1061
1062
1063




Jakob Luettgau's avatar
Jakob Luettgau committed
1064