report.py 30.4 KB
Newer Older
1
2
3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

4
5
6
7
8
9
"""
The darshan.repport module provides the DarshanReport class for convienient
interaction and aggregation of Darshan logs using Python.
"""


10
import darshan.backend.cffi_backend as backend
11

12
import json
13
import re
14
import copy
15
import datetime
16
import sys
17

18
19
20
import numpy as np
import pandas as pd

Jakob Luettgau's avatar
Jakob Luettgau committed
21
22
import collections.abc

23
import logging
24
logger = logging.getLogger(__name__)
25
26
27



28
class DarshanReportJSONEncoder(json.JSONEncoder):
29
    """
30
31
    Helper class for JSON serialization if the report contains, for example,
    numpy or dates records, which are not handled by the default JSON encoder.
32
    """
33
34
35
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
36
37
38
39
    
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()

40
41
42
        return json.JSONEncoder.default(self, obj)


Jakob Luettgau's avatar
Jakob Luettgau committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
class DarshanRecordCollection(collections.abc.MutableSequence):
    """
    Darshan log records may nest various properties (e.g., DXT, Lustre).
    As such they can not faithfully represented using only a single
    Numpy array or a Pandas dataframe.

    The DarshanRecordCollection is used as a wrapper to offer
    users a stable API to DarshanReports and contained records
    in various popular formats while allowing to optimize 
    memory and internal representations as necessary.
    """

    def __init__(self, mod=None, report=None):     
        super(DarshanRecordCollection, self).__init__()
        self.mod = mod             # collections should be homogenous in module type
        self.report = report       # reference the report offering lookup for, e.g., counter names

        self.rank = None           # if all records in collection share rank, save memory
        self.id = None             # if all records in collection share id/nrec, save memory

        self.timebase = None       # allow fast time rebase without touching every record
        self.start_time = None
        self.end_time = None

        self._type = "collection"  # collection => list(), single => [record], nested => [[], ... ,[]]
        self._records = list()     # internal format before user conversion
        pass
    
    def __len__(self):
        return len(self._records)
    
    def __setitem__(self, key, val):
        self._records[key] = val

    def __getitem__(self, key):
        if self._type == "record":
            if isinstance(key, collections.abc.Hashable):
                #TODO: might extend this style access to collection/nested type as well
                #      but do not want to offer an access which might not be feasible to maintain
                return self._records[0][key]
            else:
                return self._records[0]

        # Wrap single record in RecordCollection to attach conversions: to_json, to_dict, to_df, ...
        # This way conversion logic can be shared.
        record = DarshanRecordCollection(mod=self.mod, report=self.report)

        if isinstance(key, slice):
            record._type = "collection"
            record._records = self._records[key]
        else:
            record._type = "record"
            record.append(self._records[key])
        return record

    def __delitem__(self, key):
        del self._list[ii]

    def insert(self, key, val):
        self._records.insert(key, val)

    def append(self, val):
        self.insert(len(self._records), val)


    def __repr__(self):
        if self._type == "record":
            return self._records[0].__repr__()
        
        return object.__repr__(self)

    #def __repr__(self):
    #    print("DarshanRecordCollection.__repr__")
    #    repr = ""
    #    for rec in self._records:
    #        repr += f"{rec}\n"
    #    return repr

    def info(self, describe=False, plot=False):
        """
        Print information about the record for inspection.

        Args:
            describe (bool): show detailed summary and statistics (default: False)
            plot (bool): show plots for quick value overview for counters and fcounters (default: False)

        Return:
            None
        """
        mod = self.mod
        records = self._records

        print("Module:       ", mod, sep="")
        print("Records:      ", len(self), sep="")
        print("Coll. Type:   ", self._type, sep="")

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                pass
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            ids = set()
            ranks = set()
            hostnames = set()
            reads = 0
            writes = 0
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
                hostnames.add(rec['hostname']) 
                reads += rec['read_count']
                writes += rec['write_count']
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")
            print("Hostnames:    ", str(hostnames), sep="")
            print("Read Events:  ", str(reads), sep="")
            print("Write Events: ", str(writes), sep="")


            if describe or plot:
                logger.warn("No plots/descriptions defined for DXT records info.")

        else:
            ids = set()
            ranks = set()
            for i, rec in enumerate(records):
                ids.add(rec['id']) 
                ranks.add(rec['rank']) 
            print("Ranks:        ", str(ranks), sep="")
            print("Name Records: ", str(ids), sep="")


            if describe or plot:
                df = self.to_df(attach=None)
                pd_max_rows = pd.get_option('display.max_rows')
                pd_max_columns = pd.get_option('display.max_columns')
                pd.set_option('display.max_rows', None)

                if plot:
                    figw = 7
                    lh = 0.3    # lineheight
                    # get number of counters for plot height adjustment
                    nc = self[0]['counters'].size
                    nfc = self[0]['fcounters'].size

                    display(df['counters'].plot.box(vert=False, figsize=(figw, nc*lh)))
                    display(df['fcounters'].plot.box(vert=False, figsize=(figw, nfc*lh)))

                if describe:
                    display(df['counters'].describe().transpose())
                    display(df['fcounters'].describe().transpose())

                pd.set_option('display.max_rows', pd_max_rows)


    ###########################################################################
    # Export Conversions (following the pandas naming conventions)
    ###########################################################################
    def to_numpy(self):
        records = copy.deepcopy(self._records)
        return records

    def to_list(self):
        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            raise NotImplementedError
        else:
            for i, rec in enumerate(records):
                rec['counters'] = rec['counters'].tolist()
                rec['fcounters'] = rec['fcounters'].tolist()
        return records

    def to_dict(self):
        mod = self.mod
        records = copy.deepcopy(self._records)
        counters = self.report.counters[self.mod]
        if mod in ['LUSTRE']:
            raise NotImplementedError
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            # format already in a dict format, but may offer switches for expansion
            logger.warn("WARNING: The output of DarshanRecordCollection.to_dict() may change in the future.")
            pass
        else:
            for i, rec in enumerate(records):
                rec['counters'] = dict(zip(counters['counters'], rec['counters']))
                rec['fcounters'] = dict(zip(counters['fcounters'], rec['fcounters']))
        return records

    def to_json(self):
        records = self.to_list()
        return json.dumps(records, cls=DarshanReportJSONEncoder)

    def to_df(self, attach="default"):
        if attach == "default":
            attach = ['id', 'rank']

        mod = self.mod
        records = copy.deepcopy(self._records)

        if mod in ['LUSTRE']:
            for i, rec in enumerate(records):
                rec = rec
        elif mod in ['DXT_POSIX', 'DXT_MPIIO']:
            for i, rec in enumerate(records):
                rec['read_segments'] = pd.DataFrame(rec['read_segments'])
                rec['write_segments'] = pd.DataFrame(rec['write_segments'])
        else:
            counters = []
            fcounters = []
            ids = []
            ranks = []

            for i, rec in enumerate(records):
                counters.append(rec['counters'])
                fcounters.append(rec['fcounters'])
                ids.append(rec['id'])
                ranks.append(rec['rank'])
            
            records = {"counters": None, "fcounters": None}
            records['counters'] = pd.DataFrame(counters, columns=self.report.counters[mod]['counters'])
            records['fcounters'] = pd.DataFrame(fcounters, columns=self.report.counters[mod]['fcounters'])

            def flip_column_order(df):
                return df[df.columns[::-1]]

            # attach ids and ranks
            if attach is not None:
                for counter_type in ['counters', 'fcounters']:
                    records[counter_type] = flip_column_order(records[counter_type])
                    if 'id' in attach:
                        records[counter_type]['id'] = ids
                    if 'rank' in attach:
                        records[counter_type]['rank'] = ranks
                    records[counter_type] = flip_column_order(records[counter_type])

        return records


284
class DarshanReport(object):
285
286
287
288
289
    """
    The DarshanReport class provides a convienient wrapper to access darshan
    logs, which also caches already fetched information. In addition to that
    a number of common aggregations can be performed.
    """
290

Jakob Luettgau's avatar
Jakob Luettgau committed
291
    # a way to conserve memory?
292
293
294
    #__slots__ = ['attr1', 'attr2']


295
    def __init__(self, 
Jakob Luettgau's avatar
Jakob Luettgau committed
296
            filename=None, dtype='numpy', 
297
            start_time=None, end_time=None,
298
            automatic_summary=False,
299
            read_all=True, lookup_name_records=True):
300
301
302
303
304
305
306
307
308
309
310
311
        """
        Args:
            filename (str): filename to open (optional)
            dtype (str): default dtype for internal structures
            automatic_summary (bool): automatically generate summary after loading
            read_all (bool): whether to read all records for log
            lookup_name_records (bool): lookup and update name_records as records are loaded

        Return:
            None

        """
312
313
        self.filename = filename

314
        # Behavioral Options
Jakob Luettgau's avatar
Jakob Luettgau committed
315
        self.dtype = dtype                                  # default dtype to return when viewing records
316
        self.automatic_summary = automatic_summary
317
        self.lookup_name_records = lookup_name_records
318

319
        # State dependent book-keeping
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
320
        self.converted_records = False  # true if convert_records() was called (unnumpyfy)
321

322

323
        # Report Metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
324
325
        #
        # Start/End + Timebase are 
326
327
328
329
330
        self.start_time = start_time if start_time else float('inf')
        self.end_time = end_time if end_time else float('-inf')
        self.timebase = self.start_time

        # Initialize data namespaces
Jakob Luettgau's avatar
Jakob Luettgau committed
331
332
333
        self._metadata = {}
        self._modules = {}
        self._counters = {}
334
        self.records = {}
Jakob Luettgau's avatar
Jakob Luettgau committed
335
        self._mounts = {}
336
        self.name_records = {}
337
338

        # initialize report/summary namespace
339
        self.summary_revision = 0       # counter to check if summary needs update (see data_revision)
340
341
342
343
344
345
        self.summary = {}


        # legacy references (deprecate before 1.0?)
        self.data_revision = 0          # counter for consistency checks
        self.data = {'version': 1}
Jakob Luettgau's avatar
Jakob Luettgau committed
346
        self.data['metadata'] = self._metadata
347
348
        self.data['records'] = self.records
        self.data['summary'] = self.summary
Jakob Luettgau's avatar
Jakob Luettgau committed
349
        self.data['modules'] = self._modules
350
351
352
        self.data['counters'] = self.counters
        self.data['name_records'] = self.name_records

353

354
355
356

        # when using report algebra this log allows to untangle potentially
        # unfair aggregations (e.g., double accounting)
357
        self.provenance_enabled = True
358
        self.provenance_graph = []
359
360
361
        self.provenance_reports = {}


362
363
364
365
        if filename:
            self.open(filename, read_all=read_all)    


Jakob Luettgau's avatar
Jakob Luettgau committed
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
    @property
    def metadata(self):
        return self._metadata

    @property
    def modules(self):
        return self._modules

    @property
    def counters(self):
        return self._counters

#    @property
#    def counters(self):
#        return self._counters
#
#    @property
#    def name_records(self):
#        return self._name_records
#
#
#    @property
#    def summary(self):
#        return self._summary
#   
      

393
394
395
    def open(self, filename, read_all=False):
        """
        Open log file via CFFI backend.
396

397
398
399
        Args:
            filename (str): filename to open (optional)
            read_all (bool): whether to read all records for log
400

401
402
403
404
        Return:
            None

        """
405
406
407

        self.filename = filename

408
409
        if filename:
            self.log = backend.log_open(self.filename)
410
411
412
            if not bool(self.log['handle']):
                raise RuntimeError("Failed to open file.")

413
            self.read_metadata(read_all=read_all)
414

415
416
            if read_all:
                self.read_all()
417
418
419


    def __add__(self, other):
420
        """
421
        Allow reports to be merged using the addition operator.
422
        """
423

424
        return self.merge(other)
425

426

427
428
429
    def __deepcopy__(self, memo):
        """
        Creates a deepcopy of report.
430

431
432
        .. note::
            Needed to purge reference to self.log as Cdata can not be pickled:
433
434
            TypeError: can't pickle _cffi_backend.CData objects
        """
435

436
437
438
439
440
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            if k in ["log"]:
441
                # blacklist of members not to copy
442
443
444
                continue
            setattr(result, k, copy.deepcopy(v, memo))
        return result
445

446
        # TODO: might consider treating self.log as list of open logs to not deactivate load functions?
447

448
        return result
449

450

451
    def read_metadata(self, read_all=False):
452
453
454
455
456
457
458
459
460
461
        """
        Read metadata such as the job, the executables and available modules.

        Args:
            None

        Return:
            None

        """
462
463
        self.metadata['job'] = backend.log_get_job(self.log)
        self.metadata['exe'] = backend.log_get_exe(self.log)
464

465
466
        self.start_time = datetime.datetime.fromtimestamp(self.metadata['job']['start_time'])
        self.end_time = datetime.datetime.fromtimestamp(self.metadata['job']['end_time'])
467
468

        self.data['mounts'] = backend.log_get_mounts(self.log)
469
        self.mounts = self.data['mounts']
470
471

        self.data['modules'] = backend.log_get_modules(self.log)
Jakob Luettgau's avatar
Jakob Luettgau committed
472
        self._modules = self.data['modules']
473

474
        if read_all == True:
475
476
            self.data["name_records"] = backend.log_get_name_records(self.log)
            self.name_records = self.data['name_records']
477
478


479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
    def update_name_records(self, mod=None):
        """
        Update (and prune unused) name records from resolve table.

        First reindexes all used name record identifiers and then queries 
        darshan-utils library to compile filtered list of name records.

        Args:
            None

        Return:
            None

        """
        # sanitize inputs
        mods = mod
        if mods is None:
            mods = self.records
        else:
            mods = [mod]

        
        # state
        ids = set()

        for mod in mods:
505
            logger.debug(f" Refreshing name_records for mod={mod}")
506
507
508
509
            for rec in self.records[mod]:
                ids.add(rec['id'])


510
        self.name_records.update(backend.log_lookup_name_records(self.log, ids))
511
512
        

513
    def read_all(self, dtype=None):
514
515
516
517
518
519
520
521
522
        """
        Read all available records from darshan log and return as dictionary.

        Args:
            None

        Return:
            None
        """
523
524
525
526

        self.read_all_generic_records(dtype=dtype)
        self.read_all_dxt_records(dtype=dtype)
        self.mod_read_all_lustre_records(dtype=dtype)
527
        
528
529
530
        return


531
    def read_all_generic_records(self, counters=True, fcounters=True, dtype=None):
532
        """
533
        Read all generic records from darshan log and return as dictionary.
534

535
536
        Args:
            None
537

538
539
540
        Return:
            None
        """
541
542
543

        dtype = dtype if dtype else self.dtype

544
        for mod in self.data['modules']:
545
            self.mod_read_all_records(mod, dtype=dtype, warnings=False)
546

547
548
549
        pass


550
    def read_all_dxt_records(self, reads=True, writes=True, dtype=None):
551
        """
552
        Read all dxt records from darshan log and return as dictionary.
553
554
555
556
557
558
559

        Args:
            None

        Return:
            None
        """
560
561
562

        dtype = dtype if dtype else self.dtype

563
        for mod in self.data['modules']:
564
            self.mod_read_all_dxt_records(mod, warnings=False, reads=reads, writes=writes, dtype=dtype)
565
566
567
568

        pass


569
    def mod_read_all_records(self, mod, dtype=None, warnings=True):
570
        """
571
        Reads all generic records for module
572
573
574

        Args:
            mod (str): Identifier of module to fetch all records
575
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary, 'pandas'
576
577
578
579
580

        Return:
            None

        """
581
        unsupported =  ['DXT_POSIX', 'DXT_MPIIO', 'LUSTRE']
582

583
        if mod in unsupported:
584
            if warnings:
585
                logger.warning(f" Skipping. Currently unsupported: {mod} in mod_read_all_records().")
586
587
588
589
            # skip mod
            return 


590
591
592
        # handling options
        dtype = dtype if dtype else self.dtype

593

Jakob Luettgau's avatar
Jakob Luettgau committed
594
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
595
596
597
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

598
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
599
        self._modules[mod]['num_records'] = 0
600
601
        if mod not in self.counters:
            self.counters[mod] = {}
602
603
            self.counters[mod]['counters'] = cn 
            self.counters[mod]['fcounters'] = fcn
604
605


606
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
607
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
608
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
609
            self.records[mod].append(rec)
Jakob Luettgau's avatar
Jakob Luettgau committed
610
            self._modules[mod]['num_records'] += 1
611
612

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
613
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
614

615

616
617
        if self.lookup_name_records:
            self.update_name_records()
618
619
620

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
621
622
            combined_c = None
            combined_fc = None
623

624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    
                if combined_fc is None:
                    combined_fc = rec['fcounters']
                else:
                    combined_fc = pd.concat([combined_fc, rec['fcounters']])

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                'fcounters': combined_fc
                }]
645

646
647
        pass

648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
    def mod_read_all_apmpi_records(self, mod, dtype=None, warnings=True):
        """ 
        Reads all APMPI records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['APMPI'] 
        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_apmpi_records(). Supported: {supported}")
            # skip mod
            return
672

673
674
675
        # handling options
        dtype = dtype if dtype else self.dtype

676
677
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)

678
        # update module metadata
679
        self._modules[mod]['num_records'] = 0
680
681
682
683
        if mod not in self.counters:
            self.counters[mod] = {}

        # fetch records
684
685
        # fetch header record
        rec = backend.log_get_apmpi_record(self.log, mod, "HEADER", dtype=dtype)
686
        while rec != None:
687
            self.records[mod].append(rec)
688
689
690
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
691
            rec = backend.log_get_apmpi_record(self.log, mod, "PERF", dtype=dtype)
692
693
694
695


        if self.lookup_name_records:
            self.update_name_records()
696
697
698

        pass

699
    def mod_read_all_dxt_records(self, mod, dtype=None, warnings=True, reads=True, writes=True):
700
        """
701
        Reads all dxt records for provided module.
702
703
704

        Args:
            mod (str): Identifier of module to fetch all records
705
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
706
707
708

        Return:
            None
709
710

        """
711
        if mod not in self.data['modules']:
712
            if warnings:
713
                logger.warning(f"Skipping. Log does not contain data for mod: {mod}")
714
715
716
717
718
719
            return


        supported =  ['DXT_POSIX', 'DXT_MPIIO']

        if mod not in supported:
720
            if warnings:
721
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
722
723
724
            # skip mod
            return 

725

726
727
        # handling options
        dtype = dtype if dtype else self.dtype
728

729

Jakob Luettgau's avatar
Jakob Luettgau committed
730
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
731
732

        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
733
        self._modules[mod]['num_records'] = 0
734
735
        if mod not in self.counters:
            self.counters[mod] = {}
736
737


738
        # fetch records
Kevin Harms's avatar
Kevin Harms committed
739
        rec = backend.log_get_dxt_record(self.log, mod, dtype=dtype)
740
        while rec != None:
Kevin Harms's avatar
Kevin Harms committed
741
            self.records[mod].append(rec)
742
            self.data['modules'][mod]['num_records'] += 1
743
744

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
745
            rec = backend.log_get_dxt_record(self.log, mod, reads=reads, writes=writes, dtype=dtype)
746

747
748
749
750

        if self.lookup_name_records:
            self.update_name_records()

751
752
753
        pass


754

755
    def mod_read_all_lustre_records(self, mod="LUSTRE", dtype=None, warnings=True):
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
        """
        Reads all dxt records for provided module.

        Args:
            mod (str): Identifier of module to fetch all records
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary

        Return:
            None

        """
        if mod not in self.data['modules']:
            if warnings:
                logger.warning(f" Skipping. Log does not contain data for mod: {mod}")
            return


        supported =  ['LUSTRE']

        if mod not in supported:
            if warnings:
                logger.warning(f" Skipping. Unsupported module: {mod} in in mod_read_all_dxt_records(). Supported: {supported}")
            # skip mod
            return 


782
783
784
        # handling options
        dtype = dtype if dtype else self.dtype

785

Jakob Luettgau's avatar
Jakob Luettgau committed
786
        self.records[mod] = DarshanRecordCollection(mod=mod, report=self)
787
        cn = backend.counter_names(mod)
788

789
        # update module metadata
Jakob Luettgau's avatar
Jakob Luettgau committed
790
        self._modules[mod]['num_records'] = 0
791
792
        if mod not in self.counters:
            self.counters[mod] = {}
793
            self.counters[mod]['counters'] = cn 
794
795


796
        # fetch records
797
798
799
800
801
802
803
804
        rec = backend.log_get_record(self.log, mod, dtype=dtype)
        while rec != None:
            self.records[mod].append(rec)
            self.data['modules'][mod]['num_records'] += 1

            # fetch next
            rec = backend.log_get_record(self.log, mod, dtype=dtype)

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829

        if self.lookup_name_records:
            self.update_name_records()

        # process/combine records if the format dtype allows for this
        if dtype == 'pandas':
            combined_c = None

            for rec in self.records[mod]:
                obj = rec['counters']
                #print(type(obj))
                #display(obj)
                
                if combined_c is None:
                    combined_c = rec['counters']
                else:
                    combined_c = pd.concat([combined_c, rec['counters']])
                    

            self.records[mod] = [{
                'rank': -1,
                'id': -1,
                'counters': combined_c,
                }]

830
831
832
833
834
        pass




835
836
    def mod_records(self, mod, 
                    dtype='numpy', warnings=True):
837
838
        """
        Return generator for lazy record loading and traversal.
839
840
841
842

        .. warning::
            Can't be used for now when alternating between different modules.
            A temporary workaround can be to open the same log multiple times,
Jakob Luettgau's avatar
Jakob Luettgau committed
843
            as this way buffers are not shared between get_record invocations
844
845
            in the lower level library.

846
847
848

        Args:
            mod (str): Identifier of module to fetch records for
849
            dtype (str): 'numpy' for ndarray (default), 'dict' for python dictionary
850
851
852
853
854
855
856
857
858
859
860
861
862

        Return:
            None

        """
        cn = backend.counter_names(mod)
        fcn = backend.fcounter_names(mod)

        if mod not in self.counters:
            self.counters[mod] = {}
        self.counters[mod]['counters'] = cn 
        self.counters[mod]['fcounters'] = fcn

Kevin Harms's avatar
Kevin Harms committed
863
        rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
864
865
866
867
        while rec != None:
            yield rec

            # fetch next
Kevin Harms's avatar
Kevin Harms committed
868
            rec = backend.log_get_generic_record(self.log, mod, dtype=dtype)
869
870


871
    def info(self, metadata=False):
872
        """
873
        Print information about the record for inspection.
874
875

        Args:
876
            metadata (bool): show detailed metadata (default: False)
877
878
879

        Return:
            None
880
        """
881
        print("Filename:       ", self.filename, sep="")
882

883
884
885
        tdelta = self.end_time - self.start_time
        print("Times:          ", self.start_time, " to ", self.end_time, " (Duration ", tdelta, ")", sep="")

886
887
888
889
890
891
892
        if 'exe' in self.metadata:
            print("Executeable:    ", self.metadata['exe'], sep="")

        if 'job' in self.metadata:
            print("Processes:      ", self.metadata['job']['nprocs'], sep="")
            print("JobID:          ", self.metadata['job']['jobid'], sep="")
            print("UID:            ", self.metadata['job']['uid'], sep="")
Jakob Luettgau's avatar
Jakob Luettgau committed
893
            print("Modules in Log: ", list(self._modules.keys()), sep="")
894
895
896
897
898
899
900
901

        loaded = {}
        for mod in self.records:
            loaded[mod] = len(self.records[mod])
        print("Loaded Records: ", loaded, sep="")

        print("Name Records:   ", len(self.name_records), sep="")
        
902
903
        if 'job' in self.metadata:
            print("Darshan/Hints:  ", self.metadata['job']['metadata'], sep="")
904
        print("DarshanReport:  id(", id(self), ") (tmp)", sep="")
905

906

907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
        if metadata:
            for key, val in self.metadata.items():
                if key == "job":
                    for key2, val2 in self.metadata[key].items():
                        print("metadata['", key ,"']['", key2, "'] = ", val2, sep="")
                else:
                    print("metadata['", key, "'] = ", val, sep="")
    
    
        #def get_size(obj, seen=None):
        #    """Recursively finds size of objects"""
        #    size = sys.getsizeof(obj)
        #    if seen is None:
        #        seen = set()
        #    obj_id = id(obj)
        #    if obj_id in seen:
        #        return 0
        #    # Important mark as seen *before* entering recursion to gracefully handle
        #    # self-referential objects
        #    seen.add(obj_id)
        #    if isinstance(obj, dict):
        #        size += sum([get_size(v, seen) for v in obj.values()])
        #        size += sum([get_size(k, seen) for k in obj.keys()])
        #    elif hasattr(obj, '__dict__'):
        #        size += get_size(obj.__dict__, seen)
        #    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        #        size += sum([get_size(i, seen) for i in obj])
        #    return size
935

936
        #print("Memory:", get_size(self), 'bytes')
937

938

939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
    ###########################################################################
    # Internal Organisation
    ###########################################################################
    def rebase_timestamps(records=None, inplace=False, timebase=False):
        """
        Updates all records in the report to use timebase (defaults: start_time).
        This might allow to conserve memory as reports are merged.

        Args:
            records (dict, list):  records to rebase
            inplace (bool): weather to merel return a copy or to update records
            timebase (datetime.datetime): new timebase to use

        Return:
            rebased_records (same type as provided to records)
        """
        rebase_records = copy.deepcopy(record)

        # TODO: apply timestamp rebase
        # TODO: settle on format

        return rebased_records

    ###########################################################################
Jakob Luettgau's avatar
Jakob Luettgau committed
963
    # Export Conversions
964
    ###########################################################################
965
    def to_dict(self):
966
        """
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
967
        Return dictionary representation of report data.
968
969
970
971
972

        Args:
            None

        Return:
973
            dict
974
        """
975
        data = copy.deepcopy(self.data)
976

977
978
        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
979
980
981
982
983
984
985
            try:
                #recs[mod] = recs[mod].to_dict()
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."


986

987
        return data
988
989


990
    def to_json(self):
991
        """
Jakob Luettgau's avatar
PEP8.    
Jakob Luettgau committed
992
        Return JSON representation of report data as string.
993
994
995
996
997
998
999

        Args:
            None

        Return:
            JSON String
        """
1000
1001
1002
1003
        data = copy.deepcopy(self.data)

        recs = data['records']
        for mod in recs:
Jakob Luettgau's avatar
Jakob Luettgau committed
1004
1005
1006
1007
            try:
                recs[mod] = recs[mod].to_list()
            except:
                recs[mod] = "Not implemented."
1008

1009
        return json.dumps(data, cls=DarshanReportJSONEncoder)
1010
1011
1012
1013




Jakob Luettgau's avatar
Jakob Luettgau committed
1014