darshan-bgq.c 8.79 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <assert.h>

#include "uthash.h"
#include "darshan.h"
#include "darshan-bgq-log-format.h"

#ifdef __bgq__
#include <spi/include/kernel/location.h>
#include <spi/include/kernel/process.h>
#include <firmware/include/personality.h>
#endif

/*
 * Simple module which captures BG/Q hardware specific information about 
 * the job.
 * 
 * This module does not intercept any system calls. It just pulls data
 * from the personality struct at initialization.
 */


/*
 * Global runtime struct for tracking data needed at runtime
 */
struct bgq_runtime
{
    struct darshan_bgq_record record;
};

static struct bgq_runtime *bgq_runtime = NULL;
static pthread_mutex_t bgq_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;

/* the instrumentation_disabled flag is used to toggle functions on/off */
static int instrumentation_disabled = 0;

/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;
52
static int darshan_mem_alignment = 1;
53 54

/* internal helper functions for the "NULL" module */
Kevin Harms's avatar
Kevin Harms committed
55
void bgq_runtime_initialize(void);
56 57 58

/* forward declaration for module functions needed to interface with darshan-core */
static void bgq_begin_shutdown(void);
59
static void bgq_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size);
60
static void bgq_shutdown(void);
Kevin Harms's avatar
Kevin Harms committed
61 62
static void bgq_setup_reduction(darshan_record_id *shared_recs,int *shared_rec_count,void **send_buf,void **recv_buf,int *rec_size);
static void bgq_record_reduction_op(void* infile_v,void* inoutfile_v,int *len,MPI_Datatype *datatype);
63 64 65 66 67 68 69 70 71 72 73 74

/* macros for obtaining/releasing the "NULL" module lock */
#define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex)
#define BGQ_UNLOCK() pthread_mutex_unlock(&bgq_runtime_mutex)

/*
 * Function which updates all the counter data
 */
static void capture(struct darshan_bgq_record *rec)
{
#ifdef __bgq__
    Personality_t person;
Kevin Harms's avatar
Kevin Harms committed
75
    int r;
76 77 78

    rec->counters[BGQ_CSJOBID] = Kernel_GetJobID();
    rec->counters[BGQ_RANKSPERNODE] = Kernel_ProcessCount();
Kevin Harms's avatar
Kevin Harms committed
79
    rec->counters[BGQ_INODES] = MPIX_IO_node_id();
Kevin Harms's avatar
Kevin Harms committed
80

81 82 83 84 85 86 87 88 89 90
    r = Kernel_GetPersonality(&person, sizeof(person));
    if (r == 0)
    {
        rec->counters[BGQ_NNODES] = ND_TORUS_SIZE(person.Network_Config);
        rec->counters[BGQ_ANODES] = person.Network_Config.Anodes;
        rec->counters[BGQ_BNODES] = person.Network_Config.Bnodes;
        rec->counters[BGQ_CNODES] = person.Network_Config.Cnodes;
        rec->counters[BGQ_DNODES] = person.Network_Config.Dnodes;
        rec->counters[BGQ_ENODES] = person.Network_Config.Enodes;
        rec->counters[BGQ_TORUSENABLED] =
91 92 93 94 95
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_A) == ND_ENABLE_TORUS_DIM_A) << 0) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_B) == ND_ENABLE_TORUS_DIM_B) << 1) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_C) == ND_ENABLE_TORUS_DIM_C) << 2) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_D) == ND_ENABLE_TORUS_DIM_D) << 3) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_E) == ND_ENABLE_TORUS_DIM_E) << 4);
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110

        rec->counters[BGQ_DDRPERNODE] = person.DDR_Config.DDRSizeMB;
    }
#endif

    rec->rank = my_rank;
    rec->fcounters[BGQ_F_TIMESTAMP] = darshan_core_wtime();

    return;
}

/**********************************************************
 * Internal functions for manipulating BGQ module state *
 **********************************************************/

Kevin Harms's avatar
Kevin Harms committed
111
void bgq_runtime_initialize()
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
{
    /* struct of function pointers for interfacing with darshan-core */
    struct darshan_module_funcs bgq_mod_fns =
    {
        .begin_shutdown = bgq_begin_shutdown,
        .get_output_data = bgq_get_output_data,
        .shutdown = bgq_shutdown
    };
    int mem_limit;
    char *recname = "darshan-internal-bgq";

    BGQ_LOCK();

    /* don't do anything if already initialized or instrumenation is disabled */
    if(bgq_runtime || instrumentation_disabled)
        return;

    /* register the "NULL" module with the darshan-core component */
    darshan_core_register_module(
        DARSHAN_BGQ_MOD,
        &bgq_mod_fns,
133
        &my_rank,
134
        &mem_limit,
135
        &darshan_mem_alignment);
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163

    /* return if no memory assigned by darshan-core */
    if(mem_limit == 0)
    {
        instrumentation_disabled = 1;
        return;
    }

    /* no enough memory to fit bgq module */
    if (mem_limit < sizeof(*bgq_runtime))
    {
        instrumentation_disabled = 1;
        return;
    }

    /* initialize module's global state */
    bgq_runtime = malloc(sizeof(*bgq_runtime));
    if(!bgq_runtime)
    {
        instrumentation_disabled = 1;
        return;
    }
    memset(bgq_runtime, 0, sizeof(*bgq_runtime));

    darshan_core_register_record(
        recname,
        strlen(recname),
        1,
164
        DARSHAN_BGQ_MOD,
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
        &bgq_runtime->record.f_id,
        &bgq_runtime->record.alignment);

    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);

    capture(&bgq_runtime->record);

    BGQ_UNLOCK();

    return;
}

/* Perform any necessary steps prior to shutting down for the "NULL" module. */
static void bgq_begin_shutdown()
{
    BGQ_LOCK();

    /* In general, we want to disable all wrappers while Darshan shuts down. 
     * This is to avoid race conditions and ensure data consistency, as
     * executing wrappers could potentially modify module state while Darshan
     * is in the process of shutting down. 
     */
    instrumentation_disabled = 1;

    BGQ_UNLOCK();

    return;
}

194 195
static int cmpr(const void *p1, const void *p2)
{
Kevin Harms's avatar
Kevin Harms committed
196 197
    const uint64_t *a = (uint64_t*) p1;
    const uint64_t *b = (uint64_t*) p2;
198 199 200
    return ((*a == *b) ?  0 : ((*a < *b) ? -1 : 1));
}

201 202
/* Pass output data for the "BGQ" module back to darshan-core to log to file. */
static void bgq_get_output_data(
203 204 205
    MPI_Comm mod_comm,
    darshan_record_id *shared_recs,
    int shared_rec_count,
206 207 208 209 210 211 212 213
    void **buffer,
    int *size)
{

    /* Just set the output buffer to point at the array of the "BGQ" module's
     * I/O records, and set the output size according to the number of records
     * currently being tracked.
     */
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
    int nprocs;
    int result;
    uint64_t *ion_ids;

    if (my_rank == 0)
    {
        DARSHAN_MPI_CALL(MPI_Comm_size)(mod_comm, &nprocs);
        ion_ids = malloc(sizeof(*ion_ids)*nprocs);
        result = (ion_ids != NULL); 
    }
    DARSHAN_MPI_CALL(MPI_Bcast)(&result, 1, MPI_INT, 0, mod_comm);

    if (bgq_runtime && result)
    {
        int i, found;
        uint64_t val;

        DARSHAN_MPI_CALL(MPI_Gather)(&bgq_runtime->record.counters[BGQ_INODES],
                                     1,
Kevin Harms's avatar
Kevin Harms committed
233
                                     MPI_LONG_LONG_INT,
234 235
                                     ion_ids,
                                     1,
Kevin Harms's avatar
Kevin Harms committed
236
                                     MPI_LONG_LONG_INT,
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
                                     0,
                                     mod_comm);
        if (my_rank == 0)
        {
            qsort(ion_ids, nprocs, sizeof(*ion_ids), cmpr);
            for (i = 1, val = ion_ids[0], found = 1; i < nprocs; i++)
            {
                if (val != ion_ids[i])
                {
                    val = ion_ids[i];
                    found += 1;
                }
            }
            bgq_runtime->record.counters[BGQ_INODES] = found;
        }
    }

Kevin Harms's avatar
Kevin Harms committed
254
    if ((bgq_runtime) && (my_rank == 0))
255
    {
Kevin Harms's avatar
Kevin Harms committed
256
        *buffer = &bgq_runtime->record;
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
        *size = sizeof(struct darshan_bgq_record);
    }
    else
    {
        *buffer = NULL;
        *size   = 0;
    }

    return;
}

/* Shutdown the "BGQ" module by freeing up all data structures. */
static void bgq_shutdown()
{
    if (bgq_runtime)
    {
        free(bgq_runtime);
        bgq_runtime = NULL;
    }

    return;
}

Kevin Harms's avatar
Kevin Harms committed
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
static void bgq_record_reduction_op(
    void* infile_v,
    void* inoutfile_v,
    int* len,
    MPI_Datatype *datatype)
{
    int i;
    int j;
    struct darshan_bgq_record *infile = infile_v;
    struct darshan_bgq_record *inoutfile = inoutfile_v;

    for (i = 0; i<*len; i++)
    {
        for (j = 0; j < BGQ_NUM_INDICES; j++)
        {
            if (infile->counters[j] != inoutfile->counters[j])
            {
                // unexpected
                fprintf(stderr,
                        "%lu counter mismatch: %d [%lu] [%lu]\n",
                        infile->f_id,
                        j,
                        infile->counters[j],
                        inoutfile->counters[j]);
            }
        }
        infile++;
        inoutfile++;
    }

    return;
}

313 314 315 316 317 318 319 320
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */