local-storage-model.c 25 KB
Newer Older
1
2
3
4
5
6
7
8
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#include <assert.h>
#include <ross.h>
Jonathan Jenkins's avatar
Jonathan Jenkins committed
9
10
11
12
13
14
#include <codes/lp-io.h>
#include <codes/jenkins-hash.h>
#include <codes/codes.h>
#include <codes/codes_mapping.h>
#include <codes/lp-type-lookup.h>
#include <codes/local-storage-model.h>
15
#include <codes/quicklist.h>
16
#include <codes/rc-stack.h>
17
18
19
20

#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12

21
22
int lsm_in_sequence = 0;
tw_stime lsm_msg_offset = 0.0;
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

/* holds statistics about disk traffic on each LP */
typedef struct lsm_stats_s
{
    char category[CATEGORY_NAME_MAX];
    long read_count;
    long read_bytes;
    long read_seeks;
    tw_stime read_time;
    long write_count;
    long write_bytes;
    long write_seeks;
    tw_stime write_time;
} lsm_stats_t;

/*
 * disk model parameters
 */
typedef struct disk_model_s
{
    unsigned int *request_sizes;
    double *write_rates;
    double *read_rates;
    double *write_overheads;
    double *read_overheads;
    double *write_seeks;
    double *read_seeks;
    unsigned int bins;
51
52
53
54
    // sched params
    //   0  - no scheduling
    //  >0  - make scheduler with use_sched priority lanes
    int use_sched;
55
56
} disk_model_t;

57
58
59
60
61
62
63
64
65
66
67
68
69
70
/*
 * lsm_message_data_t
 *   - data used for input in transfer time calculation
 *   - data comes for caller
 *   - object: id of byte stream which could be a file, object, etc.
 *   - offset: offset into byte stream
 *   - size: size in bytes of request
 */
typedef struct lsm_message_data_s
{
    uint64_t    object;
    uint64_t    offset;
    uint64_t    size;
    char category[CATEGORY_NAME_MAX]; /* category for traffic */
71
    int prio; // for scheduling
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
} lsm_message_data_t;

/*
 * lsm_sched_op_s - operation to be scheduled
 */
typedef struct lsm_sched_op_s
{
    lsm_message_data_t data;
    struct qlist_head ql;
} lsm_sched_op_t;

/*
 * lsm_sched_s - data structure for implementing scheduling loop
 */
typedef struct lsm_sched_s
{
    int num_prios;
89
90
91
92
93
    // number of pending requests, incremented on new and decremented on
    // complete
    int active_count;
    // scheduler mallocs data per-request - hold onto and free later
    struct rc_stack *freelist;
94
95
96
    struct qlist_head *queues;
} lsm_sched_t;

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/*
 * lsm_state_s
 *   - state tracking structure for each LP node
 *   - next_idle: next point in time the disk will be idle
 *   - model: disk parameters
 *   - current_offset: last offset the disk operated on
 *   - current_object: last object id that operated on
 */
typedef struct lsm_state_s
{
    tw_stime next_idle;
    disk_model_t *model;
    int64_t  current_offset;
    uint64_t current_object;
    lsm_stats_t lsm_stats_array[CATEGORY_MAX];
112
113
114
    /* scheduling state */
    int use_sched;
    lsm_sched_t sched;
115
116
117
118
119
120
} lsm_state_t;

/*
 * lsm_message_t
 *   - holds event data
 *   - event: event type
121
 *   - data: IO request data
122
123
124
125
126
127
 *   - wrap: wrapped event data of caller
 */
typedef struct lsm_message_s
{
    int magic; /* magic number */
    lsm_event_t event;
128
    int prio; // op priority (user-set on request, used by LSM in rc for complete)
129
130
131
132
    tw_stime    prev_idle;
    lsm_stats_t prev_stat;
    int64_t     prev_offset;
    uint64_t    prev_object;
133
    lsm_message_data_t data;
134
    struct codes_cb_params cb;
135
136
137
138
139
140
141
142
143
} lsm_message_t;

/*
 * Prototypes
 */
static void lsm_lp_init (lsm_state_t *ns, tw_lp *lp);
static void lsm_event (lsm_state_t *ns, tw_bf *b, lsm_message_t *m, tw_lp *lp);
static void lsm_rev_event (lsm_state_t *ns, tw_bf *b, lsm_message_t *m, tw_lp *lp);
static void lsm_finalize (lsm_state_t *ns, tw_lp *lp);
144
145
146
147
148
149
static void handle_io_sched_new(lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
static void handle_rev_io_sched_new(lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
static void handle_io_request(lsm_state_t *ns, tw_bf *b, lsm_message_data_t *data, lsm_message_t *m_in, tw_lp *lp);
static void handle_rev_io_request(lsm_state_t *ns, tw_bf *b, lsm_message_data_t *data, lsm_message_t *m_in, tw_lp *lp);
static void handle_io_sched_compl(lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
static void handle_rev_io_sched_compl(lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
150
151
152
153
154
155
156
157
158
159
160
static void handle_io_completion (lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
static void handle_rev_io_completion (lsm_state_t *ns, tw_bf *b, lsm_message_t *m_in, tw_lp *lp);
static lsm_stats_t *find_stats(const char* category, lsm_state_t *ns);
static void write_stats(tw_lp* lp, lsm_stats_t* stat);

/*
 * Globals
 */

static int lsm_magic = 0;

161
162
163
164
/* configuration parameters (by annotation) */
static disk_model_t model_unanno, *models_anno = NULL;
static const config_anno_map_t *anno_map = NULL;

165
166
167
/* sched temporary for lsm_set_event_priority */
static int temp_prio = -1;

168
169
170
171
172
173
174
/*
 * lsm_lp
 *   - implements ROSS callback interfaces
 */
tw_lptype lsm_lp =
{
    (init_f) lsm_lp_init,
175
    (pre_run_f) NULL,
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    (event_f) lsm_event,
    (revent_f) lsm_rev_event,
    (final_f) lsm_finalize,
    (map_f) codes_mapping,
    sizeof(lsm_state_t)
};

static tw_stime transfer_time_table (lsm_state_t *ns,
                                     lsm_stats_t *stat,
                                     int rw,
                                     uint64_t object,
                                     int64_t offset,
                                     uint64_t size)
{
    double mb;
    double time = 0.0;
    double disk_rate;
    double disk_seek;
    double disk_overhead;
    int i;

    /* find nearest size rounded down. */
    for (i = 0; i < ns->model->bins; i++)
    {
        if (ns->model->request_sizes[i] > size)
        {
            break;
        }
    }
    if (i > 0) i--;

    if (rw)
    {
        /* read */
        disk_rate = ns->model->read_rates[i];
        disk_seek = ns->model->read_seeks[i];
        disk_overhead = ns->model->read_overheads[i];
    }
    else
    {
        /* write */
        disk_rate = ns->model->write_rates[i];
        disk_seek = ns->model->write_seeks[i];
        disk_overhead = ns->model->write_overheads[i];

    }
    
    /* transfer time */
    mb = ((double)size) / (1024.0 * 1024.0);
    time += (mb / disk_rate) * 1000.0 * 1000.0 * 1000.0;
    
    /* request overhead */
    time += (disk_overhead * 1000.0);

    /* seek */
    if ((object != ns->current_object) ||
        (offset < ns->current_offset) ||
        (offset > (ns->current_offset+512)))
    {
        if (rw) stat->read_seeks++; else stat->write_seeks++;
        time += (disk_seek * 1000.0);
    }


    /* update statistics */
    if (rw)
    {
        stat->read_count += 1;
        stat->read_bytes += size;
        stat->read_time  += time;
    }
    else
    {
        stat->write_count += 1;
        stat->write_bytes += size;
        stat->write_time  += time;
    }

    return time;
}

257
void lsm_io_event_rc(tw_lp *sender)
258
259
{
    codes_local_latency_reverse(sender);
260
}
261

262
tw_lpid lsm_find_local_device(
263
264
265
266
        struct codes_mctx const * map_ctx,
        tw_lpid sender_gid)
{
    return codes_mctx_to_lpid(map_ctx, LSM_NAME, sender_gid);
267
268
}

269
270
void lsm_io_event(
        const char * lp_io_category,
271
272
273
274
275
        uint64_t io_object,
        int64_t  io_offset,
        uint64_t io_size_bytes,
        int      io_type,
        tw_stime delay,
276
277
278
279
280
        tw_lp *sender,
        struct codes_mctx const * map_ctx,
        int return_tag,
        msg_header const * return_header,
        struct codes_cb_info const * cb)
281
{
282
283
284
    assert(strlen(lp_io_category) < CATEGORY_NAME_MAX-1);
    assert(strlen(lp_io_category) > 0);
    SANITY_CHECK_CB(cb, lsm_return_t);
285

286
    tw_lpid lsm_id = codes_mctx_to_lpid(map_ctx, LSM_NAME, sender->gid);
287

288
    tw_stime delta = delay + codes_local_latency(sender);
289
    if (lsm_in_sequence) {
290
291
292
293
        tw_stime tmp = lsm_msg_offset;
        lsm_msg_offset += delta;
        delta += tmp;
    }
294
295
296

    tw_event *e = tw_event_new(lsm_id, delta, sender);
    lsm_message_t *m = tw_event_data(e);
297
    m->magic = lsm_magic;
298
    m->event = (lsm_event_t) io_type;
299
300
301
    m->data.object = io_object;
    m->data.offset = io_offset;
    m->data.size   = io_size_bytes;
302
    strcpy(m->data.category, lp_io_category);
303
304

    // get the priority count for checking
305
    int num_prios = lsm_get_num_priorities(map_ctx, sender->gid);
306
307
308
309
310
311
312
313
314
315
    // prio checks and sets
    if (num_prios <= 0) // disabled scheduler - ignore
        m->data.prio = 0;
    else if (temp_prio < 0) // unprovided priority - defer to max possible
        m->data.prio = num_prios-1;
    else if (temp_prio < num_prios) // valid priority
        m->data.prio = temp_prio;
    else
        tw_error(TW_LOC,
                "LP %lu, LSM LP %lu: Bad priority (%d supplied, %d lanes)\n",
316
                sender->gid, lsm_id, temp_prio, num_prios);
317
318
    // reset temp_prio
    temp_prio = -1;
319

320
321
322
    m->cb.info = *cb;
    m->cb.h = *return_header;
    m->cb.tag = return_tag;
323

324
    tw_event_send(e);
325
326
}

327
int lsm_get_num_priorities(
328
329
        struct codes_mctx const * map_ctx,
        tw_lpid sender_id)
330
{
331
332
333
334
335
336
    char const * annotation =
        codes_mctx_get_annotation(map_ctx, LSM_NAME, sender_id);

    if (annotation == NULL) {
        assert(anno_map->has_unanno_lp);
        return model_unanno.use_sched;
337
338
339
    }
    else {
        for (int i = 0; i < anno_map->num_annos; i++) {
340
            if (strcmp(anno_map->annotations[i].ptr, annotation) == 0)
341
342
                return models_anno[i].use_sched;
        }
343
        assert(0);
344
345
346
347
348
349
350
351
352
        return -1;
    }
}

void lsm_set_event_priority(int prio)
{
    temp_prio = prio;
}

353
354
355
356
357
358
359
360
361
362
363
/*
 * lsm_lp_init
 *   - initialize the lsm model
 *   - sets the disk to be idle now
 */
static void lsm_lp_init (lsm_state_t *ns, tw_lp *lp)
{
    memset(ns, 0, sizeof(*ns));

    ns->next_idle = tw_now(lp);

364
365
366
367
368
369
370
371
372
    // set the correct model
    const char *anno = codes_mapping_get_annotation_by_lpid(lp->gid);
    if (anno == NULL)
        ns->model = &model_unanno;
    else {
        int id = configuration_get_annotation_index(anno, anno_map);
        ns->model = &models_anno[id];
    }

373
374
375
376
    // initialize the scheduler if need be
    ns->use_sched = ns->model->use_sched > 0;
    if (ns->use_sched) {
        ns->sched.num_prios = ns->model->use_sched;
377
378
        ns->sched.active_count = 0;
        rc_stack_create(&ns->sched.freelist);
379
380
381
382
383
384
        ns->sched.queues =
            malloc(ns->sched.num_prios * sizeof(*ns->sched.queues));
        for (int i = 0; i < ns->sched.num_prios; i++)
            INIT_QLIST_HEAD(&ns->sched.queues[i]);
    }

385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    return;
}

/*
 * lsm_event
 *   - event handler callback
 *   - dispatches the events to the appropriate handlers
 *   - handles initializtion of node state
 */ 
static void lsm_event (lsm_state_t *ns, tw_bf *b, lsm_message_t *m, tw_lp *lp)
{
    assert(m->magic == lsm_magic);

    switch (m->event)
    {
        case LSM_WRITE_REQUEST:
        case LSM_READ_REQUEST:
            if (LSM_DEBUG)
                printf("svr(%llu): REQUEST obj:%llu off:%llu size:%llu\n",
                    (unsigned long long)lp->gid,
405
406
407
                    (unsigned long long)m->data.object,
                    (unsigned long long)m->data.offset,
                    (unsigned long long)m->data.size);
408
            assert(ns->model);
409
410
411
412
            if (ns->use_sched)
                handle_io_sched_new(ns, b, m, lp);
            else
                handle_io_request(ns, b, &m->data, m, lp);
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
            break;
        case LSM_WRITE_COMPLETION:
        case LSM_READ_COMPLETION:
            if (LSM_DEBUG)
                printf("svr(%llu): COMPLETION\n",
                    (unsigned long long)lp->gid);
            handle_io_completion(ns, b, m, lp);
            break;
        default:
            printf("svr(%llu): Unknown Event:%d\n",
                (unsigned long long)lp->gid,
                m->event);
            break;
    }

    return;
}

/*
 * lsm_rev_event
 *   - callback to reverse an event
 */
static void lsm_rev_event(lsm_state_t *ns,
                          tw_bf *b,
                          lsm_message_t *m,
                          tw_lp *lp)
{
    assert(m->magic == lsm_magic);

    switch (m->event)
    {
        case LSM_WRITE_REQUEST:
        case LSM_READ_REQUEST:
            if (LSM_DEBUG)
                printf("svr(%llu): reverse REQUEST obj:%llu off:%llu size:%llu\n",
                    (unsigned long long)lp->gid,
449
450
451
452
453
454
455
                    (unsigned long long)m->data.object,
                    (unsigned long long)m->data.offset,
                    (unsigned long long)m->data.size);
            if (ns->use_sched)
                handle_rev_io_sched_new(ns, b, m, lp);
            else
                handle_rev_io_request(ns, b, &m->data, m, lp);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
            break;
        case LSM_WRITE_COMPLETION:
        case LSM_READ_COMPLETION:
            if (LSM_DEBUG)
                printf("svr(%llu): reverse COMPLETION\n",
                    (unsigned long long)lp->gid);
            handle_rev_io_completion(ns, b, m, lp);
            break;
        default:
            printf("svr(%llu): reverse Unknown Event:%d\n",
                (unsigned long long)lp->gid,
                m->event);
            break;
    }

    return;
}
  
/*
 * lsm_finalize
 *   - callback to release model resources 
 */
static void lsm_finalize(lsm_state_t *ns,
                         tw_lp *lp)
{
    int i;
    lsm_stats_t all;

    memset(&all, 0, sizeof(all));
    sprintf(all.category, "all");

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(ns->lsm_stats_array[i].category) > 0)
        {
            all.write_count += ns->lsm_stats_array[i].write_count;
            all.write_bytes += ns->lsm_stats_array[i].write_bytes;
            all.write_time += ns->lsm_stats_array[i].write_time;
            all.write_seeks += ns->lsm_stats_array[i].write_seeks;
            all.read_count += ns->lsm_stats_array[i].read_count;
            all.read_bytes += ns->lsm_stats_array[i].read_bytes;
            all.read_seeks += ns->lsm_stats_array[i].read_seeks;
            all.read_time += ns->lsm_stats_array[i].read_time;

            write_stats(lp, &ns->lsm_stats_array[i]);
        }
    }

    write_stats(lp, &all);

    return;
}

509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
static void handle_io_sched_new(
        lsm_state_t *ns,
        tw_bf *b,
        lsm_message_t *m_in,
        tw_lp *lp)
{
    if (LSM_DEBUG)
        printf("handle_io_sched_new called\n");
    // if nothing else is going on, then issue directly
    if (!ns->sched.active_count)
        handle_io_request(ns, b, &m_in->data, m_in, lp);
    else {
        lsm_sched_op_t *op = malloc(sizeof(*op));
        op->data = m_in->data;
        qlist_add_tail(&op->ql, &ns->sched.queues[m_in->prio]);
    }
    ns->sched.active_count++;
}

static void handle_rev_io_sched_new(
        lsm_state_t *ns,
        tw_bf *b,
        lsm_message_t *m_in,
        tw_lp *lp)
{
    if (LSM_DEBUG)
        printf("handle_rev_io_sched_new called\n");
    ns->sched.active_count--;
    if (!ns->sched.active_count)
        handle_rev_io_request(ns, b, &m_in->data, m_in, lp);
    else {
        struct qlist_head *ent = qlist_pop_back(&ns->sched.queues[m_in->prio]);
        assert(ent);
        lsm_sched_op_t *op = qlist_entry(ent, lsm_sched_op_t, ql);
        free(op);
    }
}

static void handle_io_sched_compl(
        lsm_state_t *ns,
        tw_bf *b,
        lsm_message_t *m_in,
        tw_lp *lp)
{
    if (LSM_DEBUG)
        printf("handle_io_sched_compl called\n");
    ns->sched.active_count--;
    if (ns->sched.active_count) {
        lsm_sched_op_t *next = NULL;
        struct qlist_head *ent = NULL;
        for (int i = 0; i < ns->sched.num_prios; i++) {
            ent = qlist_pop(&ns->sched.queues[i]);
            if (ent != NULL) {
                next = qlist_entry(ent, lsm_sched_op_t, ql);
                m_in->prio = i;
                break;
            }
        }
        assert(next);
        handle_io_request(ns, b, &next->data, m_in, lp);
        // now done with this request metadata
        rc_stack_push(lp, next, free, ns->sched.freelist);
    }
}

static void handle_rev_io_sched_compl(
        lsm_state_t *ns,
        tw_bf *b,
        lsm_message_t *m_in,
        tw_lp *lp)
{
    if (LSM_DEBUG)
        printf("handle_rev_io_sched_compl called\n");
    if (ns->sched.active_count) {
        lsm_sched_op_t *prev = rc_stack_pop(ns->sched.freelist);
        handle_rev_io_request(ns, b, &prev->data, m_in, lp);
        qlist_add_tail(&prev->ql, &ns->sched.queues[m_in->prio]);
    }
    ns->sched.active_count++;
}


591
592
593
594
595
596
597
598
/*
 * handle_io_request
 *   - handles the IO request events
 *   - computes the next_idle time
 *   - fires disk completion event at computed time
 */
static void handle_io_request(lsm_state_t *ns,
                              tw_bf *b,
599
                              lsm_message_data_t *data,
600
601
602
603
604
605
606
607
608
609
610
611
612
                              lsm_message_t *m_in,
                              tw_lp *lp)
{
    tw_stime queue_time, t_time;
    tw_event *e;
    lsm_message_t *m_out;
    lsm_stats_t *stat;
    int rw = (m_in->event == LSM_READ_REQUEST) ? 1 : 0;

    tw_stime (*transfer_time) (lsm_state_t *, lsm_stats_t *, int, uint64_t, int64_t, uint64_t);

    transfer_time = transfer_time_table;

613
    stat = find_stats(data->category, ns);
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633

    /* save history for reverse operation */
    m_in->prev_idle   = ns->next_idle;
    m_in->prev_stat   = *stat;
    m_in->prev_object = ns->current_object;
    m_in->prev_offset = ns->current_offset;

    if (ns->next_idle > tw_now(lp))
    {
        queue_time = ns->next_idle - tw_now(lp);
    }
    else
    {
        queue_time = 0;
    }


    t_time = transfer_time(ns,
                           stat,
                           rw,
634
635
636
                           data->object,
                           data->offset,
                           data->size);
637
638
    queue_time += t_time;
    ns->next_idle = queue_time + tw_now(lp); 
639
640
    ns->current_offset = data->offset + data->size;
    ns->current_object = data->object;
641

642
    e = tw_event_new(lp->gid, queue_time, lp);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
643
    m_out = (lsm_message_t*)tw_event_data(e);
644

645
    memcpy(m_out, m_in, sizeof(*m_in));
646
647
648
649
650
651
652
653
654
    if (m_out->event == LSM_WRITE_REQUEST)
    {
        m_out->event = LSM_WRITE_COMPLETION;
    }
    else
    {
        m_out->event = LSM_READ_COMPLETION;
    }

655
656
    m_out->prio = m_in->prio;

657
658
659
660
661
662
663
664
665
666
667
668
    tw_event_send(e);

    return;
}


/*
 * handle_rev_io_request
 *   - handle reversing the io request
 */
static void handle_rev_io_request(lsm_state_t *ns,
                                  tw_bf *b,
669
                                  lsm_message_data_t *data,
670
671
672
673
674
                                  lsm_message_t *m_in,
                                  tw_lp *lp)
{
    lsm_stats_t *stat;
    
675
    stat = find_stats(data->category, ns);
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694

    ns->next_idle = m_in->prev_idle;
    *stat = m_in->prev_stat;
    ns->current_object = m_in->prev_object;
    ns->current_offset = m_in->prev_offset;

    return;
}

/*
 * handle_io_completion
 *   - handle IO completion events
 *   - invoke the callers original completion event
 */
static void handle_io_completion (lsm_state_t *ns,
                                  tw_bf *b,
                                  lsm_message_t *m_in,
                                  tw_lp *lp)
{
695
696
697
698
    SANITY_CHECK_CB(&m_in->cb.info, lsm_return_t);

    tw_event * e = tw_event_new(m_in->cb.h.src, codes_local_latency(lp), lp);
    void * m = tw_event_data(e);
699

700
    GET_INIT_CB_PTRS(&m_in->cb, m, lp->gid, h, tag, rc, lsm_return_t);
701

702
703
    /* no failures to speak of yet */
    rc->rc = 0;
704
705
706

    tw_event_send(e);

707
708
709
710
    // continue the loop
    if (ns->use_sched)
        handle_io_sched_compl(ns, b, m_in, lp);

711
712
713
714
715
716
717
718
719
720
721
722
723
    return;
}

/*
 * handle_rev_io_completion
 *   - reverse io completion event
 *   - currently nothing to do in this case
 */
static void handle_rev_io_completion (lsm_state_t *ns,
                                      tw_bf *b,
                                      lsm_message_t *m_in,
                                      tw_lp *lp)
{
724
725
726
    if (ns->use_sched)
        handle_rev_io_sched_compl(ns, b, m_in, lp);

727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
    codes_local_latency_reverse(lp);
    return;
}

static lsm_stats_t *find_stats(const char* category, lsm_state_t *ns)
{
    int i;
    int new_flag = 0;
    int found_flag = 0;

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(ns->lsm_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 1;
            break;
        }
        if(strcmp(category, ns->lsm_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 0;
            break;
        }
    }
    assert(found_flag);

    if(new_flag)
    {
        strcpy(ns->lsm_stats_array[i].category, category);
    }
    return(&ns->lsm_stats_array[i]);

}

static void write_stats(tw_lp* lp, lsm_stats_t* stat)
{
    int ret;
    char id[32];
    char data[1024];

    sprintf(id, "lsm-category-%s", stat->category);
    sprintf(data, "lp:%ld\twrite_count:%ld\twrite_bytes:%ld\twrite_seeks:%ld\twrite_time:%f\t" 
        "read_count:%ld\tread_bytes:%ld\tread_seeks:%ld\tread_time:%f\n",
        (long)lp->gid,
        stat->write_count,
        stat->write_bytes,
        stat->write_seeks,
        stat->write_time,
        stat->read_count,
        stat->read_bytes,
        stat->read_seeks,
        stat->read_time);

    ret = lp_io_write(lp->gid, id, strlen(data), data);
    assert(ret == 0);

    return;

}

788
void lsm_register(void)
789
790
791
792
793
794
{
    uint32_t h1=0, h2=0;

    bj_hashlittle2("localstorage", strlen("localstorage"), &h1, &h2);
    lsm_magic = h1+h2;

795
796
797
798
    lp_type_register(LSM_NAME, &lsm_lp);
}

// read the configuration file for a given annotation
799
static void read_config(ConfigHandle *ch, char const * anno, disk_model_t *model)
800
801
802
803
804
805
806
807
{
    char       **values;
    size_t       length; 
    int          rc;
    // request sizes
    rc = configuration_get_multivalue(ch, LSM_NAME, "request_sizes", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
808
    model->request_sizes = (unsigned int*)malloc(sizeof(int)*length);
809
810
811
812
813
814
815
816
817
818
819
820
    assert(model->request_sizes);
    model->bins = length;
    for (int i = 0; i < length; i++)
    {
        model->request_sizes[i] = atoi(values[i]);
    }
    free(values);

    // write rates
    rc = configuration_get_multivalue(ch, LSM_NAME, "write_rates", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
821
    model->write_rates = (double*)malloc(sizeof(double)*length);
822
823
824
825
826
827
828
829
830
831
832
833
    assert(model->write_rates);
    assert(length == model->bins);
    for (int i = 0; i < length; i++)
    {
        model->write_rates[i] = strtod(values[i], NULL);
    }
    free(values);

    // read rates
    rc = configuration_get_multivalue(ch, LSM_NAME, "read_rates", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
834
    model->read_rates = (double*)malloc(sizeof(double)*length);
835
836
837
838
839
840
841
842
843
844
845
846
    assert(model->read_rates);
    assert(model->bins == length);
    for (int i = 0; i < length; i++)
    {
        model->read_rates[i] = strtod(values[i], NULL);
    }
    free(values);

    // write overheads
    rc = configuration_get_multivalue(ch, LSM_NAME, "write_overheads", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
847
    model->write_overheads = (double*)malloc(sizeof(double)*length);
848
849
850
851
852
853
854
855
856
857
858
859
    assert(model->write_overheads);
    assert(model->bins == length);
    for (int i = 0; i < length; i++)
    {
        model->write_overheads[i] = strtod(values[i], NULL);
    }
    free(values);

    // read overheades
    rc = configuration_get_multivalue(ch, LSM_NAME, "read_overheads", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
860
    model->read_overheads = (double*)malloc(sizeof(double)*length);
861
862
863
864
865
866
867
868
869
870
871
872
    assert(model->read_overheads);
    assert(model->bins == length);
    for (int i = 0; i < length; i++)
    {
        model->read_overheads[i] = strtod(values[i], NULL);
    }
    free(values);

    // write seek latency
    rc = configuration_get_multivalue(ch, LSM_NAME, "write_seeks", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
873
    model->write_seeks = (double*)malloc(sizeof(double)*length);
874
875
876
877
878
879
880
881
882
883
884
885
    assert(model->write_seeks);
    assert(model->bins == length);
    for (int i = 0; i < length; i++)
    {
        model->write_seeks[i] = strtod(values[i], NULL);
    }
    free(values);

    // read seek latency
    rc = configuration_get_multivalue(ch, LSM_NAME, "read_seeks", anno,
            &values,&length);
    assert(rc == 1);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
886
    model->read_seeks = (double*)malloc(sizeof(double)*length);
887
888
889
890
891
892
893
    assert(model->read_seeks);
    assert(model->bins == length);
    for (int i = 0; i < length; i++)
    {
        model->read_seeks[i] = strtod(values[i], NULL);
    }
    free(values);
894
895
896
897
898

    // scheduling parameters (this can fail)
    configuration_get_value_int(ch, LSM_NAME, "enable_scheduler", anno,
            &model->use_sched);
    assert(model->use_sched >= 0);
899
900
901
902
}

void lsm_configure(void)
{
903
904
905
906
907
    /* check and see if any lsm LPs are being used - otherwise,
     * skip the config */
    if (0 == codes_mapping_get_lp_count(NULL, 0, LSM_NAME, NULL, 1))
        return;

908
909
    anno_map = codes_mapping_get_lp_anno_map(LSM_NAME);
    assert(anno_map);
Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
910
    models_anno = (disk_model_t*)malloc(anno_map->num_annos * sizeof(*models_anno));
911
912

    // read the configuration for unannotated entries 
913
    if (anno_map->has_unanno_lp > 0){
914
915
916
917
        read_config(&config, NULL, &model_unanno);
    }

    for (uint64_t i = 0; i < anno_map->num_annos; i++){
918
        char const * anno = anno_map->annotations[i].ptr;
919
920
        read_config(&config, anno, &models_anno[i]);
    }
921
922
923
924
925
926
927
928
929
930
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */