Commit 6d96c9ba authored by Dries Kimpe's avatar Dries Kimpe
Browse files

Fix BMI memcache issues

Patch from Jingwang Zhang.

Here is our use case, and we are using the BMI code in OrangeFS 2.8.6:

1.       Malloc a buffer and write something to it.

2.       Send the buffer using BMI routines.

3.       Free the buffer and goto step 1

And here we found the following problem: We found that the messages
captured using ibdump is corrupt in iteration 2. It became a mixture of
data from iteration 1 and iteration 2.

Here is some analysis we did: We noticed that when the data corruption
occurs, the buffer always point to the same virtual address. And after
checking with the BMI code, we found that the memcache code will keep
the buffer registered(to ibverbs) and use virtual address to determine
whether a registered buffer could be reused or not later.

However I think the memcache shouldn't keep the buffer registered,
because that the user might free this buffer, and when the user did free
and re-allocate the buffer, there might be a false match which might
lead to data corruption.

So at first, we tested the code with "define ENABLE_MEMCACHE 0" to
disable the memcache. And then the test passed, so it is proven that the
data corruption is caused by memcache. However, performance will be
affected if the memcache is disabled completely.

Finally we formatted the attached patch to solve the problem. It fixes
the broken code in the clauses when memcache is disabled. And it
deregister the buffer whenever its use-count drops to 0 and register it
when it is used again.
parent f602ff4b
......@@ -144,6 +144,7 @@ memcache_memalloc(void *md, bmi_size_t len, int eager_limit)
qlist_del(&c->list);
qlist_add_tail(&c->list, &memcache_device->list);
++c->count;
if (c->count == 1) memcache_device->mem_register(c);
buf = c->buf;
gen_mutex_unlock(&memcache_device->mutex);
goto out;
......@@ -166,6 +167,7 @@ memcache_memalloc(void *md, bmi_size_t len, int eager_limit)
c = memcache_lookup_cover(memcache_device, buf, len);
if (c) {
++c->count;
if (c->count == 1) memcache_device->mem_register(c);
debug(4, "%s: reuse reg, buf %p, count %d", __func__, c->buf,
c->count);
} else {
......@@ -207,6 +209,7 @@ memcache_memfree(void *md, void *buf, bmi_size_t len)
__func__, c->buf, lld(c->len), c->count);
/* cache it */
--c->count;
if (c->count == 0) memcache_device->mem_deregister(c);
qlist_del(&c->list);
qlist_add(&c->list, &memcache_device->free_chunk_list);
gen_mutex_unlock(&memcache_device->mutex);
......@@ -238,6 +241,7 @@ memcache_register(void *md, ib_buflist_t *buflist)
buflist->len[i]);
if (c) {
++c->count;
if (c->count == 1) memcache_device->mem_register(c);
debug(2, "%s: hit [%d] %p len %lld (via %p len %lld) refcnt now %d",
__func__, i, buflist->buf.send[i], lld(buflist->len[i]), c->buf,
lld(c->len), c->count);
......@@ -257,10 +261,9 @@ memcache_register(void *md, ib_buflist_t *buflist)
}
buflist->memcache[i] = c;
#else
memcache_entry_t cp = bmi_ib_malloc(sizeof(*cp));
memcache_entry_t *cp = bmi_ib_malloc(sizeof(*cp));
cp->buf = buflist->buf.recv[i];
cp->len = buflist->len[i];
cp->type = type;
ret = memcache_device->mem_register(cp);
if (ret) {
free(cp);
......@@ -284,6 +287,7 @@ void memcache_preregister(void *md, const void *buf, bmi_size_t len,
memcache_device_t *memcache_device = md;
memcache_entry_t *c;
return ; // Can not do this any more.
gen_mutex_lock(&memcache_device->mutex);
c = memcache_lookup_cover(memcache_device, buf, len);
if (c) {
......@@ -316,6 +320,7 @@ memcache_deregister(void *md, ib_buflist_t *buflist)
#if ENABLE_MEMCACHE
memcache_entry_t *c = buflist->memcache[i];
--c->count;
if (c->count == 0) memcache_device->mem_deregister(c);
debug(2,
"%s: dec refcount [%d] %p len %lld (via %p len %lld) refcnt now %d",
__func__, i, buflist->buf.send[i], lld(buflist->len[i]),
......@@ -357,12 +362,12 @@ void memcache_shutdown(void *md)
gen_mutex_lock(&memcache_device->mutex);
qlist_for_each_entry_safe(c, cn, &memcache_device->list, list) {
memcache_device->mem_deregister(c);
if (c->count > 0) memcache_device->mem_deregister(c);
qlist_del(&c->list);
free(c);
}
qlist_for_each_entry_safe(c, cn, &memcache_device->free_chunk_list, list) {
memcache_device->mem_deregister(c);
if (c->count > 0) memcache_device->mem_deregister(c);
qlist_del(&c->list);
free(c->buf);
free(c);
......@@ -384,7 +389,6 @@ void memcache_cache_flush(void *md)
qlist_for_each_entry_safe(c, cn, &memcache_device->list, list) {
debug(4, "%s: list c->count %x c->buf %p", __func__, c->count, c->buf);
if (c->count == 0) {
memcache_device->mem_deregister(c);
qlist_del(&c->list);
free(c);
}
......@@ -393,7 +397,6 @@ void memcache_cache_flush(void *md)
debug(4, "%s: free list c->count %x c->buf %p", __func__,
c->count, c->buf);
if (c->count == 0) {
memcache_device->mem_deregister(c);
qlist_del(&c->list);
free(c->buf);
free(c);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment