remi-client.cpp 15.2 KB
Newer Older
Matthieu Dorier's avatar
Matthieu Dorier committed
1 2 3 4 5
/*
 * (C) 2018 The University of Chicago
 * 
 * See COPYRIGHT in top-level directory.
 */
6 7 8 9
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
Matthieu Dorier's avatar
Matthieu Dorier committed
10
#include <sys/mman.h>
11 12 13
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
14 15
#include <abt-io.h>
#include <uuid/uuid.h>
Matthieu Dorier's avatar
Matthieu Dorier committed
16
#include <thallium.hpp>
17
#include <thallium/serialization/stl/pair.hpp>
18
#include "uuid-util.hpp"
19
#include "fs-util.hpp"
Matthieu Dorier's avatar
Matthieu Dorier committed
20
#include "remi/remi-client.h"
Matthieu Dorier's avatar
Matthieu Dorier committed
21
#include "remi-fileset.hpp"
22

Matthieu Dorier's avatar
Matthieu Dorier committed
23 24 25 26 27 28 29
namespace tl = thallium;

struct remi_client {

    margo_instance_id    m_mid = MARGO_INSTANCE_NULL;
    tl::engine*          m_engine = nullptr;
    uint64_t             m_num_providers = 0;
30 31 32 33 34
    tl::remote_procedure m_migrate_start_rpc;
    tl::remote_procedure m_migrate_mmap_rpc;
    tl::remote_procedure m_migrate_write_rpc;
    tl::remote_procedure m_migrate_end_rpc;
    abt_io_instance_id   m_abtio = ABT_IO_INSTANCE_NULL;
Matthieu Dorier's avatar
Matthieu Dorier committed
35

36
    remi_client(tl::engine* e, abt_io_instance_id abtio)
Matthieu Dorier's avatar
Matthieu Dorier committed
37
    : m_engine(e)
38 39 40 41 42
    , m_migrate_start_rpc(m_engine->define("remi_migrate_start"))
    , m_migrate_mmap_rpc(m_engine->define("remi_migrate_mmap"))
    , m_migrate_write_rpc(m_engine->define("remi_migrate_write"))
    , m_migrate_end_rpc(m_engine->define("remi_migrate_end"))
    , m_abtio(abtio) {}
Matthieu Dorier's avatar
Matthieu Dorier committed
43 44 45 46 47 48 49 50 51 52 53 54 55

};

struct remi_provider_handle : public tl::provider_handle {

    remi_client_t m_client    = nullptr;
    uint64_t      m_ref_count = 0;
    
    template<typename ... Args>
    remi_provider_handle(Args&&... args)
    : tl::provider_handle(std::forward<Args>(args)...) {}
};

56 57 58 59
extern "C" int remi_client_init(
        margo_instance_id mid,
        abt_io_instance_id abtio,
        remi_client_t* client)
60
{
61
    auto theEngine           = new tl::engine(mid);
62
    remi_client_t theClient  = new remi_client(theEngine, abtio);
Matthieu Dorier's avatar
Matthieu Dorier committed
63 64 65
    theClient->m_mid         = mid;
    *client = theClient;
    return REMI_SUCCESS;
66 67 68 69
}

extern "C" int remi_client_finalize(remi_client_t client)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
70 71 72 73 74
    if(client == REMI_CLIENT_NULL)
        return REMI_SUCCESS;
    delete client->m_engine;
    delete client;
    return REMI_SUCCESS;
75 76
}

77 78 79 80 81 82 83 84 85 86 87 88
extern "C" int remi_client_set_abt_io_instance(
        remi_client_t client,
        abt_io_instance_id abtio)
{
    if(client) {
        client->m_abtio = abtio;
        return REMI_SUCCESS;
    } else {
        return REMI_ERR_INVALID_ARG;
    }
}

89 90 91 92 93 94
extern "C" int remi_provider_handle_create(
        remi_client_t client,
        hg_addr_t addr,
        uint16_t provider_id,
        remi_provider_handle_t* handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
95 96 97 98 99
    if(client == REMI_CLIENT_NULL)
        return REMI_ERR_INVALID_ARG;
    auto theHandle = new remi_provider_handle(
            tl::endpoint(*(client->m_engine), addr, false), provider_id);
    theHandle->m_client = client;
Matthieu Dorier's avatar
Matthieu Dorier committed
100
    theHandle->m_ref_count = 1;
Matthieu Dorier's avatar
Matthieu Dorier committed
101 102 103
    *handle = theHandle;
    client->m_num_providers += 1;
    return REMI_SUCCESS;
104 105 106 107
}

extern "C" int remi_provider_handle_ref_incr(remi_provider_handle_t handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
108 109 110 111
    if(handle == REMI_PROVIDER_HANDLE_NULL)
        return REMI_ERR_INVALID_ARG;
    handle->m_ref_count += 1;
    return REMI_SUCCESS;
112 113 114 115
}

extern "C" int remi_provider_handle_release(remi_provider_handle_t handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
116 117 118 119 120 121 122 123
    if(handle == REMI_PROVIDER_HANDLE_NULL)
        return REMI_SUCCESS;
    handle->m_ref_count -= 1;
    if(handle->m_ref_count == 0) {
        handle->m_client->m_num_providers -= 1;
        delete handle;
    }
    return REMI_SUCCESS;
124 125 126 127
}

extern "C" int remi_shutdown_service(remi_client_t client, hg_addr_t addr)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
128
    return margo_shutdown_remote_instance(client->m_mid, addr);
129
}
Matthieu Dorier's avatar
Matthieu Dorier committed
130

131 132 133 134 135
static void list_existing_files(const char* filename, void* uargs) {
    auto files = static_cast<std::set<std::string>*>(uargs);
    files->emplace(filename);
}

136 137 138 139 140 141 142 143 144 145 146 147 148 149
static int migrate_using_mmap(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status);

static int migrate_using_abtio(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status);

Matthieu Dorier's avatar
Matthieu Dorier committed
150
extern "C" int remi_fileset_migrate(
151
        remi_provider_handle_t ph,
Matthieu Dorier's avatar
Matthieu Dorier committed
152
        remi_fileset_t fileset,
153
        const char* remote_root,
154 155
        int remove_source,
        int mode,
156
        int* status)
Matthieu Dorier's avatar
Matthieu Dorier committed
157
{
158
    int ret;
159 160 161 162 163 164 165 166 167 168 169 170

    if(ph == REMI_PROVIDER_HANDLE_NULL
    || fileset == REMI_FILESET_NULL
    || remote_root == NULL)
        return REMI_ERR_INVALID_ARG;
    if(remote_root[0] != '/')
        return REMI_ERR_INVALID_ARG;

    std::string theRemoteRoot(remote_root);
    if(theRemoteRoot[theRemoteRoot.size()-1] != '/')
        theRemoteRoot += "/";

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
    // find the set of files to migrate from the fileset
    std::set<std::string> files;
    remi_fileset_walkthrough(fileset, list_existing_files,
            static_cast<void*>(&files));

    if(mode == REMI_USE_MMAP) {
        ret = migrate_using_mmap(ph, fileset, files, theRemoteRoot.c_str(), status);
    } else {
        ret = migrate_using_abtio(ph, fileset, files, theRemoteRoot.c_str(), status);
    }

    if(ret != REMI_SUCCESS) {
        return ret;
    }

    if(*status != 0) {
        return REMI_ERR_USER;
    }

    if(remove_source == REMI_REMOVE_SOURCE) {
        for(auto& filename : files) {
            auto theFilename = fileset->m_root + filename;
            remove(theFilename.c_str());
        }
        for(auto& dirname : fileset->m_directories) {
            auto theDirname = fileset->m_root + dirname;
            removeRec(theDirname);
        }
    }

    return REMI_SUCCESS;
}

int migrate_using_mmap(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status)
{
211 212 213
    // expose the data
    std::vector<std::pair<void*,std::size_t>> theData;
    std::vector<std::size_t> theSizes;
Matthieu Dorier's avatar
Matthieu Dorier committed
214
    std::vector<mode_t> theModes;
215

216

217
    // prepare lambda for cleaning up mapped files
218 219 220 221 222 223
    auto cleanup = [&theData]() {
        for(auto& seg : theData) {
            munmap(seg.first, seg.second);
        }
    };

224
    for(auto& filename : files) {
225 226 227 228 229 230 231 232 233 234 235 236 237
        // compose full name
        auto theFilename = fileset->m_root + filename;
        // open file
        int fd = open(theFilename.c_str(), O_RDONLY, 0);
        if(fd == -1) {
            cleanup();
            return REMI_ERR_UNKNOWN_FILE;
        }
        // get file size
        struct stat st;
        if(0 != fstat(fd, &st)) {
            close(fd);
            cleanup();
Matthieu Dorier's avatar
Matthieu Dorier committed
238
            return REMI_ERR_IO;
239 240 241
        }
        auto size = st.st_size;
        theSizes.push_back(size);
Matthieu Dorier's avatar
Matthieu Dorier committed
242 243
        auto mode = st.st_mode;
        theModes.push_back(mode);
244 245 246 247 248 249 250 251 252 253
        if(size == 0) {
            close(fd);
            continue;
        }
        // map the file
        void* segment = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, 0);
        if(segment == NULL) {
            cleanup();
            return REMI_ERR_ALLOCATION;
        }
Matthieu Dorier's avatar
Matthieu Dorier committed
254 255
        // indicate sequential access
        madvise(segment, size, MADV_SEQUENTIAL);
256 257 258 259 260 261 262
        // close file descriptor
        close(fd);
        // insert the segment
        theData.emplace_back(segment, size);
    }

    // expose the segments for bulk operations
263 264 265
    tl::bulk localBulk;
    if(theData.size() != 0) 
        localBulk = ph->m_client->m_engine->expose(theData, tl::bulk_mode::read_only);
266

267 268 269 270 271 272 273
    // create a copy of the fileset where m_directory is empty
    // and the filenames in directories have been resolved
    auto tmp_files = std::move(fileset->m_files);
    auto tmp_dirs  = std::move(fileset->m_directories);
    auto tmp_root  = std::move(fileset->m_root);
    fileset->m_files = files;
    fileset->m_directories = decltype(fileset->m_directories)();
274 275 276 277 278 279 280 281 282 283 284 285 286 287
    fileset->m_root = remote_root;

    // call migrate_start RPC
    // the response is in the form <errorcode, userstatus, uuid>
    std::tuple<int32_t, int32_t, uuid> start_call_result 
        = ph->m_client->m_migrate_start_rpc.on(*ph)(*fileset, theSizes, theModes);
    int ret = std::get<0>(start_call_result);
    if(ret != REMI_SUCCESS) {
        cleanup();
        if(ret == REMI_ERR_USER)
            *status = std::get<1>(start_call_result);
        return ret;
    }
    auto& operation_id = std::get<2>(start_call_result);
288

289 290
    // send the migrate_mmap RPC
    ret = ph->m_client->m_migrate_mmap_rpc.on(*ph)(operation_id, localBulk);
291 292 293 294 295

    // put back the fileset's original members
    fileset->m_root        = std::move(tmp_root);
    fileset->m_files       = std::move(tmp_files);
    fileset->m_directories = std::move(tmp_dirs);
296

297 298 299 300 301 302 303 304 305 306
    if(ret != REMI_SUCCESS) {
        cleanup();
        return ret;
    }
    
    // xfer went ok, now send migrate_end rpc.
    // the response is in the form <errorcode, userstatus>
    std::pair<int32_t, int32_t> end_call_result = 
        ph->m_client->m_migrate_end_rpc.on(*ph)(operation_id);

307 308
    cleanup();

309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
    return ret;
}

int migrate_using_abtio(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status)
{
    // expose the data
    std::vector<int> openedFileDescriptors;
    std::vector<std::pair<void*,std::size_t>> theData;
    std::vector<std::size_t> theSizes;
    std::vector<mode_t> theModes;

    auto cleanup = [&openedFileDescriptors]() {
        for(auto& fd : openedFileDescriptors) {
            close(fd);
        }
    };

    for(auto& filename : files) {
        // compose full name
        auto theFilename = fileset->m_root + filename;
        // open file
        int fd = open(theFilename.c_str(), O_RDONLY, 0);
        if(fd == -1) {
            cleanup();
            return REMI_ERR_UNKNOWN_FILE;
        }
        openedFileDescriptors.push_back(fd);
        // get file size
        struct stat st;
        if(0 != fstat(fd, &st)) {
            cleanup();
            return REMI_ERR_IO;
        }
        auto size = st.st_size;
        theSizes.push_back(size);
        auto mode = st.st_mode;
        theModes.push_back(mode);
    }

    // create a copy of the fileset where m_directory is empty
    // and the filenames in directories have been resolved
    auto tmp_files = std::move(fileset->m_files);
    auto tmp_dirs  = std::move(fileset->m_directories);
    auto tmp_root  = std::move(fileset->m_root);
    fileset->m_files = files;
    fileset->m_directories = decltype(fileset->m_directories)();
    fileset->m_root = remote_root;

    // call migrate_start RPC
    // the response is in the form <errorcode, userstatus, uuid>
    std::tuple<int32_t, int32_t, uuid> start_call_result 
        = ph->m_client->m_migrate_start_rpc.on(*ph)(*fileset, theSizes, theModes);
    int ret = std::get<0>(start_call_result);
367
    if(ret != REMI_SUCCESS) {
368 369 370
        cleanup();
        if(ret == REMI_ERR_USER)
            *status = std::get<1>(start_call_result);
371 372
        return ret;
    }
373 374 375 376 377 378 379 380 381 382
    // put back the fileset's original members
    fileset->m_root        = std::move(tmp_root);
    fileset->m_files       = std::move(tmp_files);
    fileset->m_directories = std::move(tmp_dirs);

    auto& operation_id = std::get<2>(start_call_result);

    auto abtio = ph->m_client->m_abtio;

    // send a series of migrate_write RPC, pipelined with abti-io pread calls
383
    size_t max_chunk_size = fileset->m_xfer_size;
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450

    if(abtio == ABT_IO_INSTANCE_NULL) {

        std::vector<char> buffer(max_chunk_size);
        for(uint32_t i = 0; i < files.size(); i++) {
            size_t remaining_size = theSizes[i];
            int fd = openedFileDescriptors[i];
            size_t current_offset = 0;
            while(remaining_size != 0) {
                size_t chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
                buffer.resize(chunk_size);
                auto sizeRead = read(fd, &buffer[0], chunk_size);
                ret = ph->m_client->m_migrate_write_rpc.on(*ph)(operation_id, i, current_offset, buffer);
                current_offset += chunk_size;
                remaining_size -= chunk_size;
            }
        } 
    
    } else {

        size_t current_chunk_offset  = 0; // offset of the chunk that ABT-IO has to read in this iteration
        size_t previous_chunk_offset = 0; // offset of the chunk that should be send in this iteration
        size_t current_chunk_size    = 0; // size of the chunk that ABT-IO has to read in this iteration
        size_t previous_chunk_size = 0; // size of the chunk that should be send in this iteration
        std::vector<char> current_buffer(max_chunk_size);  // buffer for ABT-IO to place data
        std::vector<char> previous_buffer(max_chunk_size); // buffer to be sent

        for(uint32_t i = 0; i < files.size(); i++) {
            // reset variables
            current_chunk_offset  = 0;
            previous_chunk_offset = 0;
            current_chunk_size    = 0;
            previous_chunk_size   = 0;

            size_t remaining_size = theSizes[i];
            int fd = openedFileDescriptors[i];
            // read first chunk
            current_chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
            current_buffer.resize(current_chunk_size);
            auto sizeRead = abt_io_pread(abtio, fd, &current_buffer[0], current_chunk_size, current_chunk_offset);
            // swap variables
            previous_chunk_offset = current_chunk_offset;
            previous_chunk_size = current_chunk_size;
            std::swap(current_buffer, previous_buffer);
            current_chunk_offset += current_chunk_size;
            remaining_size -= current_chunk_size;

            // read remaining chunks and send in parallel
            bool can_stop = false;
            while(!can_stop) {
                // issue RPC for the previous chunk
                auto async_req = ph->m_client->m_migrate_write_rpc.on(*ph).async(operation_id, i, previous_chunk_offset, previous_buffer);
                // read the current chunk
                if(remaining_size == 0) {
                    can_stop = true;
                } else {
                    current_chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
                    current_buffer.resize(current_chunk_size);
                    auto sizeRead = abt_io_pread(abtio, fd, &current_buffer[0], current_chunk_size, current_chunk_offset);
                }
                // wait for the RPC to finish
                ret = async_req.wait();
                if(ret != REMI_SUCCESS)
                    can_stop = true;
                if(!can_stop) {
                    previous_chunk_offset = current_chunk_offset;
                    previous_chunk_size = current_chunk_size;
Matthieu Dorier's avatar
Matthieu Dorier committed
451 452
                    current_chunk_offset += current_chunk_size;
                    remaining_size -= current_chunk_size;
453 454 455 456 457 458 459 460 461 462
                    std::swap(current_buffer, previous_buffer);
                }
            }

            if(ret != REMI_SUCCESS) {
                break;
            }
        }

    }
463

464 465 466
    if(ret != REMI_SUCCESS) {
        cleanup();
        return ret;
467
    }
468 469 470 471 472
    
    // xfer went ok, now send migrate_end rpc.
    // the response is in the form <errorcode, userstatus>
    std::pair<int32_t, int32_t> end_call_result = 
        ph->m_client->m_migrate_end_rpc.on(*ph)(operation_id);
473

474 475 476 477 478 479 480
    cleanup();

    ret = end_call_result.first;
    if(ret == REMI_ERR_USER) {
        *status = end_call_result.second;
    } else {
        *status = 0;
481 482
    }

483
    return ret;
Matthieu Dorier's avatar
Matthieu Dorier committed
484
}