remi-client.cpp 15 KB
Newer Older
Matthieu Dorier's avatar
Matthieu Dorier committed
1 2 3 4 5
/*
 * (C) 2018 The University of Chicago
 * 
 * See COPYRIGHT in top-level directory.
 */
6 7 8 9
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
Matthieu Dorier's avatar
Matthieu Dorier committed
10
#include <sys/mman.h>
11 12 13
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
14 15
#include <abt-io.h>
#include <uuid/uuid.h>
Matthieu Dorier's avatar
Matthieu Dorier committed
16
#include <thallium.hpp>
17
#include <thallium/serialization/stl/pair.hpp>
18
#include "uuid-util.hpp"
19
#include "fs-util.hpp"
Matthieu Dorier's avatar
Matthieu Dorier committed
20
#include "remi/remi-client.h"
Matthieu Dorier's avatar
Matthieu Dorier committed
21
#include "remi-fileset.hpp"
22

Matthieu Dorier's avatar
Matthieu Dorier committed
23 24 25 26 27 28 29
namespace tl = thallium;

struct remi_client {

    margo_instance_id    m_mid = MARGO_INSTANCE_NULL;
    tl::engine*          m_engine = nullptr;
    uint64_t             m_num_providers = 0;
30 31 32 33 34
    tl::remote_procedure m_migrate_start_rpc;
    tl::remote_procedure m_migrate_mmap_rpc;
    tl::remote_procedure m_migrate_write_rpc;
    tl::remote_procedure m_migrate_end_rpc;
    abt_io_instance_id   m_abtio = ABT_IO_INSTANCE_NULL;
Matthieu Dorier's avatar
Matthieu Dorier committed
35

36
    remi_client(tl::engine* e, abt_io_instance_id abtio)
Matthieu Dorier's avatar
Matthieu Dorier committed
37
    : m_engine(e)
38 39 40 41 42
    , m_migrate_start_rpc(m_engine->define("remi_migrate_start"))
    , m_migrate_mmap_rpc(m_engine->define("remi_migrate_mmap"))
    , m_migrate_write_rpc(m_engine->define("remi_migrate_write"))
    , m_migrate_end_rpc(m_engine->define("remi_migrate_end"))
    , m_abtio(abtio) {}
Matthieu Dorier's avatar
Matthieu Dorier committed
43 44 45 46 47 48 49 50 51 52 53 54 55

};

struct remi_provider_handle : public tl::provider_handle {

    remi_client_t m_client    = nullptr;
    uint64_t      m_ref_count = 0;
    
    template<typename ... Args>
    remi_provider_handle(Args&&... args)
    : tl::provider_handle(std::forward<Args>(args)...) {}
};

56 57 58 59
extern "C" int remi_client_init(
        margo_instance_id mid,
        abt_io_instance_id abtio,
        remi_client_t* client)
60
{
Matthieu Dorier's avatar
Matthieu Dorier committed
61
    auto theEngine           = new tl::engine(mid, THALLIUM_CLIENT_MODE);
62
    remi_client_t theClient  = new remi_client(theEngine, abtio);
Matthieu Dorier's avatar
Matthieu Dorier committed
63 64 65
    theClient->m_mid         = mid;
    *client = theClient;
    return REMI_SUCCESS;
66 67 68 69
}

extern "C" int remi_client_finalize(remi_client_t client)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
70 71 72 73 74
    if(client == REMI_CLIENT_NULL)
        return REMI_SUCCESS;
    delete client->m_engine;
    delete client;
    return REMI_SUCCESS;
75 76 77 78 79 80 81 82
}

extern "C" int remi_provider_handle_create(
        remi_client_t client,
        hg_addr_t addr,
        uint16_t provider_id,
        remi_provider_handle_t* handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
83 84 85 86 87
    if(client == REMI_CLIENT_NULL)
        return REMI_ERR_INVALID_ARG;
    auto theHandle = new remi_provider_handle(
            tl::endpoint(*(client->m_engine), addr, false), provider_id);
    theHandle->m_client = client;
Matthieu Dorier's avatar
Matthieu Dorier committed
88
    theHandle->m_ref_count = 1;
Matthieu Dorier's avatar
Matthieu Dorier committed
89 90 91
    *handle = theHandle;
    client->m_num_providers += 1;
    return REMI_SUCCESS;
92 93 94 95
}

extern "C" int remi_provider_handle_ref_incr(remi_provider_handle_t handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
96 97 98 99
    if(handle == REMI_PROVIDER_HANDLE_NULL)
        return REMI_ERR_INVALID_ARG;
    handle->m_ref_count += 1;
    return REMI_SUCCESS;
100 101 102 103
}

extern "C" int remi_provider_handle_release(remi_provider_handle_t handle)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
104 105 106 107 108 109 110 111
    if(handle == REMI_PROVIDER_HANDLE_NULL)
        return REMI_SUCCESS;
    handle->m_ref_count -= 1;
    if(handle->m_ref_count == 0) {
        handle->m_client->m_num_providers -= 1;
        delete handle;
    }
    return REMI_SUCCESS;
112 113 114 115
}

extern "C" int remi_shutdown_service(remi_client_t client, hg_addr_t addr)
{
Matthieu Dorier's avatar
Matthieu Dorier committed
116
    return margo_shutdown_remote_instance(client->m_mid, addr);
117
}
Matthieu Dorier's avatar
Matthieu Dorier committed
118

119 120 121 122 123
static void list_existing_files(const char* filename, void* uargs) {
    auto files = static_cast<std::set<std::string>*>(uargs);
    files->emplace(filename);
}

124 125 126 127 128 129 130 131 132 133 134 135 136 137
static int migrate_using_mmap(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status);

static int migrate_using_abtio(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status);

Matthieu Dorier's avatar
Matthieu Dorier committed
138
extern "C" int remi_fileset_migrate(
139
        remi_provider_handle_t ph,
Matthieu Dorier's avatar
Matthieu Dorier committed
140
        remi_fileset_t fileset,
141
        const char* remote_root,
142 143
        int remove_source,
        int mode,
144
        int* status)
Matthieu Dorier's avatar
Matthieu Dorier committed
145
{
146
    int ret;
147 148 149 150 151 152 153 154 155 156 157 158

    if(ph == REMI_PROVIDER_HANDLE_NULL
    || fileset == REMI_FILESET_NULL
    || remote_root == NULL)
        return REMI_ERR_INVALID_ARG;
    if(remote_root[0] != '/')
        return REMI_ERR_INVALID_ARG;

    std::string theRemoteRoot(remote_root);
    if(theRemoteRoot[theRemoteRoot.size()-1] != '/')
        theRemoteRoot += "/";

159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
    // find the set of files to migrate from the fileset
    std::set<std::string> files;
    remi_fileset_walkthrough(fileset, list_existing_files,
            static_cast<void*>(&files));

    if(mode == REMI_USE_MMAP) {
        ret = migrate_using_mmap(ph, fileset, files, theRemoteRoot.c_str(), status);
    } else {
        ret = migrate_using_abtio(ph, fileset, files, theRemoteRoot.c_str(), status);
    }

    if(ret != REMI_SUCCESS) {
        return ret;
    }

    if(*status != 0) {
        return REMI_ERR_USER;
    }

    if(remove_source == REMI_REMOVE_SOURCE) {
        for(auto& filename : files) {
            auto theFilename = fileset->m_root + filename;
            remove(theFilename.c_str());
        }
        for(auto& dirname : fileset->m_directories) {
            auto theDirname = fileset->m_root + dirname;
            removeRec(theDirname);
        }
    }

    return REMI_SUCCESS;
}

int migrate_using_mmap(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status)
{
199 200 201
    // expose the data
    std::vector<std::pair<void*,std::size_t>> theData;
    std::vector<std::size_t> theSizes;
Matthieu Dorier's avatar
Matthieu Dorier committed
202
    std::vector<mode_t> theModes;
203

204

205
    // prepare lambda for cleaning up mapped files
206 207 208 209 210 211
    auto cleanup = [&theData]() {
        for(auto& seg : theData) {
            munmap(seg.first, seg.second);
        }
    };

212
    for(auto& filename : files) {
213 214 215 216 217 218 219 220 221 222 223 224 225
        // compose full name
        auto theFilename = fileset->m_root + filename;
        // open file
        int fd = open(theFilename.c_str(), O_RDONLY, 0);
        if(fd == -1) {
            cleanup();
            return REMI_ERR_UNKNOWN_FILE;
        }
        // get file size
        struct stat st;
        if(0 != fstat(fd, &st)) {
            close(fd);
            cleanup();
Matthieu Dorier's avatar
Matthieu Dorier committed
226
            return REMI_ERR_IO;
227 228 229
        }
        auto size = st.st_size;
        theSizes.push_back(size);
Matthieu Dorier's avatar
Matthieu Dorier committed
230 231
        auto mode = st.st_mode;
        theModes.push_back(mode);
232 233 234 235 236 237 238 239 240 241
        if(size == 0) {
            close(fd);
            continue;
        }
        // map the file
        void* segment = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, 0);
        if(segment == NULL) {
            cleanup();
            return REMI_ERR_ALLOCATION;
        }
Matthieu Dorier's avatar
Matthieu Dorier committed
242 243
        // indicate sequential access
        madvise(segment, size, MADV_SEQUENTIAL);
244 245 246 247 248 249 250
        // close file descriptor
        close(fd);
        // insert the segment
        theData.emplace_back(segment, size);
    }

    // expose the segments for bulk operations
251 252 253
    tl::bulk localBulk;
    if(theData.size() != 0) 
        localBulk = ph->m_client->m_engine->expose(theData, tl::bulk_mode::read_only);
254

255 256 257 258 259 260 261
    // create a copy of the fileset where m_directory is empty
    // and the filenames in directories have been resolved
    auto tmp_files = std::move(fileset->m_files);
    auto tmp_dirs  = std::move(fileset->m_directories);
    auto tmp_root  = std::move(fileset->m_root);
    fileset->m_files = files;
    fileset->m_directories = decltype(fileset->m_directories)();
262 263 264 265 266 267 268 269 270 271 272 273 274 275
    fileset->m_root = remote_root;

    // call migrate_start RPC
    // the response is in the form <errorcode, userstatus, uuid>
    std::tuple<int32_t, int32_t, uuid> start_call_result 
        = ph->m_client->m_migrate_start_rpc.on(*ph)(*fileset, theSizes, theModes);
    int ret = std::get<0>(start_call_result);
    if(ret != REMI_SUCCESS) {
        cleanup();
        if(ret == REMI_ERR_USER)
            *status = std::get<1>(start_call_result);
        return ret;
    }
    auto& operation_id = std::get<2>(start_call_result);
276

277 278
    // send the migrate_mmap RPC
    ret = ph->m_client->m_migrate_mmap_rpc.on(*ph)(operation_id, localBulk);
279 280 281 282 283

    // put back the fileset's original members
    fileset->m_root        = std::move(tmp_root);
    fileset->m_files       = std::move(tmp_files);
    fileset->m_directories = std::move(tmp_dirs);
284

285 286 287 288 289 290 291 292 293 294
    if(ret != REMI_SUCCESS) {
        cleanup();
        return ret;
    }
    
    // xfer went ok, now send migrate_end rpc.
    // the response is in the form <errorcode, userstatus>
    std::pair<int32_t, int32_t> end_call_result = 
        ph->m_client->m_migrate_end_rpc.on(*ph)(operation_id);

295 296
    cleanup();

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
    return ret;
}

int migrate_using_abtio(
        remi_provider_handle_t ph,
        remi_fileset_t fileset,
        const std::set<std::string>& files,
        const std::string& remote_root,
        int* status)
{
    // expose the data
    std::vector<int> openedFileDescriptors;
    std::vector<std::pair<void*,std::size_t>> theData;
    std::vector<std::size_t> theSizes;
    std::vector<mode_t> theModes;

    auto cleanup = [&openedFileDescriptors]() {
        for(auto& fd : openedFileDescriptors) {
            close(fd);
        }
    };

    for(auto& filename : files) {
        // compose full name
        auto theFilename = fileset->m_root + filename;
        // open file
        int fd = open(theFilename.c_str(), O_RDONLY, 0);
        if(fd == -1) {
            cleanup();
            return REMI_ERR_UNKNOWN_FILE;
        }
        openedFileDescriptors.push_back(fd);
        // get file size
        struct stat st;
        if(0 != fstat(fd, &st)) {
            close(fd);
            cleanup();
            return REMI_ERR_IO;
        }
        auto size = st.st_size;
        theSizes.push_back(size);
        auto mode = st.st_mode;
        theModes.push_back(mode);
    }

    // create a copy of the fileset where m_directory is empty
    // and the filenames in directories have been resolved
    auto tmp_files = std::move(fileset->m_files);
    auto tmp_dirs  = std::move(fileset->m_directories);
    auto tmp_root  = std::move(fileset->m_root);
    fileset->m_files = files;
    fileset->m_directories = decltype(fileset->m_directories)();
    fileset->m_root = remote_root;

    // call migrate_start RPC
    // the response is in the form <errorcode, userstatus, uuid>
    std::tuple<int32_t, int32_t, uuid> start_call_result 
        = ph->m_client->m_migrate_start_rpc.on(*ph)(*fileset, theSizes, theModes);
    int ret = std::get<0>(start_call_result);
356
    if(ret != REMI_SUCCESS) {
357 358 359
        cleanup();
        if(ret == REMI_ERR_USER)
            *status = std::get<1>(start_call_result);
360 361
        return ret;
    }
362 363 364 365 366 367 368 369 370 371
    // put back the fileset's original members
    fileset->m_root        = std::move(tmp_root);
    fileset->m_files       = std::move(tmp_files);
    fileset->m_directories = std::move(tmp_dirs);

    auto& operation_id = std::get<2>(start_call_result);

    auto abtio = ph->m_client->m_abtio;

    // send a series of migrate_write RPC, pipelined with abti-io pread calls
372
    size_t max_chunk_size = fileset->m_xfer_size;
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439

    if(abtio == ABT_IO_INSTANCE_NULL) {

        std::vector<char> buffer(max_chunk_size);
        for(uint32_t i = 0; i < files.size(); i++) {
            size_t remaining_size = theSizes[i];
            int fd = openedFileDescriptors[i];
            size_t current_offset = 0;
            while(remaining_size != 0) {
                size_t chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
                buffer.resize(chunk_size);
                auto sizeRead = read(fd, &buffer[0], chunk_size);
                ret = ph->m_client->m_migrate_write_rpc.on(*ph)(operation_id, i, current_offset, buffer);
                current_offset += chunk_size;
                remaining_size -= chunk_size;
            }
        } 
    
    } else {

        size_t current_chunk_offset  = 0; // offset of the chunk that ABT-IO has to read in this iteration
        size_t previous_chunk_offset = 0; // offset of the chunk that should be send in this iteration
        size_t current_chunk_size    = 0; // size of the chunk that ABT-IO has to read in this iteration
        size_t previous_chunk_size = 0; // size of the chunk that should be send in this iteration
        std::vector<char> current_buffer(max_chunk_size);  // buffer for ABT-IO to place data
        std::vector<char> previous_buffer(max_chunk_size); // buffer to be sent

        for(uint32_t i = 0; i < files.size(); i++) {
            // reset variables
            current_chunk_offset  = 0;
            previous_chunk_offset = 0;
            current_chunk_size    = 0;
            previous_chunk_size   = 0;

            size_t remaining_size = theSizes[i];
            int fd = openedFileDescriptors[i];
            // read first chunk
            current_chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
            current_buffer.resize(current_chunk_size);
            auto sizeRead = abt_io_pread(abtio, fd, &current_buffer[0], current_chunk_size, current_chunk_offset);
            // swap variables
            previous_chunk_offset = current_chunk_offset;
            previous_chunk_size = current_chunk_size;
            std::swap(current_buffer, previous_buffer);
            current_chunk_offset += current_chunk_size;
            remaining_size -= current_chunk_size;

            // read remaining chunks and send in parallel
            bool can_stop = false;
            while(!can_stop) {
                // issue RPC for the previous chunk
                auto async_req = ph->m_client->m_migrate_write_rpc.on(*ph).async(operation_id, i, previous_chunk_offset, previous_buffer);
                // read the current chunk
                if(remaining_size == 0) {
                    can_stop = true;
                } else {
                    current_chunk_size = remaining_size < max_chunk_size ? remaining_size : max_chunk_size;
                    current_buffer.resize(current_chunk_size);
                    auto sizeRead = abt_io_pread(abtio, fd, &current_buffer[0], current_chunk_size, current_chunk_offset);
                }
                // wait for the RPC to finish
                ret = async_req.wait();
                if(ret != REMI_SUCCESS)
                    can_stop = true;
                if(!can_stop) {
                    previous_chunk_offset = current_chunk_offset;
                    previous_chunk_size = current_chunk_size;
Matthieu Dorier's avatar
Matthieu Dorier committed
440 441
                    current_chunk_offset += current_chunk_size;
                    remaining_size -= current_chunk_size;
442 443 444 445 446 447 448 449 450 451
                    std::swap(current_buffer, previous_buffer);
                }
            }

            if(ret != REMI_SUCCESS) {
                break;
            }
        }

    }
452

453 454 455
    if(ret != REMI_SUCCESS) {
        cleanup();
        return ret;
456
    }
457 458 459 460 461
    
    // xfer went ok, now send migrate_end rpc.
    // the response is in the form <errorcode, userstatus>
    std::pair<int32_t, int32_t> end_call_result = 
        ph->m_client->m_migrate_end_rpc.on(*ph)(operation_id);
462

463 464 465 466 467 468 469
    cleanup();

    ret = end_call_result.first;
    if(ret == REMI_ERR_USER) {
        *status = end_call_result.second;
    } else {
        *status = 0;
470 471
    }

472
    return ret;
Matthieu Dorier's avatar
Matthieu Dorier committed
473
}