Commit d47d2c7c authored by Adrian Pope's avatar Adrian Pope
Browse files

Merge branch 'master' into 'master'

python interface updates and balancing option

See merge request hacc/genericio!2
parents 78dd749c 5ad54984
......@@ -11,8 +11,6 @@ endif()
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g")
# PIC for everything (can be made target specific)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
......
......@@ -57,6 +57,7 @@ extern "C" {
#include <cassert>
#include <cstddef>
#include <cstring>
#include <tuple>
#ifndef GENERICIO_NO_MPI
#include <ctime>
......@@ -1787,6 +1788,39 @@ void GenericIO::readData(int EffRank, size_t RowOffset, int Rank,
}
}
#ifndef GENERICIO_NO_MPI
void GenericIO::rebalanceSourceRanks() {
if(Redistributing) {
int NRanks, Rank;
MPI_Comm_rank(Comm, &Rank);
MPI_Comm_size(Comm, &NRanks);
std::vector<std::pair<int, size_t>> rank_sizes;
std::vector<std::tuple<int, size_t, std::vector<int>>> new_source_ranks;
for(int i=0; i<NRanks; ++i) {
new_source_ranks.emplace_back(std::make_tuple(i, 0ul, std::vector<int>()));
}
for(int i=0; i<readNRanks(); ++i) {
rank_sizes.emplace_back(std::make_pair(i, readNumElems(i)));
}
std::sort(rank_sizes.begin(), rank_sizes.end(), [](const auto& p1, const auto& p2){ return p1.second > p2.second; });
// Distribute ranks
for(size_t i=0; i<rank_sizes.size(); ++i) {
// Assign to first rank
std::get<2>(new_source_ranks[0]).push_back(rank_sizes[i].first);
std::get<1>(new_source_ranks[0]) += rank_sizes[i].second;
// Reorder ranks (could be optimized since array already sorted)
std::stable_sort(new_source_ranks.begin(), new_source_ranks.end(), [](const auto& s1, const auto& s2){ return std::get<1>(s1) < std::get<1>(s2); });
}
// copy own array
SourceRanks.resize(0);
std::copy(std::get<2>(new_source_ranks[Rank]).begin(), std::get<2>(new_source_ranks[Rank]).end(), std::back_inserter(SourceRanks));
} else {
std::cerr << "rebalancing source ranks has no effect when Redistributing==false" << std::endl;
}
}
#endif
void GenericIO::getVariableInfo(vector<VariableInfo> &VI) {
if (FH.isBigEndian())
getVariableInfo<true>(VI);
......
......@@ -448,7 +448,9 @@ public:
int readGlobalRankNumber(int EffRank = -1);
void readData(int EffRank = -1, bool PrintStats = true, bool CollStats = true);
#ifndef GENERICIO_NO_MPI
void rebalanceSourceRanks();
#endif
void getSourceRanks(std::vector<int> &SR);
void close() {
......
......@@ -5,7 +5,7 @@ include(FetchContent)
FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.5
GIT_TAG v2.6.1
)
FetchContent_GetProperties(pybind11)
......
......@@ -25,6 +25,9 @@ The library can then be imported in python. Here is a small example script:
import numpy as np
import pygio
# inspect file
pygio.inspect_genericio("generic_io_file")
# read all variables
data = pygio.read_genericio("generic_io_file")
......
......@@ -17,14 +17,14 @@ namespace py = pybind11;
class PyGenericIO : public gio::GenericIO {
public:
PyGenericIO(const std::string& filename, unsigned method=gio::GenericIO::FileIOPOSIX)
PyGenericIO(const std::string& filename, gio::GenericIO::FileIO method=gio::GenericIO::FileIOPOSIX, gio::GenericIO::MismatchBehavior redistribute=gio::GenericIO::MismatchRedistribute)
#ifdef GENERICIO_NO_MPI
: gio::GenericIO(filename, method), num_ranks(0) {
#else
: gio::GenericIO(MPI_COMM_WORLD, filename, method), num_ranks(0) {
#endif
// open headers and rank info
openAndReadHeader(gio::GenericIO::MismatchRedistribute);
openAndReadHeader(redistribute);
num_ranks = readNRanks();
// read variable info
getVariableInfo(variables);
......@@ -84,21 +84,21 @@ public:
);
if (varp != variables.end()) {
// extra space
size_t readsize = num_elem + requestedExtraSpace()/(*varp).ElementSize;
py::ssize_t readsize = num_elem + requestedExtraSpace()/(*varp).ElementSize;
if((*varp).IsFloat && (*varp).ElementSize == 4) {
result[var_name] = py::array_t<float>(size_t(readsize));
result[var_name] = py::array_t<float>(readsize);
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if((*varp).IsFloat && (*varp).ElementSize == 8) {
result[var_name] = py::array_t<double>(size_t(readsize));
result[var_name] = py::array_t<double>(readsize);
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 4) {
result[var_name] = py::array_t<int32_t>(size_t(readsize));
result[var_name] = py::array_t<int32_t>(readsize);
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 8) {
result[var_name] = py::array_t<int64_t>(size_t(readsize));
result[var_name] = py::array_t<int64_t>(readsize);
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 2) {
result[var_name] = py::array_t<uint16_t>(size_t(readsize));
result[var_name] = py::array_t<uint16_t>(readsize);
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
}
}
......@@ -136,11 +136,18 @@ private:
std::vector<gio::GenericIO::VariableInfo> variables;
};
std::map<std::string, py::array> read_genericio(std::string filename, std::optional<std::vector<std::string>> var_names, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX) {
PyGenericIO reader(filename, method);
std::map<std::string, py::array> read_genericio(std::string filename, std::optional<std::vector<std::string>> var_names, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute, bool rebalance_source_ranks=false) {
PyGenericIO reader(filename, method, redistribute);
if(rebalance_source_ranks)
reader.rebalanceSourceRanks();
return reader.read(var_names);
}
void inspect_genericio(std::string filename, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX, PyGenericIO::MismatchBehavior redistribute=PyGenericIO::MismatchBehavior::MismatchRedistribute) {
PyGenericIO reader(filename, method, redistribute);
reader.inspect();
}
#ifndef GENERICIO_NO_MPI
void write_genericio(std::string filename, std::map<std::string, py::array> variables, std::array<double, 3> phys_scale, std::array<double, 3> phys_origin, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX) {
// check data integrity, find particle count
......@@ -189,23 +196,38 @@ void write_genericio(std::string filename, std::map<std::string, py::array> vari
PYBIND11_MODULE(pygio, m) {
m.doc() = "genericio python module";
#ifndef GENERICIO_NO_MPI
m.def("_init_mpi", [](){MPI_Init(nullptr, nullptr); });
m.def("_init_mpi", [](){
int initialized;
MPI_Initialized(&initialized);
if(!initialized) {
int level_provided;
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_SINGLE, &level_provided);
}
});
#endif
py::class_<PyGenericIO> pyGenericIO(m, "PyGenericIO");
pyGenericIO.def(py::init<std::string>())
py::enum_<PyGenericIO::FileIO>(pyGenericIO, "FileIO")
.value("FileIOMPI", PyGenericIO::FileIO::FileIOMPI)
.value("FileIOPOSIX", PyGenericIO::FileIO::FileIOPOSIX)
.value("FileIOMPICollective", PyGenericIO::FileIO::FileIOMPICollective);
py::enum_<PyGenericIO::MismatchBehavior>(pyGenericIO, "MismatchBehavior")
.value("MismatchAllowed", PyGenericIO::MismatchBehavior::MismatchAllowed)
.value("MismatchDisallowed", PyGenericIO::MismatchBehavior::MismatchDisallowed)
.value("MismatchRedistribute", PyGenericIO::MismatchBehavior::MismatchRedistribute);
pyGenericIO.def(py::init<std::string, PyGenericIO::FileIO, PyGenericIO::MismatchBehavior>(), py::arg("filename"), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute)
.def("inspect", &PyGenericIO::inspect, "Print variable infos and size of GenericIO file")
.def("get_variables", &PyGenericIO::get_variables, "Get a list of VariableInformations defined in the GenericIO file")
.def("read_num_elems", (size_t (PyGenericIO::*)(int))(&PyGenericIO::readNumElems), py::arg("eff_rank")=-1)
.def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems))
.def("read_phys_origin", &PyGenericIO::read_phys_origin)
.def("read_phys_scale", &PyGenericIO::read_phys_scale)
.def("read", &PyGenericIO::read, py::arg("variables")=nullptr);
py::enum_<PyGenericIO::FileIO>(pyGenericIO, "FileIO")
.value("FileIOMPI", PyGenericIO::FileIO::FileIOMPI)
.value("FileIOPOSIX", PyGenericIO::FileIO::FileIOPOSIX)
.value("FileIOMPICollective", PyGenericIO::FileIO::FileIOMPICollective);
.def("read", &PyGenericIO::read, py::arg("variables")=nullptr)
.def("get_source_ranks", &PyGenericIO::getSourceRanks)
.def("rebalance_source_ranks", &PyGenericIO::rebalanceSourceRanks);
py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo")
.def_readonly("name", &gio::GenericIO::VariableInfo::Name)
......@@ -216,8 +238,9 @@ PYBIND11_MODULE(pygio, m) {
return std::string("<PyGenericIO.VariableInfo type=") +
(vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>";
});
m.def("read_genericio", &read_genericio, py::arg("filename"), py::arg("variables")=nullptr, py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::return_value_policy::move);
m.def("read_genericio", &read_genericio, py::arg("filename"), py::arg("variables")=nullptr, py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute, py::arg("rebalance_sourceranks")=false, py::return_value_policy::move);
m.def("inspect_genericio", &inspect_genericio, py::arg("filename"), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::arg("redistribute")=PyGenericIO::MismatchBehavior::MismatchRedistribute);
#ifndef GENERICIO_NO_MPI
m.def("write_genericio", &write_genericio, py::arg("filename"), py::arg("variables"), py::arg("phys_scale"), py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX);
#endif
}
\ No newline at end of file
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment