Commit f0da113e authored by Adrian Pope's avatar Adrian Pope
Browse files

Merge branch 'master' into 'master'

CMakeLists and new python interface

See merge request hacc/genericio!1
parents 285c5b73 4bbd42c3
build/
mpi/
frontend/
*.o
python/genericio.pyc
new_python/pygio.egg-info
new_python/build
\ No newline at end of file
cmake_minimum_required(VERSION 3.10)
project(GenericIO LANGUAGES C CXX)
# Are we the main project or in a nested directory?
set(GENERICIO_MASTER_PROJECT OFF)
if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
set(GENERICIO_MASTER_PROJECT ON)
endif()
# Use C++14
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g")
# PIC for everything (can be made target specific)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# MPI (if found: MPI_FOUND)
find_package(MPI)
# OpenMP (if found: OpenMP_FOUND)
find_package(OpenMP)
# Options
# executables default: on if master project
if(MPI_FOUND)
option(GENERICIO_MPI_EXECUTABLES "build MPI executables?" ${GENERICIO_MASTER_PROJECT})
else()
set(GENERICIO_MPI_EXECUTABLES OFF)
endif()
option(GENERICIO_FRONTEND_EXECUTABLES "build frontend executables?" ${GENERICIO_MASTER_PROJECT})
option(GENERICIO_PYTHON_LIBRARY "build python library?" ${GENERICIO_MASTER_PROJECT})
# new python target: only if cmake at least version 3.11
if(NOT (CMAKE_VERSION VERSION_LESS 3.11.0))
option(GENERICIO_NEW_PYTHON_LIBRARY "build (new) python library with pybind11?" OFF)
else()
message(WANRING " cmake version < 3.11.0, cannot build new python library")
set(GENERICIO_NEW_PYTHON_LIBRARY OFF)
endif()
###############################################################################
# BLOSC
add_subdirectory(thirdparty)
###############################################################################
# GenericIO sources, libraries, and executables
set(GenericIO_Sources
GenericIO.h GenericIO.cxx
)
# Libraries: MPI and no MPI
add_library(genericio ${GenericIO_Sources})
add_library(genericio::genericio ALIAS genericio)
# for consumers: make GenericIO.h available (in current source dir)
target_include_directories(genericio INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_definitions(genericio PUBLIC GENERICIO_NO_MPI)
target_link_libraries(genericio PRIVATE blosc)
if(OpenMP_FOUND)
target_link_libraries(genericio PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
endif()
# change output to lib directory
set_target_properties(genericio PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
if(MPI_FOUND)
add_library(genericio_mpi ${GenericIO_Sources})
add_library(genericio::genericio_mpi ALIAS genericio_mpi)
# for consumers: make GenericIO.h available (in current source dir)
target_include_directories(genericio_mpi INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(genericio_mpi PUBLIC MPI::MPI_CXX)
target_link_libraries(genericio_mpi PRIVATE blosc)
# change output to lib directory
set_target_properties(genericio_mpi PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
if(OpenMP_FOUND)
target_link_libraries(genericio_mpi PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
endif()
endif()
# MPI Executables
if(GENERICIO_MPI_EXECUTABLES)
set(MPI_Executables
GenericIOPrint
GenericIOVerify
GenericIOBenchmarkRead
GenericIOBenchmarkWrite
GenericIORewrite
)
foreach(executable ${MPI_Executables})
add_executable("${executable}_MPI" "${executable}.cxx")
target_link_libraries("${executable}_MPI" PRIVATE genericio_mpi)
set_target_properties("${executable}_MPI" PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/mpi"
OUTPUT_NAME ${executable})
endforeach()
endif()
# Frontend Executables
if(GENERICIO_FRONTEND_EXECUTABLES)
set(Frontend_Executables
GenericIOPrint
GenericIOVerify
)
foreach(executable ${Frontend_Executables})
add_executable(${executable} "${executable}.cxx")
target_link_libraries(${executable} PRIVATE genericio)
set_target_properties(${executable} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/frontend")
endforeach()
endif()
# Old python library
if(GENERICIO_PYTHON_LIBRARY)
add_library(pygio SHARED python/lib/gio.cxx python/lib/gio.h)
target_link_libraries(pygio PRIVATE genericio)
# GNUmakefile compatibility: also move to frontend (could be improved)
set_target_properties(pygio PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/frontend")
# GNUmakefile compatibility: copy python files to build directory so that relative paths are correct
file(COPY python/genericio.py python/example.py DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/python)
endif()
# New python library
if(GENERICIO_NEW_PYTHON_LIBRARY)
add_subdirectory(new_python)
endif()
\ No newline at end of file
GenericIO - For more information, please visit the wiki: https://xgitlab.cels.anl.gov/hacc/genericio/-/wikis/home
\ No newline at end of file
GenericIO - For more information, please visit the wiki: https://xgitlab.cels.anl.gov/hacc/genericio/-/wikis/home
[Click here to go to the README for the alternative python interface](new_python/README.md)
\ No newline at end of file
cmake_minimum_required(VERSION 3.11)
set(CMAKE_CXX_STANDARD 17)
include(FetchContent)
FetchContent_Declare(
pybind11
GIT_REPOSITORY https://github.com/pybind/pybind11.git
GIT_TAG v2.5
)
FetchContent_GetProperties(pybind11)
if(NOT pybind11_POPULATED)
FetchContent_Populate(pybind11)
add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR})
endif()
pybind11_add_module(pygio_new genericio.cpp)
set_target_properties(pygio_new PROPERTIES OUTPUT_NAME pygio)
if(MPI_FOUND)
target_link_libraries(pygio_new PRIVATE genericio_mpi)
else()
target_link_libraries(pygio_new PRIVATE genericio)
endif()
\ No newline at end of file
# Python module
This version of `pygio` is pip-installable and works with `mpi4py`.
## Requirements
Currently, a CMake version >= 3.11.0 is required to fetch dependencies during configuration.
The `pygio` module also requires MPI libraries to be findable by CMake's FindMPI. The compiler
needs to support C++17 (make sure that `CC` and `CXX` point to the correct compiler)
## Install
The python library can be installed by running pip in the **main folder**:
```bash
pip install .
```
It will use the compiler referred by the `CC` and `CXX` environment variable. If the compiler
supports OpenMP, the library will be threaded. Make sure to set `OMP_NUM_THREADS` to an
appropriate variable, in particluar when using multiple MPI ranks per node.
## Usage
The library can then be imported in python. Here is a small example script:
```python
import numpy as np
import pygio
# read all variables
data = pygio.read_genericio("generic_io_file")
# read only a subset of variables
data_partial = pygio.read_genericio("generic_io_file", ["x", "y", "z"])
data_x = data_partial["x"]
# write data to file
pygio.write_genericio("new_generic_io_file",
variables = {"x": np.ones(100), "y": np.ones(100)},
phys_scale = [1, 1, 1],
phys_origin = [0, 0, 0],
method = PyGenericIO.FileIO.FileIOPOSIX
)
### USING THE CLASS BASED INTERFACE ###
# instantiate a GenericIO class
gio_file = pygio.PyGenericIO("generic_io_file")
# inspect (prints to python stdout, also works in notebook)
gio_file.inspect()
# get variables
gio_vars = gio_file.get_variables()
# print variable names
for var in gio_vars:
print(var.name, var.size, var.element_size, var.is_float)
# read data
data_all = gio_file.read()
data_partial = gio_file.read(["x", "y", "z"])
```
Further methods and members of GenericIO can easly be interfaced by editing `python_new/genericio.cpp`.
### Using MPI
The `pygio` library is working with mpi4py. Here is an example file:
```python
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
ranks = comm.Get_size()
import numpy as np
import pygio
# read locally
data = pygio.read_genericio("generic_io_file")
# get local number of elements from the first element in dictionary
num_elems = len(next(iter(data.values())))
# reduce total number of elements
num_elems_total = comm.allreduce(num_elems)
if rank == 0:
print(f"Reading file with {ranks} ranks")
print(f"Total number of particles: {num_elems_total}")
print("The data contains the following variables:")
for k, d in data.items():
print(f"\t{k:5s}, dtype={d.dtype}")
for i in range(ranks):
if i == rank:
print(f"rank {rank} read {num_elems} elements")
comm.Barrier()
```
It can be executed with `mpirun`:
```bash
mpirun -n 8 python testpygio_mpi.py
```
Here is an output for 1 and 8 ranks:
```
Reading file with 1 ranks
Total number of particles: 562500
The data contains the following variables:
id , dtype=int64
vx , dtype=float32
vy , dtype=float32
vz , dtype=float32
x , dtype=float32
y , dtype=float32
z , dtype=float32
rank 0 read 562500 elements
```
```
Reading file with 8 ranks
Total number of particles: 562500
The data contains the following variables:
id , dtype=int64
vx , dtype=float32
vy , dtype=float32
vz , dtype=float32
x , dtype=float32
y , dtype=float32
z , dtype=float32
rank 0 read 70000 elements
rank 1 read 70000 elements
rank 2 read 70000 elements
rank 3 read 70000 elements
rank 4 read 70625 elements
rank 5 read 70625 elements
rank 6 read 70625 elements
rank 7 read 70625 elements
```
\ No newline at end of file
#include "GenericIO.h"
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <cstdint>
#include <optional>
#ifndef GENERICIO_NO_MPI
#include <mpi.h>
#endif
namespace py = pybind11;
class PyGenericIO : public gio::GenericIO {
public:
PyGenericIO(const std::string& filename, unsigned method=gio::GenericIO::FileIOPOSIX)
#ifdef GENERICIO_NO_MPI
: gio::GenericIO(filename, method), num_ranks(0) {
#else
: gio::GenericIO(MPI_COMM_WORLD, filename, method), num_ranks(0) {
#endif
// open headers and rank info
openAndReadHeader(gio::GenericIO::MismatchRedistribute);
num_ranks = readNRanks();
// read variable info
getVariableInfo(variables);
}
void inspect() {
int rank;
#ifdef GENERICIO_NO_MPI
rank = 0;
#else
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif
if(rank == 0) {
std::stringstream s;
s << "Number of Elements: " << readNumElems() << "\n";
s << "Total number of Elements: " << readTotalNumElems() << "\n";
s << "[data type] Variable name\n";
s << "---------------------------------------------\n";
for (int i = 0; i < variables.size(); ++i) {
gio::GenericIO::VariableInfo vinfo = variables[i];
if (vinfo.IsFloat)
s << "[f";
else
s << "[i";
int NumElements = vinfo.Size / vinfo.ElementSize;
s << " " << vinfo.ElementSize * 8;
if (NumElements > 1)
s << "x" << NumElements;
s << "] ";
s << vinfo.Name << "\n";
}
s << "\n(i=integer,f=floating point, number bits size)\n";
py::print(s.str());
}
}
std::map<std::string, py::array> read(std::optional<std::vector<std::string>> var_names) {
// read number of elements
int64_t num_elem = readNumElems();
// if no argument, read all
if(!var_names.has_value()) {
var_names.emplace(std::vector<std::string>());
for(const auto& v: variables) {
var_names->push_back(v.Name);
}
}
clearVariables();
std::map<std::string, py::array> result;
for(const std::string& var_name: *var_names) {
auto varp = std::find_if(
variables.begin(),
variables.end(),
[&var_name](const auto& v){ return v.Name == var_name; }
);
if (varp != variables.end()) {
// extra space
size_t readsize = num_elem + requestedExtraSpace()/(*varp).ElementSize;
if((*varp).IsFloat && (*varp).ElementSize == 4) {
result[var_name] = py::array_t<float>(size_t(readsize));
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if((*varp).IsFloat && (*varp).ElementSize == 8) {
result[var_name] = py::array_t<double>(size_t(readsize));
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 4) {
result[var_name] = py::array_t<int32_t>(size_t(readsize));
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 8) {
result[var_name] = py::array_t<int64_t>(size_t(readsize));
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
} else if(!(*varp).IsFloat && (*varp).ElementSize == 2) {
result[var_name] = py::array_t<uint16_t>(size_t(readsize));
addVariable(*varp, result[var_name].mutable_data(), gio::GenericIO::VarHasExtraSpace);
}
}
}
readData();
#ifndef GENERICIO_NO_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif
// get rid of extraspace
std::for_each(result.begin(), result.end(), [&](auto& item){ item.second.resize({num_elem}); });
return result;
}
const std::vector<gio::GenericIO::VariableInfo> &get_variables() {
return variables;
}
std::array<double, 3> read_phys_origin() {
std::array<double, 3> origin;
readPhysOrigin(origin.data());
return origin;
}
std::array<double, 3> read_phys_scale() {
std::array<double, 3> scale;
readPhysScale(scale.data());
return scale;
}
private:
int num_ranks;
std::vector<gio::GenericIO::VariableInfo> variables;
};
std::map<std::string, py::array> read_genericio(std::string filename, std::optional<std::vector<std::string>> var_names, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX) {
PyGenericIO reader(filename, method);
return reader.read(var_names);
}
#ifndef GENERICIO_NO_MPI
void write_genericio(std::string filename, std::map<std::string, py::array> variables, std::array<double, 3> phys_scale, std::array<double, 3> phys_origin, PyGenericIO::FileIO method=PyGenericIO::FileIO::FileIOPOSIX) {
// check data integrity, find particle count
int64_t particle_count = -1;
for(auto const& [name, data]: variables) {
if(data.ndim() != 1) {
throw std::runtime_error("dimension of array must be 1 (" + name + ")");
}
if(particle_count == -1) {
particle_count = data.size();
} else if(particle_count != data.size()) {
throw std::runtime_error("arrays do not have same length (" + name + ")");
}
}
gio::GenericIO writer(MPI_COMM_WORLD, filename, method);
writer.setNumElems(particle_count);
// set size
for (int d = 0; d < 3; ++d) {
writer.setPhysOrigin(phys_origin[d], d);
writer.setPhysScale(phys_scale[d], d);
}
for(auto& [name, data]: variables) {
if(py::isinstance<py::array_t<float>>(data))
writer.addVariable(name.c_str(), reinterpret_cast<float*>(data.mutable_data()));
else if(py::isinstance<py::array_t<double>>(data))
writer.addVariable(name.c_str(), reinterpret_cast<double*>(data.mutable_data()));
else if(py::isinstance<py::array_t<int32_t>>(data))
writer.addVariable(name.c_str(), reinterpret_cast<int32_t*>(data.mutable_data()));
else if(py::isinstance<py::array_t<int64_t>>(data))
writer.addVariable(name.c_str(), reinterpret_cast<int64_t*>(data.mutable_data()));
else if(py::isinstance<py::array_t<uint16_t>>(data))
writer.addVariable(name.c_str(), reinterpret_cast<uint16_t*>(data.mutable_data()));
else
throw std::runtime_error("array dtype not supported for " + name);
}
writer.write();
MPI_Barrier(MPI_COMM_WORLD);
}
#endif
PYBIND11_MODULE(pygio, m) {
m.doc() = "genericio python module";
#ifndef GENERICIO_NO_MPI
m.def("_init_mpi", [](){MPI_Init(nullptr, nullptr); });
#endif
py::class_<PyGenericIO> pyGenericIO(m, "PyGenericIO");
pyGenericIO.def(py::init<std::string>())
.def("inspect", &PyGenericIO::inspect, "Print variable infos and size of GenericIO file")
.def("get_variables", &PyGenericIO::get_variables, "Get a list of VariableInformations defined in the GenericIO file")
.def("read_num_elems", (size_t (PyGenericIO::*)(int))(&PyGenericIO::readNumElems), py::arg("eff_rank")=-1)
.def("read_total_num_elems", (uint64_t (PyGenericIO::*)(void))(&PyGenericIO::readTotalNumElems))
.def("read_phys_origin", &PyGenericIO::read_phys_origin)
.def("read_phys_scale", &PyGenericIO::read_phys_scale)
.def("read", &PyGenericIO::read, py::arg("variables")=nullptr);
py::enum_<PyGenericIO::FileIO>(pyGenericIO, "FileIO")
.value("FileIOMPI", PyGenericIO::FileIO::FileIOMPI)
.value("FileIOPOSIX", PyGenericIO::FileIO::FileIOPOSIX)
.value("FileIOMPICollective", PyGenericIO::FileIO::FileIOMPICollective);
py::class_<gio::GenericIO::VariableInfo>(pyGenericIO, "VariableInfo")
.def_readonly("name", &gio::GenericIO::VariableInfo::Name)
.def_readonly("size", &gio::GenericIO::VariableInfo::Size)
.def_readonly("element_size", &gio::GenericIO::VariableInfo::ElementSize)
.def_readonly("is_float", &gio::GenericIO::VariableInfo::IsFloat)
.def("__repr__", [](const gio::GenericIO::VariableInfo &vi) {
return std::string("<PyGenericIO.VariableInfo type=") +
(vi.IsFloat ? "float" : "int") + " name='" + vi.Name + "'>";
});
m.def("read_genericio", &read_genericio, py::arg("filename"), py::arg("variables")=nullptr, py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX, py::return_value_policy::move);
#ifndef GENERICIO_NO_MPI
m.def("write_genericio", &write_genericio, py::arg("filename"), py::arg("variables"), py::arg("phys_scale"), py::arg("phys_origin") = std::array<double, 3>({0., 0., 0.}), py::arg("method")=PyGenericIO::FileIO::FileIOPOSIX);
#endif
}
\ No newline at end of file
from __future__ import print_function
from . import pygio as _pygio
try:
_pygio._init_mpi()
except:
print("WARNING: The pygio module has been compiled without MPI support. Writing GenericIO files not supported.")
from .pygio import *
\ No newline at end of file
import os
import re
import sys
import subprocess
import platform
#import versioneer
from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext
from distutils.version import LooseVersion
class CMakeExtension(Extension):
def __init__(self, name, sourcedir=''):
Extension.__init__(self, name, sources=[])
self.sourcedir = os.path.abspath(sourcedir)
class CMakeBuild(build_ext):
def run(self):
try:
out = subprocess.check_output(['cmake', '--version'])
except OSError:
raise RuntimeError("CMake must be installed to build the following extensions: " +
", ".join(e.name for e in self.extensions))
cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
if cmake_version < '3.11.0':
raise RuntimeError("CMake >= 3.11.0 is required")
for ext in self.extensions:
self.build_extension(ext)
def build_extension(self, ext):
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
'-DGENERICIO_NEW_PYTHON_LIBRARY=ON',
'-DGENERICIO_PYTHON_LIBRARY=OFF',
'-DGENERICIO_MPI_EXECUTABLES=OFF',
'-DGENERICIO_FRONTEND_EXECUTABLES=OFF',
'-DPYTHON_EXECUTABLE=' + sys.executable]
cfg = 'Debug' if self.debug else 'Release'
build_args = ['--config', cfg]
if platform.system() == "Windows":
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
if sys.maxsize > 2 ** 32:
cmake_args += ['-A', 'x64']
build_args += ['--', '/m']
else:
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
build_args += ['--', '-j']
env = os.environ.copy()
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), self.distribution.get_version())
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
#def get_cmdclass():
# cmdclass = versioneer.get_cmdclass()
# cmdclass.update({"build_ext": CMakeBuild})
# return cmdclass
setup(
name='pygio',
#version=versioneer.get_version(),
version=0.1,
author='',
author_email='',
ext_package='pygio',
packages=['pygio'],
package_dir={"": "new_python"},
ext_modules=[CMakeExtension('pygio')],
#cmdclass=get_cmdclass(),