From 35a1372f0a6e327a8ef2401134d93908b0ebc8a3 Mon Sep 17 00:00:00 2001 From: Rob Latham Date: Tue, 7 Jan 2020 16:47:20 -0600 Subject: [PATCH] first draft of summit onboarding --- ecp-am-2020/sessions/hands-on/README.md | 194 ++++++++++++++++++ .../sessions/hands-on/bashrc.mochi.summit | 6 + ecp-am-2020/sessions/hands-on/packages.yaml | 60 ++++++ ecp-am-2020/sessions/hands-on/sum/Makefile | 19 ++ ecp-am-2020/sessions/hands-on/sum/client.c | 44 ++++ ecp-am-2020/sessions/hands-on/sum/server.c | 72 +++++++ ecp-am-2020/sessions/hands-on/sum/types.h | 17 ++ 7 files changed, 412 insertions(+) create mode 100644 ecp-am-2020/sessions/hands-on/README.md create mode 100644 ecp-am-2020/sessions/hands-on/bashrc.mochi.summit create mode 100644 ecp-am-2020/sessions/hands-on/packages.yaml create mode 100644 ecp-am-2020/sessions/hands-on/sum/Makefile create mode 100644 ecp-am-2020/sessions/hands-on/sum/client.c create mode 100644 ecp-am-2020/sessions/hands-on/sum/server.c create mode 100644 ecp-am-2020/sessions/hands-on/sum/types.h diff --git a/ecp-am-2020/sessions/hands-on/README.md b/ecp-am-2020/sessions/hands-on/README.md new file mode 100644 index 0000000..b68bbf4 --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/README.md @@ -0,0 +1,194 @@ +# Mochi Boot Camp: Hands-on + +## Logging on to Summit + +```ssh username@summit.olcf.ornl.gov``` + +Summit requires two-factor authentication. Hopefully you have a token from Oak +Ridge. If you do not, and you are reading this on 6 February at the boot camp +for the first time, you will have to find another machine to work on. + +You'll log in with your pin + one-time pass code. + +Make a directory for this bootcamp: + +``` +mkdir ~/bootcamp +cd ~/bootcamp +git clone https://xgitlab.cels.anl.gov/sds/mochi-boot-camp.git +``` +## Installing spack + +The easiest way to download and compile Mochi components is via the [Spack +package manager](https://spack.io/). Begin by cloning the spack repository +from github, and then adding spack to your environment: + +``` +cd ~/bootcamp +git clone https://github.com/spack/spack.git +cd spack +. ~/bootcamp/spack/share/spack/setup-env.sh +``` + +At this point, the `spack` command line tool is available to you, but we +need just a few more steps to configure it ideally for this envionment. + +Summit has a minimal base environment, and relies on `modules` +(http://lmod.readthedocs.org) to populate the environment. + +We'll load a recent gcc compiler and teach spack about it: + +``` +[robl@login1]~% module load gcc/9.1.0 +[robl@login1]~% spack compiler find +==> Added 1 new compiler to /home/robl/.spack/linux/compilers.yaml + gcc@9.1.0 +==> Compilers are defined in the following files: + /ccs/home/robl/.spack/linux/compilers.yaml +``` + +Other compilers, such as Intel, IBM's XL, or PGI compilers might work for some +or all of the Mochi components, but we know gcc-9 supports the language +features used by any of our components. + +## Adding the Mochi software repository to Spack + +Many of the Mochi software components have not yet been upstreamed to the +Spack package manager. They are available as a separate software repository +that can be added to spack with the `spack repo add` command: + +``` +[robl@login1]~% cd ~/bootcamp +[robl@login1]~/bootcamp% git clone https://xgitlab.cels.anl.gov/sds/sds-repo.git +... +robl@login1]~/bootcamp% cd sds-repo +robl@login1]~/bootcamp/sds-repo% spack repo add sds-repo +[carns@jlselogin2 bootcamp]$ +==> Added repo with namespace 'sds'. +``` +## Customizing Spack for ORNL/Summit environment + +At this point you can install any Mochi software component, but it will +likely download and compile more packages than are strictly necessary, which +is time consuming. At this point we will install a `packages.yaml` file +that customizes Spack by informing it of system packages that it should +reuse, and specifies a subset of network transports to use for Mochi. + +We have provided a pre-configured `packages.yaml` file for this purpose in +the JLSE environment. It primarily does the following: + +* informs Spack to use already available software for certain common system + packages (Spack by default will build these packages itself) +* Configure the Mercury RPC package to use the Infiniband 'verbs' interface for our + external network fabric package 'libfabric' + +You can activate this configuration for your account by doing the following: + +``` +cp ~/bootcamp/mochi-boot-camp/ecp-am-2020/sessions/hands-on/packages.yaml ~/.spack/linux/ +``` + +At this point you are ready to install and run Mochi software! + +## General spack usage for package management + +The following are the most important commands to know: + +* `spack spec ` to see what will be installed if you were to + install it (including dependencies and version numbers) +* `spack install ` to install a package +* `spack load -r ` to load the package into your environment +* `module list` to observe what modules you have loaded + +## Setting up your profile to retain Spack and compiler settings + +There are two critical commands that you will want to either run every time +you log into a Summit node, or else add to your ~/.bashrc file so that they +are performed automatically. We recommend the latter to save time: + +``` +cat ~/bootcamp/mochi-boot-camp/ecp-am-2020/sessions/hands-on/bashrc.mochi.summit >> ~/.bashrc +``` + +Now when you log into Summit moving forward you will have the correct compiler +and Spack command line tools available in your environment. + +## Installing your first Mochi components + +Run the following to download, compile, and install Margo: + +``` +spack install margo +``` + +This will take a few minutes and will install all of the necessary +dependencies, including Mercury and Argobots. You can now load these +packages by running: + +``` +spack load -r margo +``` + +... and inspect to confirm that they are present in your environment with: + +``` +module list +``` + +## Compiling an example Mochi code + +``` +cd ~/bootcamp/mochi-boot-camp/ecp-am-2020/sessions/hands-on/sum +make +``` + +The above example is a very slightly modified (to use Infiniband instead +of TCP for communication) copy of the [Sending arguments, returning +values](https://mochi.readthedocs.io/en/latest/margo/03_sum.html#) example +from the [Mochi Readthedocs +page](https://mochi.readthedocs.io/en/latest/index.html). + +This will compile a simple client and server program, linked +against margo. If you inspect the Makfile you will see pkg-config commands +that are used to find the correct CFLAGS and LDFLAGS for the build. + +## Running an interactive job on Summit compute nodes + +Summit uses the LSF job scheduler plus some job managment utilites specific to +summit. One requests an allocation of nodes with `bsub` and runs a program in +that allocation with `jsrun` + +To get an interactive allocation for 15 minutes: +``` +bsub -Is -W 0:15 -nnodes 2 -P CSC332 $SHELL +``` + +You'll need to replace `-P CSC332` with the name of whichever project you're using. + +The job scheduler will drop you into an interactive login on one of the +"monitor" nodes. Don't run jobs here directly. Instead, we'll use `jsrun`. + +Note that we asked for two nodes from the scheduler. We'll run one service on +one node in the background, then run the client on the other. + +First, start the server. The job will inherit your environment, so if you +loaded modules and built the client and server, things should work ok. + + +``` +cd ~/bootcamp/ecp-am-2020/sessions/hands-on/sum +jsrun -n 1 -r 1 -g ALL_GPUS ./server & + +``` + +Next, run the client, passing it the address string + +``` +jsrun -n 1 -r 1 -g ALL_GPUS ./client ofi+verbs://blah +``` + + +## Additional resources + +- https://docs.olcf.ornl.gov/systems/summit_user_guide.html + diff --git a/ecp-am-2020/sessions/hands-on/bashrc.mochi.summit b/ecp-am-2020/sessions/hands-on/bashrc.mochi.summit new file mode 100644 index 0000000..ee8d161 --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/bashrc.mochi.summit @@ -0,0 +1,6 @@ +# Mochi bootcamp 2020 environment for summit.ornl.gov + +# no need to set compiler paths: spack extracted path information from the +# environment when user ran 'spack compiler find' + +. ~/bootcamp/spack/share/spack/setup-env.sh diff --git a/ecp-am-2020/sessions/hands-on/packages.yaml b/ecp-am-2020/sessions/hands-on/packages.yaml new file mode 100644 index 0000000..596530e --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/packages.yaml @@ -0,0 +1,60 @@ +packages: + all: + compiler: [gcc@9.1.0, xl] + providers: + mpi: [spectrum-mpi, mpich] + pkgconfig: [pkg-config] + spectrum-mpi: + modules: + spectrum-mpi@10.3.0.1%gcc: spectrum-mpi/10.3.0.1-20190611 + buildable: False + openssl: + paths: + openssl@1.0.2k: /usr + buildable: False + # spack issue https://github.com/spack/spack/issues/11955 : system cmake + # (also built by spack) will confuse other cmake-using packages + #cmake: + # buildable: false + # modules: + # cmake@3.15.2: cmake/3.15.2 + autoconf: + paths: + autoconf@2.69: /usr + buildable: False + automake: + modules: + automake@1.16.1: automake/1.16.1 + buildable: False + ucx: + paths: + ucx@1.5.1: /usr + buildable: False + libnl: + modules: + libnl@3.3.0: libnl/3.3.0 + buildable: False + rdma-core: + paths: + rdma-core@20: /usr + buildable: False + findutils: + paths: + findutils@4.5.11: /usr + buildable: False + libxml2: + paths: + libxml2@2.9.1: /usr + buildable: False + ssg: + variants: +mpi + mercury: + variants: ~boostsys + # the 'mlx' provider for libfabric is not maintained + # https://github.com/ofiwg/libfabric/issues/4806#issuecomment-461678426 + libfabric: + variants: fabrics=verbs,rxm,mrail + thallium: + variants: +cereal + mpich: + variants: netmod=ucx device=ch4 diff --git a/ecp-am-2020/sessions/hands-on/sum/Makefile b/ecp-am-2020/sessions/hands-on/sum/Makefile new file mode 100644 index 0000000..8ec05d5 --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/sum/Makefile @@ -0,0 +1,19 @@ +CFLAGS += `pkg-config --cflags margo` +LDFLAGS += `pkg-config --libs margo` + +all:: client server + +client.o: client.c types.h + $(CC) $(CFLAGS) -c client.c + +client: client.o + $(CC) client.o -o client $(LDFLAGS) + +server.o: server.c types.h + $(CC) $(CFLAGS) -c server.c + +server: server.o + $(CC) server.o -o server $(LDFLAGS) + +clean:: + rm -f client server client.o server.o diff --git a/ecp-am-2020/sessions/hands-on/sum/client.c b/ecp-am-2020/sessions/hands-on/sum/client.c new file mode 100644 index 0000000..66fe61c --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/sum/client.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include "types.h" + +int main(int argc, char** argv) +{ + if(argc != 2) { + fprintf(stderr,"Usage: %s \n", argv[0]); + exit(0); + } + + margo_instance_id mid = margo_init("verbs:", MARGO_CLIENT_MODE, 0, 0); + + hg_id_t sum_rpc_id = MARGO_REGISTER(mid, "sum", sum_in_t, sum_out_t, NULL); + + hg_addr_t svr_addr; + margo_addr_lookup(mid, argv[1], &svr_addr); + + int i; + sum_in_t args; + for(i=0; i<4; i++) { + args.x = 42+i*2; + args.y = 42+i*2+1; + + hg_handle_t h; + margo_create(mid, svr_addr, sum_rpc_id, &h); + margo_forward(h, &args); + + sum_out_t resp; + margo_get_output(h, &resp); + + printf("Got response: %d+%d = %d\n", args.x, args.y, resp.ret); + + margo_free_output(h,&resp); + margo_destroy(h); + } + + margo_addr_free(mid, svr_addr); + + margo_finalize(mid); + + return 0; +} diff --git a/ecp-am-2020/sessions/hands-on/sum/server.c b/ecp-am-2020/sessions/hands-on/sum/server.c new file mode 100644 index 0000000..a908602 --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/sum/server.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include "types.h" + +typedef struct { + int max_rpcs; + int num_rpcs; +} server_data; + +static void sum(hg_handle_t h); +DECLARE_MARGO_RPC_HANDLER(sum) + +int main(int argc, char** argv) +{ + margo_instance_id mid = margo_init("verbs:", MARGO_SERVER_MODE, 0, 0); + assert(mid); + + server_data svr_data = { + .max_rpcs = 4, + .num_rpcs = 0 + }; + + hg_addr_t my_address; + margo_addr_self(mid, &my_address); + char addr_str[128]; + size_t addr_str_size = 128; + margo_addr_to_string(mid, addr_str, &addr_str_size, my_address); + margo_addr_free(mid,my_address); + printf("Server running at address %s\n", addr_str); + + hg_id_t rpc_id = MARGO_REGISTER(mid, "sum", sum_in_t, sum_out_t, sum); + margo_register_data(mid, rpc_id, &svr_data, NULL); + + margo_wait_for_finalize(mid); + + return 0; +} + +static void sum(hg_handle_t h) +{ + hg_return_t ret; + + sum_in_t in; + sum_out_t out; + + margo_instance_id mid = margo_hg_handle_get_instance(h); + + const struct hg_info* info = margo_get_info(h); + server_data* svr_data = (server_data*)margo_registered_data(mid, info->id); + + ret = margo_get_input(h, &in); + assert(ret == HG_SUCCESS); + + out.ret = in.x + in.y; + printf("Computed %d + %d = %d\n",in.x,in.y,out.ret); + + ret = margo_respond(h, &out); + assert(ret == HG_SUCCESS); + + ret = margo_free_input(h, &in); + assert(ret == HG_SUCCESS); + + ret = margo_destroy(h); + assert(ret == HG_SUCCESS); + + svr_data->num_rpcs += 1; + if(svr_data->num_rpcs == svr_data->max_rpcs) { + margo_finalize(mid); + } +} +DEFINE_MARGO_RPC_HANDLER(sum) diff --git a/ecp-am-2020/sessions/hands-on/sum/types.h b/ecp-am-2020/sessions/hands-on/sum/types.h new file mode 100644 index 0000000..5671390 --- /dev/null +++ b/ecp-am-2020/sessions/hands-on/sum/types.h @@ -0,0 +1,17 @@ +#ifndef PARAM_H +#define PARAM_H + +#include +#include + +/* We use the Mercury macros to define the input + * and output structures along with the serialization + * functions. + */ +MERCURY_GEN_PROC(sum_in_t, + ((int32_t)(x))\ + ((int32_t)(y))) + +MERCURY_GEN_PROC(sum_out_t, ((int32_t)(ret))) + +#endif -- 2.26.2