codes-best-practices.tex 30 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

\documentclass[conference,10pt,compsocconf,onecolumn]{IEEEtran}
%\documentclass{acm_proc_article-sp}
%\documentclass{sig-alternate}
% Add the compsoc option for Computer Society conferences.
%
% If IEEEtran.cls has not been installed into the LaTeX system files,
% manually specify the path to it like:
% \documentclass[conference]{../sty/IEEEtran}

% *** CITATION PACKAGES ***
%
\usepackage{cite}
% cite.sty was written by Donald Arseneau
% V1.6 and later of IEEEtran pre-defines the format of the cite.sty package
% \cite{} output to follow that of IEEE. Loading the cite package will
% result in citation numbers being automatically sorted and properly
% "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using
% cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's
% \cite will automatically add leading space, if needed. Use cite.sty's
% noadjust option (cite.sty V3.8 and later) if you want to turn this off.
% cite.sty is already installed on most LaTeX systems. Be sure and use
% version 4.0 (2003-05-27) and later if using hyperref.sty. cite.sty does
% not currently provide for hyperlinked citations.
% The latest version can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/cite/
% The documentation is contained in the cite.sty file itself.
%
\usepackage{setspace}
\usepackage{wrapfig}
\usepackage{color}
32 33
\usepackage{listing}
\usepackage{listings}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
34
\usepackage{verbatim}
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

\lstset{ %
frame=single,
language=C,
captionpos=b,
columns=fullflexible,
morekeywords={aesop,pwait,pbranch,pbreak},
numbers=left,
basicstyle=\scriptsize\ttfamily,
breaklines=true,
framexleftmargin=0em,
boxpos=c,
resetmargins=true,
xleftmargin=6ex
%basicstyle=\footnotesize
}

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152

\usepackage[pdftex]{graphicx}
%\usepackage{graphicx}
% declare the path(s) where your graphic files are
\graphicspath{{./}{data/beagle/}}
% and their extensions so you won't have to specify these with
% every instance of \includegraphics
\DeclareGraphicsExtensions{.pdf,.jpeg,.png}

% *** GRAPHICS RELATED PACKAGES ***
%
\ifCLASSINFOpdf
 % \usepackage[pdftex]{graphicx}
 % declare the path(s) where your graphic files are
 % \graphicspath{{../pdf/}{../jpeg/}}
 % and their extensions so you won't have to specify these with
 % every instance of \includegraphics
 % \DeclareGraphicsExtensions{.pdf,.jpeg,.png}
\else
 % or other class option (dvipsone, dvipdf, if not using dvips). graphicx
 % will default to the driver specified in the system graphics.cfg if no
 % driver is specified.
 % \usepackage[dvips]{graphicx}
 % declare the path(s) where your graphic files are
 % \graphicspath{{../eps/}}
 % and their extensions so you won't have to specify these with
 % every instance of \includegraphics
 % \DeclareGraphicsExtensions{.eps}
\fi

% *** ALIGNMENT PACKAGES ***
%
%\usepackage{array}
% Frank Mittelbach's and David Carlisle's array.sty patches and improves
% the standard LaTeX2e array and tabular environments to provide better
% appearance and additional user controls. As the default LaTeX2e table
% generation code is lacking to the point of almost being broken with
% respect to the quality of the end results, all users are strongly
% advised to use an enhanced (at the very least that provided by array.sty)
% set of table tools. array.sty is already installed on most systems. The
% latest version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/required/tools/

%\usepackage{eqparbox}
% Also of notable interest is Scott Pakin's eqparbox package for creating
% (automatically sized) equal width boxes - aka "natural width parboxes".
% Available at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/eqparbox/

\usepackage{algorithm}
\usepackage[noend]{algorithmic}

% *** SUBFIGURE PACKAGES ***
\usepackage[tight,footnotesize]{subfigure}
% \usepackage{subfigure}
% subfigure.sty was written by Steven Douglas Cochran. This package makes it
% easy to put subfigures in your figures. e.g., "Figure 1a and 1b". For IEEE
% work, it is a good idea to load it with the tight package option to reduce
% the amount of white space around the subfigures. subfigure.sty is already
% installed on most LaTeX systems. The latest version and documentation can
% be obtained at:
% http://www.ctan.org/tex-archive/obsolete/macros/latex/contrib/subfigure/
% subfigure.sty has been superceeded by subfig.sty.

%\usepackage[caption=false]{caption}
%\usepackage[font=footnotesize]{subfig}
% subfig.sty, also written by Steven Douglas Cochran, is the modern
% replacement for subfigure.sty. However, subfig.sty requires and
% automatically loads Axel Sommerfeldt's caption.sty which will override
% IEEEtran.cls handling of captions and this will result in nonIEEE style
% figure/table captions. To prevent this problem, be sure and preload
% caption.sty with its "caption=false" package option. This is will preserve
% IEEEtran.cls handing of captions. Version 1.3 (2005/06/28) and later 
% (recommended due to many improvements over 1.2) of subfig.sty supports
% the caption=false option directly:
%\usepackage[caption=false,font=footnotesize]{subfig}
%
% The latest version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/subfig/
% The latest version and documentation of caption.sty can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/caption/

% *** PDF, URL AND HYPERLINK PACKAGES ***
%
\usepackage{url}
% url.sty was written by Donald Arseneau. It provides better support for
% handling and breaking URLs. url.sty is already installed on most LaTeX
% systems. The latest version can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/misc/
% Read the url.sty source comments for usage information. Basically,
% \url{my_url_here}.

% *** Do not adjust lengths that control margins, column widths, etc. ***
% *** Do not use packages that alter fonts (such as pslatex).         ***
% There should be no need to do such things with IEEEtran.cls V1.6 and later.
% (Unless specifically asked to do so by the journal or conference you plan
% to submit to, of course. )

% correct bad hyphenation here
\hyphenation{op-tical net-works semi-conduc-tor}

153 154 155 156 157
\newcommand{\codesmapping}[1]{\texttt{codes\_mapping}}
\newcommand{\codesconfig}[1]{\texttt{codes\_config}}
\newcommand{\codesmodelnet}[1]{\texttt{model\_net}}


158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
\begin{document}
\title{CODES Best Practices}

%\author{\IEEEauthorblockN{Someone\IEEEauthorrefmark{1}} \\
%\IEEEauthorblockA{\IEEEauthorrefmark{1}Somewhere}
%}

%\numberofauthors{6} %  in this sample file, there are a *total*


% use for special paper notices
%\IEEEspecialpapernotice{(Invited Paper)}

% use arabic rather than roman numerals for table references
\renewcommand{\thetable}{\arabic{table}}

% make the title area
\maketitle

\begin{abstract}
178 179
This document outlines best practices for developing models in the
CODES/ROSS framework.  The reader should already be familiar with ROSS
Philip Carns's avatar
Philip Carns committed
180
and discrete event simulation in general; those topics are covered in the primary
181 182 183
ROSS documentation.
%
The main purpose of this document is to help the reader produce
184
CODES models in a consistent, modular style so that components can be more
185 186
easily shared and reused.  It also includes a few tips to help avoid common
simulation bugs.
187 188
\end{abstract}

189
\section{CODES: modularizing models}
190

191 192 193
This section covers some of the basic principles of how to organize model
components to be more modular and easier to reuse across CODES models.

194
\subsection{Units of time}
195

196
ROSS does not dictate the units to be used in simulation timestamps.
Philip Carns's avatar
Philip Carns committed
197
The \texttt{tw\_stime} type could represent any time unit
198
(e.g. days, hours, seconds, nanoseconds, etc.).  When building CODES
Philip Carns's avatar
Philip Carns committed
199 200
models you should \emph{always treat timestamps as double precision floating
point numbers representing nanoseconds}, however.
201 202 203
All components within a model must agree on the time units in order to
advance simulation time consistently.  Several common utilities in the
CODES project expect to operate in terms of nanoseconds.
204

205 206
\subsection{Organizing models by LP types}

207 208 209 210 211 212 213 214 215 216 217
ROSS allows you to use as many different LP types as you would like to
construct your models.  Try to take advantage of this as much as possible by
organizing your simulation so that each component of the system that you are
modeling is implemented within its own LP type.  For example, a storage
system model might use different LPs for hard disks, clients, network
adapters, and servers.  There are multiple reasons for dividing up models
like this:

\begin{itemize}
\item General modularity: makes it easier to pull out particular components
(for example, a disk model) for use in other models.
218
\item Simplicity: if each LP type is only handling a limited set of
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
events, then the event structure, state structure, and event handler
functions will all be much smaller and easier to understand.
\item Reverse computation: it makes it easier to implement reverse
computation, not only because the code is simpler, but also because you can
implement and test reverse computation per component rather than having to
apply it to an entire model all at once before testing.
\end{itemize}

It is also important to note that you can divide up models not just by
hardware components, but also by functionality, just as
you would modularize the implementation of a distributed file system.  For
example, a storage daemon might include separate LPs for replication, failure
detection, and reconstruction.  Each of those LPs can share the same network
card and disk resources for accurate modeling of resource usage.  They key
reason for splitting them up is to simplify the model and to encourage
reuse.

Philip Carns's avatar
Philip Carns committed
236 237 238 239 240 241 242 243 244 245 246 247
One hypothetical downside to splitting up models into multiple LP types is that it likely
means that your model will generate more events than a monolithic model
would have.  Remember that \emph{ROSS is really efficient at generating and
processing events}, though!  It is usually a premature optimization to try to optimize a model by
replacing events with function calls in cases where you know the necessary
data is available on the local MPI process.  Also recall that any information
exchanged via event automatically benefits by shifting burden for
tracking/retaining event data and event ordering into ROSS rather than your
model.  This can help simplify reverse computation by breaking complex
operations into smaller, easier to understand (and reverse) event units with
deterministic ordering.

248 249
\subsection{Protecting data structures}

Philip Carns's avatar
Philip Carns committed
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
ROSS operates by exchanging events between LPs.  If an LP is sending
an event to another LP of the same type, then in general it can do so
by allocating an event structure (e.g. \texttt{tw\_event\_new()}),
populating the event structure, and transmitting it
(e.g. \texttt{tw\_event\_send()}).  If an LP is sending an event to
another LP of a \emph{different} type, however, then it should use an
explicit API to do so without exposing the other LP's event structure
definition.  Event structures are not a robust API for exchanging data
across different LP types.  If one LP type accesses the event (or state)
structure of another LP type, then it entangles the two components such
that one LP is dependent upon the internal architecture of another LP.
This not only makes it difficult to reuse components, but also makes it
difficult to check for incompatibilities at compile time.  The compiler
has no way to know which fields in a struct must be set before sending
an event.

For these reasons we encourage that a) each LP be implemented in a separate
source file and b) all event structs and state structs
be defined only within those source files.  They should not be exposed in external
269 270
headers.  If the definitions are placed in a header then it makes it
possible for those event and state structs to be used as an ad-hoc interface
Philip Carns's avatar
Philip Carns committed
271
between LPs of different types.
272

273 274 275 276
\section{CODES: common utilities}

\subsection{modelnet}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
277 278 279 280 281 282 283
Modelnet is a network abstraction layer for use in CODES models. It provides a
consistent API that can be used to send messages between nodes using a variety
of different network transport models. Note that modelnet requires the use of
the codes-mapping API, described in previous section.

modelnet can be found in the codes-net repository. See the example program for
general usage.
284

285 286
\subsection{lp-io}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
287 288
% TODO: flesh out further
lp-io is a simple API for storing modest-sized
289
simulation results (not continuous traces).  It handles reverse computation
Jonathan Jenkins's avatar
Jonathan Jenkins committed
290 291
and avoids doing any disk I/O until the simulation is complete. All data is
written with collective I/O into a unified output directory. lp-io is
Philip Carns's avatar
Philip Carns committed
292 293 294
mostly useful for cases in which you would like each LP instance to report
statistics, but for scalability and data management reasons those results
should be aggregated into a single file rather than producing a separate
Jonathan Jenkins's avatar
Jonathan Jenkins committed
295 296 297 298
file per LP. It is not recommended that lp-io be used for data intensive,
streaming output.

The API for lp-io can be found in codes/lp-io.h
Philip Carns's avatar
Philip Carns committed
299

Jonathan Jenkins's avatar
Jonathan Jenkins committed
300
% TODO: look at ross/IO code and determine how it relates to this.
301

302 303
\subsection{codes-workload generator}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
304 305 306 307
% TODO: fill in further
codes-workload is an abstraction layer for feeding I/O / network
workloads into a simulation. It supports multiple back-ends for generating
I/O and network events; data could come from a trace file, from Darshan, or from a
308 309
synthetic description.

Jonathan Jenkins's avatar
Jonathan Jenkins committed
310 311 312 313
This component is under active development right now and not complete yet.  If
you are interested in using it, a minimal example of the I/O API can be seen in
the codes-workload-dump utility and in
tests/workload/codes-workload-test-cn-lp.c
314

Jonathan Jenkins's avatar
Jonathan Jenkins committed
315
The API for the workload generator can be found in codes/codes-(nw-)workload.h.
316

Jonathan Jenkins's avatar
Jonathan Jenkins committed
317
\subsection{codes\_event\_new}
318

Jonathan Jenkins's avatar
Jonathan Jenkins committed
319 320 321 322 323
Defined in codes/codes.h, codes\_event\_new is a small convenience wrapper to
tw\_event\_new that errors out if an event exceeds the global end timestamp for
ROSS. The assumption is that CODES models will normally run to a completion
condition rather than until simulation time runs out, see later section for
more information on this approach.
324

Jonathan Jenkins's avatar
Jonathan Jenkins committed
325
\section{CODES/ROSS: general tips and tricks}
326

327 328
\subsection{Event magic numbers}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
329
Put magic numbers at the top of each event struct and
330
check them in event handler.  This makes sure that you don't accidentally
Jonathan Jenkins's avatar
Jonathan Jenkins committed
331
send the wrong event type to an LP, and aids debugging.
332

Jonathan Jenkins's avatar
Jonathan Jenkins committed
333
\subsection{Avoiding event timestamp ties}
334

Jonathan Jenkins's avatar
Jonathan Jenkins committed
335 336 337 338 339 340
Event timestamp ties in ROSS occur when two or more events have the same
timestamp. These have a variety of unintended consequences, most significant of
which is hampering both reproducability and determinism in simulations. To
avoid this, use codes\_local\_latency for events with small or zero time deltas
to add some random noise. codes\_local\_latency must be reversed, so use
codes\_local\_latency\_reverse in reverse event handlers.
341

Jonathan Jenkins's avatar
Jonathan Jenkins committed
342 343 344 345 346 347
One example of this usage is exchanging events between LPs without really
consuming significant time (for example, to transfer information from a server
to its locally attached network card). It is tempting to use a timestamp of 0,
but this would cause timestamp ties in ROSS. Use of codes\_local\_latency for
timing of local event transitions in this case can be thought of as bus
overhead or context switch overhead.
348 349 350

\subsection{Organizing event structures}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
351 352 353 354
Since a single event structure contains data for all of the different types of
events processed by the LP, use a type enum + unions as an organizational
strategy. Keeps the event size down and makes it a little clearer what
variables are used by which event types.
355 356 357

\subsection{Validating across simulation modes}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
358 359 360
During development, you should do test runs with serial, parallel conservative,
and parallel optimistic runs to make sure that you get consistent results.
These modes stress different aspects of the model.
361

362 363 364 365 366 367 368 369
\subsection{Working with floating-point data}

Floating point variables are particularly tricky to use in optimistic
simulations, as rounding errors prevent rolling back to a consistent state by
merely performing the inverse operations (e.g., $a+b-b \neq a$). Hence, it is
instead preferable to simply store the local floating-point state in the event
structure and perform assignment on rollback.

370 371
\subsection{How to complete a simulation}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
372
Most core ROSS examples are design to intentionally hit
373
the end timestamp for the simulation (i.e. they are modeling a continuous,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
374 375 376
steady state system). This isn't necessarily true for other models. Quite
simply, set g\_tw\_ts\_end to an arbitrary large number when running simulations
that have a well-defined end-point in terms of events processed. 
377

Jonathan Jenkins's avatar
Jonathan Jenkins committed
378
\begin{comment} ROSS takes care of this
Philip Carns's avatar
Philip Carns committed
379
\subsection{Kicking off a simulation}
380
\label{sec_kickoff}
Philip Carns's avatar
Philip Carns committed
381 382 383 384

TOOD: fill this in.  Each LP needs to send an event to itself at the
beginning of the simulation (explain why).  We usually skew these with
random numbers to help break ties right off the bat (explain why).
Jonathan Jenkins's avatar
Jonathan Jenkins committed
385
\end{comment}
Philip Carns's avatar
Philip Carns committed
386

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
\subsection{Handling non-trivial event dependencies}

In storage system simulations, it will often be the case that clients, servers,
or both issue multiple asynchronous (parallel) operations, performing some
action upon the completion of them. More generally, the problem is: an event
issuance (an ack to the client) is based on the completion of more than one
asynchronous/parallel events (local write on primary server, forwarding write to
replica server). Further complicating the matter for storage simulations, there
can be any number of outstanding requests, each waiting on multiple events. 

In ROSS's sequential and conservative parallel modes, the necessary state can
easily be stored in the LP as a queue of statuses for each set of events,
enqueuing upon asynchronous event issuances and updating/dequeuing upon each
completion. Each LP can assign unique IDs to each queue item and propagate the
IDs through the asynchronous events for lookup purposes. However, in optimistic
mode we may remove an item from the queue and then be forced to re-insert it
during reverse computation.

Naively, one could simply never remove queue items, but of course memory will
quickly be consumed.

An elegant solution to this is to \emph{cache the status state in the event
structure that causes the dequeue}. ROSS's reverse computation semantics ensures
that this event will be reversed before the completion events of any of the
other asynchronous events, allowing us to easily recover the state. Furthermore,
events are garbage-collected as the GVT, reducing memory management complexity.
However, this strategy has the disadvantage of increasing event size
accordingly.

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
\section{Best practices quick reference}

NOTE: these may be integrated with the remaining notes or used as a summary of
section(s).

\subsection{ROSS simulation development}

\begin{enumerate}

    \item prefer fine-grained, simple LPs to coarse-grained, complex LPs
    \begin{enumerate}
        \item can simplify both LP state and reverse computation implementation
        \item ROSS is very good at event processing, likely small difference in
            performance
    \end{enumerate}

    \item consider separating single-source generation of concurrent events with
        "feedback" events or "continue" events to self
    \begin{enumerate}
        \item generating multiple concurrent events makes rollback more difficult
    \end{enumerate}

    \item use dummy events to work around "event-less" advancement of simulation time 

    \item add a small amount of time "noise" to events to prevent ties

    \item prefer more and smaller events to fewer and larger events
    \begin{enumerate}
        \item simplifies individual event processing
        \item ROSS uses bounded event structure size in communication, so
            smaller bound $\rightarrow$  less communication overhead
    \end{enumerate}

    \item prefer placing state in event structure to LP state structure
    \begin{enumerate}
        \item simplifies reverse computation -- less persistent state
Jonathan Jenkins's avatar
Jonathan Jenkins committed
452
        \item NOTE: tradeoff with previous point - consider efficiency vs.\
453 454 455 456 457 458 459 460
            complexity
    \end{enumerate}

    \item try to implement event processing with only LP-local information
    \begin{enumerate}
        \item reverse computation with collective knowledge is difficult
    \end{enumerate}

461 462 463 464
    \item for optimistic-mode-capable tracking of multiple asynchronous event
        dependencies, cache status in the event state signifying the last
        satisfied dependency to ease reverse computation

465 466
\end{enumerate}

467
\section{CODES Example Model}
468 469 470

TODO: Standardize the namings for codes configuration, mapping, and model-net.

Jonathan Jenkins's avatar
Jonathan Jenkins committed
471 472
An example model representing most of the functionality present in CODES is
available in doc/example. In
473 474 475 476 477 478 479 480 481 482 483 484 485
this scenario, we have a certain number of storage servers, identified
through indices $0,\ldots, n-1$ where each server has a network interface card
(NIC) associated with it. The servers exchange messages with their neighboring
server via their NIC card (i.e., server $i$ pings server $i+1$, rolling over the
index if necessary). When the neighboring server receives the message, it sends
an acknowledgement message to the sending server in response. Upon receiving the
acknowledgement, the sending server issues another message. This process continues until
some number of messages have been sent. For simplicity, it is assumed that each
server has a direct link to its neighbor, and no network congestion occurs due
to concurrent messages being sent.

The model is relatively simple to simulate through the usage of ROSS. There are
two distinct LP types in the simulation: the server and the NIC. Refer to
Jonathan Jenkins's avatar
Jonathan Jenkins committed
486
example.c for data structure definitions. The server LPs
487 488 489 490 491
are in charge of issuing/acknowledging the messages, while the NIC LPs
(implemented via CODES's model-net) transmit the data and inform their
corresponding servers upon completion. This LP decomposition strategy is
generally preferred for ROSS-based simulations: have single-purpose, simple LPs
representing logical system components.
492

493
In this program, CODES is used in the following four ways: to provide
Jonathan Jenkins's avatar
Jonathan Jenkins committed
494
configuration utilities for the program (example.conf), to logically separate and provide
495
lookup functionality for multiple LP types, to automate LP placement on KPs/PEs,
496 497 498
and to simplify/modularize the underlying network structure. The \codesconfig{}
API is used for the first use-case, the \codesmapping{} API is used for
the second and third use-cases, and the \codesmodelnet{} API is used for the
499 500 501
fourth use-case. The following sections discuss these while covering necessary
ROSS-specific information.

502
\subsection{\codesconfig{}}
503

Jonathan Jenkins's avatar
Jonathan Jenkins committed
504 505 506 507 508 509 510 511
The configuration format allows categories, and optionally subgroups within the
category, of key-value pairs for configuration. The LPGROUPS category defines
the LP configuration. The PARAMS category is currently used for
\codesmodelnet{} and ROSS-specific parameters. For instance, the
\texttt{message\_size} field defines the maximum event size used in ROSS for
memory management. Of course, user-defined categories can be used as well,
which are used in this case to define the rounds of communication and the size
of each message.
512

513
\subsection{\codesmapping{}}
514 515
\label{subsec:codes_mapping}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
516 517 518 519 520
The \codesmapping{} API transparently maps user LPs to global LP IDs and MPI
ranks (Aka ROSS PE's). The LP type and count can be specified through
\codesconfig{}. In this section, we focus on the \codesmapping{} API as well as
configuration. Multiple LP types are specified in a single LP group (there can
also be multiple LP groups in a config file).
521

522 523 524 525 526 527 528 529 530
In Listing~\ref{snippet2}, there is 1 server LP and 1
\texttt{modelnet\_simplenet} LP type in a group and this combination is repeated
16 time (repetitions="16").  ROSS will assign the LPs to the PEs (PEs is an
abstraction for MPI rank in ROSS) by placing 1 server LP then 1
\texttt{modelnet\_simplenet} LP a total of 16 times. This configuration is
useful if there is heavy communication involved between the server and
\texttt{modelnet\_simplenet} LP types, in which case ROSS will place them on the
same PE so that the communication between server and
\texttt{modelnet\_simplenet} LPs will not involve remote messages. 
531 532

An important consideration when defining the configuration file is the way
533
\codesmodelnet{} maps the network-layer LPs (the NICs in this example) and the upper
534 535 536 537
level LPs (e.g., the servers). Specifically, each NIC is mapped in a one-to-one
manner with the calling LP through the calling LP's group name, repetition
number, and number within the repetition.

Jonathan Jenkins's avatar
Jonathan Jenkins committed
538 539 540 541
After the initialization function calls of ROSS (\texttt{tw\_init}), the
configuration file can be loaded in the example program (see the main function
in example.c). Each LP type must register itself using
\texttt{lp\_type\_register} before setting up the mapping.
542

543
The \codesmapping{} API provides ways to query information like number of LPs of
544 545
a particular LP types, group to which a LP type belongs, repetitions in the
group (For details see codes-base/codes/codes-mapping.h file).  Figure
546 547
\ref{snippet3} shows how to setup the \codesmapping{} API with our CODES example
and computes basic information by querying the number of servers in a particular
548 549 550 551 552 553 554 555 556
group. 

\subsection{Event Handlers}
In this example, we have two LP types i.e. a server LP and a model-net LP.
Since the servers only send and receive messages to each other, the server LP state
maintains a count of the number of remote messages it has sent and received as
well as the number of local completion messages.   

For the server event message, we have four message types KICKOFF, REQ, ACK and
Jonathan Jenkins's avatar
Jonathan Jenkins committed
557 558 559 560 561
LOCAL. With a KICKOFF event, each LP sends a message to itself to begin the
simulation proper. To avoid event ties, we add a small noise using
codes\_local\_latency. The ``REQ'' message is sent by a server to its
neighboring server and when received, neighboring server sends back a message
of type ``ACK''.
562

563 564
\subsection{\codesmodelnet{}}
\codesmodelnet{} is an abstraction layer that allow models to send messages
Jonathan Jenkins's avatar
Jonathan Jenkins committed
565 566 567
across components using different network transports. This is a consistent API
that can send messages across both simple and complex network models without
changing the higher level model code.
568

569
In the CODES example, we use \emph{simple-net} as the underlying plug-in for
Jonathan Jenkins's avatar
Jonathan Jenkins committed
570 571
\codesmodelnet{}. The simple-net parameters are specified by the user in the
example.conf config file and loaded via model\_net\_configure.
572

573
\codesmodelnet{} assumes that the caller already knows what LP it wants to
Jonathan Jenkins's avatar
Jonathan Jenkins committed
574 575 576 577 578 579 580
deliver the message to (e.g.\ by using the codes-mapping API) and how large the
simulated message is. It carries two types of events (1) a remote event to be
delivered to a higher level model LP (In the example, the \codesmodelnet{} LPs
carry the remote event to the server LPs) and (2) a local event to be delivered
to the caller once the message has been transmitted from the node (In the
example, a local completion message is delivered to the server LP once the
\codesmodelnet{} LP sends the message).
581 582

\subsection{Reverse computation}
583 584 585 586 587 588 589 590 591 592

ROSS has the capability for optimistic parallel simulation, but instead of
saving the state of each LP, they instead require users to perform \emph{reverse
computation}. That is, while the event messages are themselves preserved (until
the Global Virtual Time (GVT) algorithm renders the messages unneeded), the LP
state is not preserved. Hence, it is up to the simulation developer to provide
functionality to reverse the LP state, given the event to be reversed. ROSS
makes this simpler in that events will always be rolled back in exactly the
order they were applied. Note that ROSS also has both serial and parallel
conservative modes, so reverse computation may not be necessary if the
Jonathan Jenkins's avatar
Jonathan Jenkins committed
593
simulation is not compute- or memory-intensive.
594 595 596

For our example program, recall the ``forward'' event handlers. They perform the
following: 
597 598
\begin{enumerate}
    \item Kickoff: send a message to the peer server, and increment sender LP's
599
        count of sent messages.
600
    \item Request (received from peer server): increment receiver count of
601
        received messages, and send an acknowledgement to the sender.
602
    \item Acknowledgement (received from message receiver): send the next
603 604
        message to the receiver and increment messages sent count. Set a flag
        indicating whether a message has been sent.  
605
    \item Local \codesmodelnet{} callback: increment the local model-net
606
        received messages count.
607
\end{enumerate}
608 609 610

In terms of LP state, the four operations are simply modifying counts. Hence,
the ``reverse'' event handlers need to merely roll back those changes: 
611 612 613
\begin{enumerate}
    \item  Kickoff: decrement sender LP's count of sent messages.
    \item Request (received from peer server): decrement receiver count of
614
        received messages.
615
    \item Acknowledgement (received from message receiver): decrement messages
616 617
        sent count if flag indicating a message has been sent has not been
        set.
618
    \item Local \codesmodelnet{} callback: decrement the local model-net
619
        received messages count.
620
\end{enumerate}
621 622 623 624 625 626 627 628 629 630 631

For more complex LP states (such as maintaining queues), reverse event
processing becomes similarly more complex. Other sections of this document
highlight strategies of dealing with those.

Note that ROSS maintains the ``lineage'' of events currently stored, which
enables ROSS to roll back the messages in the order they were originally
processed. This greatly simplifies the reverse computation process: the LP state
when reversing the effects of a particular event is exactly the state that
resulted from processing the event in the first place (of course, unless the
event handlers are buggy).
632

633 634 635
\section{TODO}

\begin{itemize}
Jonathan Jenkins's avatar
Jonathan Jenkins committed
636 637 638 639 640 641 642 643
    \item reference to ROSS user's guide, airport model, etc.
    \item add code examples?
    \item techniques for exchanging events across LP types (API tips)
    \item add codes-mapping overview
    \item add more content on reverse computation. Specifically, development
        strategies using it, tips on testing, common issues that come up, etc.
    \item put a pdf or latex2html version of this document on the codes web page
        when it's ready
644 645
\end{itemize}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
646
\begin{comment} ==== SCRATCH MATERIAL ====
647 648 649 650 651 652 653 654 655 656 657 658
\begin{figure}
\begin{lstlisting}[caption=Example code snippet., label=snippet-example]
for (i=0; i<n; i++) {
    for (j=0; j<i; j++) {
        /* do something */
    }
}
\end{lstlisting}
\end{figure}

Figure ~\ref{fig:snippet-example} shows an example of how to show a code
snippet in latex.  We can use this format as needed throughout the document.
Jonathan Jenkins's avatar
Jonathan Jenkins committed
659
\end{comment}
660

661
\end{document}