adioi.h 35.5 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
/* 
 *
 *   Copyright (C) 1997 University of Chicago. 
 *   See COPYRIGHT notice in top-level directory.
 */


/* contains general definitions, declarations, and macros internal to
   the ADIO implementation, though independent of the underlying file
   system. This file is included in adio.h */ 

/* Functions and datataypes that are "internal" to the ADIO implementation 
   prefixed ADIOI_. Functions and datatypes that are part of the
   "externally visible" (documented) ADIO interface are prefixed ADIO_.

   An implementation of MPI-IO, or any other high-level interface, should
   not need to use any of the ADIOI_ functions/datatypes. 
   Only someone implementing ADIO on a new file system, or modifying 
   an existing ADIO implementation, would need to use the ADIOI_
   functions/datatypes. */


#ifndef ADIOI_INCLUDE
#define ADIOI_INCLUDE
/* used to keep track of hint/info values.
 * Note that there are a lot of int-sized values in here...they are
 * used as int-sized entities other places as well.  This would be a 
 * problem on 32-bit systems using > 2GB files in some cases...
 */
struct ADIOI_Hints_struct {
    int initialized;
    int striping_factor;
    int striping_unit;
    int cb_read;
    int cb_write;
    int cb_nodes;
    int cb_buffer_size;
39
40
41
42
43
    int cb_pfr;
    int cb_fr_type;
    int cb_fr_alignment;
    int cb_ds_threshold;
    int cb_alltoall;
44
45
46
47
48
49
    int ds_read;
    int ds_write;
    int no_indep_rw;
    int ind_rd_buffer_size;
    int ind_wr_buffer_size;
    int deferred_open;
50
    int min_fdomain_size;
51
52
53
54
55
56
57
58
59
    char *cb_config_list;
    int *ranklist;
    union {
	    struct {
		    int listio_read;
		    int listio_write;
	    } pvfs;
	    struct {
		    int debugmask;
60
61
62
63
64
65
		    int posix_read;
		    int posix_write;
		    int listio_read;
		    int listio_write;
		    int dtype_read;
		    int dtype_write;
66
	    } pvfs2;
67
68
            struct {
                    int start_iodevice;
69
70
                    int co_ratio;
                    int coll_threshold;
71
72
                    int ds_in_coll;
            } lustre;
73
74
75
76
		struct {
			unsigned read_chunk_sz; /* chunk size for direct reads */
			unsigned write_chunk_sz; /* chunk size for direct writes */
		} xfs;
77
78
79
80
81
82
83
	struct {
	    int *bridgelist; /* list of all bride ranks */
	    int *bridgelistnum; /* each entry here is the number of aggregators
				   associated with the bridge rank of the same
				   index in bridgelist */
	    int numbridges; /* total number of bridges */
	} bg;
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
    } fs_hints;

};

typedef struct ADIOI_Datarep {
    char *name;
    void *state;
    MPI_Datarep_extent_function *extent_fn;
    MPI_Datarep_conversion_function *read_conv_fn;
    MPI_Datarep_conversion_function *write_conv_fn;
    struct ADIOI_Datarep *next; /* pointer to next datarep */
} ADIOI_Datarep;

/* Values for use with cb_read, cb_write, ds_read, and ds_write 
 * and some fs-specific hints
   (IBM xlc, Compaq Tru64 compilers object to a comma after the last item)
   (that's just wrong)
 */
enum {
    ADIOI_HINT_AUTO    = 0,
    ADIOI_HINT_ENABLE  = 1,
    ADIOI_HINT_DISABLE = 2
};

/* flattened datatypes. Each datatype is stored as a node of a
   globally accessible linked list. Once attribute caching on a
   datatype is available (in MPI-2), that should be used instead. */  

typedef struct ADIOI_Fl_node {  
    MPI_Datatype type;
114
    MPI_Count count;                   /* no. of contiguous blocks */
115
    ADIO_Offset *blocklens;      /* array of contiguous block lengths (bytes)*/
116
117
118
119
120
121
122
123
124
    ADIO_Offset *indices;        /* array of byte offsets of each block */
    struct ADIOI_Fl_node *next;  /* pointer to next node */
} ADIOI_Flatlist_node;

#ifdef ROMIO_PVFS2
#include <pvfs2.h>
#endif
typedef struct ADIOI_AIO_req_str {
	/* very wierd: if this MPI_Request is a pointer, some C++ compilers
125
	 * will clobber it when the MPICH C++ bindings are used */
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
	MPI_Request req;
	MPI_Offset nbytes;
	/* should probably make this a union */
#ifdef ROMIO_HAVE_WORKING_AIO
	struct aiocb *aiocbp;
#endif
#ifdef ROMIO_PVFS2
	PVFS_sys_op_id op_id;
	PVFS_sysresp_io resp_io;
	PVFS_Request file_req;
	PVFS_Request mem_req;
#endif
#ifdef ROMIO_NTFS
    /* Ptr to Overlapped struct */
    LPOVERLAPPED    lpOvl;
    /* Ptr to file handle */
	HANDLE fd;
#endif
} ADIOI_AIO_Request;

struct ADIOI_Fns_struct {
    void (*ADIOI_xxx_Open) (ADIO_File fd, int *error_code);
148
149
    void (*ADIOI_xxx_OpenColl) (ADIO_File fd, int rank, 
		    int access_mode, int *error_code);
150
151
    void (*ADIOI_xxx_ReadContig) (ADIO_File fd, void *buf, int count,
                   MPI_Datatype datatype, int file_ptr_type,
152
                   ADIO_Offset offset, ADIO_Status *status, int *error_code);
153
154
    void (*ADIOI_xxx_WriteContig) (ADIO_File fd, const void *buf, int count,
                   MPI_Datatype datatype, int file_ptr_type,
155
156
157
158
                   ADIO_Offset offset, ADIO_Status *status, int *error_code);  
    void (*ADIOI_xxx_ReadStridedColl) (ADIO_File fd, void *buf, int count,
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Status *status, int *error_code);
159
    void (*ADIOI_xxx_WriteStridedColl) (ADIO_File fd, const void *buf, int count,
160
161
162
163
164
165
166
167
168
169
170
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Status *status, int *error_code);
    ADIO_Offset (*ADIOI_xxx_SeekIndividual) (ADIO_File fd, ADIO_Offset offset, 
                       int whence, int *error_code);
    void (*ADIOI_xxx_Fcntl) (ADIO_File fd, int flag, 
                             ADIO_Fcntl_t *fcntl_struct, int *error_code); 
    void (*ADIOI_xxx_SetInfo) (ADIO_File fd, MPI_Info users_info, 
                               int *error_code);
    void (*ADIOI_xxx_ReadStrided) (ADIO_File fd, void *buf, int count,
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Status *status, int *error_code);
171
    void (*ADIOI_xxx_WriteStrided) (ADIO_File fd, const void *buf, int count,
172
173
174
175
176
177
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Status *status, int *error_code);
    void (*ADIOI_xxx_Close) (ADIO_File fd, int *error_code);
    void (*ADIOI_xxx_IreadContig) (ADIO_File fd, void *buf, int count, 
                   MPI_Datatype datatype, int file_ptr_type, 
                   ADIO_Offset offset, ADIO_Request *request, int *error_code);
178
179
    void (*ADIOI_xxx_IwriteContig) (ADIO_File fd, const void *buf, int count,
                   MPI_Datatype datatype, int file_ptr_type,
180
181
182
183
184
185
186
187
188
189
190
191
	           ADIO_Offset offset, ADIO_Request *request, int *error_code);
    int (*ADIOI_xxx_ReadDone) (ADIO_Request *request, ADIO_Status *status, 
               int *error_code); 
    int (*ADIOI_xxx_WriteDone) (ADIO_Request *request, ADIO_Status *status, 
               int *error_code);
    void (*ADIOI_xxx_ReadComplete) (ADIO_Request *request, ADIO_Status *status, 
               int *error_code); 
    void (*ADIOI_xxx_WriteComplete) (ADIO_Request *request, ADIO_Status *status,
	       int *error_code); 
    void (*ADIOI_xxx_IreadStrided) (ADIO_File fd, void *buf, int count,
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Request *request, int *error_code);
192
    void (*ADIOI_xxx_IwriteStrided) (ADIO_File fd, const void *buf, int count,
193
194
195
196
	       MPI_Datatype datatype, int file_ptr_type,
	       ADIO_Offset offset, ADIO_Request *request, int *error_code);
    void (*ADIOI_xxx_Flush) (ADIO_File fd, int *error_code); 
    void (*ADIOI_xxx_Resize) (ADIO_File fd, ADIO_Offset size, int *error_code);
197
    void (*ADIOI_xxx_Delete) (const char *filename, int *error_code);
198
    int  (*ADIOI_xxx_Feature) (ADIO_File fd, int flag);
199
    const char *fsname;
200
201
202
203
204
205
206
207
};

/* optypes for ADIO_RequestD */
#define ADIOI_READ                26
#define ADIOI_WRITE               27

#define ADIOI_MIN(a, b) ((a) < (b) ? (a) : (b))
#define ADIOI_MAX(a, b) ((a) > (b) ? (a) : (b))
Rob Latham's avatar
Rob Latham committed
208
209
210
/* thanks stackoverflow:
 * http://stackoverflow.com/questions/3982348/implement-generic-swap-macro-in-c */
#define ADIOI_SWAP(x, y, T) do { T temp##x##y = x; x = y; y = temp##x##y; } while (0);
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

#define ADIOI_PREALLOC_BUFSZ      16777216    /* buffer size used to 
                                                preallocate disk space */


/* default values for some hints */
    /* buffer size for collective I/O = 16 MB */
#define ADIOI_CB_BUFFER_SIZE_DFLT         "16777216"
    /* buffer size for data sieving in independent reads = 4MB */
#define ADIOI_IND_RD_BUFFER_SIZE_DFLT     "4194304"
    /* buffer size for data sieving in independent writes = 512KB. default is
       smaller than for reads, because write requires read-modify-write
       with file locking. If buffer size is large there is more contention 
       for locks. */
#define ADIOI_IND_WR_BUFFER_SIZE_DFLT     "524288"
    /* use one process per processor name by default */
#define ADIOI_CB_CONFIG_LIST_DFLT "*:1"


/* some of the ADIO functions are macro-replaced */

232
233
234
#define ADIOI_OpenColl(fd, rank, access_mode, error_code) \
	(*(fd->fns->ADIOI_xxx_OpenColl))(fd, rank, access_mode, error_code)

235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#define ADIO_ReadContig(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_ReadContig))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_WriteContig(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_WriteContig))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_SeekIndividual(fd,offset,whence,error_code) \
        (*(fd->fns->ADIOI_xxx_SeekIndividual))(fd,offset,whence,error_code)

#define ADIO_Fcntl(fd,flag,fcntl_struct,error_code) \
        (*(fd->fns->ADIOI_xxx_Fcntl))(fd,flag,fcntl_struct,error_code)

#define ADIO_IreadContig(fd,buf,count,datatype,file_ptr_type,offset,request,error_code) \
        (*(fd->fns->ADIOI_xxx_IreadContig))(fd,buf,count,datatype,file_ptr_type,offset,request,error_code)

#define ADIO_IwriteContig(fd,buf,count,datatype,file_ptr_type,offset,request,error_code) \
        (*(fd->fns->ADIOI_xxx_IwriteContig))(fd,buf,count,datatype,file_ptr_type,offset,request,error_code)

/* in these routines a pointer to request is passed */
#define ADIO_ReadDone(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_ReadDone))(request,status,error_code)

#define ADIO_WriteDone(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_WriteDone))(request,status,error_code)

#define ADIO_ReadIcomplete(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_ReadIcomplete))(request,status,error_code)

#define ADIO_WriteIcomplete(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_WriteIcomplete))(request,status,error_code)

#define ADIO_ReadComplete(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_ReadComplete))(request,status,error_code)

#define ADIO_WriteComplete(request,status,error_code) \
        (*((*(request))->fd->fns->ADIOI_xxx_WriteComplete))(request,status,error_code)

#define ADIO_ReadStrided(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_ReadStrided))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_WriteStrided(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_WriteStrided))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_ReadStridedColl(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_ReadStridedColl))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_WriteStridedColl(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
        (*(fd->fns->ADIOI_xxx_WriteStridedColl))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)

#define ADIO_IreadStrided(fd,buf,count,datatype,file_ptr_type,offset,request,error_code) \
        (*(fd->fns->ADIOI_xxx_IreadStrided))(fd,buf,count,datatype,file_ptr_type,offset,request,error_code)

#define ADIO_IwriteStrided(fd,buf,count,datatype,file_ptr_type,offset,request,error_code) \
        (*(fd->fns->ADIOI_xxx_IwriteStrided))(fd,buf,count,datatype,file_ptr_type,offset,request,error_code)

#define ADIO_Flush(fd,error_code) (*(fd->fns->ADIOI_xxx_Flush))(fd,error_code)

#define ADIO_Resize(fd,size,error_code) \
        (*(fd->fns->ADIOI_xxx_Resize))(fd,size,error_code)

#define ADIO_Delete(filename,error_code) \
        (*(fd->fns->ADIOI_xxx_Delete))(filename,error_code)

#define ADIO_SetInfo(fd, users_info, error_code) \
        (*(fd->fns->ADIOI_xxx_SetInfo))(fd, users_info, error_code)

301
302
303
#define ADIO_Feature(fd, flag) \
	(*(fd->fns->ADIOI_xxx_Feature))(fd, flag)

304
305
306
307
308
309

/* structure for storing access info of this process's request 
   from the file domain of other processes, and vice-versa. used 
   as array of structures indexed by process number. */
typedef struct {
    ADIO_Offset *offsets;   /* array of offsets */
310
311
312
    int *lens;              /* array of lengths */ 
    /* consider aints or offsets for lens? Seems to be used as in-memory
       buffer lengths, so it should be < 2G and ok as an int          */
313
314
315
316
317
318
    MPI_Aint *mem_ptrs;     /* array of pointers. used in the read/write
			       phase to indicate where the data
			       is stored in memory */
    int count;             /* size of above arrays */
} ADIOI_Access;

319
320
321
322
323
324
325
/* structure for storing generic offset/length pairs.  used to describe
   file realms among other things */
typedef struct {
    ADIO_Offset *offsets; /* array of offsets */
    int *lens;           /* array of lengths */
    int count;            /* size of above arrays */
} ADIOI_Offlen;
326
327
328
329
330
331

/* prototypes for ADIO internal functions */

void ADIOI_SetFunctions(ADIO_File fd);
void ADIOI_Flatten_datatype(MPI_Datatype type);
void ADIOI_Flatten(MPI_Datatype type, ADIOI_Flatlist_node *flat,
332
		  ADIO_Offset st_offset, MPI_Count *curr_index);
333
void ADIOI_Delete_flattened(MPI_Datatype datatype);
334
MPI_Count ADIOI_Count_contiguous_blocks(MPI_Datatype type, MPI_Count *curr_index);
335
void ADIOI_Complete_async(int *error_code);
336
337
338
339
void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname);
void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname);
void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname);
void ADIOI_Free_fn(void *ptr, int lineno, const char *fname);
340
341
342
343
344
void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag);
void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset);
void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset);
void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset,
			   ADIO_Offset *disp);
Rob Latham's avatar
Rob Latham committed
345
void ADIOI_process_system_hints(MPI_Info info);
346
347
void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo, 
		MPI_Info *new_info);
Rob Latham's avatar
Rob Latham committed
348

349
350
351
352

void ADIOI_GEN_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
		     int *error_code);
void ADIOI_GEN_Flush(ADIO_File fd, int *error_code);
353
354
355
356
357
358
void ADIOI_GEN_OpenColl(ADIO_File fd, int rank, 
		int access_mode, int *error_code);
void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank, 
		int access_mode, int *error_code);
void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank, 
		int access_mode, int *error_code);
359
void ADIOI_GEN_Delete(const char *filename, int *error_code);
360
361
362
363
364
365
366
367
368
369
void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count, 
			  MPI_Datatype datatype, int file_ptr_type,
			  ADIO_Offset offset, ADIO_Status *status,
			  int *error_code);
int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
		  int wr, MPI_Request *request);
void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, 
			   MPI_Datatype datatype, int file_ptr_type,
			   ADIO_Offset offset, ADIO_Request *request,
			   int *error_code);
370
void ADIOI_GEN_WriteContig(ADIO_File fd, const void *buf, int count,
371
372
373
			   MPI_Datatype datatype, int file_ptr_type,
			   ADIO_Offset offset, ADIO_Status *status,
			   int *error_code);
374
void ADIOI_GEN_IwriteContig(ADIO_File fd, const void *buf, int count,
375
376
377
378
379
380
381
382
383
384
385
			    MPI_Datatype datatype, int file_ptr_type,
			    ADIO_Offset offset, ADIO_Request *request,
			    int *error_code);
void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
			   MPI_Datatype datatype, int file_ptr_type,
			   ADIO_Offset offset, ADIO_Status *status, int
			   *error_code);
void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count,
			    MPI_Datatype datatype, int file_ptr_type,
			    ADIO_Offset offset, ADIO_Request *request, int
			    *error_code);
386
void ADIOI_GEN_IwriteStrided(ADIO_File fd, const void *buf, int count,
387
388
389
390
391
392
393
394
395
396
397
398
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Request *request, int
			     *error_code);
int ADIOI_GEN_IODone(ADIO_Request *request, ADIO_Status *status,
		     int *error_code);
void ADIOI_GEN_IOComplete(ADIO_Request *request, ADIO_Status *status,
			  int *error_code);
int ADIOI_GEN_aio_poll_fn(void *extra_state, ADIO_Status *status);
int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout, 
		ADIO_Status *status);
int ADIOI_GEN_aio_query_fn(void *extra_state, ADIO_Status *status);
int ADIOI_GEN_aio_free_fn(void *extra_state);
399
400
int ADIOI_GEN_Feature(ADIO_File fd, int feature);

401
402
403
404
void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
                       MPI_Datatype buftype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
405
void ADIOI_GEN_WriteStrided(ADIO_File fd, const void *buf, int count,
406
407
408
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
409
void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, const void *buf, int count,
410
411
412
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
413
void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, const void *buf, int count,
Rob Latham's avatar
Rob Latham committed
414
415
416
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
417
418
419
420
void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
421
void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count,
422
423
424
425
426
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
			    datatype, int file_ptr_type, ADIO_Offset 
427
			    offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
428
429
430
431
432
433
434
			    **len_list_ptr, ADIO_Offset *start_offset_ptr,
			    ADIO_Offset *end_offset_ptr, int
			   *contig_access_count_ptr);
void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
			     *end_offsets, int nprocs, int nprocs_for_coll,
			     ADIO_Offset *min_st_offset_ptr,
			     ADIO_Offset **fd_start_ptr, ADIO_Offset 
435
			     **fd_end_ptr, int min_fd_size, 
436
			     ADIO_Offset *fd_size_ptr,
437
			     int striping_unit);
438
439
440
441
442
443
444
445
int ADIOI_Calc_aggregator(ADIO_File fd,
                                 ADIO_Offset off,
                                 ADIO_Offset min_off,
                                 ADIO_Offset *len,
                                 ADIO_Offset fd_size,
                                 ADIO_Offset *fd_start,
                                 ADIO_Offset *fd_end);
void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, 
446
			    ADIO_Offset *len_list, int
447
448
449
450
451
452
453
454
455
456
457
458
459
460
			    contig_access_count, ADIO_Offset 
			    min_st_offset, ADIO_Offset *fd_start,
			    ADIO_Offset *fd_end, ADIO_Offset fd_size,
                            int nprocs,
                            int *count_my_req_procs_ptr,
			    int **count_my_req_per_proc_ptr,
			    ADIOI_Access **my_req_ptr,
			    int **buf_idx_ptr);
void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs, 
				int *count_my_req_per_proc,
				ADIOI_Access *my_req, 
				int nprocs, int myrank,
				int *count_others_req_procs_ptr,
				ADIOI_Access **others_req_ptr);  
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530

/* KC && AC - New Collective I/O internals*/

#define TEMP_OFF 0
#define REAL_OFF 1
#define MAX_OFF_TYPE 2

/* Communication Tags */
#define DATA_TAG 30
#define AMT_TAG 31

/* cb_fr_type user size is non-zero */
#define ADIOI_FR_AAR 0
#define ADIOI_FR_FSZ -1
#define ADIOI_FR_USR_REALMS -2

typedef struct flatten_state
{
    ADIO_Offset abs_off;
    ADIO_Offset cur_sz;
    ADIO_Offset idx;
    ADIO_Offset cur_reg_off;
} flatten_state;

typedef struct view_state
{
    ADIO_Offset fp_ind;    /* file view params*/
    ADIO_Offset disp;      /* file view params*/
    ADIO_Offset byte_off;
    ADIO_Offset sz;
    ADIO_Offset ext;       /* preserved extent from MPI_Type_extent */
    ADIO_Offset type_sz;

    /* Current state */
    flatten_state cur_state;
    /* Scratch state for counting up ol pairs */
    flatten_state tmp_state;

    /* Preprocessed data amount and ol pairs */
    ADIO_Offset pre_sz;
    int pre_ol_ct;
    MPI_Aint *pre_disp_arr;
    int *pre_blk_arr;
    
    ADIOI_Flatlist_node *flat_type_p;
} view_state;

void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype,
			int file_ptr_type, ADIO_Offset offset,
			ADIO_Offset *st_offset, ADIO_Offset *end_offset);
int ADIOI_Agg_idx (int rank, ADIO_File fd);
void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset,
			     ADIO_Offset max_end_offset);
void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count,
		      MPI_Datatype datatype, int file_ptr_type,
		      ADIO_Offset offset, MPI_Datatype custom_ftype,
		      int rdwr, ADIO_Status *status, int
		      *error_code);
void ADIOI_IOStridedColl(ADIO_File fd, void *buf, int count, int rdwr,
                       MPI_Datatype datatype, int file_ptr_type,
                       ADIO_Offset offset, ADIO_Status *status, int
                       *error_code);
void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p);
ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type);
void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
			   ADIO_File fd, int count,
			   MPI_Datatype datatype, ADIO_Offset off,
			   view_state *my_mem_view_state_arr,
			   view_state *agg_file_view_state_arr,
			   view_state *client_file_view_state_arr);
531
int ADIOI_init_view_state(int file_ptr_type,
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
		    int nprocs, 
		    view_state *view_state_arr,
		    int op_type);
int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
			 view_state *client_file_view_state_arr,
			 MPI_Datatype *client_comm_dtype_arr,
			 ADIO_Offset *client_comm_sz_arr,
			 ADIO_Offset *agg_dtype_offset_p,
			 MPI_Datatype *agg_dtype_p);
int ADIOI_Build_client_reqs(ADIO_File fd, 
			    int nprocs,
			    view_state *my_mem_view_state_arr,
			    view_state *agg_file_view_state_arr,
			    ADIO_Offset *agg_comm_sz_arr,
			    MPI_Datatype *agg_comm_dtype_arr);
int ADIOI_Build_client_pre_req(ADIO_File fd,
                               int agg_rank,
			       int agg_idx,
                               view_state *my_mem_view_state_p,
                               view_state *agg_file_view_state_p,
                               ADIO_Offset max_pre_req_sz,
                               int max_ol_ct);
int ADIOI_Build_client_req(ADIO_File fd,
			   int agg_rank,
			   int agg_idx,
			   view_state *my_mem_view_state_p,
			   view_state *agg_file_view_state_p,
			   ADIO_Offset agg_comm_sz,
			   MPI_Datatype *agg_comm_dtype_p);

Rob Latham's avatar
Rob Latham committed
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
void ADIOI_P2PContigWriteAggregation(ADIO_File fd,
	                             const void *buf,
				     int *error_code,
				     ADIO_Offset *st_offsets,
				     ADIO_Offset *end_offset,
				     ADIO_Offset *fd_start,
				     ADIO_Offset *fd_end);

void ADIOI_P2PContigReadAggregation(ADIO_File fd,
	                             const void *buf,
				     int *error_code,
				     ADIO_Offset *st_offsets,
				     ADIO_Offset *end_offset,
				     ADIO_Offset *fd_start,
				     ADIO_Offset *fd_end);

578
579
580
581
582
583
584
585
586
587
ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset, 
				     int whence, int *error_code);
void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
void ADIOI_GEN_Close(ADIO_File fd, int *error_code);
void ADIOI_Shfp_fname(ADIO_File fd, int rank);
void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset size, int *error_code);
int ADIOI_Error(ADIO_File fd, int error_code, char *string);
int MPIR_Err_setmsg( int, int, const char *, const char *, const char *, ... );
int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void *extra_state);
588
int MPIR_Status_set_bytes(MPI_Status *status, MPI_Datatype datatype, MPI_Count nbytes);
589
590
int ADIOI_Uses_generic_read(ADIO_File fd);
int ADIOI_Uses_generic_write(ADIO_File fd);
591
int ADIOI_Err_create_code(const char *myname, const char *filename, int my_errno);
592
593
594
595
596
597
598
599
600
601
602
603


int ADIOI_FAKE_IODone(ADIO_Request *request, ADIO_Status *status,
                      int *error_code);
void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, 
			   MPI_Datatype datatype, int file_ptr_type,
			   ADIO_Offset offset, ADIO_Request *request,
                            int *error_code);
void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, 
			    MPI_Datatype datatype, int file_ptr_type,
			    ADIO_Offset offset, ADIO_Request *request,
                             int *error_code);
604
void ADIOI_FAKE_IwriteContig(ADIO_File fd, const void *buf, int count,
605
606
			    MPI_Datatype datatype, int file_ptr_type,
			    ADIO_Offset offset, ADIO_Request *request,
607
608
                             int *error_code);
void ADIOI_FAKE_IwriteStrided(ADIO_File fd, const void *buf, int count,
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
			     MPI_Datatype datatype, int file_ptr_type,
			     ADIO_Offset offset, ADIO_Request *request,
                              int *error_code);
void ADIOI_FAKE_IOComplete(ADIO_Request *request, ADIO_Status *status,
                           int *error_code);


/* File I/O common functionality */
int MPIOI_File_read(MPI_File fh,
		    MPI_Offset offset,
		    int file_ptr_type,
		    void *buf,
		    int count,
		    MPI_Datatype datatype,
		    char *myname,
		    MPI_Status *status);
int MPIOI_File_write(MPI_File fh,
		     MPI_Offset offset,
		     int file_ptr_type,
628
		     const void *buf,
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
		     int count,
		     MPI_Datatype datatype,
		     char *myname,
		     MPI_Status *status);
int MPIOI_File_read_all(MPI_File fh,
			MPI_Offset offset,
			int file_ptr_type,
			void *buf,
			int count,
			MPI_Datatype datatype,
			char *myname,
			MPI_Status *status);
int MPIOI_File_write_all(MPI_File fh,
			 MPI_Offset offset,
			 int file_ptr_type,
644
			 const void *buf,
645
646
647
648
649
650
651
652
653
654
655
656
657
658
			 int count,
			 MPI_Datatype datatype,
			 char *myname,
			 MPI_Status *status);
int MPIOI_File_read_all_begin(MPI_File fh,
			      MPI_Offset offset,
			      int file_ptr_type,
			      void *buf,
			      int count,
			      MPI_Datatype datatype,
			      char *myname);
int MPIOI_File_write_all_begin(MPI_File fh,
			       MPI_Offset offset,
			       int file_ptr_type,
659
			       const void *buf,
660
661
662
663
664
665
666
667
			       int count,
			       MPI_Datatype datatype,
			       char *myname);
int MPIOI_File_read_all_end(MPI_File fh,
			    void *buf,
			    char *myname,
			    MPI_Status *status);
int MPIOI_File_write_all_end(MPI_File fh,
668
			     const void *buf,
669
670
671
672
673
			     char *myname,
			     MPI_Status *status);
int MPIOI_File_iwrite(MPI_File fh,
		      MPI_Offset offset,
		      int file_ptr_type,
674
		      const void *buf,
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
		      int count,
		      MPI_Datatype datatype,
		      char *myname,
		      MPI_Request *request);
int MPIOI_File_iread(MPI_File fh,
		     MPI_Offset offset,
		     int file_ptr_type,
		     void *buf,
		     int count,
		     MPI_Datatype datatype,
		     char *myname,
		     MPI_Request *request);



/* Unix-style file locking */

#if (defined(ROMIO_HFS) || defined(ROMIO_XFS))

# define ADIOI_WRITE_LOCK(fd, offset, whence, len) \
695
   do {if (((fd)->file_system == ADIO_XFS) || ((fd)->file_system == ADIO_HFS)) \
696
     ADIOI_Set_lock64((fd)->fd_sys, F_SETLKW64, F_WRLCK, offset, whence, len);\
Rob Latham's avatar
Rob Latham committed
697
   else ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_WRLCK, offset, whence, len); } while (0) 
698
699

# define ADIOI_READ_LOCK(fd, offset, whence, len) \
700
   do {if (((fd)->file_system == ADIO_XFS) || ((fd)->file_system == ADIO_HFS)) \
701
     ADIOI_Set_lock64((fd)->fd_sys, F_SETLKW64, F_RDLCK, offset, whence, len);\
Rob Latham's avatar
Rob Latham committed
702
   else ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_RDLCK, offset, whence, len); }while (0)
703
704

# define ADIOI_UNLOCK(fd, offset, whence, len) \
705
   do {if (((fd)->file_system == ADIO_XFS) || ((fd)->file_system == ADIO_HFS)) \
706
     ADIOI_Set_lock64((fd)->fd_sys, F_SETLK64, F_UNLCK, offset, whence, len); \
Rob Latham's avatar
Rob Latham committed
707
   else ADIOI_Set_lock((fd)->fd_sys, F_SETLK, F_UNLCK, offset, whence, len); }while (0)
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758

#elif (defined(ROMIO_NTFS))

#define ADIOI_LOCK_CMD		0
#define ADIOI_UNLOCK_CMD	1

#   define ADIOI_WRITE_LOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, ADIOI_LOCK_CMD, LOCKFILE_EXCLUSIVE_LOCK, offset, whence, len)
#   define ADIOI_READ_LOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, ADIOI_LOCK_CMD, 0, offset, whence, len)
#   define ADIOI_UNLOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, ADIOI_UNLOCK_CMD, LOCKFILE_FAIL_IMMEDIATELY, offset, whence, len)

#else

#ifdef ADIOI_MPE_LOGGING
#   define ADIOI_WRITE_LOCK(fd, offset, whence, len) do { \
        MPE_Log_event( ADIOI_MPE_writelock_a, 0, NULL ); \
        ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_WRLCK, offset, whence, len); \
        MPE_Log_event( ADIOI_MPE_writelock_b, 0, NULL ); } while( 0 )
#   define ADIOI_READ_LOCK(fd, offset, whence, len) \
        MPE_Log_event( ADIOI_MPE_readlock_a, 0, NULL ); do { \
        ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_RDLCK, offset, whence, len); \
        MPE_Log_event( ADIOI_MPE_readlock_b, 0, NULL ); } while( 0 )
#   define ADIOI_UNLOCK(fd, offset, whence, len) do { \
        MPE_Log_event( ADIOI_MPE_unlock_a, 0, NULL ); \
        ADIOI_Set_lock((fd)->fd_sys, F_SETLK, F_UNLCK, offset, whence, len); \
        MPE_Log_event( ADIOI_MPE_unlock_b, 0, NULL ); } while( 0 )
#else
#   define ADIOI_WRITE_LOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_WRLCK, offset, whence, len)
#   define ADIOI_READ_LOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, F_SETLKW, F_RDLCK, offset, whence, len)
#   define ADIOI_UNLOCK(fd, offset, whence, len) \
          ADIOI_Set_lock((fd)->fd_sys, F_SETLK, F_UNLCK, offset, whence, len)
#endif

#endif

int ADIOI_Set_lock(FDTYPE fd_sys, int cmd, int type, ADIO_Offset offset, int whence, ADIO_Offset len);
int ADIOI_Set_lock64(FDTYPE fd_sys, int cmd, int type, ADIO_Offset offset, int whence, ADIO_Offset len);

#define ADIOI_Malloc(a) ADIOI_Malloc_fn(a,__LINE__,__FILE__)
#define ADIOI_Calloc(a,b) ADIOI_Calloc_fn(a,b,__LINE__,__FILE__)
#define ADIOI_Realloc(a,b) ADIOI_Realloc_fn(a,b,__LINE__,__FILE__)
#define ADIOI_Free(a) ADIOI_Free_fn(a,__LINE__,__FILE__)

int ADIOI_Strncpy( char *outstr, const char *instr, size_t maxlen );
int ADIOI_Strnapp( char *, const char *, size_t );
char *ADIOI_Strdup( const char * );

759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
/* the current MPI standard is not const-correct, and modern compilers warn
 * about the following sort of code:
 *
 *   MPI_Info_set(info, "key", "val");
 *
 * reminding us that "key" and "val" are const.  We use the following macros to
 * cast away the const and suppress the warning. */
#define ADIOI_Info_set(info_,key_str_,val_) \
    MPI_Info_set((info_),((char*)key_str_),(char*)(val_))
#define ADIOI_Info_get(info_,key_str_,val_len_,val_,flag_) \
    MPI_Info_get((info_),((char*)key_str_),(val_len_),(val_),(flag_))
#define ADIOI_Info_get_valuelen(info_,key_str_,val_len_,flag_) \
    MPI_Info_get_valuelen((info_),((char*)key_str_),(val_len_),(flag_))
#define ADIOI_Info_delete(info_,key_str_) \
    MPI_Info_delete((info_),((char*)key_str_))


776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
/* Provide a fallback snprintf for systems that do not have one */
/* Define attribute as empty if it has no definition */
#ifndef ATTRIBUTE
#ifdef HAVE_GCC_ATTRIBUTE
#define ATTRIBUTE(a) __attribute__(a)
#else
#define ATTRIBUTE(a)
#endif
#endif

/* style: allow:snprintf:1 sig:0 */

#ifdef HAVE_SNPRINTF
#define ADIOI_Snprintf snprintf
/* Sometimes systems don't provide prototypes for snprintf */
#ifdef NEEDS_SNPRINTF_DECL
extern int snprintf( char *, size_t, const char *, ... ) ATTRIBUTE((format(printf,3,4)));
#endif
#else
int ADIOI_Snprintf( char *str, size_t size, const char *format, ... ) 
     ATTRIBUTE((format(printf,3,4)));
#endif /* HAVE_SNPRINTF */

#define FPRINTF fprintf

#ifndef HAVE_STRERROR
#  ifdef HAVE_SYSERRLIST
      extern char *sys_errlist[];
#     define strerror(n) sys_errlist[n]
#  else 
#     define PRINT_ERR_MSG
#  endif
#endif

#include "adioi_error.h"

/*  MPE logging variables  */

#ifdef ADIOI_MPE_LOGGING
#include "mpe.h"

int  ADIOI_MPE_open_a;
int  ADIOI_MPE_open_b;
int  ADIOI_MPE_read_a;
int  ADIOI_MPE_read_b;
int  ADIOI_MPE_write_a;
int  ADIOI_MPE_write_b;
int  ADIOI_MPE_lseek_a;
int  ADIOI_MPE_lseek_b;
int  ADIOI_MPE_close_a;
int  ADIOI_MPE_close_b;
int  ADIOI_MPE_writelock_a;
int  ADIOI_MPE_writelock_b;
int  ADIOI_MPE_readlock_a;
int  ADIOI_MPE_readlock_b;
int  ADIOI_MPE_unlock_a;
int  ADIOI_MPE_unlock_b;
int  ADIOI_MPE_postwrite_a;
int  ADIOI_MPE_postwrite_b;
835
836
837
838
int  ADIOI_MPE_openinternal_a;
int  ADIOI_MPE_openinternal_b;
int  ADIOI_MPE_stat_a;
int  ADIOI_MPE_stat_b;
839
840
841
842
int  ADIOI_MPE_iread_a;
int  ADIOI_MPE_iread_b;
int  ADIOI_MPE_iwrite_a;
int  ADIOI_MPE_iwrite_b;
843
844
#endif

845
#ifdef ROMIO_INSIDE_MPICH
846
847
848
849
850
851
852
/* Assert that this MPI_Aint value can be cast to a ptr value without problem.*/
/* Basic idea is the value should be unchanged after casting 
   (no loss of (meaningful) high order bytes in 8 byte MPI_Aint 
      to (possible) 4 byte ptr cast)                              */
/* Should work even on 64bit or old 32bit configs                 */
  /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and 
         MPI_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
853
  #include "glue_romio.h"
854
855
856
857
858
859
860

  #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(MPIR_Pint)
  /* The next two casts are only used when you don't want sign extension
     when casting a (possible 4 byte) aint to a (8 byte) long long or offset */
  #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
  #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG

861
862
  #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) MPIR_Ext_ensure_Aint_fits_in_pointer(aint_value)
  #define ADIOI_Assert MPIR_Ext_assert
863
864
865
#else
  #include <assert.h>
  #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)
866
  #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
867
868
869
  #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG
  #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) 
  #define ADIOI_Assert assert
870
  #define MPIR_Upint unsigned long
871
  #define MPIU_THREADPRIV_DECL
872
873
874
875
#endif

#ifdef USE_DBG_LOGGING    /*todo fix dependency on mpich?*/
/* DBGT_FPRINTF terse level printing */
876
877
#define DBGT_FPRINTF if (MPIR_Ext_dbg_romio_verbose_enabled) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
if (MPIR_Ext_dbg_romio_terse_enabled) fprintf
878
/* DBG_FPRINTF default (typical level) printing */
879
880
#define DBG_FPRINTF if (MPIR_Ext_dbg_romio_verbose_enabled) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
if (MPIR_Ext_dbg_romio_typical_enabled) fprintf
881
/* DBGV_FPRINTF verbose level printing */
882
883
#define DBGV_FPRINTF if (MPIR_Ext_dbg_romio_verbose_enabled) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
 if (MPIR_Ext_dbg_romio_verbose_enabled) fprintf
884
#else /* compile it out */
885
#define DBGT_FPRINTF if (0) fprintf
886
887
888
#define DBG_FPRINTF if (0) fprintf
#define DBGV_FPRINTF if (0) fprintf
#endif
Rob Latham's avatar
Rob Latham committed
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906

/* declarations for threaded I/O */
/* i/o thread data structure (bgmpio_pthreadwc) */
typedef struct wcThreadFuncData {
    ADIO_File fd;
    int io_kind;
    char *buf;
    int size;
    ADIO_Offset offset;
    ADIO_Status status;
    int error_code;
} ADIOI_IO_ThreadFuncData;

void *ADIOI_IO_Thread_Func(void *vptr_args);




907
908
#endif