scratch_par.c 9.08 KB
Newer Older
Swann Perarnau's avatar
Swann Perarnau committed
1 2 3 4 5 6 7 8 9 10
/*******************************************************************************
 * Copyright 2019 UChicago Argonne, LLC.
 * (c.f. AUTHORS, LICENSE)
 *
 * This file is part of the AML project.
 * For more info, see https://xgitlab.cels.anl.gov/argo/aml
 *
 * SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/

11
#include "aml.h"
12
#include "aml/layout/dense.h"
13
#include "aml/scratch/par.h"
14 15 16
#include <assert.h>

/*******************************************************************************
17
 * Parallel scratchpad
18 19 20 21 22 23 24 25 26 27 28
 * The scratch itself is organized into several different components
 * - request types: push and pull
 * - implementation of the request
 * - user API (i.e. generic request creation and call)
 * - how to init the scratch
 ******************************************************************************/

/*******************************************************************************
 * Requests:
 ******************************************************************************/

29 30 31 32
int aml_scratch_request_par_init(struct aml_scratch_request_par *req, int type,
				 struct aml_scratch_par *scratch,
				 void *dstptr, int dstid, void *srcptr,
				 int srcid)
33 34 35

{
	assert(req != NULL);
36 37 38
	void *dp, *sp;
	size_t size;

39
	req->type = type;
40
	req->scratch = scratch;
41 42
	req->srcid = srcid;
	req->dstid = dstid;
43 44 45 46 47
	dp = aml_tiling_tilestart(scratch->data.tiling, dstptr, dstid);
	sp = aml_tiling_tilestart(scratch->data.tiling, srcptr, srcid);
	size = aml_tiling_tilesize(scratch->data.tiling, srcid);
	aml_layout_dense_create(&req->dst, dp, 0, 1, 1, &size, NULL, NULL);
	aml_layout_dense_create(&req->src, sp, 0, 1, 1, &size, NULL, NULL);
48 49 50
	return 0;
}

51 52 53
int aml_scratch_request_par_destroy(struct aml_scratch_request_par *r)
{
	assert(r != NULL);
54 55
	aml_layout_dense_destroy(&r->dst);
	aml_layout_dense_destroy(&r->src);
56 57 58
	return 0;
}

59 60 61 62 63
/*******************************************************************************
 * Internal functions
 ******************************************************************************/
void *aml_scratch_par_do_thread(void *arg)
{
64 65
	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)arg;
66 67
	struct aml_scratch_par *scratch = req->scratch;

68
	aml_dma_copy(scratch->data.dma, req->dst, req->src);
69
	return NULL;
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
}

struct aml_scratch_par_ops aml_scratch_par_inner_ops = {
	aml_scratch_par_do_thread,
};

/*******************************************************************************
 * Public API
 ******************************************************************************/

int aml_scratch_par_create_request(struct aml_scratch_data *d,
				   struct aml_scratch_request **r,
				   int type, va_list ap)
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch =
		(struct aml_scratch_par *)d;
88 89

	struct aml_scratch_request_par *req;
90

91
	pthread_mutex_lock(&scratch->data.lock);
92
	req = aml_vector_add(scratch->data.requests);
93
	/* init the request */
94
	if (type == AML_SCRATCH_REQUEST_TYPE_PUSH) {
95 96 97 98 99 100 101 102 103 104 105
		int scratchid;
		int *srcid;
		void *srcptr;
		void *scratchptr;

		srcptr = va_arg(ap, void *);
		srcid = va_arg(ap, int *);
		scratchptr = va_arg(ap, void *);
		scratchid = va_arg(ap, int);

		/* find destination tile */
106
		int *slot = aml_vector_get(scratch->data.tilemap, scratchid);
107

108 109 110 111
		assert(slot != NULL);
		*srcid = *slot;

		/* init request */
112 113
		aml_scratch_request_par_init(req, type, scratch, srcptr, *srcid,
					     scratchptr, scratchid);
114
	} else if (type == AML_SCRATCH_REQUEST_TYPE_PULL) {
115 116 117 118
		int *scratchid;
		int srcid;
		void *srcptr;
		void *scratchptr;
119
		int slot, *tile;
120 121 122 123 124 125 126 127 128

		scratchptr = va_arg(ap, void *);
		scratchid = va_arg(ap, int *);
		srcptr = va_arg(ap, void *);
		srcid = va_arg(ap, int);

		/* find destination tile
		 * We don't use add here because adding a tile means allocating
		 * new tiles on the sch_area too. */
129
		slot = aml_vector_find(scratch->data.tilemap, srcid);
130
		if (slot == -1) {
131
			slot = aml_vector_find(scratch->data.tilemap, -1);
132
			assert(slot != -1);
133
			tile = aml_vector_get(scratch->data.tilemap, slot);
134
			*tile = srcid;
135
		} else
136 137 138
			type = AML_SCRATCH_REQUEST_TYPE_NOOP;

		/* save the key */
139 140 141
		*scratchid = slot;

		/* init request */
142 143 144
		aml_scratch_request_par_init(req, type, scratch,
					     scratchptr, *scratchid,
					     srcptr, srcid);
145
	}
146
	pthread_mutex_unlock(&scratch->data.lock);
147
	/* thread creation */
148
	if (req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
149
		pthread_create(&req->thread, NULL, scratch->ops.do_thread, req);
150
	*r = (struct aml_scratch_request *)req;
151 152 153 154
	return 0;
}

int aml_scratch_par_destroy_request(struct aml_scratch_data *d,
155
					 struct aml_scratch_request *r)
156 157 158 159 160
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch =
		(struct aml_scratch_par *)d;
161

162 163 164 165
	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)r;
	int *tile;

166 167 168
	if (req->type != AML_SCRATCH_REQUEST_TYPE_NOOP) {
		pthread_cancel(req->thread);
		pthread_join(req->thread, NULL);
169
	}
170

171
	aml_scratch_request_par_destroy(req);
172 173

	/* destroy removes the tile from the scratch */
174
	pthread_mutex_lock(&scratch->data.lock);
175 176 177 178
	if (req->type == AML_SCRATCH_REQUEST_TYPE_PUSH)
		tile = aml_vector_get(scratch->data.tilemap, req->srcid);
	else if (req->type == AML_SCRATCH_REQUEST_TYPE_PULL)
		tile = aml_vector_get(scratch->data.tilemap, req->dstid);
179
	aml_vector_remove(scratch->data.tilemap, tile);
180
	aml_vector_remove(scratch->data.requests, req);
181
	pthread_mutex_unlock(&scratch->data.lock);
182 183 184 185 186 187 188 189 190 191 192 193 194 195
	return 0;
}

int aml_scratch_par_wait_request(struct aml_scratch_data *d,
				   struct aml_scratch_request *r)
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)r;
	int *tile;

	/* wait for completion of the request */
196 197
	if (req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
		pthread_join(req->thread, NULL);
198 199

	/* cleanup a completed request. In case of push, free up the tile */
200
	aml_scratch_request_par_destroy(req);
201
	pthread_mutex_lock(&scratch->data.lock);
202 203
	if (req->type == AML_SCRATCH_REQUEST_TYPE_PUSH) {
		tile = aml_vector_get(scratch->data.tilemap, req->srcid);
204
		aml_vector_remove(scratch->data.tilemap, tile);
205
	}
206
	aml_vector_remove(scratch->data.requests, req);
207
	pthread_mutex_unlock(&scratch->data.lock);
208 209 210
	return 0;
}

211
void *aml_scratch_par_baseptr(const struct aml_scratch_data *d)
212 213
{
	assert(d != NULL);
214 215 216
	const struct aml_scratch_par *scratch =
		(const struct aml_scratch_par *)d;

217 218 219
	return scratch->data.sch_ptr;
}

220 221 222 223 224 225 226
int aml_scratch_par_release(struct aml_scratch_data *d, int scratchid)
{
	assert(d != NULL);
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
	int *tile;

	pthread_mutex_lock(&scratch->data.lock);
227
	tile = aml_vector_get(scratch->data.tilemap, scratchid);
228
	if (tile != NULL)
229
		aml_vector_remove(scratch->data.tilemap, tile);
230 231 232 233
	pthread_mutex_unlock(&scratch->data.lock);
	return 0;
}

234 235 236 237 238
struct aml_scratch_ops aml_scratch_par_ops = {
	aml_scratch_par_create_request,
	aml_scratch_par_destroy_request,
	aml_scratch_par_wait_request,
	aml_scratch_par_baseptr,
239
	aml_scratch_par_release,
240 241 242 243 244 245
};

/*******************************************************************************
 * Init functions:
 ******************************************************************************/

246
int aml_scratch_par_create(struct aml_scratch **scratch,
247 248 249 250
			   struct aml_area *scratch_area,
			   struct aml_area *src_area,
			   struct aml_dma *dma, struct aml_tiling *tiling,
			   size_t nbtiles, size_t nbreqs)
251 252
{
	struct aml_scratch *ret = NULL;
253
	struct aml_scratch_par *s;
254

255 256 257
	if (scratch == NULL
	    || scratch_area == NULL || src_area == NULL
	    || dma == NULL || tiling == NULL)
258
		return -AML_EINVAL;
259

260 261
	*scratch = NULL;

262
	ret = AML_INNER_MALLOC_2(struct aml_scratch, struct aml_scratch_par);
263
	if (ret == NULL)
264
		return -AML_ENOMEM;
265

266
	ret->ops = &aml_scratch_par_ops;
267 268
	ret->data = AML_INNER_MALLOC_NEXTPTR(ret, struct aml_scratch,
					     struct aml_scratch_par);
269 270
	s = (struct aml_scratch_par *)ret->data;
	s->ops = aml_scratch_par_inner_ops;
271

272 273 274 275
	s->data.sch_area = scratch_area;
	s->data.src_area = src_area;
	s->data.dma = dma;
	s->data.tiling = tiling;
276 277

	/* allocate request array */
278
	aml_vector_create(&s->data.requests, nbreqs,
279 280
			  sizeof(struct aml_scratch_request_par),
			  offsetof(struct aml_scratch_request_par, type),
281
			  AML_SCRATCH_REQUEST_TYPE_INVALID);
282

283 284 285
	/* s init */
	aml_vector_create(&s->data.tilemap, nbtiles, sizeof(int), 0, -1);
	size_t tilesize = aml_tiling_tilesize(s->data.tiling, 0);
286

287 288
	s->data.scratch_size = nbtiles * tilesize;
	s->data.sch_ptr = aml_area_mmap(s->data.sch_area,
Nicolas Denoyelle's avatar
Nicolas Denoyelle committed
289
					      NULL,
290 291 292 293
					      s->data.scratch_size);
	pthread_mutex_init(&s->data.lock, NULL);

	*scratch = ret;
294 295 296
	return 0;
}

297
void aml_scratch_par_destroy(struct aml_scratch **scratch)
298
{
299 300
	struct aml_scratch *s;
	struct aml_scratch_par *inner;
301

302
	if (scratch == NULL)
303
		return;
304
	s = *scratch;
305
	if (s == NULL)
306
		return;
307 308

	assert(s->data != NULL);
309 310 311 312 313 314 315 316 317
	inner = (struct aml_scratch_par *)s->data;
	aml_vector_destroy(&inner->data.requests);
	aml_vector_destroy(&inner->data.tilemap);
	aml_area_munmap(inner->data.sch_area,
			inner->data.sch_ptr,
			inner->data.scratch_size);
	pthread_mutex_destroy(&inner->data.lock);
	free(s);
	*scratch = NULL;
318
}