scratch_par.c 8.73 KB
Newer Older
Swann Perarnau's avatar
Swann Perarnau committed
1 2 3 4 5 6 7 8 9 10
/*******************************************************************************
 * Copyright 2019 UChicago Argonne, LLC.
 * (c.f. AUTHORS, LICENSE)
 *
 * This file is part of the AML project.
 * For more info, see https://xgitlab.cels.anl.gov/argo/aml
 *
 * SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/

11
#include "aml.h"
12 13 14
#include <assert.h>

/*******************************************************************************
15
 * Parallel scratchpad
16 17 18 19 20 21 22 23 24 25 26 27
 * The scratch itself is organized into several different components
 * - request types: push and pull
 * - implementation of the request
 * - user API (i.e. generic request creation and call)
 * - how to init the scratch
 ******************************************************************************/

/*******************************************************************************
 * Requests:
 ******************************************************************************/

int aml_scratch_request_par_init(struct aml_scratch_request_par *req, int type,
28
				 struct aml_scratch_par *scratch,
29
				 void *dstptr, int dstid, void *srcptr, int srcid)
30 31 32 33

{
	assert(req != NULL);
	req->type = type;
34
	req->scratch = scratch;
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
	req->srcptr = srcptr;
	req->srcid = srcid;
	req->dstptr = dstptr;
	req->dstid = dstid;
	return 0;
}

int aml_scratch_request_par_destroy(struct aml_scratch_request_par *r)
{
	assert(r != NULL);
	return 0;
}

/*******************************************************************************
 * Internal functions
 ******************************************************************************/
void *aml_scratch_par_do_thread(void *arg)
{
	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)arg;
	struct aml_scratch_par *scratch = req->scratch;

57 58
	aml_dma_copy(scratch->data.dma, scratch->data.tiling, req->dstptr,
		     req->dstid, scratch->data.tiling, req->srcptr, req->srcid);
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
}

struct aml_scratch_par_ops aml_scratch_par_inner_ops = {
	aml_scratch_par_do_thread,
};

/*******************************************************************************
 * Public API
 ******************************************************************************/

/* TODO: not thread-safe */

int aml_scratch_par_create_request(struct aml_scratch_data *d,
				   struct aml_scratch_request **r,
				   int type, va_list ap)
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch =
		(struct aml_scratch_par *)d;

	struct aml_scratch_request_par *req;

82
	pthread_mutex_lock(&scratch->data.lock);
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
	req = aml_vector_add(&scratch->data.requests);
	/* init the request */
	if(type == AML_SCRATCH_REQUEST_TYPE_PUSH)
	{
		int scratchid;
		int *srcid;
		void *srcptr;
		void *scratchptr;

		srcptr = va_arg(ap, void *);
		srcid = va_arg(ap, int *);
		scratchptr = va_arg(ap, void *);
		scratchid = va_arg(ap, int);

		/* find destination tile */
		int *slot = aml_vector_get(&scratch->data.tilemap, scratchid);
		assert(slot != NULL);
		*srcid = *slot;

		/* init request */
103
		aml_scratch_request_par_init(req, type, scratch, srcptr, *srcid,
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
					     scratchptr, scratchid);
	}
	else if(type == AML_SCRATCH_REQUEST_TYPE_PULL)
	{
		int *scratchid;
		int srcid;
		void *srcptr;
		void *scratchptr;

		scratchptr = va_arg(ap, void *);
		scratchid = va_arg(ap, int *);
		srcptr = va_arg(ap, void *);
		srcid = va_arg(ap, int);

		/* find destination tile
		 * We don't use add here because adding a tile means allocating
		 * new tiles on the sch_area too. */
		int slot = aml_vector_find(&scratch->data.tilemap, srcid);
		if(slot == -1)
123
		{
124
			slot = aml_vector_find(&scratch->data.tilemap, -1);
125 126 127 128 129 130 131 132
			assert(slot != -1);
			int *tile = aml_vector_get(&scratch->data.tilemap, slot);
			*tile = srcid;
		}
		else
			type = AML_SCRATCH_REQUEST_TYPE_NOOP;

		/* save the key */
133 134 135
		*scratchid = slot;

		/* init request */
136 137
		aml_scratch_request_par_init(req, type, scratch,
					     scratchptr, *scratchid,
138 139
					     srcptr, srcid);
	}
140
	pthread_mutex_unlock(&scratch->data.lock);
141
	/* thread creation */
142 143
	if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
		pthread_create(&req->thread, NULL, scratch->ops.do_thread, req);
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
	*r = (struct aml_scratch_request *)req;
	return 0;
}

int aml_scratch_par_destroy_request(struct aml_scratch_data *d,
					 struct aml_scratch_request *r)
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch =
		(struct aml_scratch_par *)d;

	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)r;
	int *tile;

160 161 162 163 164
	if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
	{
		pthread_cancel(req->thread);
		pthread_join(req->thread, NULL);
	}
165 166 167 168

	aml_scratch_request_par_destroy(req);

	/* destroy removes the tile from the scratch */
169
	pthread_mutex_lock(&scratch->data.lock);
170 171 172 173 174 175
	if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH)
		tile = aml_vector_get(&scratch->data.tilemap,req->srcid);
	else if(req->type == AML_SCRATCH_REQUEST_TYPE_PULL)
		tile = aml_vector_get(&scratch->data.tilemap,req->dstid);
	aml_vector_remove(&scratch->data.tilemap, tile);
	aml_vector_remove(&scratch->data.requests, req);
176
	pthread_mutex_unlock(&scratch->data.lock);
177 178 179 180 181 182 183 184 185 186 187 188 189 190
	return 0;
}

int aml_scratch_par_wait_request(struct aml_scratch_data *d,
				   struct aml_scratch_request *r)
{
	assert(d != NULL);
	assert(r != NULL);
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
	struct aml_scratch_request_par *req =
		(struct aml_scratch_request_par *)r;
	int *tile;

	/* wait for completion of the request */
191 192
	if(req->type != AML_SCRATCH_REQUEST_TYPE_NOOP)
		pthread_join(req->thread, NULL);
193 194 195

	/* cleanup a completed request. In case of push, free up the tile */
	aml_scratch_request_par_destroy(req);
196
	pthread_mutex_lock(&scratch->data.lock);
197 198 199 200 201 202
	if(req->type == AML_SCRATCH_REQUEST_TYPE_PUSH)
	{
		tile = aml_vector_get(&scratch->data.tilemap,req->srcid);
		aml_vector_remove(&scratch->data.tilemap, tile);
	}
	aml_vector_remove(&scratch->data.requests, req);
203
	pthread_mutex_unlock(&scratch->data.lock);
204 205 206
	return 0;
}

207
void *aml_scratch_par_baseptr(const struct aml_scratch_data *d)
208 209
{
	assert(d != NULL);
210
	const struct aml_scratch_par *scratch = (const struct aml_scratch_par *)d;
211 212 213
	return scratch->data.sch_ptr;
}

214 215 216 217 218 219 220 221 222 223 224 225 226 227
int aml_scratch_par_release(struct aml_scratch_data *d, int scratchid)
{
	assert(d != NULL);
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d;
	int *tile;

	pthread_mutex_lock(&scratch->data.lock);
	tile = aml_vector_get(&scratch->data.tilemap, scratchid);
	if(tile != NULL)
		aml_vector_remove(&scratch->data.tilemap, tile);
	pthread_mutex_unlock(&scratch->data.lock);
	return 0;
}

228 229 230 231 232
struct aml_scratch_ops aml_scratch_par_ops = {
	aml_scratch_par_create_request,
	aml_scratch_par_destroy_request,
	aml_scratch_par_wait_request,
	aml_scratch_par_baseptr,
233
	aml_scratch_par_release,
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
};

/*******************************************************************************
 * Init functions:
 ******************************************************************************/

int aml_scratch_par_create(struct aml_scratch **d, ...)
{
	va_list ap;
	struct aml_scratch *ret = NULL;
	intptr_t baseptr, dataptr;
	va_start(ap, d);

	/* alloc */
	baseptr = (intptr_t) calloc(1, AML_SCRATCH_PAR_ALLOCSIZE);
	dataptr = baseptr + sizeof(struct aml_scratch);

	ret = (struct aml_scratch *)baseptr;
	ret->data = (struct aml_scratch_data *)dataptr;

	aml_scratch_par_vinit(ret, ap);

	va_end(ap);
	*d = ret;
	return 0;
}
int aml_scratch_par_vinit(struct aml_scratch *d, va_list ap)
{
	d->ops = &aml_scratch_par_ops;
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d->data;

	scratch->ops = aml_scratch_par_inner_ops;

	scratch->data.sch_area = va_arg(ap, struct aml_area *);
	scratch->data.src_area = va_arg(ap, struct aml_area *);
	scratch->data.dma = va_arg(ap, struct aml_dma *);
	scratch->data.tiling = va_arg(ap, struct aml_tiling *);
	size_t nbtiles = va_arg(ap, size_t);
	size_t nbreqs = va_arg(ap, size_t);

	/* allocate request array */
	aml_vector_init(&scratch->data.requests, nbreqs,
			sizeof(struct aml_scratch_request_par),
			offsetof(struct aml_scratch_request_par, type),
			AML_SCRATCH_REQUEST_TYPE_INVALID);

	/* scratch init */
	aml_vector_init(&scratch->data.tilemap, nbtiles, sizeof(int), 0, -1);
	size_t tilesize = aml_tiling_tilesize(scratch->data.tiling, 0);
	scratch->data.sch_ptr = aml_area_calloc(scratch->data.sch_area,
						nbtiles, tilesize);
285
	pthread_mutex_init(&scratch->data.lock, NULL);
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
	return 0;
}
int aml_scratch_par_init(struct aml_scratch *d, ...)
{
	int err;
	va_list ap;
	va_start(ap, d);
	err = aml_scratch_par_vinit(d, ap);
	va_end(ap);
	return err;
}

int aml_scratch_par_destroy(struct aml_scratch *d)
{
	struct aml_scratch_par *scratch = (struct aml_scratch_par *)d->data;
	aml_vector_destroy(&scratch->data.requests);
	aml_vector_destroy(&scratch->data.tilemap);
	aml_area_free(scratch->data.sch_area, scratch->data.sch_ptr);
304
	pthread_mutex_destroy(&scratch->data.lock);
305 306
	return 0;
}