Commit deeb7638 authored by Kazutomo Yoshii's avatar Kazutomo Yoshii
Browse files

updated README

parent 836e4bce
clwrap.hpp is a convenient header file for wrapping OpenCL host
boilerplate procedure.
After checking out this repository, please source a proper OpenCL
environment first, then make and run it using the OpenCL emulator for
a quick functional verification.
a quick functional verification. The following examples are targetted
to the JLSE environment but with a small modification, this clwrap
demo works in other environment.
$ source /soft/fpga/altera/pro/ # e.g., for JLSE users
$ make
$ ./
login $ git clone && cd clwrap
login $ source /soft/fpga/altera/pro/
login $ make
login $ ./
Platform0: Altera SDK for OpenCL
Device0: EmulatorDevice : Emulated Device
login $ cp /soft/fpga/altera/clwrap/dummy.aocx . # use pre-compiled binary
login $ qsub -q fpga_385a -n 1 -t 30 -I
ruth $ source /soft/fpga/altera/pro/
ruth $ ./testclwrap
- This only works with Intel FPGA SDK for
......@@ -3,20 +3,21 @@
__kernel void dummy(ulong n,
__global float *restrict d,
__global int *restrict g1,
__global int *restrict g2)
__global int *restrict a0,
__global int *restrict a1,
__global float *restrict inout)
int g_idx = get_global_id(0);
int l_idx = get_local_id(0);
g1[g_idx] = g_idx;
g2[g_idx] = l_idx;
a0[g_idx] = g_idx;
a1[g_idx] = l_idx;
printf("d: ");
for (int i=0; i < n; i++) printf("%.1f ", d[i]);
if (g_idx == 0) {
for (int i=0; i < n; i++) *inout += d[i];
printf("dummy: dim=%d global_size=%2lu global_id=%2lu local_size=%2lu local_id=%2lu num_groups=%2lu group_id=%2lu\n",
get_global_size(0), get_global_id(0),
......@@ -27,6 +27,8 @@ static void test_clwrap(const char *kfile)
cw.prepKernel(kfile, "dummy");
float inout = 1.0;
/* input to kernel by value. no flag is needed */
cw.appendArg(sizeof(cl_ulong), &n);
/* input to kernel by reference; data is copied from host to dev*/
......@@ -34,31 +36,16 @@ static void test_clwrap(const char *kfile)
/* output from kernel by reference; data is copied from dec to host */
cw.appendArg(sizeof(int)*gsiz, a0, cw.DEV2HOST);
cw.appendArg(sizeof(int)*gsiz, a1, cw.DEV2HOST);
/* bi-directional type */
cw.appendArg(sizeof(float), &inout, cw.DUPLEX);
/* Other available flags:
cw.DUPLEX : bi-directional type
NULL : kernel local type
cw.runKernel(gsiz, lsiz); /* blocking call */
cout << "inout = " << inout << endl;
for (int i = 0; i < gsiz; i++)
cout << i << "," << a0[i] << "," << a1[i] << " ";
cout << endl;
To measure the elapsed time runs on an actual FPGA board ,
call cw.getKernelElapsedNanoSec() after cw.runKernel().
To read the power consumption, uncomment the following line
in clwrap.hpp, call cw.readboardpower() from host and
recompile it
Note: you need to write your version of cw.runKernel() if
you want to sample power while your kernel is running.
int main(int argc, char *argv[])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment