Commit bf51080e authored by Kazutomo Yoshii's avatar Kazutomo Yoshii
Browse files

Work with Intel GPUs

parent 98f2aee3
CC=gcc
#PLATFORM = intelfpga
PLATFORM = intelgpu
ifeq ($(PLATFORM),intelfpga)
CXX=g++
CFLAGS = -Wall -O2 -g -Wno-unknown-pragmas
CFLAGS += $(shell aocl compile-config)
#CFLAGS += -I${ALTERAOCLSDKROOT}/board/custom_platform_toolkit/mmd
CXXFLAGS = $(CFLAGS) -std=c++11 -DENABLE_INTELFPGA
LDFLAGS = $(shell aocl link-config)
endif
CXXFLAGS = $(CFLAGS) -std=c++11
ifeq ($(PLATFORM),intelgpu)
CXX=g++
CXXFLAGS = -Wall -O2 -g -std=gnu++0x -DENABLE_INTELGPU
LDFLAGS = -lOpenCL
endif
#LDFLAGS = $(shell aocl link-config) -lnalla_pcie_mmd
LDFLAGS = $(shell aocl link-config)
#LIBC214 = -L/opt/glibc-2.14/lib
#LDFLAGS = $(shell aocl link-config) $(LIBC214)
INSTALL_PATH ?= $$HOME/local
all: testclwrap dummy-emulator.aocx
all: testclwrap
testclwrap : testclwrap.cpp clwrap.hpp
$(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
dummy-emulator.aocx : dummy.cl
aoc -o $@ -v --report -march=emulator -DEMULATOR $<
# target a real FPGA board. this may take hours
dummy.aocx : dummy.cl
aoc -o $@ -v --report --profile $<
aoc -march=emulator -DEMULATOR $<
install: clwrap.hpp
mkdir -p $(INSTALL_PATH)/include
......@@ -29,7 +36,6 @@ install: clwrap.hpp
clean:
rm -f testclwrap *.o
rm -rf dummy-emulator dummy-emulator.aocx dummy-emulator.aoco
distclean: clean
rm -f *~
......
......@@ -6,7 +6,10 @@
//
// Written by Kaz Yoshii <ky@anl.gov>
//
// See COPYRIGHT for licensing
// Tested platform:
// Intel OpenCL SDK for embedded GPUs
// Intel OpenCL SDK for FPGAs (e.g., Nallatech 385A)
//
#include <sys/stat.h>
#include <iostream>
......@@ -19,7 +22,7 @@
// uncomment below for reading the board power. only tested this on Nallatech385A
//#define ENABLE_AOCL_MMD_HACK
#define ENABLE_AOCL_MMD_HACK
#ifdef ENABLE_AOCL_MMD_HACK
extern "C" void aocl_mmd_card_info(const char *name , int id,
size_t sz,
......@@ -45,7 +48,6 @@ private:
vector<cl::Device> devs;
vector<cl::Program> prgs;
cl::Context ctx;
int platform_id, device_id, program_id;
cl::Event kernel_event;
......@@ -54,14 +56,15 @@ private:
vector<struct arg_struct> kargs;
public:
bool buildprgsFromBinary(string fn) {
char *loadfile(string fn)
{
struct stat st;
stat(fn.c_str(), &st);
if (!S_ISREG(st.st_mode)) {
cout << fn << " is not a regular file!" << endl;
return false;
return NULL;
}
// load binary and build prgs
......@@ -69,25 +72,70 @@ public:
if (! f0.good()) {
cout << "Unable to load " << fn << endl;
return false;
return NULL;
}
f0.seekg(0, f0.end);
cl_ulong sz = f0.tellg();
// cout << "size: " << sz << endl;
f0.seekg(0, f0.beg);
char *f0c = new char [sz];
char *f0c = new char [sz+1];
f0.read(f0c, sz);
f0c[sz] = 0;
return f0c;
}
cl::Program::Binaries bin0;
bin0.push_back({f0c,sz});
prgs.push_back(cl::Program(ctx,devs,bin0));
#ifdef ENABLE_INTELFPGA
bool loadprog(string fn) {
char *binfile = loadfile(fn);
if (! binfile)
return false;
cl::Program::Binaries bin;
bin.push_back({binfile,strlen(binfile)});
prgs.push_back(cl::Program(ctx,devs,bin));
return true;
}
#elif ENABLE_INTELGPU
bool loadprog(string fn) {
char *srcfile = loadfile(fn);
cl_int err = CL_SUCCESS;
if (! srcfile)
return false;
cl::Program::Sources src;
src.push_back({srcfile,strlen(srcfile)});
cl::Program p(ctx, src, &err);
if (err != CL_SUCCESS) {
cout << "Program failed" << err << endl;
return false;
}
p.build(devs);
prgs.push_back(p);
#if 0
vector<size_t> bszs = p.getInfo<CL_PROGRAM_BINARY_SIZES>();
int bindatasz = *bszs.begin();
char *bindata = new char [bindatasz+1];
vector<char*> bins;
bins.push_back(bindata);
p.getInfo(CL_PROGRAM_BINARIES, &bindata);
FILE *fp;
fp = fopen("kernel.isa", "w+b");
if(fp) {
fwrite(bindata, 1, bindatasz, fp);
fclose(fp);
} else {
runtime_error("Failed to write kernel.isa\n");
return false;
}
delete bindata;
#endif
return true;
}
#else
#error "Add -DENABLE_INTELFPGA or -DENABLE_INTELGPU to compiler options"
#endif
#ifdef AOCL_MMD_HACK
// technically this function should be called in other thread context
......@@ -115,7 +163,18 @@ public:
return;
}
pfs[pid].getDevices(CL_DEVICE_TYPE_ALL, &devs);
platform_id = pid;
#ifdef ENABLE_INTELGPU
for (int i = 0; i < (int)pfs.size(); i++) {
string pn = pfs[i].getInfo<CL_PLATFORM_NAME>();
if (pn.find("Intel Gen OCL") != string::npos) {
platform_id = i;
break;
}
}
#endif
pfs[platform_id].getDevices(CL_DEVICE_TYPE_ALL, &devs);
if (devs.size() == 0) {
cout << "No device found" << endl;
return;
......@@ -123,31 +182,60 @@ public:
ctx = devs;
platform_id = pid;
device_id = did;
program_id = 0; //
}
void listPlatforms(void) {
for (unsigned i = 0; i < pfs.size(); i++)
cout << "Platform" << i << ": " << pfs[i].getInfo<CL_PLATFORM_NAME>() << endl;
cout << "[Platforms]\n";
for (int i = 0; i < (int)pfs.size(); i++) {
cout << i << ": " << pfs[i].getInfo<CL_PLATFORM_NAME>();
if (i == platform_id) cout << " [selected]";
cout << endl;
}
}
void listDevices(void) {
for (unsigned i = 0; i < devs.size(); i++)
cout << "Device" << i << ": " << devs[i].getInfo<CL_DEVICE_NAME>() << endl;
cout << "[Devices]\n";
for (int i = 0; i < (int)devs.size(); i++) {
cout << "Device" << i << ": " << devs[i].getInfo<CL_DEVICE_NAME>();
if (i == device_id) cout << " [selected]";
cout << endl;
}
}
bool prepKernel(const char *filename, const char *funcname) {
cl_int err = CL_SUCCESS;
if (! buildprgsFromBinary(filename)) {
string fn = filename;
size_t pos = fn.find_last_of(".");
if (pos == std::string::npos) {
#ifdef ENABLE_INTELFPGA
fn = fn + ".aocx";
#elif ENABLE_INTELGPU
fn = fn + ".cl";
#endif
}
if (! loadprog(fn)) {
return false;
}
queue = cl::CommandQueue(ctx, devs[device_id], 0, &err);
kernel = cl::Kernel(prgs[program_id], funcname, &err);
// check err
if (err != CL_SUCCESS) {
switch(err) {
case CL_INVALID_PROGRAM: cout << "CL_INVALID_PROGRAM\n"; break;
case CL_INVALID_PROGRAM_EXECUTABLE: cout << "CL_INVALID_PROGRAM_EXECUTABLE\n"; break;
case CL_INVALID_KERNEL_NAME: cout << "CL_INVALID_KERNEL_NAME\n"; break;
case CL_INVALID_KERNEL_DEFINITION: cout << "CL_INVALID_KERNEL_DEFINITION\n"; break;
default:
cout << "cl::Kernel() failed:" << err << endl;
}
return false;
}
return true;
}
......
// just a dummy OpenCL kernel for testing
// Kaz Yoshii <ky@anl.gov>
__kernel void dummy(ulong n,
__global float *restrict d,
__global int *restrict a0,
__global int *restrict a1,
__global float *restrict inout)
__kernel void dummy(__global int *restrict g1,
__global int *restrict g2)
{
int g_idx = get_global_id(0);
int l_idx = get_local_id(0);
a0[g_idx] = g_idx;
a1[g_idx] = l_idx;
if (g_idx == 0) {
for (int i=0; i < n; i++) *inout += d[i];
}
g1[g_idx] = g_idx;
g2[g_idx] = l_idx;
#ifdef EMULATOR
printf("dummy: dim=%d global_size=%2lu global_id=%2lu local_size=%2lu local_id=%2lu num_groups=%2lu group_id=%2lu\n",
printf("dummy: dim=%d global_size=%2lu global_id=%2lu local_size=%2lu local_id=%2lu num_groups=%2lu group_id=%2lu\n",
get_work_dim(),
get_global_size(0), get_global_id(0),
get_local_size(0), get_local_id(0),
......
......@@ -6,59 +6,36 @@
// source $OPENCLENV
// g++ -I. -Wall -O2 -g -Wno-unknown-pragmas `aocl compile-config` -std=c++11 -o testclwrap testclwrap.cpp `aocl link-config`
// aocl
static void test_clwrap(const char *kfile)
static void test_clwrap()
{
clWrap cw;
cw.listPlatforms();
cw.listDevices();
cl_ulong n = 4;
float *d = new float[n];
for (int i = 0; i < (int)n; i++) d[i] = (float)i;
int gsiz = 8;
int lsiz = 2;
cl_int *a0 = new cl_int[gsiz];
cl_int *a1 = new cl_int[gsiz];
int *a0 = new int[gsiz];
int *a1 = new int[gsiz];
cw.prepKernel(kfile, "dummy");
cw.prepKernel("dummy", "dummy"); // filename w/ ext, kernelname
float inout = 1.0;
/* input to kernel by value. no flag is needed */
cw.appendArg(sizeof(cl_ulong), &n);
/* input to kernel by reference; data is copied from host to dev*/
cw.appendArg(sizeof(float)*n, d, cw.HOST2DEV);
/* output from kernel by reference; data is copied from dec to host */
cw.appendArg(sizeof(int)*gsiz, a0, cw.DEV2HOST);
cw.appendArg(sizeof(int)*gsiz, a1, cw.DEV2HOST);
/* bi-directional type */
cw.appendArg(sizeof(float), &inout, cw.DUPLEX);
cw.runKernel(gsiz, lsiz); /* blocking call */
cw.runKernel(gsiz, lsiz);
cout << "inout = " << inout << endl;
for (int i = 0; i < gsiz; i++)
cout << "(" << a0[i] << "," << a1[i] << ") ";
cout << i << ":" << a0[i] << "," << a1[i] << " ";
cout << endl;
};
int main(int argc, char *argv[])
int main()
{
char *kfile = (char *)"dummy.aocx";
if (argc >= 2) {
kfile = argv[1];
}
printf("Kernel: %s\n", kfile);
test_clwrap(kfile);
test_clwrap();
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment