00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_KERNEL_HPP_
00016 #define _VIENNACL_KERNEL_HPP_
00017
00022 #ifdef __APPLE__
00023 #include <OpenCL/cl.h>
00024 #else
00025 #include <CL/cl.h>
00026 #endif
00027
00028 #include "viennacl/ocl/forwards.h"
00029 #include "viennacl/ocl/backend.hpp"
00030 #include "viennacl/ocl/handle.hpp"
00031 #include "viennacl/ocl/program.hpp"
00032 #include "viennacl/ocl/device.hpp"
00033 #include "viennacl/ocl/local_mem.hpp"
00034
00035 namespace viennacl
00036 {
00037 namespace ocl
00038 {
00039
00041 class kernel
00042 {
00043 template <typename KernelType>
00044 friend void enqueue(KernelType & k, viennacl::ocl::command_queue const & queue);
00045
00046
00047 public:
00048 kernel() : handle_(0)
00049 {
00050 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00051 std::cout << "ViennaCL: Creating kernel object (default CTOR)" << std::endl;
00052 #endif
00053 set_work_size_defaults();
00054 }
00055
00056 kernel(viennacl::ocl::handle<cl_program> const & prog, std::string const & name)
00057 : handle_(0), program_(prog), name_(name), init_done_(false)
00058 {
00059 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00060 std::cout << "ViennaCL: Creating kernel object (full CTOR)" << std::endl;
00061 #endif
00062 set_work_size_defaults();
00063 }
00064
00065 kernel(kernel const & other)
00066 : handle_(other.handle_), program_(other.program_), name_(other.name_), init_done_(other.init_done_)
00067 {
00068 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00069 std::cout << "ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
00070 #endif
00071 local_work_size_[0] = other.local_work_size_[0];
00072 local_work_size_[1] = other.local_work_size_[1];
00073
00074 global_work_size_[0] = other.global_work_size_[0];
00075 global_work_size_[1] = other.global_work_size_[1];
00076 }
00077
00078 viennacl::ocl::kernel & operator=(const kernel & other)
00079 {
00080 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00081 std::cout << "ViennaCL: Assigning kernel object" << std::endl;
00082 #endif
00083 handle_ = other.handle_;
00084 program_ = other.program_;
00085 name_ = other.name_;
00086 init_done_ = other.init_done_;
00087 local_work_size_[0] = other.local_work_size_[0];
00088 local_work_size_[1] = other.local_work_size_[1];
00089 global_work_size_[0] = other.global_work_size_[0];
00090 global_work_size_[1] = other.global_work_size_[1];
00091 return *this;
00092 }
00093
00094
00096 void arg(unsigned int pos, cl_uint val)
00097 {
00098 init();
00099 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00100 std::cout << "ViennaCL: Setting unsigned long kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00101 #endif
00102 cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_uint), (void*)&val);
00103 VIENNACL_ERR_CHECK(err);
00104 }
00105
00107 void arg(unsigned int pos, float val)
00108 {
00109 init();
00110 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00111 std::cout << "ViennaCL: Setting floating point kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00112 #endif
00113 cl_int err = clSetKernelArg(handle_, pos, sizeof(float), (void*)&val);
00114 VIENNACL_ERR_CHECK(err);
00115 }
00116
00118 void arg(unsigned int pos, double val)
00119 {
00120 init();
00121 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00122 std::cout << "ViennaCL: Setting double precision kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00123 #endif
00124 cl_int err = clSetKernelArg(handle_, pos, sizeof(double), (void*)&val);
00125 VIENNACL_ERR_CHECK(err);
00126 }
00127
00128
00130 template<class VCL_TYPE>
00131 void arg(unsigned int pos, VCL_TYPE const & val)
00132 {
00133 init();
00134 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00135 std::cout << "ViennaCL: Setting generic kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00136 #endif
00137 cl_mem temp = val.handle();
00138 cl_int err = clSetKernelArg(handle_, pos, sizeof(cl_mem), (void*)&temp);
00139 VIENNACL_ERR_CHECK(err);
00140 }
00141
00142
00144 template<class CL_TYPE>
00145 void arg(unsigned int pos, viennacl::ocl::handle<CL_TYPE> const & h)
00146 {
00147
00148 init();
00149 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00150 std::cout << "ViennaCL: Setting handle kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00151 #endif
00152 CL_TYPE temp = h;
00153 cl_int err = clSetKernelArg(handle_, pos, sizeof(CL_TYPE), (void*)&temp);
00154 VIENNACL_ERR_CHECK(err);
00155 }
00156
00157
00158
00160 void arg(unsigned int pos, const local_mem & mem)
00161 {
00162 unsigned int size = mem.size();
00163 init();
00164 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00165 std::cout << "ViennaCL: Setting local memory kernel argument at pos " << pos << " for kernel " << name_ << std::endl;
00166 #endif
00167 cl_int err = clSetKernelArg(handle_, pos, size, 0);
00168 VIENNACL_ERR_CHECK(err);
00169 }
00170
00171
00172
00174 template <typename T0>
00175 kernel & operator()(T0 const & t0)
00176 {
00177 arg(0, t0);
00178 return *this;
00179 }
00180
00182 template <typename T0, typename T1>
00183 kernel & operator()(T0 const & t0, T1 const & t1)
00184 {
00185 arg(0, t0); arg(1, t1);
00186 return *this;
00187 }
00188
00190 template <typename T0, typename T1, typename T2>
00191 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2)
00192 {
00193 arg(0, t0); arg(1, t1); arg(2, t2);
00194 return *this;
00195 }
00196
00198 template <typename T0, typename T1, typename T2, typename T3>
00199 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3)
00200 {
00201 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3);
00202 return *this;
00203 }
00204
00206 template <typename T0, typename T1, typename T2, typename T3, typename T4>
00207 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4)
00208 {
00209 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4);
00210 return *this;
00211 }
00212
00214 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
00215 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5)
00216 {
00217 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00218 return *this;
00219 }
00220
00222 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
00223 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6)
00224 {
00225 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6);
00226 return *this;
00227 }
00228
00230 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
00231 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7)
00232 {
00233 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7);
00234 return *this;
00235 }
00236
00238 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
00239 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8)
00240 {
00241 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8);
00242 return *this;
00243 }
00244
00246 template <typename T0, typename T1, typename T2, typename T3, typename T4,
00247 typename T5, typename T6, typename T7, typename T8, typename T9>
00248 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4,
00249 T5 const & t5, T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9)
00250 {
00251 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9);
00252 return *this;
00253 }
00254
00256 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00257 typename T6, typename T7, typename T8, typename T9, typename T10>
00258 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00259 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10)
00260 {
00261 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5); arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10);
00262 return *this;
00263 }
00264
00266 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00267 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11>
00268 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00269 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11)
00270 {
00271 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00272 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00273 return *this;
00274 }
00275
00277 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00278 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11, typename T12>
00279 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00280 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11, T12 const & t12)
00281 {
00282 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00283 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11); arg(12, t12);
00284 return *this;
00285 }
00286
00288 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00289 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00290 typename T12, typename T13>
00291 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00292 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00293 T12 const & t12, T13 const & t13)
00294 {
00295 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00296 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00297 arg(12, t12); arg(13, t13);
00298 return *this;
00299 }
00300
00302 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00303 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00304 typename T12, typename T13, typename T14>
00305 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00306 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00307 T12 const & t12, T13 const & t13, T14 const & t14)
00308 {
00309 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00310 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00311 arg(12, t12); arg(13, t13); arg(14, t14);
00312 return *this;
00313 }
00314
00316 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00317 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00318 typename T12, typename T13, typename T14, typename T15>
00319 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00320 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00321 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15)
00322 {
00323 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00324 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00325 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15);
00326 return *this;
00327 }
00328
00330 template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5,
00331 typename T6, typename T7, typename T8, typename T9, typename T10, typename T11,
00332 typename T12, typename T13, typename T14, typename T15, typename T16>
00333 kernel & operator()(T0 const & t0, T1 const & t1, T2 const & t2, T3 const & t3, T4 const & t4, T5 const & t5,
00334 T6 const & t6, T7 const & t7, T8 const & t8, T9 const & t9, T10 const & t10, T11 const & t11,
00335 T12 const & t12, T13 const & t13, T14 const & t14, T15 const & t15, T16 const & t16)
00336 {
00337 arg(0, t0); arg(1, t1); arg(2, t2); arg(3, t3); arg(4, t4); arg(5, t5);
00338 arg(6, t6); arg(7, t7); arg(8, t8); arg(9, t9); arg(10, t10); arg(11, t11);
00339 arg(12, t12); arg(13, t13); arg(14, t14); arg(15, t15); arg(16, t16);
00340 return *this;
00341 }
00342
00347 size_t local_work_size(int index = 0) const
00348 {
00349 assert(index == 0 || index == 1);
00350 return local_work_size_[index];
00351 }
00356 size_t global_work_size(int index = 0) const
00357 {
00358 assert(index == 0 || index == 1);
00359 return global_work_size_[index];
00360 }
00361
00367 void local_work_size(int index, size_t s)
00368 {
00369 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00370 std::cout << "ViennaCL: Setting local work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00371 #endif
00372 assert(index == 0 || index == 1);
00373 local_work_size_[index] = s;
00374 }
00380 void global_work_size(int index, size_t s)
00381 {
00382 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00383 std::cout << "ViennaCL: Setting global work size to " << s << " at index " << index << " for kernel " << name_ << std::endl;
00384 #endif
00385 assert(index == 0 || index == 1);
00386 global_work_size_[index] = s;
00387 }
00388
00389 std::string const & name() const { return name_; }
00390
00391 private:
00392 void create_kernel()
00393 {
00394 cl_int err;
00395 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00396 std::cout << "ViennaCL: Building kernel " << name_ << std::endl;
00397 #endif
00398 handle_ = clCreateKernel(program_, name_.c_str(), &err);
00399
00400 if (err != CL_SUCCESS)
00401 {
00402 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
00403 std::cout << "ViennaCL: Could not build kernel '" << name_ << "'." << std::endl;
00404 #endif
00405 std::cerr << "Could not build kernel '" << name_ << "'." << std::endl;
00406 }
00407 VIENNACL_ERR_CHECK(err);
00408 }
00409
00410 void set_work_size_defaults()
00411 {
00412 if (viennacl::ocl::current_device().type() == CL_DEVICE_TYPE_GPU)
00413 {
00414 local_work_size_[0] = 128; local_work_size_[1] = 0;
00415 global_work_size_[0] = 128*128; global_work_size_[1] = 0;
00416 }
00417 else
00418 {
00419
00420 local_work_size_[0] = 1; local_work_size_[1] = 0;
00421 global_work_size_[0] = viennacl::ocl::current_device().max_compute_units(); global_work_size_[1] = 0;
00422 }
00423 }
00424
00425 void init()
00426 {
00427 if (!init_done_)
00428 {
00429 create_kernel();
00430 init_done_ = true;
00431 }
00432 }
00433
00434 viennacl::ocl::handle<cl_kernel> const & handle() const { return handle_; }
00435
00436 viennacl::ocl::handle<cl_kernel> handle_;
00437 viennacl::ocl::handle<cl_program> program_;
00438 std::string name_;
00439 bool init_done_;
00440 size_t local_work_size_[2];
00441 size_t global_work_size_[2];
00442 };
00443
00444 }
00445 }
00446
00447 #endif