00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_VECTOR_OPERATIONS_HPP_
00016 #define _VIENNACL_VECTOR_OPERATIONS_HPP_
00017
00022 #include "viennacl/forwards.h"
00023 #include "viennacl/ocl/device.hpp"
00024 #include "viennacl/ocl/handle.hpp"
00025 #include "viennacl/ocl/kernel.hpp"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/tools/tools.hpp"
00028 #include "viennacl/linalg/kernels/vector_kernels.h"
00029
00030 namespace viennacl
00031 {
00032 namespace linalg
00033 {
00040 template<class SCALARTYPE, unsigned int ALIGNMENT>
00041 void add(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00042 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00043 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00044 {
00045 assert(vec1.size() == vec2.size() && vec1.size() == result.size());
00046
00047 unsigned int size = std::min(vec1.internal_size(), vec2.internal_size());
00048 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "add");
00049
00050 viennacl::ocl::enqueue(k(vec1, vec2, result, size));
00051 }
00052
00060 template<class SCALARTYPE, unsigned int ALIGNMENT>
00061 void inplace_add(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00062 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2)
00063 {
00064 assert(vec1.size() == vec2.size());
00065 unsigned int size = std::min(vec1.internal_size(), vec2.internal_size());
00066 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_add");
00067
00068 viennacl::ocl::enqueue(k(vec1, vec2, size));
00069 }
00070
00079 template<class SCALARTYPE, unsigned int ALIGNMENT>
00080 void sub(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00081 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00082 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00083 {
00084 assert(vec1.size() == vec2.size());
00085 result.resize(vec1.size());
00086 unsigned int size = std::min(vec1.internal_size(), vec2.internal_size());
00087 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "sub");
00088
00089 viennacl::ocl::enqueue(k(vec1, vec2, result, size));
00090 }
00091
00099 template<class SCALARTYPE, unsigned int ALIGNMENT>
00100 void inplace_sub(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00101 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2)
00102 {
00103 assert(vec1.size() == vec2.size());
00104 unsigned int size = std::min(vec1.internal_size(), vec2.internal_size());
00105 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_sub");
00106
00107 viennacl::ocl::enqueue(k(vec1, vec2, size));
00108 }
00109
00110
00111
00120 template<class SCALARTYPE, unsigned int ALIGNMENT>
00121 void mult(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00122 scalar<SCALARTYPE> const & alpha,
00123 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00124 {
00125 result.resize(vec.size());
00126 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "mult");
00127
00128 viennacl::ocl::enqueue(k(vec, alpha, result, static_cast<cl_uint>(vec.internal_size())));
00129 }
00130
00139 template<class SCALARTYPE, unsigned int ALIGNMENT>
00140 void mult(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00141 SCALARTYPE alpha,
00142 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00143 {
00144 result.resize(vec.size());
00145 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "cpu_mult");
00146
00147 viennacl::ocl::enqueue(k(vec, alpha, result, static_cast<cl_uint>(vec.internal_size())));
00148 }
00149
00157 template<class SCALARTYPE, unsigned int ALIGNMENT>
00158 void inplace_mult(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00159 scalar<SCALARTYPE> const & alpha)
00160 {
00161 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_mult");
00162
00163 viennacl::ocl::enqueue(k(vec, alpha, static_cast<cl_uint>(vec.internal_size())));
00164 }
00165
00173 template<class SCALARTYPE, unsigned int ALIGNMENT>
00174 void inplace_mult(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00175 SCALARTYPE alpha)
00176 {
00177 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "cpu_inplace_mult");
00178
00179 viennacl::ocl::enqueue(k(vec, alpha, static_cast<cl_uint>(vec.internal_size())));
00180 }
00181
00182
00191 template<class SCALARTYPE, unsigned int ALIGNMENT>
00192 void divide(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00193 scalar<SCALARTYPE> const & alpha,
00194 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00195 {
00196 assert(vec.size() == result.size());
00197 result.resize(vec.size());
00198 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "divide");
00199
00200 viennacl::ocl::enqueue(k(vec, alpha, result, static_cast<cl_uint>(vec.internal_size())));
00201 }
00202
00210 template<class SCALARTYPE, unsigned int ALIGNMENT>
00211 void inplace_divide(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec,
00212 scalar<SCALARTYPE> const & alpha)
00213 {
00214 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_divide");
00215
00216 viennacl::ocl::enqueue(k(vec, alpha, static_cast<cl_uint>(vec.internal_size())));
00217 }
00218
00219
00229 template<class SCALARTYPE, unsigned int ALIGNMENT>
00230 void mul_add(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00231 scalar<SCALARTYPE> const & alpha,
00232 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00233 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00234 {
00235 assert(vec1.size() == vec2.size() && result.size() == vec1.size());
00236 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "mul_add");
00237 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00238
00239 viennacl::ocl::enqueue(k(vec1, alpha, vec2, result, size));
00240 }
00241
00251 template<class SCALARTYPE, unsigned int ALIGNMENT>
00252 void mul_add(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00253 SCALARTYPE alpha,
00254 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00255 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00256 {
00257 assert(vec1.size() == vec2.size() && result.size() == vec1.size());
00258 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "cpu_mul_add");
00259 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00260
00261 viennacl::ocl::enqueue(k(vec1, alpha, vec2, result, size));
00262 }
00263
00264
00273 template<class SCALARTYPE, unsigned int ALIGNMENT>
00274 void inplace_mul_add(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00275 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00276 scalar<SCALARTYPE> const & alpha)
00277 {
00278 assert(vec1.size() == vec2.size());
00279 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00280 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_mul_add");
00281
00282 viennacl::ocl::enqueue(k(vec1, vec2, alpha, size));
00283 }
00284
00293 template<class SCALARTYPE, unsigned int ALIGNMENT>
00294 void inplace_mul_add(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00295 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00296 SCALARTYPE alpha)
00297 {
00298 assert(vec1.size() == vec2.size());
00299 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "cpu_inplace_mul_add");
00300 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00301
00302 viennacl::ocl::enqueue(k(vec1, vec2, alpha, size));
00303 }
00304
00314 template<class SCALARTYPE, unsigned int ALIGNMENT>
00315 void mul_sub(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00316 scalar<SCALARTYPE> const & alpha,
00317 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00318 viennacl::vector<SCALARTYPE, ALIGNMENT> & result)
00319 {
00320 assert(vec1.size() == vec2.size() && result.size() == vec1.size());
00321 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "mul_sub");
00322 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00323
00324 viennacl::ocl::enqueue(k(vec1, alpha, vec2, result, size));
00325 }
00326
00327
00336 template<class SCALARTYPE, unsigned int ALIGNMENT>
00337 void inplace_mul_sub(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00338 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00339 scalar<SCALARTYPE> const & alpha)
00340 {
00341 assert(vec1.size() == vec2.size());
00342 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_mul_sub");
00343 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00344
00345 viennacl::ocl::enqueue(k(vec1, vec2, alpha, size));
00346 }
00347
00356 template<class SCALARTYPE, unsigned int ALIGNMENT>
00357 void inplace_div_add(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00358 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00359 scalar<SCALARTYPE> const & alpha)
00360 {
00361 assert(vec1.size() == vec2.size());
00362 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_div_add");
00363 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00364
00365 viennacl::ocl::enqueue(k(vec1, vec2, alpha, size));
00366 }
00367
00376 template<class SCALARTYPE, unsigned int ALIGNMENT>
00377 void inplace_div_sub(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00378 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00379 scalar<SCALARTYPE> const & alpha)
00380 {
00381 assert(vec1.size() == vec2.size());
00382 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inplace_div_sub");
00383 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00384
00385 viennacl::ocl::enqueue(k(vec1, vec2, alpha, size));
00386 }
00387
00388
00390
00391
00392
00393
00400 template<class SCALARTYPE, unsigned int ALIGNMENT>
00401 void inner_prod_impl(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00402 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00403 scalar<SCALARTYPE> & result)
00404 {
00405 assert(vec1.size() == vec2.size());
00406 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "inner_prod");
00407 cl_uint size = static_cast<cl_uint>(std::min(vec1.internal_size(), vec2.internal_size()));
00408 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00409 static viennacl::vector<SCALARTYPE> temp(work_groups);
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420 assert( work_groups * k.local_work_size() == k.global_work_size() );
00421 assert( (k.global_work_size() / k.local_work_size()) == 1
00422 || (k.global_work_size() / k.local_work_size()) == 2
00423 || (k.global_work_size() / k.local_work_size()) == 4
00424 || (k.global_work_size() / k.local_work_size()) == 8
00425 || (k.global_work_size() / k.local_work_size()) == 16
00426 || (k.global_work_size() / k.local_work_size()) == 32
00427 || (k.global_work_size() / k.local_work_size()) == 64
00428 || (k.global_work_size() / k.local_work_size()) == 128
00429 || (k.global_work_size() / k.local_work_size()) == 256
00430 || (k.global_work_size() / k.local_work_size()) == 512 );
00431
00432 viennacl::ocl::enqueue(k(vec1, vec2, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
00433
00434 viennacl::ocl::kernel & ksum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "sum");
00435
00436 ksum.local_work_size(0, work_groups);
00437 ksum.global_work_size(0, work_groups);
00438 viennacl::ocl::enqueue(ksum(temp, result));
00439 }
00440
00441
00442
00449 template<class SCALARTYPE, unsigned int ALIGNMENT1, unsigned int ALIGNMENT2>
00450 viennacl::scalar_expression< const viennacl::vector<SCALARTYPE, ALIGNMENT1>,
00451 const viennacl::vector<SCALARTYPE, ALIGNMENT2>,
00452 viennacl::op_inner_prod >
00453 inner_prod_impl(const viennacl::vector<SCALARTYPE, ALIGNMENT1> & vec1,
00454 const viennacl::vector<SCALARTYPE, ALIGNMENT2> & vec2)
00455 {
00456 return viennacl::scalar_expression< const viennacl::vector<SCALARTYPE, ALIGNMENT1>,
00457 const viennacl::vector<SCALARTYPE, ALIGNMENT2>,
00458 viennacl::op_inner_prod >(vec1, vec2);
00459 }
00460
00461
00462
00468 template<class SCALARTYPE, unsigned int ALIGNMENT>
00469 void norm_1_impl(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vcl_vec,
00470 scalar<SCALARTYPE> & result)
00471 {
00472 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "norm_1");
00473 cl_uint size = static_cast<cl_uint>(vcl_vec.internal_size());
00474
00475 if (k.local_work_size() != k.global_work_size())
00476 {
00477
00478 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00479 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00480 }
00481
00482
00483 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00484 viennacl::vector<SCALARTYPE> temp(work_groups);
00485
00486
00487
00488 assert( work_groups * k.local_work_size() == k.global_work_size() );
00489 assert( (k.global_work_size() / k.local_work_size()) == 1
00490 || (k.global_work_size() / k.local_work_size()) == 2
00491 || (k.global_work_size() / k.local_work_size()) == 4
00492 || (k.global_work_size() / k.local_work_size()) == 8
00493 || (k.global_work_size() / k.local_work_size()) == 16
00494 || (k.global_work_size() / k.local_work_size()) == 32
00495 || (k.global_work_size() / k.local_work_size()) == 64
00496 || (k.global_work_size() / k.local_work_size()) == 128
00497 || (k.global_work_size() / k.local_work_size()) == 256
00498 || (k.global_work_size() / k.local_work_size()) == 512 );
00499
00500 viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
00501
00502 viennacl::ocl::kernel & ksum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "sum");
00503
00504 ksum.local_work_size(0, work_groups);
00505 ksum.global_work_size(0, work_groups);
00506 viennacl::ocl::enqueue(ksum(temp, result));
00507 }
00508
00514 template<class SCALARTYPE, unsigned int ALIGNMENT>
00515 void norm_2_impl(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vcl_vec,
00516 scalar<SCALARTYPE> & result)
00517 {
00518 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "norm_2");
00519 cl_uint size = static_cast<cl_uint>(vcl_vec.internal_size());
00520
00521 if (k.local_work_size() != k.global_work_size())
00522 {
00523
00524 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00525 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00526 }
00527
00528 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00529 viennacl::vector<SCALARTYPE> temp(work_groups);
00530
00531
00532
00533 assert( work_groups * k.local_work_size() == k.global_work_size() );
00534 assert( (k.global_work_size() / k.local_work_size()) == 1
00535 || (k.global_work_size() / k.local_work_size()) == 2
00536 || (k.global_work_size() / k.local_work_size()) == 4
00537 || (k.global_work_size() / k.local_work_size()) == 8
00538 || (k.global_work_size() / k.local_work_size()) == 16
00539 || (k.global_work_size() / k.local_work_size()) == 32
00540 || (k.global_work_size() / k.local_work_size()) == 64
00541 || (k.global_work_size() / k.local_work_size()) == 128
00542 || (k.global_work_size() / k.local_work_size()) == 256
00543 || (k.global_work_size() / k.local_work_size()) == 512 );
00544
00545 viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
00546
00547 viennacl::ocl::kernel & sqrt_sum = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "sqrt_sum");
00548
00549 sqrt_sum.local_work_size(0, work_groups);
00550 sqrt_sum.global_work_size(0, work_groups);
00551 viennacl::ocl::enqueue(sqrt_sum(temp, result, work_groups));
00552 }
00553
00559 template<class SCALARTYPE, unsigned int ALIGNMENT>
00560 void norm_inf_impl(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vcl_vec,
00561 scalar<SCALARTYPE> & result)
00562 {
00563 cl_uint size = static_cast<cl_uint>(vcl_vec.internal_size());
00564 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "norm_inf");
00565
00566 if (k.local_work_size() != k.global_work_size())
00567 {
00568
00569 k.local_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00570 k.global_work_size(0, viennacl::ocl::current_device().max_work_group_size());
00571 }
00572
00573 unsigned int work_groups = k.global_work_size() / k.local_work_size();
00574 viennacl::vector<SCALARTYPE> temp(work_groups);
00575
00576
00577
00578 assert( work_groups * k.local_work_size() == k.global_work_size() );
00579 assert( work_groups == 1
00580 || work_groups == 2
00581 || work_groups == 4
00582 || work_groups == 8
00583 || work_groups == 16
00584 || work_groups == 32
00585 || work_groups == 64
00586 || work_groups == 128
00587 || work_groups == 256
00588 || work_groups == 512 );
00589
00590 viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
00591
00592
00593
00594 viennacl::ocl::kernel & max_kernel = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "vmax");
00595 max_kernel.local_work_size(0, work_groups);
00596 max_kernel.global_work_size(0, work_groups);
00597
00598 viennacl::ocl::enqueue(max_kernel(temp, result, work_groups));
00599 }
00600
00601
00602
00603
00609 template<class SCALARTYPE, unsigned int ALIGNMENT>
00610 cl_uint index_norm_inf(const viennacl::vector<SCALARTYPE, ALIGNMENT> & vcl_vec)
00611 {
00612 viennacl::ocl::handle<cl_mem> h = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint));
00613
00614 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "index_norm_inf");
00615 cl_uint size = static_cast<cl_uint>(vcl_vec.internal_size());
00616
00617 k.global_work_size(0, k.local_work_size());
00618 viennacl::ocl::enqueue(k(vcl_vec,
00619 size,
00620 viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()),
00621 viennacl::ocl::local_mem(sizeof(cl_uint) * k.local_work_size()), h));
00622
00623
00624 cl_uint result;
00625 cl_int err;
00626 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), h, CL_TRUE, 0, sizeof(cl_uint), &result, 0, NULL, NULL);
00627 VIENNACL_ERR_CHECK(err);
00628 return result;
00629 }
00630
00631
00641 template<class SCALARTYPE, unsigned int ALIGNMENT>
00642 void plane_rotation(viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1,
00643 const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2,
00644 SCALARTYPE alpha,
00645 SCALARTYPE beta)
00646 {
00647 assert(vec1.size() == vec2.size());
00648 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::vector<SCALARTYPE, ALIGNMENT>::program_name(), "plane_rotation");
00649
00650 viennacl::ocl::enqueue(k(vec1, vec2, alpha, beta, static_cast<cl_uint>(vec1.size())));
00651 }
00652
00653 }
00654 }
00655
00656
00657 #endif