00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_MATRIX_HPP_
00016 #define _VIENNACL_MATRIX_HPP_
00017
00022 #include "viennacl/forwards.h"
00023 #include "viennacl/ocl/backend.hpp"
00024 #include "viennacl/scalar.hpp"
00025 #include "viennacl/vector.hpp"
00026 #include "viennacl/linalg/matrix_operations.hpp"
00027 #include "viennacl/tools/tools.hpp"
00028 #include "viennacl/tools/matrix_size_deducer.hpp"
00029 #include "viennacl/tools/matrix_kernel_class_deducer.hpp"
00030
00031 namespace viennacl
00032 {
00034 struct row_major
00035 {
00043 static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols)
00044 {
00045 return i * num_cols + j;
00046 }
00047
00048 static unsigned int internal_size1(unsigned int rows, unsigned int alignment)
00049 {
00050 return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment);;
00051 }
00052
00053 static unsigned int internal_size2(unsigned int cols, unsigned int alignment)
00054 {
00055 return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment);
00056 }
00057 };
00058
00059 struct column_major
00060 {
00068 static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols)
00069 {
00070 return i + j * num_rows;
00071 }
00072
00073 static unsigned int internal_size1(unsigned int rows, unsigned int alignment)
00074 {
00075 return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment);
00076 }
00077
00078 static unsigned int internal_size2(unsigned int cols, unsigned int alignment)
00079 {
00080 return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment);
00081 }
00082 };
00083
00084
00085 template <typename LHS, typename RHS, typename OP>
00086 class matrix_expression
00087 {
00088 public:
00090
00091
00092
00093 matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
00094
00097 LHS & lhs() const { return _lhs; }
00100 RHS & rhs() const { return _rhs; }
00101
00103 unsigned int size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); }
00104 unsigned int size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); }
00105
00106 private:
00108 LHS & _lhs;
00110 RHS & _rhs;
00111 };
00112
00113
00115 struct row_iteration {};
00116
00118 struct col_iteration {};
00119
00120
00121 template <typename ROWCOL, typename MATRIXTYPE>
00122 class matrix_iterator
00123 {
00124 typedef matrix_iterator<ROWCOL, MATRIXTYPE> self_type;
00125 public:
00126 typedef typename MATRIXTYPE::value_type value_type;
00127
00128 matrix_iterator(MATRIXTYPE & mat, unsigned int start_row, unsigned int start_col) : _mat(mat), _row(start_row), _col(start_col) {};
00129
00130 value_type operator*(void) { return _mat(_row, _col); }
00131 self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(_mat, _row, _col); return *this; }
00132 self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
00133
00134 bool operator==(self_type const & other) { return (_row == other._row) && (_col == other._col); }
00135 bool operator!=(self_type const & other) { return !(*this == other); }
00136
00137 unsigned int index1() { return _row; }
00138 unsigned int index2() { return _col; }
00139
00140 MATRIXTYPE & operator()(void) const { return _mat; }
00141
00142 private:
00143 MATRIXTYPE & _mat;
00144 unsigned int _row;
00145 unsigned int _col;
00146 };
00147
00154 template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00155 class matrix
00156 {
00157
00158 public:
00159
00160 typedef matrix_iterator<row_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator1;
00161 typedef matrix_iterator<col_iteration, matrix<SCALARTYPE, F, ALIGNMENT> > iterator2;
00162 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00163
00165 matrix() : _rows(0), _columns(0)
00166 {
00167 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00168 KernelClass::init();
00169 };
00170
00176 explicit matrix(unsigned int rows, unsigned int columns) :
00177 _rows(rows), _columns(columns)
00178 {
00179 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00180 KernelClass::init();
00181 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00182 }
00183
00184 explicit matrix(cl_mem mem, unsigned int rows, unsigned int columns) :
00185 _rows(rows), _columns(columns)
00186 {
00187 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00188 KernelClass::init();
00189 _elements = mem;
00190 _elements.inc();
00191 }
00192
00193 template <typename LHS, typename RHS, typename OP>
00194 matrix(matrix_expression< LHS, RHS, OP> const & proxy) : _rows(proxy.size1()), _columns(proxy.size2())
00195 {
00196 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00197 KernelClass::init();
00198 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00199
00200 *this = proxy;
00201 }
00202
00203
00204
00205 matrix(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) :
00206 _rows(mat.size1()), _columns(mat.size2()),
00207 _elements(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()))
00208 {
00209 cl_int err;
00210 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00211 VIENNACL_ERR_CHECK(err);
00212 }
00213
00214 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00215 {
00216 resize(mat.size1(), mat.size2(), false);
00217 cl_int err;
00218 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00219 VIENNACL_ERR_CHECK(err);
00220 return *this;
00221 }
00222
00223 matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00224 const matrix<SCALARTYPE, F, ALIGNMENT>,
00225 op_trans> & proxy)
00226 {
00227 assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented");
00228 assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!");
00229 assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!");
00230
00231 resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
00232
00233 std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
00234
00235 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00236 proxy.lhs().handle(), CL_TRUE, 0,
00237 sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
00238 &(temp[0]), 0, NULL, NULL);
00239 VIENNACL_ERR_CHECK(err);
00240 viennacl::ocl::get_queue().finish();
00241
00242
00243
00244
00245
00246
00247
00248
00249 std::vector<SCALARTYPE> temp_trans(internal_size());
00250
00251 for (size_t i=0; i<proxy.lhs().size1(); ++i)
00252 for (size_t j=0; j<proxy.lhs().size2(); ++j)
00253 temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())]
00254 = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
00255
00256
00257
00258
00259
00260
00261
00262
00263 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00264 sizeof(SCALARTYPE)*internal_size(),
00265 &(temp_trans[0]));
00266
00267 return *this;
00268 }
00269
00270
00271
00279 void resize(unsigned int rows, unsigned int columns, bool preserve = true)
00280 {
00281 assert(rows > 0 && columns > 0);
00282 if (preserve)
00283 {
00284
00285 std::vector< SCALARTYPE > old_entries(internal_size());
00286 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00287 handle(),
00288 CL_TRUE,
00289 0,
00290 sizeof(SCALARTYPE)*internal_size(),
00291 &(old_entries[0]),
00292 0, NULL, NULL);
00293 VIENNACL_ERR_CHECK(err);
00294
00295
00296 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00297 for (unsigned int i=0; i<rows; ++i)
00298 {
00299 if (i >= _rows)
00300 continue;
00301
00302 for (unsigned int j=0; j<columns; ++j)
00303 {
00304 if (j >= _columns)
00305 continue;
00306 new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))]
00307 = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
00308 }
00309 }
00310
00311
00312 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00313 _rows = rows;
00314 _columns = columns;
00315 }
00316 else
00317 {
00318 _rows = rows;
00319 _columns = columns;
00320
00321 std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00322 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00323 }
00324 }
00325
00326
00327
00330 entry_proxy<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index)
00331 {
00332 return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), _elements);
00333 }
00334
00337 scalar<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index) const
00338 {
00339 scalar<SCALARTYPE> tmp;
00340 cl_int err;
00341 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
00342 _elements,
00343 tmp.handle(),
00344 sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()),
00345 0,
00346 sizeof(SCALARTYPE),
00347 0,
00348 NULL,
00349 NULL);
00350
00351 VIENNACL_ERR_CHECK(err);
00352 return tmp;
00353 }
00354
00355
00356 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00357 const matrix<SCALARTYPE, F, ALIGNMENT>,
00358 op_add >
00359 operator + (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00360 {
00361 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00362 const matrix<SCALARTYPE, F, ALIGNMENT>,
00363 op_add > (*this, other);
00364 }
00365
00366
00367 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00368 {
00369 viennacl::linalg::inplace_add(*this, other);
00370 return *this;
00371 }
00372
00373 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00374 const matrix<SCALARTYPE, F, ALIGNMENT>,
00375 op_sub >
00376 operator - (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00377 {
00378 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00379 const matrix<SCALARTYPE, F, ALIGNMENT>,
00380 op_sub > (*this, other);
00381 }
00382
00383 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix< SCALARTYPE, F, ALIGNMENT> & other)
00384 {
00385 viennacl::linalg::inplace_sub(*this, other);
00386 return *this;
00387 }
00388
00389 template <unsigned int A1, unsigned int A2>
00390 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const vector<SCALARTYPE, A1>,
00391 const vector<SCALARTYPE, A2>,
00392 op_prod > & proxy)
00393 {
00394 viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs());
00395 return *this;
00396 }
00397
00398 template <unsigned int A1, unsigned int A2>
00399 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const vector<SCALARTYPE, A1>,
00400 const vector<SCALARTYPE, A2>,
00401 op_prod > & proxy)
00402 {
00403 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs());
00404 return *this;
00405 }
00406
00407 template <unsigned int A1, unsigned int A2>
00408 matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00409 const vector<SCALARTYPE, A2>,
00410 op_prod >,
00411 const SCALARTYPE,
00412 op_prod > & proxy)
00413 {
00414 viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00415 return *this;
00416 }
00417
00418 template <unsigned int A1, unsigned int A2>
00419 matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00420 const vector<SCALARTYPE, A2>,
00421 op_prod >,
00422 const SCALARTYPE,
00423 op_prod > & proxy)
00424 {
00425 viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00426 return *this;
00427 }
00428
00429
00430 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (SCALARTYPE val)
00431 {
00432 viennacl::linalg::inplace_mult(*this, val);
00433 return *this;
00434 }
00435
00436 matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (scalar<SCALARTYPE> const & val)
00437 {
00438 viennacl::linalg::inplace_mult(*this, val);
00439 return *this;
00440 }
00441
00442 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (SCALARTYPE val)
00443 {
00444 viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val);
00445 return *this;
00446 }
00447
00448 matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (scalar<SCALARTYPE> const & val)
00449 {
00450 viennacl::linalg::inplace_divide(*this, val);
00451 return *this;
00452 }
00453
00454
00455
00456 template <typename MatrixType1, typename MatrixType2>
00457 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< MatrixType1,
00458 MatrixType2,
00459 op_prod > & proxy)
00460 {
00461 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00462 return *this;
00463 }
00464
00465
00466 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00467 const matrix<SCALARTYPE, F, ALIGNMENT>,
00468 op_add > & proxy)
00469 {
00470 viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
00471 return *this;
00472 }
00473
00474
00475 matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00476 const matrix<SCALARTYPE, F, ALIGNMENT>,
00477 op_sub > & proxy)
00478 {
00479 viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
00480 return *this;
00481 }
00482
00483
00485 const unsigned int & size1() const { return _rows;}
00487 const unsigned int & size2() const { return _columns; }
00488
00490 void clear()
00491 {
00492 unsigned int internal_size = internal_size1() * internal_size2();
00493
00494 typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType KernelClass;
00495
00496 viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear");
00497 viennacl::ocl::enqueue(k(_elements, internal_size));
00498 }
00499
00500
00501
00503 const unsigned int internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); }
00505 const unsigned int internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); }
00507 const unsigned int internal_size() const { return internal_size1() * internal_size2(); }
00508
00510 const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
00511
00512 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
00513 template <typename CPU_MATRIX>
00514 friend void copy(const CPU_MATRIX & cpu_matrix,
00515 matrix & gpu_matrix );
00516
00517 template <typename SCALARTYPE2, typename A1, typename A2>
00518 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00519 matrix & gpu_matrix );
00520
00521 template <typename SCALARTYPE2>
00522 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00523 SCALARTYPE2 * cpu_matrix_end,
00524 matrix & gpu_matrix);
00525
00526 #ifdef VIENNACL_HAVE_EIGEN
00527 friend void copy(const Eigen::MatrixXf & cpu_matrix,
00528 matrix & gpu_matrix);
00529
00530 friend void copy(const Eigen::MatrixXd & cpu_matrix,
00531 matrix & gpu_matrix);
00532 #endif
00533
00534 #ifdef VIENNACL_HAVE_MTL4
00535 template <typename SCALARTYPE2, typename T>
00536 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00537 matrix & gpu_matrix);
00538 #endif
00539 #else
00540 template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00541 friend void copy(const CPU_MATRIX & cpu_matrix,
00542 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00543
00544 template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2>
00545 friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00546 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00547
00548 template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00549 friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00550 SCALARTYPE2 * cpu_matrix_end,
00551 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00552
00553 #ifdef VIENNACL_HAVE_EIGEN
00554 template <typename F2, unsigned int ALIGNMENT2>
00555 friend void copy(const Eigen::MatrixXf & cpu_matrix,
00556 matrix<float, F2, ALIGNMENT2> & gpu_matrix);
00557
00558 template <typename F2, unsigned int ALIGNMENT2>
00559 friend void copy(const Eigen::MatrixXd & cpu_matrix,
00560 matrix<double, F2, ALIGNMENT2> & gpu_matrix);
00561 #endif
00562
00563 #ifdef VIENNACL_HAVE_MTL4
00564 template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2>
00565 friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00566 matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00567 #endif
00568 #endif
00569
00570 private:
00571 unsigned int _rows;
00572 unsigned int _columns;
00573 viennacl::ocl::handle<cl_mem> _elements;
00574 };
00575
00581 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00582 std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00583 {
00584 std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
00585 cl_int err;
00586 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL);
00587 VIENNACL_ERR_CHECK(err);
00588 viennacl::ocl::get_queue().finish();
00589
00590 s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
00591
00592 s << "(";
00593 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00594 {
00595 s << "(";
00596 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00597 {
00598 s << tmp[ i * gpu_matrix.internal_size2() + j ];
00599 if (j < gpu_matrix.size2() - 1)
00600 s << ",";
00601 }
00602 s << ")";
00603 if (i < gpu_matrix.size1() - 1)
00604 s << ",";
00605 }
00606 s << ")";
00607 return s;
00608 }
00609
00615 template<typename LHS, typename RHS, typename OP>
00616 std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
00617 {
00618 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType ScalarType;
00619
00620 matrix<ScalarType> temp = expr;
00621 s << temp;
00622 return s;
00623 }
00624
00626 template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00627 matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00628 const matrix<SCALARTYPE, F, ALIGNMENT>,
00629 op_trans> trans(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00630 {
00631 return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00632 const matrix<SCALARTYPE, F, ALIGNMENT>,
00633 op_trans>(mat, mat);
00634 }
00635
00636
00638
00639
00640
00641
00647 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00648 void copy(const CPU_MATRIX & cpu_matrix,
00649 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00650 {
00651 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()),
00652 static_cast<unsigned int>(cpu_matrix.size2()), false);
00653
00654 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00655 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00656 {
00657 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00658 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00659 }
00660
00661 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00662 }
00663
00664
00665
00666
00672 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00673 void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
00674 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00675 {
00676 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size()),
00677 static_cast<unsigned int>(cpu_matrix[0].size()),
00678 false);
00679
00680 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00681 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00682 {
00683 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00684 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00685 }
00686
00687 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00688 }
00689
00690
00691
00692
00693
00700 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00701 void fast_copy(SCALARTYPE * cpu_matrix_begin,
00702 SCALARTYPE * cpu_matrix_end,
00703 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00704 {
00705 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00706 sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
00707 cpu_matrix_begin);
00708 }
00709
00710
00711 #ifdef VIENNACL_HAVE_EIGEN
00712
00717 template <typename F, unsigned int ALIGNMENT>
00718 void copy(const Eigen::MatrixXf & cpu_matrix,
00719 matrix<float, F, ALIGNMENT> & gpu_matrix)
00720 {
00721 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()),
00722 static_cast<unsigned int>(cpu_matrix.cols()),
00723 false);
00724
00725 std::vector<float> data(gpu_matrix.internal_size());
00726 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00727 {
00728 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00729 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00730 }
00731
00732 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00733 }
00734
00740 template <typename F, unsigned int ALIGNMENT>
00741 void copy(const Eigen::MatrixXd & cpu_matrix,
00742 matrix<double, F, ALIGNMENT> & gpu_matrix)
00743 {
00744 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()),
00745 static_cast<unsigned int>(cpu_matrix.cols()),
00746 false);
00747
00748 std::vector<double> data(gpu_matrix.internal_size());
00749 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00750 {
00751 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00752 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00753 }
00754
00755 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00756 }
00757 #endif
00758
00759 #ifdef VIENNACL_HAVE_MTL4
00760
00765 template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
00766 void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
00767 matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00768 {
00769 gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.num_rows()),
00770 static_cast<unsigned int>(cpu_matrix.num_cols()),
00771 false);
00772
00773 std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00774 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00775 {
00776 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00777 data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00778 }
00779
00780 gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00781 }
00782 #endif
00783
00784
00785
00786
00787
00788
00789
00795 template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00796 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00797 CPU_MATRIX & cpu_matrix )
00798 {
00799 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
00800 {
00801 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00802 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00803 VIENNACL_ERR_CHECK(err);
00804
00805
00806 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00807 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00808 cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00809 }
00810 }
00811
00812
00818 template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00819 void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00820 std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
00821 {
00822 if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0)
00823 && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2()))
00824 {
00825 std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00826 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00827 VIENNACL_ERR_CHECK(err);
00828
00829
00830 for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00831 for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00832 cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00833 }
00834 }
00835
00836
00842 template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00843 void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00844 SCALARTYPE * cpu_matrix_begin)
00845 {
00846 cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00847 gpu_matrix.handle(),
00848 CL_TRUE, 0,
00849 sizeof(SCALARTYPE)*gpu_matrix.internal_size(),
00850 cpu_matrix_begin, 0, NULL, NULL);
00851 VIENNACL_ERR_CHECK(err);
00852 }
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863 template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT>
00864 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00865 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00866 op_prod>,
00867 const SCALARTYPE,
00868 op_prod> operator*(const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00869 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00870 op_prod> & proxy,
00871 CPU_SCALAR val)
00872 {
00873 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00874 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00875 op_prod>,
00876 const SCALARTYPE,
00877 op_prod>(proxy, static_cast<SCALARTYPE>(val));
00878 }
00879
00880
00881 template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2>
00882 viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00883 const viennacl::vector<SCALARTYPE, VA2>,
00884 op_prod>,
00885 const SCALARTYPE,
00886 op_prod> operator*(CPU_SCALAR val,
00887 viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00888 const viennacl::vector<SCALARTYPE, VA2>,
00889 op_prod> const & proxy)
00890 {
00891 return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00892 const viennacl::vector<SCALARTYPE, VA2>,
00893 op_prod>,
00894 const SCALARTYPE,
00895 op_prod>(proxy, static_cast<SCALARTYPE>(val));
00896 }
00897
00898
00899
00900 }
00901
00902 #endif