• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/ViennaCL-1.1.2/viennacl/matrix.hpp

Go to the documentation of this file.
00001 /* =======================================================================
00002    Copyright (c) 2010, Institute for Microelectronics, TU Vienna.
00003    http://www.iue.tuwien.ac.at
00004                              -----------------
00005                      ViennaCL - The Vienna Computing Library
00006                              -----------------
00007                             
00008    authors:    Karl Rupp                          rupp@iue.tuwien.ac.at
00009                Florian Rudolf                     flo.rudy+viennacl@gmail.com
00010                Josef Weinbub                      weinbub@iue.tuwien.ac.at
00011 
00012    license:    MIT (X11), see file LICENSE in the ViennaCL base directory
00013 ======================================================================= */
00014 
00015 #ifndef _VIENNACL_MATRIX_HPP_
00016 #define _VIENNACL_MATRIX_HPP_
00017 
00022 #include "viennacl/forwards.h"
00023 #include "viennacl/ocl/backend.hpp"
00024 #include "viennacl/scalar.hpp"
00025 #include "viennacl/vector.hpp"
00026 #include "viennacl/linalg/matrix_operations.hpp"
00027 #include "viennacl/tools/tools.hpp"
00028 #include "viennacl/tools/matrix_size_deducer.hpp"
00029 #include "viennacl/tools/matrix_kernel_class_deducer.hpp"
00030 
00031 namespace viennacl
00032 {
00034     struct row_major
00035     {
00043       static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols)
00044       {
00045         return i * num_cols + j;
00046       }
00047       
00048       static unsigned int internal_size1(unsigned int rows, unsigned int alignment)
00049       {
00050         return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment);;
00051       }
00052       
00053       static unsigned int internal_size2(unsigned int cols, unsigned int alignment)
00054       {
00055         return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment);
00056       }
00057     };
00058 
00059     struct column_major
00060     {
00068       static unsigned int mem_index(unsigned int i, unsigned int j, unsigned int num_rows, unsigned int num_cols)
00069       {
00070         return i + j * num_rows;
00071       }
00072       
00073       static unsigned int internal_size1(unsigned int rows, unsigned int alignment)
00074       {
00075         return viennacl::tools::roundUpToNextMultiple<unsigned int>(rows, alignment);
00076       }
00077       
00078       static unsigned int internal_size2(unsigned int cols, unsigned int alignment)
00079       {
00080         return viennacl::tools::roundUpToNextMultiple<unsigned int>(cols, alignment);
00081       }
00082     };
00083     
00084     
00085     template <typename LHS, typename RHS, typename OP>
00086     class matrix_expression
00087     {
00088       public:
00090         //*/
00091         //typedef typename viennacl::tools::VECTOR_EXTRACTOR<LHS, RHS>::ResultType    VectorType;
00092       
00093         matrix_expression(LHS & lhs, RHS & rhs) : _lhs(lhs), _rhs(rhs) {}
00094         
00097         LHS & lhs() const { return _lhs; }
00100         RHS & rhs() const { return _rhs; }
00101         
00103         unsigned int size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(_lhs, _rhs); }
00104         unsigned int size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(_lhs, _rhs); }
00105         
00106       private:
00108         LHS & _lhs;
00110         RHS & _rhs;
00111     };
00112     
00113     
00115     struct row_iteration {};
00116     
00118     struct col_iteration {};
00119 
00120     //STL-like iterator. TODO: STL-compliance...
00121     template <typename ROWCOL, typename MATRIXTYPE>
00122     class matrix_iterator
00123     {
00124         typedef matrix_iterator<ROWCOL, MATRIXTYPE>    self_type;
00125       public:
00126         typedef typename MATRIXTYPE::value_type       value_type;
00127         
00128         matrix_iterator(MATRIXTYPE & mat, unsigned int start_row, unsigned int start_col) : _mat(mat), _row(start_row), _col(start_col) {};
00129         
00130         value_type operator*(void) { return _mat(_row, _col); }
00131         self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(_mat, _row, _col); return *this; }
00132         self_type & operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
00133         
00134         bool operator==(self_type const & other) { return (_row == other._row) && (_col == other._col); }
00135         bool operator!=(self_type const & other) { return !(*this == other); }
00136         
00137         unsigned int index1() { return _row; }
00138         unsigned int index2() { return _col; }
00139         
00140         MATRIXTYPE & operator()(void) const { return _mat; }
00141       
00142       private:
00143         MATRIXTYPE & _mat;
00144         unsigned int _row;
00145         unsigned int _col;
00146     };
00147 
00154     template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00155     class matrix
00156     {
00157       
00158     public:
00159       
00160       typedef matrix_iterator<row_iteration, matrix<SCALARTYPE, F, ALIGNMENT> >   iterator1;
00161       typedef matrix_iterator<col_iteration, matrix<SCALARTYPE, F, ALIGNMENT> >   iterator2;
00162       typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00163       
00165       matrix() : _rows(0), _columns(0)
00166       {
00167         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00168         KernelClass::init();
00169       };
00170       
00176       explicit matrix(unsigned int rows, unsigned int columns) :
00177         _rows(rows), _columns(columns)
00178       {
00179         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00180         KernelClass::init();
00181         _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00182       }
00183 
00184       explicit matrix(cl_mem mem, unsigned int rows, unsigned int columns) :
00185         _rows(rows), _columns(columns)
00186       {
00187         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00188         KernelClass::init();
00189         _elements = mem;
00190         _elements.inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed.
00191       }
00192 
00193       template <typename LHS, typename RHS, typename OP>
00194       matrix(matrix_expression< LHS, RHS, OP> const & proxy) : _rows(proxy.size1()), _columns(proxy.size2())
00195       {
00196         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00197         KernelClass::init();
00198         _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size());
00199         
00200         *this = proxy;
00201       }
00202 
00203 
00204       //copy constructor:
00205       matrix(const matrix<SCALARTYPE, F, ALIGNMENT> & mat) :
00206         _rows(mat.size1()), _columns(mat.size2()),
00207         _elements(viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE)*internal_size()))
00208       {
00209         cl_int err;
00210         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00211         VIENNACL_ERR_CHECK(err);
00212       }
00213 
00214       matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00215       {
00216         resize(mat.size1(), mat.size2(), false);
00217         cl_int err;
00218         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), mat.handle(), handle(), 0, 0, sizeof(SCALARTYPE)*internal_size(), 0, NULL, NULL);
00219         VIENNACL_ERR_CHECK(err);
00220         return *this;
00221       }
00222       
00223       matrix<SCALARTYPE, F, ALIGNMENT> & operator=(const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00224                                                                             const matrix<SCALARTYPE, F, ALIGNMENT>,
00225                                                                             op_trans> & proxy)
00226       {
00227         assert(handle() != proxy.lhs().handle() && "Self-assignment of matrix transpose not implemented");
00228         assert(proxy.lhs().size1() == size2() && "Matrix dimensions do not match!");
00229         assert(proxy.lhs().size2() == size1() && "Matrix dimensions do not match!");
00230 
00231         resize(proxy.lhs().size2(), proxy.lhs().size1(), false);
00232         
00233         std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
00234         
00235         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00236                                          proxy.lhs().handle(), CL_TRUE, 0,
00237                                          sizeof(SCALARTYPE)*proxy.lhs().internal_size(),
00238                                          &(temp[0]), 0, NULL, NULL);
00239         VIENNACL_ERR_CHECK(err);
00240         viennacl::ocl::get_queue().finish();
00241 
00242         /*
00243         for (size_t i=0; i<proxy.lhs().size1(); ++i)
00244         {
00245           for (size_t j=0; j<proxy.lhs().size2(); ++j)
00246             std::cout << temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
00247         }*/
00248         
00249         std::vector<SCALARTYPE> temp_trans(internal_size());
00250 
00251         for (size_t i=0; i<proxy.lhs().size1(); ++i)
00252           for (size_t j=0; j<proxy.lhs().size2(); ++j)
00253             temp_trans[F::mem_index(j,i, internal_size1(), internal_size2())] 
00254              = temp[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
00255 
00256         /*     
00257         for (size_t i=0; i<proxy.lhs().size1(); ++i)
00258         {
00259           for (size_t j=0; j<proxy.lhs().size2(); ++j)
00260             std::cout << temp_trans[F::mem_index(i,j, proxy.lhs().internal_size1(), proxy.lhs().internal_size2())] << ", ";
00261         }*/
00262         
00263         _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, 
00264                                                                    sizeof(SCALARTYPE)*internal_size(),
00265                                                                    &(temp_trans[0]));
00266           
00267         return *this;
00268       }
00269 
00270 
00271 
00279       void resize(unsigned int rows, unsigned int columns, bool preserve = true)
00280       {
00281         assert(rows > 0 && columns > 0);
00282         if (preserve)
00283         {
00284           //get old entries:
00285           std::vector< SCALARTYPE > old_entries(internal_size());
00286           cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), //src
00287                                            handle(), //dest
00288                                            CL_TRUE, //blocking
00289                                            0, //offset
00290                                            sizeof(SCALARTYPE)*internal_size(), //size
00291                                            &(old_entries[0]), //destination
00292                                            0, NULL, NULL);
00293           VIENNACL_ERR_CHECK(err);
00294           
00295           //set up entries of new matrix:
00296           std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00297           for (unsigned int i=0; i<rows; ++i)
00298           {
00299             if (i >= _rows)
00300               continue;
00301               
00302             for (unsigned int j=0; j<columns; ++j)
00303             {
00304               if (j >= _columns)
00305                 continue;
00306               new_entries[F::mem_index(i, j, F::internal_size1(rows, ALIGNMENT), F::internal_size2(columns, ALIGNMENT))] 
00307                  = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
00308             }
00309           }
00310           
00311           //copy new entries to GPU:
00312           _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00313           _rows = rows;
00314           _columns = columns;
00315         }
00316         else //discard old entries:
00317         {
00318           _rows = rows;
00319           _columns = columns;
00320           
00321           std::vector< SCALARTYPE > new_entries(F::internal_size1(rows, ALIGNMENT) * F::internal_size2(columns, ALIGNMENT));
00322           _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, new_entries);
00323         }
00324       }
00325       
00326       
00327       //read-write access to an element of the vector
00330       entry_proxy<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index)
00331       {
00332         return entry_proxy<SCALARTYPE>(F::mem_index(row_index, col_index, internal_size1(), internal_size2()), _elements);
00333       }
00334       
00337       scalar<SCALARTYPE> operator()(unsigned int row_index, unsigned int col_index) const
00338       {
00339         scalar<SCALARTYPE> tmp;
00340         cl_int err;
00341         err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(),
00342                                   _elements,
00343                                   tmp.handle(),
00344                                   sizeof(SCALARTYPE) * F::mem_index(row_index, col_index, internal_size1(), internal_size2()),
00345                                   0,
00346                                   sizeof(SCALARTYPE),
00347                                   0,
00348                                   NULL,
00349                                   NULL);
00350         //assert(err == CL_SUCCESS);
00351         VIENNACL_ERR_CHECK(err);
00352         return tmp;
00353       }
00354       
00355 
00356       matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00357                          const matrix<SCALARTYPE, F, ALIGNMENT>,
00358                          op_add >
00359       operator + (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00360       {
00361         return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00362                                   const matrix<SCALARTYPE, F, ALIGNMENT>,
00363                                   op_add > (*this, other);
00364       }
00365 
00366 
00367       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00368       {
00369         viennacl::linalg::inplace_add(*this, other);
00370         return *this;
00371       }
00372 
00373       matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00374                          const matrix<SCALARTYPE, F, ALIGNMENT>,
00375                          op_sub >
00376       operator - (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00377       {
00378         return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00379                                   const matrix<SCALARTYPE, F, ALIGNMENT>,
00380                                   op_sub > (*this, other);
00381       }
00382 
00383       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix< SCALARTYPE, F, ALIGNMENT> & other) 
00384       {
00385         viennacl::linalg::inplace_sub(*this, other);
00386         return *this;
00387       }
00388 
00389       template <unsigned int A1, unsigned int A2>
00390       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const vector<SCALARTYPE, A1>,
00391                                                                                const vector<SCALARTYPE, A2>,
00392                                                                                op_prod > & proxy) 
00393       {
00394         viennacl::linalg::rank_1_update(*this, proxy.lhs(), proxy.rhs());
00395         return *this;
00396       }
00397 
00398       template <unsigned int A1, unsigned int A2>
00399       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const vector<SCALARTYPE, A1>,
00400                                                                                const vector<SCALARTYPE, A2>,
00401                                                                                op_prod > & proxy) 
00402       {
00403         viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0), proxy.lhs(), proxy.rhs());
00404         return *this;
00405       }
00406 
00407       template <unsigned int A1, unsigned int A2>
00408       matrix<SCALARTYPE, F, ALIGNMENT> & operator += (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00409                                                                                                         const vector<SCALARTYPE, A2>,
00410                                                                                                         op_prod >,
00411                                                                                const SCALARTYPE,
00412                                                                                op_prod > & proxy) 
00413       {
00414         viennacl::linalg::scaled_rank_1_update(*this, proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00415         return *this;
00416       }
00417 
00418       template <unsigned int A1, unsigned int A2>
00419       matrix<SCALARTYPE, F, ALIGNMENT> & operator -= (const matrix_expression< const matrix_expression< const vector<SCALARTYPE, A1>,
00420                                                                                                         const vector<SCALARTYPE, A2>,
00421                                                                                                         op_prod >,
00422                                                                                const SCALARTYPE,
00423                                                                                op_prod > & proxy) 
00424       {
00425         viennacl::linalg::scaled_rank_1_update(*this, static_cast<SCALARTYPE>(-1.0) * proxy.rhs(), proxy.lhs().lhs(), proxy.lhs().rhs());
00426         return *this;
00427       }
00428       
00429       
00430       matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (SCALARTYPE val) 
00431       {
00432         viennacl::linalg::inplace_mult(*this, val);
00433         return *this;
00434       }
00435 
00436       matrix<SCALARTYPE, F, ALIGNMENT> & operator *= (scalar<SCALARTYPE> const & val) 
00437       {
00438         viennacl::linalg::inplace_mult(*this, val);
00439         return *this;
00440       }
00441 
00442       matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (SCALARTYPE val) 
00443       {
00444         viennacl::linalg::inplace_mult(*this, SCALARTYPE(1.0) / val);
00445         return *this;
00446       }
00447 
00448       matrix<SCALARTYPE, F, ALIGNMENT> & operator /= (scalar<SCALARTYPE> const & val) 
00449       {
00450         viennacl::linalg::inplace_divide(*this, val);
00451         return *this;
00452       }
00453 
00454 
00455       //this = A * B and related (with trans())
00456       template <typename MatrixType1, typename MatrixType2>
00457       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< MatrixType1,
00458                                                                               MatrixType2,
00459                                                                               op_prod > & proxy) 
00460       {
00461         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00462         return *this;
00463       }
00464 
00465       //this = A + B
00466       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00467                                                                                const matrix<SCALARTYPE, F, ALIGNMENT>,
00468                                                                                op_add > & proxy) 
00469       {
00470         viennacl::linalg::add(proxy.lhs(), proxy.rhs(), *this);
00471         return *this;
00472       }
00473 
00474       //this = A - B
00475       matrix<SCALARTYPE, F, ALIGNMENT> & operator = (const matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00476                                                                                const matrix<SCALARTYPE, F, ALIGNMENT>,
00477                                                                                op_sub > & proxy) 
00478       {
00479         viennacl::linalg::sub(proxy.lhs(), proxy.rhs(), *this);
00480         return *this;
00481       }
00482 
00483 
00485       const unsigned int & size1() const { return _rows;}
00487       const unsigned int & size2() const { return _columns; }
00488       
00490       void clear()
00491       {
00492         unsigned int internal_size = internal_size1() * internal_size2();
00493         
00494         typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00495         
00496         viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "clear");
00497         viennacl::ocl::enqueue(k(_elements, internal_size));
00498       }
00499       
00500       
00501       //const unsigned int row_stride() const { return roundUpToNextMultiple<unsigned int>(columns(), ALIGNMENT); }
00503       const unsigned int internal_size1() const { return F::internal_size1(size1(), ALIGNMENT); }
00505       const unsigned int internal_size2() const { return F::internal_size2(size2(), ALIGNMENT); }
00507       const unsigned int internal_size() const { return internal_size1() * internal_size2(); }
00508       
00510       const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
00511       
00512       #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment
00513       template <typename CPU_MATRIX>
00514       friend void copy(const CPU_MATRIX & cpu_matrix,
00515                       matrix & gpu_matrix );
00516       
00517       template <typename SCALARTYPE2, typename A1, typename A2>
00518       friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00519                       matrix & gpu_matrix );
00520       
00521       template <typename SCALARTYPE2>
00522       friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00523                             SCALARTYPE2 * cpu_matrix_end,
00524                             matrix & gpu_matrix);
00525       
00526       #ifdef VIENNACL_HAVE_EIGEN
00527       friend void copy(const Eigen::MatrixXf & cpu_matrix,
00528                        matrix & gpu_matrix);
00529       
00530       friend void copy(const Eigen::MatrixXd & cpu_matrix,
00531                        matrix & gpu_matrix);
00532       #endif
00533       
00534       #ifdef VIENNACL_HAVE_MTL4
00535       template <typename SCALARTYPE2, typename T>
00536       friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00537                        matrix & gpu_matrix);
00538       #endif
00539       #else
00540       template <typename CPU_MATRIX, typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00541       friend void copy(const CPU_MATRIX & cpu_matrix,
00542                       matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00543                       
00544       template <typename SCALARTYPE2, typename A1, typename A2, typename F2, unsigned int ALIGNMENT2>
00545       friend void copy(const std::vector< std::vector<SCALARTYPE2, A1>, A2> & cpu_matrix,
00546                        matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix );
00547       
00548       template <typename SCALARTYPE2, typename F2, unsigned int ALIGNMENT2>
00549       friend void fast_copy(SCALARTYPE2 * cpu_matrix_begin,
00550                             SCALARTYPE2 * cpu_matrix_end,
00551                             matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00552       
00553       #ifdef VIENNACL_HAVE_EIGEN
00554       template <typename F2, unsigned int ALIGNMENT2>
00555       friend void copy(const Eigen::MatrixXf & cpu_matrix,
00556                 matrix<float, F2, ALIGNMENT2> & gpu_matrix);
00557       
00558       template <typename F2, unsigned int ALIGNMENT2>
00559       friend void copy(const Eigen::MatrixXd & cpu_matrix,
00560                 matrix<double, F2, ALIGNMENT2> & gpu_matrix);
00561       #endif
00562       
00563       #ifdef VIENNACL_HAVE_MTL4
00564       template <typename SCALARTYPE2, typename T, typename F2, unsigned int ALIGNMENT2>
00565       friend void copy(const mtl::dense2D<SCALARTYPE2, T>& cpu_matrix,
00566                        matrix<SCALARTYPE2, F2, ALIGNMENT2> & gpu_matrix);
00567       #endif
00568       #endif                 
00569       
00570     private:
00571       unsigned int _rows;
00572       unsigned int _columns;
00573       viennacl::ocl::handle<cl_mem> _elements;
00574     }; //matrix
00575 
00581     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00582     std::ostream & operator<<(std::ostream & s, const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00583     {
00584       std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
00585       cl_int err;
00586       err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &tmp[0], 0, NULL, NULL);
00587       VIENNACL_ERR_CHECK(err);
00588       viennacl::ocl::get_queue().finish();
00589       
00590       s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
00591       
00592       s << "(";
00593       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00594       {
00595         s << "(";
00596         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j)
00597         {
00598           s << tmp[ i * gpu_matrix.internal_size2() + j ];
00599           if (j < gpu_matrix.size2() - 1)
00600             s << ",";
00601         }
00602         s << ")";
00603         if (i < gpu_matrix.size1() - 1)
00604           s << ",";
00605       }
00606       s << ")";
00607       return s;
00608     }
00609 
00615     template<typename LHS, typename RHS, typename OP>
00616     std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
00617     {
00618       typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType     ScalarType;
00619 
00620       matrix<ScalarType> temp = expr;
00621       s << temp;
00622       return s;
00623     }
00624     
00626     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00627     matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00628                        const matrix<SCALARTYPE, F, ALIGNMENT>,
00629                        op_trans> trans(const matrix<SCALARTYPE, F, ALIGNMENT> & mat)
00630     {
00631       return matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00632                                 const matrix<SCALARTYPE, F, ALIGNMENT>,
00633                                 op_trans>(mat, mat);
00634     }
00635     
00636     
00638 
00639     //
00640     //cpu to gpu, generic type:
00641     //
00647     template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00648     void copy(const CPU_MATRIX & cpu_matrix,
00649               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00650     {
00651       gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()),
00652                         static_cast<unsigned int>(cpu_matrix.size2()), false);
00653 
00654       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00655       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00656       {
00657         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00658           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00659       }
00660       
00661       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00662     }
00663     
00664     //
00665     //cpu to gpu, STL type:
00666     //
00672     template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00673     void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
00674               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00675     {
00676       gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size()),
00677                         static_cast<unsigned int>(cpu_matrix[0].size()),
00678                         false);
00679 
00680       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00681       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00682       {
00683         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00684           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00685       }
00686       
00687       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00688     }
00689     
00690     
00691     //
00692     //cpu to gpu, another STL type:
00693     //
00700     template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00701     void fast_copy(SCALARTYPE * cpu_matrix_begin,
00702                    SCALARTYPE * cpu_matrix_end,
00703                    matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00704     {
00705       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
00706                                                                             sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
00707                                                                             cpu_matrix_begin);
00708     }
00709     
00710    
00711     #ifdef VIENNACL_HAVE_EIGEN
00712 
00717     template <typename F, unsigned int ALIGNMENT>
00718     void copy(const Eigen::MatrixXf & cpu_matrix,
00719               matrix<float, F, ALIGNMENT> & gpu_matrix)
00720     {
00721       gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()),
00722                         static_cast<unsigned int>(cpu_matrix.cols()),
00723                         false);
00724 
00725       std::vector<float> data(gpu_matrix.internal_size());
00726       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00727       {
00728         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00729           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00730       }
00731       
00732       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00733     }
00734     
00740     template <typename F, unsigned int ALIGNMENT>
00741     void copy(const Eigen::MatrixXd & cpu_matrix,
00742               matrix<double, F, ALIGNMENT> & gpu_matrix)
00743     {
00744       gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.rows()),
00745                         static_cast<unsigned int>(cpu_matrix.cols()),
00746                         false);
00747 
00748       std::vector<double> data(gpu_matrix.internal_size());
00749       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00750       {
00751         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00752           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00753       }
00754       
00755       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00756     }
00757     #endif
00758     
00759     #ifdef VIENNACL_HAVE_MTL4
00760 
00765     template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
00766     void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
00767               matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
00768     {
00769       gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.num_rows()),
00770                         static_cast<unsigned int>(cpu_matrix.num_cols()),
00771                         false);
00772 
00773       std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00774       for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00775       {
00776         for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00777           data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00778       }
00779       
00780       gpu_matrix._elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00781     }
00782     #endif
00783     
00784     
00785     
00786     
00787     //
00788     //gpu to cpu, generic type
00789     //
00795     template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00796     void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00797               CPU_MATRIX & cpu_matrix )
00798     {
00799       if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
00800       {
00801         std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00802         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00803         VIENNACL_ERR_CHECK(err);
00804         
00805         //now copy entries to cpu_matrix:
00806         for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00807           for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00808             cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00809       }
00810     }
00811 
00812     //gpu to cpu, STL type
00818     template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00819     void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00820               std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
00821     {
00822       if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) 
00823          && (cpu_matrix.size() >= gpu_matrix.size1()) && (cpu_matrix[0].size() >= gpu_matrix.size2()))
00824       {
00825         std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
00826         cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]), 0, NULL, NULL);
00827         VIENNACL_ERR_CHECK(err);
00828         
00829         //now copy entries to cpu_matrix:
00830         for (unsigned int i = 0; i < gpu_matrix.size1(); ++i)
00831           for (unsigned int j = 0; j < gpu_matrix.size2(); ++j) 
00832             cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00833       }
00834     }
00835 
00836     //gpu to cpu, STL type
00842     template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00843     void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
00844                    SCALARTYPE * cpu_matrix_begin)
00845     {
00846       cl_int err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(),
00847                                        gpu_matrix.handle(), 
00848                                        CL_TRUE, 0,
00849                                        sizeof(SCALARTYPE)*gpu_matrix.internal_size(),
00850                                        cpu_matrix_begin, 0, NULL, NULL);
00851       VIENNACL_ERR_CHECK(err);
00852     }
00853 
00854 
00855 
00856 
00857 
00858 
00859 
00860 
00861 
00862     // outer_prod(v1, v2) * val;
00863     template<typename CPU_SCALAR, typename SCALARTYPE,unsigned int VECTOR_ALIGNMENT>
00864     viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00865                                                                     const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00866                                                                     op_prod>,
00867                                  const SCALARTYPE,
00868                                  op_prod>  operator*(const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00869                                                                                         const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00870                                                                                         op_prod> & proxy,
00871                                                      CPU_SCALAR val)
00872     {
00873       return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00874                                                                              const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>,
00875                                                                              op_prod>,
00876                                           const SCALARTYPE,
00877                                           op_prod>(proxy, static_cast<SCALARTYPE>(val));
00878     }
00879 
00880     // val * outer_prod(v1, v2);
00881     template <typename CPU_SCALAR, typename SCALARTYPE, unsigned int VA1, unsigned int VA2>
00882     viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00883                                                                     const viennacl::vector<SCALARTYPE, VA2>,
00884                                                                     op_prod>,
00885                                  const SCALARTYPE,
00886                                  op_prod>  operator*(CPU_SCALAR val,
00887                                                      viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00888                                                                                   const viennacl::vector<SCALARTYPE, VA2>,
00889                                                                                   op_prod> const & proxy)
00890     {
00891       return viennacl::matrix_expression< const viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00892                                                                              const viennacl::vector<SCALARTYPE, VA2>,
00893                                                                              op_prod>,
00894                                           const SCALARTYPE,
00895                                           op_prod>(proxy, static_cast<SCALARTYPE>(val));
00896     }
00897     
00898    
00899 
00900 } //namespace viennacl
00901 
00902 #endif

Generated on Sat May 21 2011 20:36:50 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1