• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/ViennaCL-1.1.2/viennacl/linalg/matrix_operations.hpp

Go to the documentation of this file.
00001 /* =======================================================================
00002    Copyright (c) 2010, Institute for Microelectronics, TU Vienna.
00003    http://www.iue.tuwien.ac.at
00004                              -----------------
00005                      ViennaCL - The Vienna Computing Library
00006                              -----------------
00007                             
00008    authors:    Karl Rupp                          rupp@iue.tuwien.ac.at
00009                Florian Rudolf                     flo.rudy+viennacl@gmail.com
00010                Josef Weinbub                      weinbub@iue.tuwien.ac.at
00011 
00012    license:    MIT (X11), see file LICENSE in the ViennaCL base directory
00013 ======================================================================= */
00014 
00015 #ifndef _VIENNACL_MATRIX_OPERATIONS_HPP_
00016 #define _VIENNACL_MATRIX_OPERATIONS_HPP_
00017 
00022 #include "viennacl/forwards.h"
00023 #include "viennacl/ocl/device.hpp"
00024 #include "viennacl/ocl/handle.hpp"
00025 #include "viennacl/ocl/kernel.hpp"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/tools/tools.hpp"
00029 #include "viennacl/tools/matrix_kernel_class_deducer.hpp"
00030 #include "viennacl/tools/matrix_prod_kernel_class_deducer.hpp"
00031 #include "viennacl/linalg/kernels/vector_kernels.h"
00032 #include "viennacl/linalg/kernels/matrix_row_kernels.h"
00033 #include "viennacl/linalg/kernels/matrix_col_kernels.h"
00034 
00035 #include "viennacl/linalg/kernels/matrix_prod_col_col_col_kernels.h"
00036 #include "viennacl/linalg/kernels/matrix_prod_col_col_row_kernels.h"
00037 #include "viennacl/linalg/kernels/matrix_prod_col_row_col_kernels.h"
00038 #include "viennacl/linalg/kernels/matrix_prod_col_row_row_kernels.h"
00039 
00040 #include "viennacl/linalg/kernels/matrix_prod_row_col_col_kernels.h"
00041 #include "viennacl/linalg/kernels/matrix_prod_row_col_row_kernels.h"
00042 #include "viennacl/linalg/kernels/matrix_prod_row_row_col_kernels.h"
00043 #include "viennacl/linalg/kernels/matrix_prod_row_row_row_kernels.h"
00044 
00045 namespace viennacl
00046 {
00047   namespace linalg
00048   {
00049     
00058     template<class TYPE, typename F, unsigned int ALIGNMENT>
00059     void add(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat1, 
00060              const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2,
00061              viennacl::matrix<TYPE, F, ALIGNMENT> & result)
00062     {
00063       assert(result.size1() == mat1.size1());
00064       assert(result.size2() == mat1.size2());
00065       assert(result.size1() == mat2.size1());
00066       assert(result.size2() == mat2.size2());
00067 
00068       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00069       
00070       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "add");
00071       assert( (mat1.internal_size() == mat2.internal_size())
00072              && "Operands must have same dimension and memory layout in this version of ViennaCL!");
00073       unsigned int size = std::min(mat1.internal_size(), mat2.internal_size());
00074 
00075       viennacl::ocl::enqueue(k(mat1, mat2, result, size));        
00076     }
00077 
00086     template<class TYPE, typename F, unsigned int ALIGNMENT>
00087     void inplace_add(viennacl::matrix<TYPE, F, ALIGNMENT> & result, 
00088                      const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2)
00089     {
00090       assert(result.size1() == mat2.size1());
00091       assert(result.size2() == mat2.size2());
00092 
00093       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00094       
00095       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_add");
00096       assert( (result.internal_size() == mat2.internal_size())
00097              && "Operands must have same dimension and memory layout in this version of ViennaCL!");
00098       unsigned int size = std::min(result.internal_size(), mat2.internal_size());
00099 
00100       viennacl::ocl::enqueue(k(result, mat2, size));        
00101     }
00102 
00103 
00112     template<class TYPE, typename F, unsigned int ALIGNMENT>
00113     void sub(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat1, 
00114              const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2,
00115              viennacl::matrix<TYPE, F, ALIGNMENT> & result)
00116     {
00117       assert(result.size1() == mat1.size1());
00118       assert(result.size2() == mat1.size2());
00119       assert(result.size1() == mat2.size1());
00120       assert(result.size2() == mat2.size2());
00121 
00122       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00123       
00124       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "sub");
00125       assert( (mat1.internal_size() == mat2.internal_size())
00126              && "Operands must have same dimension and memory layout in this version of ViennaCL!");
00127       unsigned int size = std::min(mat1.internal_size(), mat2.internal_size());
00128 
00129       viennacl::ocl::enqueue(k(mat1, mat2, result, size));        
00130     }
00131 
00140     template<class TYPE, typename F, unsigned int ALIGNMENT>
00141     void inplace_sub(viennacl::matrix<TYPE, F, ALIGNMENT> & result, 
00142                      const viennacl::matrix<TYPE, F, ALIGNMENT> & mat2)
00143     {
00144       assert(result.size1() == mat2.size1());
00145       assert(result.size2() == mat2.size2());
00146 
00147       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00148       
00149       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_sub");
00150       assert( (result.internal_size() == mat2.internal_size())
00151              && "Operands must have same dimension and memory layout in this version of ViennaCL!");
00152       unsigned int size = std::min(result.internal_size(), mat2.internal_size());
00153 
00154       viennacl::ocl::enqueue(k(result, mat2, size));        
00155     }
00156 
00165     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00166     void inplace_mult(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 
00167                       SCALARTYPE val)
00168     {
00169       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00170       
00171       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "cpu_inplace_mult");
00172       unsigned int size = result.internal_size();
00173 
00174       viennacl::ocl::enqueue(k(result, val, size));
00175     }
00176 
00177 
00186     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00187     void inplace_mult(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 
00188                       viennacl::scalar<SCALARTYPE> const & val)
00189     {
00190       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00191       
00192       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_mult");
00193       unsigned int size = result.internal_size();
00194 
00195       viennacl::ocl::enqueue(k(result, val, size));
00196     }
00197 
00198 
00199 
00208     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00209     void inplace_divide(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & result, 
00210                         viennacl::scalar<SCALARTYPE> const & val)
00211     {
00212       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00213       
00214       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "inplace_divide");
00215       unsigned int size = result.internal_size();
00216 
00217       viennacl::ocl::enqueue(k(result, val, size));
00218     }
00219 
00220     // A * x
00228     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
00229     viennacl::vector_expression<const viennacl::matrix<SCALARTYPE, F, ALIGNMENT>,
00230                                 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 
00231                                 op_prod > prod_impl(const viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat, 
00232                                                     const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec)
00233     {
00234       return viennacl::vector_expression<const viennacl::matrix<SCALARTYPE, F, ALIGNMENT>,
00235                                          const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 
00236                                          op_prod >(mat, vec);
00237     }
00238     
00247     template<class TYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
00248     void prod_impl(const viennacl::matrix<TYPE, F, ALIGNMENT> & mat, 
00249                     const viennacl::vector<TYPE, VECTOR_ALIGNMENT> & vec, 
00250                           viennacl::vector<TYPE, VECTOR_ALIGNMENT> & result)
00251     {
00252       assert(mat.size2() == vec.size());
00253       // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
00254       assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!");
00255       result.resize(mat.size1());
00256 
00257       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<TYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00258       
00259       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "vec_mul");
00260       viennacl::ocl::enqueue(
00261                              k(mat, mat.size1(), mat.size2(), mat.internal_size1(), mat.internal_size2(), vec, result));    
00262     }
00263 
00264 
00265 
00266     // trans(A) * x
00274     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
00275     viennacl::vector_expression<const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00276                                                                    const matrix<SCALARTYPE, F, ALIGNMENT>,
00277                                                                    op_trans>,
00278                                 const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 
00279                                 op_prod > prod_impl(const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00280                                                                                        const matrix<SCALARTYPE, F, ALIGNMENT>,
00281                                                                                        op_trans> & proxy, 
00282                                                     const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec)
00283     {
00284       return viennacl::vector_expression<const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00285                                                                             const matrix<SCALARTYPE, F, ALIGNMENT>,
00286                                                                             op_trans>,
00287                                          const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT>, 
00288                                          op_prod >(proxy, vec);
00289     }
00290 
00293     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
00294     void prod_impl(const viennacl::matrix_expression< const matrix<SCALARTYPE, F, ALIGNMENT>,
00295                                                       const matrix<SCALARTYPE, F, ALIGNMENT>,
00296                                                       op_trans> & mat,
00297                     const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, 
00298                           viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & result)
00299     {
00300       trans_prod_impl(mat.lhs(), vec, result);
00301     }
00302     
00311     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT, unsigned int VECTOR_ALIGNMENT>
00312     void trans_prod_impl(const matrix<SCALARTYPE, F, ALIGNMENT> & mat,
00313                           const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, 
00314                                 viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & result)
00315     {
00316       assert(mat.size1() == vec.size());  //remember: mat is transposed!
00317       // Inplace matrix-vector products like x = prod(A, x) are currently illegal: Introduce a temporary like y = prod(A, x); x = y; instead
00318       assert(vec.handle() != result.handle() && "No direct inplace matrix-vector product possible. Introduce a temporary!");
00319       result.resize(mat.size2());
00320 
00321       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00322       
00323       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "trans_vec_mul");
00324       
00325       viennacl::ocl::enqueue(k(mat, mat.size1(), mat.size2(), mat.internal_size1(), mat.internal_size2(), vec, result));        
00326     }
00327 
00328 
00329 
00335     template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
00336     void prod_impl(const viennacl::matrix<TYPE, F1, ALIGNMENT> & A, 
00337                     const viennacl::matrix<TYPE, F2, ALIGNMENT> & B, 
00338                           viennacl::matrix<TYPE, F3, ALIGNMENT> & C, 
00339                           int block_size = 15) // [JW] added ability to set block size from outside ..
00340     {
00341       assert(A.size1() == C.size1());
00342       assert(A.size2() == B.size1());
00343       assert(B.size2() == C.size2());
00344       // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
00345       assert(C.handle() != A.handle() 
00346              && C.handle() != B.handle()
00347              && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
00348       
00349       typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>,
00350                                                                           viennacl::matrix<TYPE, F2, ALIGNMENT>,
00351                                                                           viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType    KernelClass;
00352       KernelClass::init();
00353       
00354       //std::cout << "KernelClass::program_name() : " << KernelClass::program_name() << std::endl;
00355       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AA");
00356       
00357       /*k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1() / 2, block_size / 2));
00358       k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2() / 2, block_size / 2));
00359       k.local_work_size(0, block_size / 2);
00360       k.local_work_size(1, block_size / 2);*/
00361       
00362       k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
00363       k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
00364       k.local_work_size(0, block_size);
00365       k.local_work_size(1, block_size);
00366       
00367       viennacl::ocl::enqueue(
00368                              k(A, A.size1(), A.size2(), A.internal_size1(), A.internal_size2(),
00369                                B, B.size1(), B.size2(), B.internal_size1(), B.internal_size2(),
00370                                C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(),
00371                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
00372                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) ));        
00373     }
00374 
00375 
00381     template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
00382     void prod_impl(const viennacl::matrix_expression< const matrix<TYPE, F1, ALIGNMENT>,
00383                                                       const matrix<TYPE, F1, ALIGNMENT>,
00384                                                       op_trans> & A, 
00385                     const viennacl::matrix<TYPE, F2, ALIGNMENT> & B, 
00386                           viennacl::matrix<TYPE, F3, ALIGNMENT> & C)
00387     {
00388       assert(A.size2() == C.size1());
00389       assert(A.size1() == B.size1());
00390       assert(B.size2() == C.size2());
00391       // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
00392       assert(C.handle() != A.lhs().handle() 
00393              && C.handle() != B.handle()
00394              && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
00395       
00396       int block_size = 15;
00397 
00398       typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>,
00399                                                                           viennacl::matrix<TYPE, F2, ALIGNMENT>,
00400                                                                           viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType    KernelClass;
00401       KernelClass::init();
00402       
00403       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TA");
00404       
00405       k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
00406       k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
00407       
00408       k.local_work_size(0, block_size);
00409       k.local_work_size(1, block_size);
00410       viennacl::ocl::enqueue(
00411                              k(A.lhs(), A.lhs().size1(), A.lhs().size2(), A.lhs().internal_size1(), A.lhs().internal_size2(),
00412                                B, B.size1(), B.size2(), B.internal_size1(), B.internal_size2(),
00413                                C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(),
00414                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
00415                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) ));        
00416     }
00417 
00418 
00424     template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
00425     void prod_impl(const viennacl::matrix<TYPE, F1, ALIGNMENT> & A, 
00426                    const viennacl::matrix_expression< const matrix<TYPE, F2, ALIGNMENT>,
00427                                                       const matrix<TYPE, F2, ALIGNMENT>,
00428                                                       op_trans> & B,
00429                    viennacl::matrix<TYPE, F3, ALIGNMENT> & C)
00430     {
00431       assert(A.size1() == C.size1());
00432       assert(A.size2() == B.size2());
00433       assert(B.size1() == C.size2());
00434       // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
00435       assert(C.handle() != A.handle() 
00436              && C.handle() != B.lhs().handle()
00437              && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
00438       
00439       int block_size = 15;
00440 
00441       typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>,
00442                                                                           viennacl::matrix<TYPE, F2, ALIGNMENT>,
00443                                                                           viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType    KernelClass;
00444       KernelClass::init();
00445       
00446       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_AT");
00447       
00448       k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
00449       k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
00450       
00451       k.local_work_size(0, block_size);
00452       k.local_work_size(1, block_size);
00453       viennacl::ocl::enqueue(
00454                              k(A, A.size1(), A.size2(), A.internal_size1(), A.internal_size2(),
00455                                B.lhs(), B.lhs().size1(), B.lhs().size2(), B.lhs().internal_size1(), B.lhs().internal_size2(),
00456                                C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(),
00457                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
00458                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) ));        
00459     }
00460 
00461 
00462 
00468     template<class TYPE, typename F1, typename F2, typename F3, unsigned int ALIGNMENT>
00469     void prod_impl(const viennacl::matrix_expression< const matrix<TYPE, F1, ALIGNMENT>,
00470                                                       const matrix<TYPE, F1, ALIGNMENT>,
00471                                                       op_trans> & A,
00472                    const viennacl::matrix_expression< const matrix<TYPE, F2, ALIGNMENT>,
00473                                                       const matrix<TYPE, F2, ALIGNMENT>,
00474                                                       op_trans> & B,
00475                    viennacl::matrix<TYPE, F3, ALIGNMENT> & C)
00476     {
00477       assert(A.size2() == C.size1());
00478       assert(A.size1() == B.size2());
00479       assert(B.size1() == C.size2());
00480       // Inplace matrix-vector products like B = prod(A, B) are currently illegal: Introduce a temporary like C = prod(A, B); B = C; instead
00481       assert(C.handle() != A.lhs().handle() 
00482              && C.handle() != B.lhs().handle()
00483              && "No direct inplace matrix-matrix product possible. Introduce a temporary!");
00484       
00485       int block_size = 15;
00486 
00487       typedef typename viennacl::tools::MATRIX_PROD_KERNEL_CLASS_DEDUCER< viennacl::matrix<TYPE, F1, ALIGNMENT>,
00488                                                                           viennacl::matrix<TYPE, F2, ALIGNMENT>,
00489                                                                           viennacl::matrix<TYPE, F3, ALIGNMENT> >::ResultType    KernelClass;
00490       KernelClass::init();
00491       
00492       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "prod_TT");
00493       
00494       k.global_work_size(0, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size1(), block_size));
00495       k.global_work_size(1, viennacl::tools::roundUpToNextMultiple<unsigned int>(C.size2(), block_size));
00496       
00497       k.local_work_size(0, block_size);
00498       k.local_work_size(1, block_size);
00499       viennacl::ocl::enqueue(
00500                              k(A.lhs(), A.lhs().size1(), A.lhs().size2(), A.lhs().internal_size1(), A.lhs().internal_size2(),
00501                                B.lhs(), B.lhs().size1(), B.lhs().size2(), B.lhs().internal_size1(), B.lhs().internal_size2(),
00502                                C, C.size1(), C.size2(), C.internal_size1(), C.internal_size2(),
00503                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size),
00504                                viennacl::ocl::local_mem(sizeof(TYPE) * block_size * block_size) ));        
00505     }
00506 
00507 
00508 
00509 
00510 
00511 
00512 
00513 
00514 
00515 
00516 
00522     template<class SCALARTYPE, unsigned int VA1, unsigned int VA2>
00523     viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00524                                  const viennacl::vector<SCALARTYPE, VA2>,
00525                                  op_prod> outer_prod(const viennacl::vector<SCALARTYPE, VA1> & vec1, 
00526                                                      const viennacl::vector<SCALARTYPE, VA2> & vec2)
00527     {
00528       return viennacl::matrix_expression< const viennacl::vector<SCALARTYPE, VA1>,
00529                                           const viennacl::vector<SCALARTYPE, VA2>,
00530                                           op_prod>(vec1, vec2);
00531     }
00532     
00533     
00534 
00543     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00544     void rank_1_update(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat1, 
00545                        const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1, 
00546                        const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2)
00547     {
00548       assert(mat1.size1() == vec1.size());
00549       assert(mat1.size2() == vec2.size());
00550 
00551       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00552       
00553       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "rank1_update");
00554 
00555       viennacl::ocl::enqueue(k(mat1, mat1.size1(), mat1.size2(), mat1.internal_size1(), mat1.internal_size2(), vec1, vec2));        
00556     }
00557     
00558     
00568     template<class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00569     void scaled_rank_1_update(viennacl::matrix<SCALARTYPE, F, ALIGNMENT> & mat1,
00570                               SCALARTYPE val,
00571                               const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec1, 
00572                               const viennacl::vector<SCALARTYPE, ALIGNMENT> & vec2)
00573     {
00574       assert(mat1.size1() == vec1.size());
00575       assert(mat1.size2() == vec2.size());
00576 
00577       typedef typename viennacl::tools::MATRIX_KERNEL_CLASS_DEDUCER< matrix<SCALARTYPE, F, ALIGNMENT> >::ResultType    KernelClass;
00578       
00579       viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(KernelClass::program_name(), "scaled_rank1_update");
00580 
00581       viennacl::ocl::enqueue(k(mat1, mat1.size1(), mat1.size2(), mat1.internal_size1(), mat1.internal_size2(), 
00582                                                            val, vec1, vec2));        
00583     }
00584     
00585   } //namespace linalg
00586 
00587 
00588     //v = A * x
00593     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00594     template <typename F, unsigned int MAT_ALIGNMENT>
00595     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00596     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00597                                                                                           const viennacl::vector<SCALARTYPE, ALIGNMENT>,
00598                                                                                           viennacl::op_prod> & proxy) 
00599     {
00600       // check for the special case x = A * x
00601       if (proxy.rhs().handle() == this->handle())
00602       {
00603         viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
00604         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00605         *this = result;
00606         return *this;
00607       }
00608       else
00609       {
00610         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00611         return *this;
00612       }
00613       return *this;
00614     }
00615 
00616     //v += A * x
00621     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00622     template <typename F, unsigned int MAT_ALIGNMENT>
00623     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00624     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00625                                                                                  const vector<SCALARTYPE, ALIGNMENT>,
00626                                                                                  op_prod> & proxy) 
00627     {
00628       vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
00629       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00630       *this += result;
00631       return *this;
00632     }
00633 
00638     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00639     template <typename F, unsigned int MAT_ALIGNMENT>
00640     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00641     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00642                                                                                  const vector<SCALARTYPE, ALIGNMENT>,
00643                                                                                  op_prod> & proxy) 
00644     {
00645       vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
00646       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00647       *this -= result;
00648       return *this;
00649     }
00650     
00651     
00652     //free functions:
00657     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00658     template <typename F, unsigned int MAT_ALIGNMENT>
00659     viennacl::vector<SCALARTYPE, ALIGNMENT> 
00660     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00661                                                                                 const vector<SCALARTYPE, ALIGNMENT>,
00662                                                                                 op_prod> & proxy) 
00663     {
00664       assert(proxy.lhs().size1() == size());
00665       vector<SCALARTYPE, ALIGNMENT> result(size());
00666       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00667       result += *this;
00668       return result;
00669     }
00670 
00675     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00676     template <typename F, unsigned int MAT_ALIGNMENT>
00677     viennacl::vector<SCALARTYPE, ALIGNMENT> 
00678     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00679                                                                                 const vector<SCALARTYPE, ALIGNMENT>,
00680                                                                                 op_prod> & proxy) 
00681     {
00682       assert(proxy.lhs().size1() == size());
00683       vector<SCALARTYPE, ALIGNMENT> result(size());
00684       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00685       result = *this - result;
00686       return result;
00687     }
00688 
00689 
00691 
00692 
00693     //v = trans(A) * x
00698     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00699     template <typename F, unsigned int MAT_ALIGNMENT>
00700     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00701     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator=(const viennacl::vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00702                                                                                                                    const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00703                                                                                                                    op_trans>,
00704                                                                                           const viennacl::vector<SCALARTYPE, ALIGNMENT>,
00705                                                                                           viennacl::op_prod> & proxy) 
00706     {
00707       // check for the special case x = trans(A) * x
00708       if (proxy.rhs().handle() == this->handle())
00709       {
00710         viennacl::vector<SCALARTYPE, ALIGNMENT> result(proxy.rhs().size());
00711         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00712         *this = result;
00713         return *this;
00714       }
00715       else
00716       {
00717         viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this);
00718         return *this;
00719       }
00720       return *this;
00721     }
00722 
00723     //v += A * x
00728     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00729     template <typename F, unsigned int MAT_ALIGNMENT>
00730     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00731     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+=(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00732                                                                                                           const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00733                                                                                                           op_trans>,
00734                                                                                  const vector<SCALARTYPE, ALIGNMENT>,
00735                                                                                  op_prod> & proxy) 
00736     {
00737       vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
00738       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00739       *this += result;
00740       return *this;
00741     }
00742 
00747     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00748     template <typename F, unsigned int MAT_ALIGNMENT>
00749     viennacl::vector<SCALARTYPE, ALIGNMENT> & 
00750     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-=(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00751                                                                                                           const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00752                                                                                                           op_trans>,
00753                                                                                  const vector<SCALARTYPE, ALIGNMENT>,
00754                                                                                  op_prod> & proxy) 
00755     {
00756       vector<SCALARTYPE, ALIGNMENT> result(proxy.lhs().size1());
00757       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00758       *this -= result;
00759       return *this;
00760     }
00761     
00762     
00763     //free functions:
00768     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00769     template <typename F, unsigned int MAT_ALIGNMENT>
00770     viennacl::vector<SCALARTYPE, ALIGNMENT> 
00771     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator+(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00772                                                                                                          const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00773                                                                                                          op_trans>,
00774                                                                                 const vector<SCALARTYPE, ALIGNMENT>,
00775                                                                                 op_prod> & proxy) 
00776     {
00777       assert(proxy.lhs().size1() == size());
00778       vector<SCALARTYPE, ALIGNMENT> result(size());
00779       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00780       result += *this;
00781       return result;
00782     }
00783 
00788     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00789     template <typename F, unsigned int MAT_ALIGNMENT>
00790     viennacl::vector<SCALARTYPE, ALIGNMENT> 
00791     viennacl::vector<SCALARTYPE, ALIGNMENT>::operator-(const vector_expression< const matrix_expression< const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00792                                                                                                          const matrix<SCALARTYPE, F, MAT_ALIGNMENT>,
00793                                                                                                          op_trans>,
00794                                                                                 const vector<SCALARTYPE, ALIGNMENT>,
00795                                                                                 op_prod> & proxy) 
00796     {
00797       assert(proxy.lhs().size1() == size());
00798       vector<SCALARTYPE, ALIGNMENT> result(size());
00799       viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
00800       result = *this - result;
00801       return result;
00802     }
00803 
00804 
00805 } //namespace viennacl
00806 
00807 
00808 #endif

Generated on Sat May 21 2011 20:36:50 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1