• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/ViennaCL-1.1.2/viennacl/coordinate_matrix.hpp

Go to the documentation of this file.
00001 /* =======================================================================
00002    Copyright (c) 2010, Institute for Microelectronics, TU Vienna.
00003    http://www.iue.tuwien.ac.at
00004                              -----------------
00005                      ViennaCL - The Vienna Computing Library
00006                              -----------------
00007                             
00008    authors:    Karl Rupp                          rupp@iue.tuwien.ac.at
00009                Florian Rudolf                     flo.rudy+viennacl@gmail.com
00010                Josef Weinbub                      weinbub@iue.tuwien.ac.at
00011 
00012    license:    MIT (X11), see file LICENSE in the ViennaCL base directory
00013 ======================================================================= */
00014 
00015 #ifndef _VIENNACL_COORDINATE_MATRIX_HPP_
00016 #define _VIENNACL_COORDINATE_MATRIX_HPP_
00017 
00022 #include <map>
00023 #include <vector>
00024 #include <list>
00025 
00026 #include "viennacl/forwards.h"
00027 #include "viennacl/ocl/backend.hpp"
00028 #include "viennacl/vector.hpp"
00029 
00030 #include "viennacl/linalg/coordinate_matrix_operations.hpp"
00031 
00032 namespace viennacl
00033 {
00034   
00035     
00036     //provide copy-operation:
00044     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00045     void copy(const CPU_MATRIX & cpu_matrix,
00046                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00047     {
00048       size_t group_num = 64;
00049       
00050       // Step 1: Determine nonzeros:
00051       if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00052       {
00053         unsigned int num_entries = 0;
00054         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00055               row_it != cpu_matrix.end1();
00056               ++row_it)
00057         {
00058           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00059                 col_it != row_it.end();
00060                 ++col_it)
00061           {
00062             ++num_entries;
00063           }
00064         }
00065         
00066         // Step 2: Set up matrix data:
00067         std::cout << "Number of entries: " << num_entries << std::endl;
00068         gpu_matrix.nonzeros_ = num_entries;
00069         gpu_matrix.rows_ = cpu_matrix.size1();
00070         gpu_matrix.cols_ = cpu_matrix.size2();
00071 
00072         std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz());
00073         std::vector<cl_uint> group_boundaries(group_num + 1);
00074         std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00075         
00076         unsigned int data_index = 0;
00077         unsigned int current_fraction = 0;
00078         
00079         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00080               row_it != cpu_matrix.end1();
00081               ++row_it)
00082         {
00083           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00084                 col_it != row_it.end();
00085                 ++col_it)
00086           {
00087             coord_buffer[2*data_index] = static_cast<unsigned int>(col_it.index1());
00088             coord_buffer[2*data_index + 1] = static_cast<unsigned int>(col_it.index2());
00089             elements[data_index] = *col_it;
00090             ++data_index;
00091           }
00092           
00093           if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries)    //split data equally over 64 groups
00094             group_boundaries[++current_fraction] = data_index;
00095         }
00096         
00097         //write end of last group:
00098         group_boundaries[group_num] = data_index;
00099         //group_boundaries[1] = data_index; //for one compute unit
00100         
00101         /*std::cout << "Group boundaries: " << std::endl;
00102         for (size_t i=0; i<group_boundaries.size(); ++i)
00103           std::cout << group_boundaries[i] << std::endl;*/
00104         
00105         gpu_matrix.coord_buffer_     = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer);
00106         gpu_matrix.elements_         = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements);
00107         gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries);
00108       }
00109     }
00110 
00116     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00117     void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00118                      coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00119     {
00120       copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00121     }
00122     
00123     //gpu to cpu:
00133     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00134     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00135                      CPU_MATRIX & cpu_matrix )
00136     {
00137       if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00138       {
00139         cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00140         
00141         //get raw data from memory:
00142         std::vector<unsigned int> coord_buffer(2*gpu_matrix.nnz());
00143         std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00144         
00145         //std::cout << "GPU nonzeros: " << gpu_matrix.nnz() << std::endl;
00146         
00147         cl_int err;
00148         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(unsigned int)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL);
00149         VIENNACL_ERR_CHECK(err);
00150         err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00151         VIENNACL_ERR_CHECK(err);
00152         viennacl::ocl::get_queue().finish();
00153         
00154         //fill the cpu_matrix:
00155         for (unsigned int index = 0; index < gpu_matrix.nnz(); ++index)
00156         {
00157           cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
00158         }
00159       }
00160     }
00161 
00167     template <typename SCALARTYPE, unsigned int ALIGNMENT>
00168     void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00169               std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00170     {
00171       tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00172       copy(gpu_matrix, temp);
00173     }
00174 
00175 
00177 
00184     template<class SCALARTYPE, unsigned int ALIGNMENT /* see VCLForwards.h */ >
00185     class coordinate_matrix
00186     {
00187     public:
00188       typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00189       
00191       coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00192       
00199       coordinate_matrix(unsigned int rows, unsigned int cols, unsigned int nonzeros = 0) : 
00200         rows_(rows), cols_(cols), nonzeros_(nonzeros)
00201       {
00202         viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init();
00203         if (nonzeros > 0)
00204         {
00205           coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00206           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00207           group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1));
00208         }
00209       }
00210         
00212       void reserve(unsigned int new_nonzeros)
00213       {
00214         if (new_nonzeros > nonzeros_)
00215         {
00216           viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_;
00217           viennacl::ocl::handle<cl_mem> elements_old = elements_;
00218           coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00219           elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00220           
00221           cl_int err;
00222           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL);
00223           VIENNACL_ERR_CHECK(err);
00224           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL);
00225           VIENNACL_ERR_CHECK(err);
00226 
00227           //new memory must be padded with zeros:
00228           std::vector<long> temp(internal_nnz() - nonzeros_);
00229           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL);
00230           VIENNACL_ERR_CHECK(err);
00231           err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL);
00232           VIENNACL_ERR_CHECK(err);
00233         }
00234       }
00235 
00242       void resize(unsigned int new_size1, unsigned int new_size2, bool preserve = true)
00243       {
00244         assert (new_size1 > 0 && new_size2 > 0);
00245                 
00246         if (new_size1 < rows_ || new_size2 < cols_) //enlarge buffer
00247         {
00248           std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00249           if (rows_ > 0)
00250             stl_sparse_matrix.resize(rows_);
00251           
00252           if (preserve && rows_ > 0)
00253             viennacl::copy(*this, stl_sparse_matrix);
00254             
00255           stl_sparse_matrix.resize(new_size1);
00256           
00257           std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
00258           if (new_size2 < cols_ && rows_ > 0)
00259           {
00260             for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
00261             {
00262               std::list<unsigned int> to_delete;
00263               for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00264                    it != stl_sparse_matrix[i].end();
00265                   ++it)
00266               {
00267                 if (it->first >= new_size2)
00268                   to_delete.push_back(it->first);
00269               }
00270               
00271               for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00272                 stl_sparse_matrix[i].erase(*it);
00273             }
00274           std::cout << "Cropping done..." << std::endl;
00275           }
00276           
00277           rows_ = new_size1;
00278           cols_ = new_size2;
00279           viennacl::copy(stl_sparse_matrix, *this);
00280         }
00281           
00282         rows_ = new_size1;
00283         cols_ = new_size2;
00284       }
00285 
00286 
00288       unsigned int size1() const { return rows_; }
00290       unsigned int size2() const { return cols_; }
00292       unsigned int nnz() const { return nonzeros_; }
00294       unsigned int internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<unsigned int>(nonzeros_, ALIGNMENT);; }
00295       
00297       const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; }
00299       const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00301       const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; }
00302       
00303       #if defined(_MSC_VER) && _MSC_VER < 1500      //Visual Studio 2005 needs special treatment
00304       template <typename CPU_MATRIX>
00305       friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
00306       #else
00307       template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
00308       friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
00309       #endif
00310 
00311     private:
00313       coordinate_matrix(coordinate_matrix const &);
00314       
00316       coordinate_matrix & operator=(coordinate_matrix const &);
00317       
00318       
00319       unsigned int rows_;
00320       unsigned int cols_;
00321       unsigned int nonzeros_;
00322       viennacl::ocl::handle<cl_mem> coord_buffer_;
00323       viennacl::ocl::handle<cl_mem> elements_;
00324       viennacl::ocl::handle<cl_mem> group_boundaries_;
00325     };
00326 
00327 
00328 }
00329 
00330 #endif

Generated on Sat May 21 2011 20:36:50 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1