00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_COMPRESSED_MATRIX_HPP_
00016 #define _VIENNACL_COMPRESSED_MATRIX_HPP_
00017
00022 #include <vector>
00023 #include <list>
00024 #include <map>
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/ocl/backend.hpp"
00027 #include "viennacl/vector.hpp"
00028
00029 #include "viennacl/linalg/compressed_matrix_operations.hpp"
00030
00031 #include "viennacl/tools/tools.hpp"
00032
00033 namespace viennacl
00034 {
00035
00036
00037
00052 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00053 void copy(const CPU_MATRIX & cpu_matrix,
00054 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00055 {
00056
00057
00058 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00059 {
00060
00061 long num_entries = 0;
00062 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00063 row_it != cpu_matrix.end1();
00064 ++row_it)
00065 {
00066 unsigned int entries_per_row = 0;
00067 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00068 col_it != row_it.end();
00069 ++col_it)
00070 {
00071 ++entries_per_row;
00072 }
00073 num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT);
00074 }
00075
00076 if (num_entries == 0)
00077 {
00078 num_entries = 1;
00079 }
00080
00081
00082 std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1);
00083 std::vector<unsigned int> col_buffer(num_entries);
00084 std::vector<SCALARTYPE> elements(num_entries);
00085
00086 unsigned int row_index = 0;
00087 unsigned int data_index = 0;
00088
00089 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00090 row_it != cpu_matrix.end1();
00091 ++row_it)
00092 {
00093 row_buffer[row_index] = data_index;
00094 ++row_index;
00095
00096 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00097 col_it != row_it.end();
00098 ++col_it)
00099 {
00100 col_buffer[data_index] = static_cast<unsigned int>(col_it.index2());
00101 elements[data_index] = *col_it;
00102 ++data_index;
00103 }
00104 data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT);
00105 }
00106 row_buffer[row_index] = data_index;
00107
00108 gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries);
00109 }
00110 }
00111
00112
00113
00119 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00120 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00121 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00122 {
00123 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00124 }
00125
00126 #ifdef VIENNACL_HAVE_EIGEN
00127 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00128 void copy(const Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix,
00129 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00130 {
00131 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(eigen_matrix.rows());
00132
00133 for (int k=0; k < eigen_matrix.outerSize(); ++k)
00134 for (typename Eigen::SparseMatrix<SCALARTYPE, flags>::InnerIterator it(eigen_matrix, k); it; ++it)
00135 stl_matrix[it.row()][it.col()] = it.value();
00136
00137 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00138 }
00139 #endif
00140
00141
00142 #ifdef VIENNACL_HAVE_MTL4
00143 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00144 void copy(const mtl::compressed2D<SCALARTYPE> & cpu_matrix,
00145 compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix)
00146 {
00147 typedef mtl::compressed2D<SCALARTYPE> MatrixType;
00148
00149 std::vector< std::map<unsigned int, SCALARTYPE> > stl_matrix(cpu_matrix.num_rows());
00150
00151 using mtl::traits::range_generator;
00152 using mtl::traits::range::min;
00153
00154
00155 typedef typename min<range_generator<mtl::tag::row, MatrixType>,
00156 range_generator<mtl::tag::col, MatrixType> >::type range_type;
00157 range_type my_range;
00158
00159
00160 typedef typename range_type::type c_type;
00161
00162 typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type;
00163
00164
00165 typename mtl::traits::row<MatrixType>::type row(cpu_matrix);
00166 typename mtl::traits::col<MatrixType>::type col(cpu_matrix);
00167 typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix);
00168
00169
00170 for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor)
00171 for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor)
00172 stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor);
00173
00174 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(stl_matrix), gpu_matrix);
00175 }
00176 #endif
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00196 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00197 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00198 CPU_MATRIX & cpu_matrix )
00199 {
00200 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00201 {
00202 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00203
00204
00205 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
00206 std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
00207 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00208
00209
00210
00211 cl_int err;
00212 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(), CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00213 VIENNACL_ERR_CHECK(err);
00214 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(), CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00215 VIENNACL_ERR_CHECK(err);
00216 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00217 VIENNACL_ERR_CHECK(err);
00218 viennacl::ocl::get_queue().finish();
00219
00220
00221 unsigned int data_index = 0;
00222 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row)
00223 {
00224 while (data_index < row_buffer[row])
00225 {
00226 if (col_buffer[data_index] >= gpu_matrix.size2())
00227 {
00228 std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
00229 return;
00230 }
00231
00232 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00233 cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
00234 ++data_index;
00235 }
00236 }
00237 }
00238 }
00239
00240
00246 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00247 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00248 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00249 {
00250 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00251 copy(gpu_matrix, temp);
00252 }
00253
00254
00255 #ifdef VIENNACL_HAVE_EIGEN
00256 template <typename SCALARTYPE, int flags, unsigned int ALIGNMENT>
00257 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00258 Eigen::SparseMatrix<SCALARTYPE, flags> & eigen_matrix)
00259 {
00260 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00261 {
00262 assert(static_cast<unsigned int>(eigen_matrix.rows()) >= gpu_matrix.size1()
00263 && static_cast<unsigned int>(eigen_matrix.cols()) >= gpu_matrix.size2()
00264 && "Provided Eigen compressed matrix is too small!");
00265
00266
00267 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
00268 std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
00269 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00270
00271 cl_int err;
00272 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00273 CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00274 VIENNACL_ERR_CHECK(err);
00275 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00276 CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00277 VIENNACL_ERR_CHECK(err);
00278 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00279 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00280 VIENNACL_ERR_CHECK(err);
00281 viennacl::ocl::get_queue().finish();
00282
00283 eigen_matrix.setZero();
00284 eigen_matrix.startFill();
00285 unsigned int data_index = 0;
00286 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row)
00287 {
00288 while (data_index < row_buffer[row])
00289 {
00290 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00291 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00292 eigen_matrix.fill(row-1, col_buffer[data_index]) = elements[data_index];
00293 ++data_index;
00294 }
00295 }
00296 eigen_matrix.endFill();
00297 }
00298 }
00299 #endif
00300
00301
00302
00303 #ifdef VIENNACL_HAVE_MTL4
00304 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00305 void copy(compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00306 mtl::compressed2D<SCALARTYPE> & mtl4_matrix)
00307 {
00308 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00309 {
00310 assert(mtl4_matrix.num_rows() >= gpu_matrix.size1()
00311 && mtl4_matrix.num_cols() >= gpu_matrix.size2()
00312 && "Provided MTL4 compressed matrix is too small!");
00313
00314
00315 std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
00316 std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
00317 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00318
00319 cl_int err;
00320 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle1(),
00321 CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
00322 VIENNACL_ERR_CHECK(err);
00323 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle2(),
00324 CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
00325 VIENNACL_ERR_CHECK(err);
00326 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(),
00327 CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00328 VIENNACL_ERR_CHECK(err);
00329 viennacl::ocl::get_queue().finish();
00330
00331
00332
00333
00334 mtl::matrix::inserter< mtl::compressed2D<SCALARTYPE> > ins(mtl4_matrix);
00335 unsigned int data_index = 0;
00336 for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row)
00337 {
00338 while (data_index < row_buffer[row])
00339 {
00340 assert(col_buffer[data_index] < gpu_matrix.size2() && "ViennaCL encountered invalid data at col_buffer");
00341 if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
00342 ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<SCALARTYPE> >::value_type(elements[data_index]);
00343 ++data_index;
00344 }
00345 }
00346 }
00347 }
00348 #endif
00349
00350
00351
00352
00353
00355
00360 template<class SCALARTYPE, unsigned int ALIGNMENT >
00361 class compressed_matrix
00362 {
00363 public:
00364 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00365
00367 compressed_matrix() : _rows(0), _cols(0), _nonzeros(0) { viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00368
00375 explicit compressed_matrix(unsigned int rows, unsigned int cols, unsigned int nonzeros = 0) :
00376 _rows(rows), _cols(cols), _nonzeros(nonzeros)
00377 {
00378 viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, ALIGNMENT>::init();
00379
00380 if (rows > 0)
00381 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * rows);
00382 if (nonzeros > 0)
00383 {
00384 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * nonzeros);
00385 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros);
00386 }
00387 }
00388
00389 explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, unsigned int rows, unsigned int cols, unsigned int nonzeros) :
00390 _rows(rows), _cols(cols), _nonzeros(nonzeros)
00391 {
00392 _row_buffer = mem_row_buffer;
00393 _row_buffer.inc();
00394 _col_buffer = mem_col_buffer;
00395 _col_buffer.inc();
00396 _elements = mem_elements;
00397 _elements.inc();
00398 }
00399
00400
00409 void set(unsigned int * row_jumper, unsigned int * col_buffer, SCALARTYPE * elements, unsigned int cols, unsigned int nonzeros)
00410 {
00411 assert(cols > 0);
00412 assert(nonzeros > 0);
00413
00414 _row_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * (cols + 1), row_jumper);
00415 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * nonzeros, col_buffer);
00416 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * nonzeros, elements);
00417 _nonzeros = nonzeros;
00418 _rows = cols;
00419 _cols = cols;
00420 }
00421
00423 void reserve(unsigned int new_nonzeros)
00424 {
00425 if (new_nonzeros > _nonzeros)
00426 {
00427 viennacl::ocl::handle<cl_mem> _col_buffer_old = _col_buffer;
00428 viennacl::ocl::handle<cl_mem> _elements_old = _elements;
00429 _col_buffer = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(unsigned int) * new_nonzeros);
00430 _elements = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * new_nonzeros);
00431
00432 cl_int err;
00433 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _col_buffer_old, _col_buffer, 0, 0, sizeof(unsigned int)*_nonzeros, 0, NULL, NULL);
00434 VIENNACL_ERR_CHECK(err);
00435 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), _elements_old, _elements, 0, 0, sizeof(SCALARTYPE)*_nonzeros, 0, NULL, NULL);
00436 VIENNACL_ERR_CHECK(err);
00437
00438 _nonzeros = new_nonzeros;
00439 }
00440 }
00441
00448 void resize(unsigned int new_size1, unsigned int new_size2, bool preserve = true)
00449 {
00450 assert(new_size1 > 0 && new_size2 > 0);
00451
00452
00453 if (new_size1 != _rows || new_size2 != _cols)
00454 {
00455 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00456 if (_rows > 0)
00457 stl_sparse_matrix.resize(_rows);
00458
00459 if (preserve && _rows > 0)
00460 viennacl::copy(*this, stl_sparse_matrix);
00461
00462 stl_sparse_matrix.resize(new_size1);
00463
00464
00465 if (new_size2 < _cols && _rows > 0)
00466 {
00467 for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
00468 {
00469 std::list<unsigned int> to_delete;
00470 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00471 it != stl_sparse_matrix[i].end();
00472 ++it)
00473 {
00474 if (it->first >= new_size2)
00475 to_delete.push_back(it->first);
00476 }
00477
00478 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00479 stl_sparse_matrix[i].erase(*it);
00480 }
00481 }
00482
00483 copy(stl_sparse_matrix, *this);
00484
00485 _rows = new_size1;
00486 _cols = new_size2;
00487 }
00488 }
00489
00491 const unsigned int & size1() const { return _rows; }
00493 const unsigned int & size2() const { return _cols; }
00495 const unsigned int & nnz() const { return _nonzeros; }
00496
00498 const viennacl::ocl::handle<cl_mem> & handle1() const { return _row_buffer; }
00500 const viennacl::ocl::handle<cl_mem> & handle2() const { return _col_buffer; }
00502 const viennacl::ocl::handle<cl_mem> & handle() const { return _elements; }
00503
00504 private:
00506 compressed_matrix(compressed_matrix const &);
00507
00509 compressed_matrix & operator=(compressed_matrix const &);
00510
00511
00512 unsigned int _rows;
00513 unsigned int _cols;
00514 unsigned int _nonzeros;
00515 viennacl::ocl::handle<cl_mem> _row_buffer;
00516 viennacl::ocl::handle<cl_mem> _col_buffer;
00517 viennacl::ocl::handle<cl_mem> _elements;
00518 };
00519
00520
00521
00522
00523 }
00524
00525 #endif