00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_COORDINATE_MATRIX_HPP_
00016 #define _VIENNACL_COORDINATE_MATRIX_HPP_
00017
00022 #include <map>
00023 #include <vector>
00024 #include <list>
00025
00026 #include "viennacl/forwards.h"
00027 #include "viennacl/ocl/backend.hpp"
00028 #include "viennacl/vector.hpp"
00029
00030 #include "viennacl/linalg/coordinate_matrix_operations.hpp"
00031
00032 namespace viennacl
00033 {
00034
00035
00036
00044 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00045 void copy(const CPU_MATRIX & cpu_matrix,
00046 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00047 {
00048 size_t group_num = 64;
00049
00050
00051 if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
00052 {
00053 unsigned int num_entries = 0;
00054 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00055 row_it != cpu_matrix.end1();
00056 ++row_it)
00057 {
00058 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00059 col_it != row_it.end();
00060 ++col_it)
00061 {
00062 ++num_entries;
00063 }
00064 }
00065
00066
00067 std::cout << "Number of entries: " << num_entries << std::endl;
00068 gpu_matrix.nonzeros_ = num_entries;
00069 gpu_matrix.rows_ = cpu_matrix.size1();
00070 gpu_matrix.cols_ = cpu_matrix.size2();
00071
00072 std::vector<cl_uint> coord_buffer(2*gpu_matrix.internal_nnz());
00073 std::vector<cl_uint> group_boundaries(group_num + 1);
00074 std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00075
00076 unsigned int data_index = 0;
00077 unsigned int current_fraction = 0;
00078
00079 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
00080 row_it != cpu_matrix.end1();
00081 ++row_it)
00082 {
00083 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
00084 col_it != row_it.end();
00085 ++col_it)
00086 {
00087 coord_buffer[2*data_index] = static_cast<unsigned int>(col_it.index1());
00088 coord_buffer[2*data_index + 1] = static_cast<unsigned int>(col_it.index2());
00089 elements[data_index] = *col_it;
00090 ++data_index;
00091 }
00092
00093 if (data_index > (current_fraction + 1) / static_cast<double>(group_num) * num_entries)
00094 group_boundaries[++current_fraction] = data_index;
00095 }
00096
00097
00098 group_boundaries[group_num] = data_index;
00099
00100
00101
00102
00103
00104
00105 gpu_matrix.coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, coord_buffer);
00106 gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, elements);
00107 gpu_matrix.group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, group_boundaries);
00108 }
00109 }
00110
00116 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00117 void copy(const std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix,
00118 coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
00119 {
00120 copy(tools::const_sparse_matrix_adapter<SCALARTYPE>(cpu_matrix), gpu_matrix);
00121 }
00122
00123
00133 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00134 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00135 CPU_MATRIX & cpu_matrix )
00136 {
00137 if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
00138 {
00139 cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2(), false);
00140
00141
00142 std::vector<unsigned int> coord_buffer(2*gpu_matrix.nnz());
00143 std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
00144
00145
00146
00147 cl_int err;
00148 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle12(), CL_TRUE, 0, sizeof(unsigned int)* 2 *gpu_matrix.nnz(), &(coord_buffer[0]), 0, NULL, NULL);
00149 VIENNACL_ERR_CHECK(err);
00150 err = clEnqueueReadBuffer(viennacl::ocl::get_queue().handle(), gpu_matrix.handle(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
00151 VIENNACL_ERR_CHECK(err);
00152 viennacl::ocl::get_queue().finish();
00153
00154
00155 for (unsigned int index = 0; index < gpu_matrix.nnz(); ++index)
00156 {
00157 cpu_matrix(coord_buffer[2*index], coord_buffer[2*index+1]) = elements[index];
00158 }
00159 }
00160 }
00161
00167 template <typename SCALARTYPE, unsigned int ALIGNMENT>
00168 void copy(const coordinate_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
00169 std::vector< std::map<unsigned int, SCALARTYPE> > & cpu_matrix)
00170 {
00171 tools::sparse_matrix_adapter<SCALARTYPE> temp(cpu_matrix);
00172 copy(gpu_matrix, temp);
00173 }
00174
00175
00177
00184 template<class SCALARTYPE, unsigned int ALIGNMENT >
00185 class coordinate_matrix
00186 {
00187 public:
00188 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type;
00189
00191 coordinate_matrix() : rows_(0), cols_(0), nonzeros_(0) { viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init(); }
00192
00199 coordinate_matrix(unsigned int rows, unsigned int cols, unsigned int nonzeros = 0) :
00200 rows_(rows), cols_(cols), nonzeros_(nonzeros)
00201 {
00202 viennacl::linalg::kernels::coordinate_matrix<SCALARTYPE, ALIGNMENT>::init();
00203 if (nonzeros > 0)
00204 {
00205 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00206 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00207 group_boundaries_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * (rows + 1));
00208 }
00209 }
00210
00212 void reserve(unsigned int new_nonzeros)
00213 {
00214 if (new_nonzeros > nonzeros_)
00215 {
00216 viennacl::ocl::handle<cl_mem> coord_buffer_old = coord_buffer_;
00217 viennacl::ocl::handle<cl_mem> elements_old = elements_;
00218 coord_buffer_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint) * 2 * internal_nnz());
00219 elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, sizeof(SCALARTYPE) * internal_nnz());
00220
00221 cl_int err;
00222 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, 0, sizeof(cl_uint) * 2 * nonzeros_, 0, NULL, NULL);
00223 VIENNACL_ERR_CHECK(err);
00224 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, 0, sizeof(SCALARTYPE)*nonzeros_, 0, NULL, NULL);
00225 VIENNACL_ERR_CHECK(err);
00226
00227
00228 std::vector<long> temp(internal_nnz() - nonzeros_);
00229 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), coord_buffer_old, coord_buffer_, 0, nonzeros_, sizeof(cl_uint) * 2 * temp.size(), 0, NULL, NULL);
00230 VIENNACL_ERR_CHECK(err);
00231 err = clEnqueueCopyBuffer(viennacl::ocl::get_queue().handle(), elements_old, elements_, 0, nonzeros_, sizeof(SCALARTYPE)*temp.size(), 0, NULL, NULL);
00232 VIENNACL_ERR_CHECK(err);
00233 }
00234 }
00235
00242 void resize(unsigned int new_size1, unsigned int new_size2, bool preserve = true)
00243 {
00244 assert (new_size1 > 0 && new_size2 > 0);
00245
00246 if (new_size1 < rows_ || new_size2 < cols_)
00247 {
00248 std::vector<std::map<unsigned int, SCALARTYPE> > stl_sparse_matrix;
00249 if (rows_ > 0)
00250 stl_sparse_matrix.resize(rows_);
00251
00252 if (preserve && rows_ > 0)
00253 viennacl::copy(*this, stl_sparse_matrix);
00254
00255 stl_sparse_matrix.resize(new_size1);
00256
00257 std::cout << "Cropping STL matrix of size " << stl_sparse_matrix.size() << std::endl;
00258 if (new_size2 < cols_ && rows_ > 0)
00259 {
00260 for (size_t i=0; i<stl_sparse_matrix.size(); ++i)
00261 {
00262 std::list<unsigned int> to_delete;
00263 for (typename std::map<unsigned int, SCALARTYPE>::iterator it = stl_sparse_matrix[i].begin();
00264 it != stl_sparse_matrix[i].end();
00265 ++it)
00266 {
00267 if (it->first >= new_size2)
00268 to_delete.push_back(it->first);
00269 }
00270
00271 for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it)
00272 stl_sparse_matrix[i].erase(*it);
00273 }
00274 std::cout << "Cropping done..." << std::endl;
00275 }
00276
00277 rows_ = new_size1;
00278 cols_ = new_size2;
00279 viennacl::copy(stl_sparse_matrix, *this);
00280 }
00281
00282 rows_ = new_size1;
00283 cols_ = new_size2;
00284 }
00285
00286
00288 unsigned int size1() const { return rows_; }
00290 unsigned int size2() const { return cols_; }
00292 unsigned int nnz() const { return nonzeros_; }
00294 unsigned int internal_nnz() const { return viennacl::tools::roundUpToNextMultiple<unsigned int>(nonzeros_, ALIGNMENT);; }
00295
00297 const viennacl::ocl::handle<cl_mem> & handle12() const { return coord_buffer_; }
00299 const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
00301 const viennacl::ocl::handle<cl_mem> & handle3() const { return group_boundaries_; }
00302
00303 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
00304 template <typename CPU_MATRIX>
00305 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix & gpu_matrix );
00306 #else
00307 template <typename CPU_MATRIX, typename SCALARTYPE2, unsigned int ALIGNMENT2>
00308 friend void copy(const CPU_MATRIX & cpu_matrix, coordinate_matrix<SCALARTYPE2, ALIGNMENT2> & gpu_matrix );
00309 #endif
00310
00311 private:
00313 coordinate_matrix(coordinate_matrix const &);
00314
00316 coordinate_matrix & operator=(coordinate_matrix const &);
00317
00318
00319 unsigned int rows_;
00320 unsigned int cols_;
00321 unsigned int nonzeros_;
00322 viennacl::ocl::handle<cl_mem> coord_buffer_;
00323 viennacl::ocl::handle<cl_mem> elements_;
00324 viennacl::ocl::handle<cl_mem> group_boundaries_;
00325 };
00326
00327
00328 }
00329
00330 #endif