00001 #ifndef _VIENNACL_MATRIX_ROW_KERNELS_HPP_
00002 #define _VIENNACL_MATRIX_ROW_KERNELS_HPP_
00003 #include "viennacl/tools/tools.hpp"
00004 #include "viennacl/ocl/kernel.hpp"
00005 #include "viennacl/ocl/platform.hpp"
00006 #include "viennacl/ocl/utils.hpp"
00007 #include "viennacl/linalg/kernels/matrix_row_source.h"
00008
00009
00010 namespace viennacl
00011 {
00012 namespace linalg
00013 {
00014 namespace kernels
00015 {
00016 template<class TYPE, unsigned int alignment>
00017 struct matrix_row;
00018
00019
00021 template <>
00022 struct matrix_row<float, 16>
00023 {
00024 static std::string program_name()
00025 {
00026 return "f_matrix_row_16";
00027 }
00028 static void init()
00029 {
00030 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00031 static std::map<cl_context, bool> init_done;
00032 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00033 if (!init_done[context_.handle()])
00034 {
00035 std::string source;
00036 source.append(matrix_row_align1_unit_lower_triangular_substitute_inplace);
00037 source.append(matrix_row_align1_inplace_sub);
00038 source.append(matrix_row_align1_lower_triangular_substitute_inplace);
00039 source.append(matrix_row_align1_trans_vec_mul);
00040 source.append(matrix_row_align1_rank1_update);
00041 source.append(matrix_row_align1_sub);
00042 source.append(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace);
00043 source.append(matrix_row_align1_lu_factorize);
00044 source.append(matrix_row_align1_add);
00045 source.append(matrix_row_align1_vec_mul);
00046 source.append(matrix_row_align1_trans_lower_triangular_substitute_inplace);
00047 source.append(matrix_row_align1_inplace_divide);
00048 source.append(matrix_row_align1_trans_upper_triangular_substitute_inplace);
00049 source.append(matrix_row_align1_unit_upper_triangular_substitute_inplace);
00050 source.append(matrix_row_align1_inplace_add);
00051 source.append(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace);
00052 source.append(matrix_row_align1_scaled_rank1_update);
00053 source.append(matrix_row_align1_clear);
00054 source.append(matrix_row_align1_cpu_inplace_mult);
00055 source.append(matrix_row_align1_inplace_mult);
00056 source.append(matrix_row_align1_upper_triangular_substitute_inplace);
00057 std::string prog_name = program_name();
00058 #ifdef VIENNACL_BUILD_INFO
00059 std::cout << "Creating program " << prog_name << std::endl;
00060 #endif
00061 context_.add_program(source, prog_name);
00062 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00063 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00064 prog_.add_kernel("inplace_sub");
00065 prog_.add_kernel("lower_triangular_substitute_inplace");
00066 prog_.add_kernel("trans_vec_mul");
00067 prog_.add_kernel("rank1_update");
00068 prog_.add_kernel("sub");
00069 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00070 prog_.add_kernel("lu_factorize");
00071 prog_.add_kernel("add");
00072 prog_.add_kernel("vec_mul");
00073 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00074 prog_.add_kernel("inplace_divide");
00075 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00076 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00077 prog_.add_kernel("inplace_add");
00078 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00079 prog_.add_kernel("scaled_rank1_update");
00080 prog_.add_kernel("clear");
00081 prog_.add_kernel("cpu_inplace_mult");
00082 prog_.add_kernel("inplace_mult");
00083 prog_.add_kernel("upper_triangular_substitute_inplace");
00084 init_done[context_.handle()] = true;
00085 }
00086 }
00087 };
00088
00089 template <>
00090 struct matrix_row<float, 1>
00091 {
00092 static std::string program_name()
00093 {
00094 return "f_matrix_row_1";
00095 }
00096 static void init()
00097 {
00098 viennacl::ocl::DOUBLE_PRECISION_CHECKER<float>::apply();
00099 static std::map<cl_context, bool> init_done;
00100 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00101 if (!init_done[context_.handle()])
00102 {
00103 std::string source;
00104 source.append(matrix_row_align1_unit_lower_triangular_substitute_inplace);
00105 source.append(matrix_row_align1_inplace_sub);
00106 source.append(matrix_row_align1_lower_triangular_substitute_inplace);
00107 source.append(matrix_row_align1_trans_vec_mul);
00108 source.append(matrix_row_align1_rank1_update);
00109 source.append(matrix_row_align1_sub);
00110 source.append(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace);
00111 source.append(matrix_row_align1_lu_factorize);
00112 source.append(matrix_row_align1_add);
00113 source.append(matrix_row_align1_vec_mul);
00114 source.append(matrix_row_align1_trans_lower_triangular_substitute_inplace);
00115 source.append(matrix_row_align1_inplace_divide);
00116 source.append(matrix_row_align1_trans_upper_triangular_substitute_inplace);
00117 source.append(matrix_row_align1_unit_upper_triangular_substitute_inplace);
00118 source.append(matrix_row_align1_inplace_add);
00119 source.append(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace);
00120 source.append(matrix_row_align1_scaled_rank1_update);
00121 source.append(matrix_row_align1_clear);
00122 source.append(matrix_row_align1_cpu_inplace_mult);
00123 source.append(matrix_row_align1_inplace_mult);
00124 source.append(matrix_row_align1_upper_triangular_substitute_inplace);
00125 std::string prog_name = program_name();
00126 #ifdef VIENNACL_BUILD_INFO
00127 std::cout << "Creating program " << prog_name << std::endl;
00128 #endif
00129 context_.add_program(source, prog_name);
00130 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00131 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00132 prog_.add_kernel("inplace_sub");
00133 prog_.add_kernel("lower_triangular_substitute_inplace");
00134 prog_.add_kernel("trans_vec_mul");
00135 prog_.add_kernel("rank1_update");
00136 prog_.add_kernel("sub");
00137 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00138 prog_.add_kernel("lu_factorize");
00139 prog_.add_kernel("add");
00140 prog_.add_kernel("vec_mul");
00141 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00142 prog_.add_kernel("inplace_divide");
00143 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00144 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00145 prog_.add_kernel("inplace_add");
00146 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00147 prog_.add_kernel("scaled_rank1_update");
00148 prog_.add_kernel("clear");
00149 prog_.add_kernel("cpu_inplace_mult");
00150 prog_.add_kernel("inplace_mult");
00151 prog_.add_kernel("upper_triangular_substitute_inplace");
00152 init_done[context_.handle()] = true;
00153 }
00154 }
00155 };
00156
00157
00158
00160 template <>
00161 struct matrix_row<double, 16>
00162 {
00163 static std::string program_name()
00164 {
00165 return "d_matrix_row_16";
00166 }
00167 static void init()
00168 {
00169 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00170 static std::map<cl_context, bool> init_done;
00171 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00172 if (!init_done[context_.handle()])
00173 {
00174 std::string source;
00175 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00176 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_lower_triangular_substitute_inplace, fp64_ext));
00177 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_sub, fp64_ext));
00178 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lower_triangular_substitute_inplace, fp64_ext));
00179 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_vec_mul, fp64_ext));
00180 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_rank1_update, fp64_ext));
00181 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_sub, fp64_ext));
00182 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace, fp64_ext));
00183 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lu_factorize, fp64_ext));
00184 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_add, fp64_ext));
00185 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_vec_mul, fp64_ext));
00186 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_lower_triangular_substitute_inplace, fp64_ext));
00187 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_divide, fp64_ext));
00188 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_upper_triangular_substitute_inplace, fp64_ext));
00189 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_upper_triangular_substitute_inplace, fp64_ext));
00190 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_add, fp64_ext));
00191 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace, fp64_ext));
00192 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_scaled_rank1_update, fp64_ext));
00193 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_clear, fp64_ext));
00194 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_cpu_inplace_mult, fp64_ext));
00195 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_mult, fp64_ext));
00196 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_upper_triangular_substitute_inplace, fp64_ext));
00197 std::string prog_name = program_name();
00198 #ifdef VIENNACL_BUILD_INFO
00199 std::cout << "Creating program " << prog_name << std::endl;
00200 #endif
00201 context_.add_program(source, prog_name);
00202 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00203 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00204 prog_.add_kernel("inplace_sub");
00205 prog_.add_kernel("lower_triangular_substitute_inplace");
00206 prog_.add_kernel("trans_vec_mul");
00207 prog_.add_kernel("rank1_update");
00208 prog_.add_kernel("sub");
00209 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00210 prog_.add_kernel("lu_factorize");
00211 prog_.add_kernel("add");
00212 prog_.add_kernel("vec_mul");
00213 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00214 prog_.add_kernel("inplace_divide");
00215 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00216 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00217 prog_.add_kernel("inplace_add");
00218 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00219 prog_.add_kernel("scaled_rank1_update");
00220 prog_.add_kernel("clear");
00221 prog_.add_kernel("cpu_inplace_mult");
00222 prog_.add_kernel("inplace_mult");
00223 prog_.add_kernel("upper_triangular_substitute_inplace");
00224 init_done[context_.handle()] = true;
00225 }
00226 }
00227 };
00228
00229 template <>
00230 struct matrix_row<double, 1>
00231 {
00232 static std::string program_name()
00233 {
00234 return "d_matrix_row_1";
00235 }
00236 static void init()
00237 {
00238 viennacl::ocl::DOUBLE_PRECISION_CHECKER<double>::apply();
00239 static std::map<cl_context, bool> init_done;
00240 viennacl::ocl::context & context_ = viennacl::ocl::current_context();
00241 if (!init_done[context_.handle()])
00242 {
00243 std::string source;
00244 std::string fp64_ext = viennacl::ocl::current_device().double_support_extension();
00245 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_lower_triangular_substitute_inplace, fp64_ext));
00246 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_sub, fp64_ext));
00247 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lower_triangular_substitute_inplace, fp64_ext));
00248 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_vec_mul, fp64_ext));
00249 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_rank1_update, fp64_ext));
00250 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_sub, fp64_ext));
00251 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_upper_triangular_substitute_inplace, fp64_ext));
00252 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_lu_factorize, fp64_ext));
00253 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_add, fp64_ext));
00254 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_vec_mul, fp64_ext));
00255 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_lower_triangular_substitute_inplace, fp64_ext));
00256 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_divide, fp64_ext));
00257 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_upper_triangular_substitute_inplace, fp64_ext));
00258 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_unit_upper_triangular_substitute_inplace, fp64_ext));
00259 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_add, fp64_ext));
00260 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_trans_unit_lower_triangular_substitute_inplace, fp64_ext));
00261 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_scaled_rank1_update, fp64_ext));
00262 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_clear, fp64_ext));
00263 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_cpu_inplace_mult, fp64_ext));
00264 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_inplace_mult, fp64_ext));
00265 source.append(viennacl::tools::make_double_kernel(matrix_row_align1_upper_triangular_substitute_inplace, fp64_ext));
00266 std::string prog_name = program_name();
00267 #ifdef VIENNACL_BUILD_INFO
00268 std::cout << "Creating program " << prog_name << std::endl;
00269 #endif
00270 context_.add_program(source, prog_name);
00271 viennacl::ocl::program & prog_ = context_.get_program(prog_name);
00272 prog_.add_kernel("unit_lower_triangular_substitute_inplace");
00273 prog_.add_kernel("inplace_sub");
00274 prog_.add_kernel("lower_triangular_substitute_inplace");
00275 prog_.add_kernel("trans_vec_mul");
00276 prog_.add_kernel("rank1_update");
00277 prog_.add_kernel("sub");
00278 prog_.add_kernel("trans_unit_upper_triangular_substitute_inplace");
00279 prog_.add_kernel("lu_factorize");
00280 prog_.add_kernel("add");
00281 prog_.add_kernel("vec_mul");
00282 prog_.add_kernel("trans_lower_triangular_substitute_inplace");
00283 prog_.add_kernel("inplace_divide");
00284 prog_.add_kernel("trans_upper_triangular_substitute_inplace");
00285 prog_.add_kernel("unit_upper_triangular_substitute_inplace");
00286 prog_.add_kernel("inplace_add");
00287 prog_.add_kernel("trans_unit_lower_triangular_substitute_inplace");
00288 prog_.add_kernel("scaled_rank1_update");
00289 prog_.add_kernel("clear");
00290 prog_.add_kernel("cpu_inplace_mult");
00291 prog_.add_kernel("inplace_mult");
00292 prog_.add_kernel("upper_triangular_substitute_inplace");
00293 init_done[context_.handle()] = true;
00294 }
00295 }
00296 };
00297
00298
00299 }
00300 }
00301 }
00302 #endif