• Main Page
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

/data/development/ViennaCL/ViennaCL-1.1.2/viennacl/linalg/kernels/matrix_prod_col_col_row_source.h

Go to the documentation of this file.
00001 #ifndef _VIENNACL_MATRIX_PROD_COL_COL_ROW_SOURCE_HPP_
00002 #define _VIENNACL_MATRIX_PROD_COL_COL_ROW_SOURCE_HPP_
00003 //Automatically generated file from aux-directory, do not edit manually!
00004 namespace viennacl
00005 {
00006  namespace linalg
00007  {
00008   namespace kernels
00009   {
00010 const char * const matrix_prod_col_col_row_align1_prod_TT = 
00011 "// file automatically generated - do not edit!\n"
00012 "// matrix-matrix multiplication C = A^T * B^T\n"
00013 "// matrix layouts: C...row_major, A...col_major, B...col_major\n"
00014 "__kernel void prod_TT(\n"
00015 "          __global const float * A,\n"
00016 "          unsigned int A_rows,\n"
00017 "          unsigned int A_cols,\n"
00018 "          unsigned int A_internal_rows,\n"
00019 "          unsigned int A_internal_cols,\n"
00020 "          __global const float * B,  \n"
00021 "          unsigned int B_rows,\n"
00022 "          unsigned int B_cols,\n"
00023 "          unsigned int B_internal_rows,\n"
00024 "          unsigned int B_internal_cols,\n"
00025 "          __global float * C,\n"
00026 "          unsigned int C_rows,\n"
00027 "          unsigned int C_cols,\n"
00028 "          unsigned int C_internal_rows,\n"
00029 "          unsigned int C_internal_cols,\n"
00030 "          __local float * bufA,\n"
00031 "          __local float * bufB) \n"
00032 "{ \n"
00033 "  int block_size = get_local_size(0);\n"
00034 "  int row_block_id = get_group_id(0);\n"
00035 "  int col_block_id = get_group_id(1);\n"
00036 "  int row_thread_id = get_local_id(0);\n"
00037 "  int col_thread_id = get_local_id(1);\n"
00038 "  int aBegin = row_block_id * block_size * A_internal_rows;\n"
00039 "  int aStep = block_size;\n"
00040 "  int bBegin = col_block_id * block_size;\n"
00041 "  int bStep = block_size * B_internal_rows;\n"
00042 "  int block_num = A_rows / block_size;\n"
00043 "  if (block_num * block_size != A_rows)\n"
00044 "    ++block_num;\n"
00045 "  float Csub = 0;\n"
00046 "  int aOffset = row_thread_id * A_internal_rows + col_thread_id;\n"
00047 "  int bOffset = row_thread_id * B_internal_rows + col_thread_id;\n"
00048 "  for (int block = 0;\n"
00049 "           block < block_num;\n"
00050 "           ++block)\n"
00051 "  {\n"
00052 "    if (block * block_size + col_thread_id < A_rows && get_global_id(0) < A_cols)\n"
00053 "      bufA[row_thread_id * block_size + col_thread_id] = A[aBegin + aOffset];\n"
00054 "    else\n"
00055 "      bufA[row_thread_id * block_size + col_thread_id] = 0;\n"
00056 "    if ( (block * block_size + row_thread_id < B_cols) && get_global_id(1) < B_rows )\n"
00057 "      bufB[row_thread_id * block_size + col_thread_id] = B[bBegin + bOffset]; \n"
00058 "    else\n"
00059 "      bufB[row_thread_id * block_size + col_thread_id] = 0;\n"
00060 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00061 "    for (int k = 0; k < block_size; ++k)\n"
00062 "      Csub += bufA[row_thread_id * block_size + k] * bufB[k * block_size + col_thread_id];\n"
00063 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00064 "    aBegin += aStep;\n"
00065 "    bBegin += bStep;\n"
00066 "  }\n"
00067 "  if (get_global_id(0) < A_cols && get_global_id(1) < B_rows)\n"
00068 "    C[get_global_id(0) * C_internal_cols + get_global_id(1)] = Csub;\n"
00069 "}\n"
00070 ; //matrix_prod_col_col_row_align1_prod_TT
00071 
00072 const char * const matrix_prod_col_col_row_align1_prod_TA = 
00073 "// file automatically generated - do not edit!\n"
00074 "// matrix-matrix multiplication C = A^T * B\n"
00075 "// matrix layouts: C...row_major, A...col_major, B...col_major\n"
00076 "__kernel void prod_TA(\n"
00077 "          __global const float * A,\n"
00078 "          unsigned int A_rows,\n"
00079 "          unsigned int A_cols,\n"
00080 "          unsigned int A_internal_rows,\n"
00081 "          unsigned int A_internal_cols,\n"
00082 "          __global const float * B,  \n"
00083 "          unsigned int B_rows,\n"
00084 "          unsigned int B_cols,\n"
00085 "          unsigned int B_internal_rows,\n"
00086 "          unsigned int B_internal_cols,\n"
00087 "          __global float * C,\n"
00088 "          unsigned int C_rows,\n"
00089 "          unsigned int C_cols,\n"
00090 "          unsigned int C_internal_rows,\n"
00091 "          unsigned int C_internal_cols,\n"
00092 "          __local float * bufA,\n"
00093 "          __local float * bufB) \n"
00094 "{ \n"
00095 "  int block_size = get_local_size(0);\n"
00096 "  int row_block_id = get_group_id(0);\n"
00097 "  int col_block_id = get_group_id(1);\n"
00098 "  int row_thread_id = get_local_id(0);\n"
00099 "  int col_thread_id = get_local_id(1);\n"
00100 "  int aBegin = row_block_id * block_size * A_internal_rows;\n"
00101 "  int aStep = block_size;\n"
00102 "  int bBegin = col_block_id * block_size * B_internal_rows;\n"
00103 "  int bStep = block_size;\n"
00104 "  int block_num = A_rows / block_size;\n"
00105 "  if (block_num * block_size != A_rows)\n"
00106 "    ++block_num;\n"
00107 "  float Csub = 0;\n"
00108 "  int aOffset = row_thread_id * A_internal_rows + col_thread_id;\n"
00109 "  int bOffset = row_thread_id + col_thread_id * B_internal_rows;\n"
00110 "  for (int block = 0;\n"
00111 "           block < block_num;\n"
00112 "           ++block)\n"
00113 "  {\n"
00114 "    if (block * block_size + col_thread_id < A_rows && get_global_id(0) < A_cols)\n"
00115 "      bufA[row_thread_id * block_size + col_thread_id] = A[aBegin + aOffset];\n"
00116 "    else\n"
00117 "      bufA[row_thread_id * block_size + col_thread_id] = 0;\n"
00118 "    if ( (block * block_size + row_thread_id < B_rows) && get_global_id(1) < B_cols )\n"
00119 "      bufB[row_thread_id * block_size + col_thread_id] = B[bBegin + bOffset]; \n"
00120 "    else\n"
00121 "      bufB[row_thread_id * block_size + col_thread_id] = 0;\n"
00122 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00123 "    for (int k = 0; k < block_size; ++k)\n"
00124 "      Csub += bufA[row_thread_id * block_size + k] * bufB[k * block_size + col_thread_id];\n"
00125 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00126 "    aBegin += aStep;\n"
00127 "    bBegin += bStep;\n"
00128 "  }\n"
00129 "  if (get_global_id(0) < A_cols && get_global_id(1) < B_cols)\n"
00130 "    C[get_global_id(0) * C_internal_cols + get_global_id(1)] = Csub;\n"
00131 "}\n"
00132 ; //matrix_prod_col_col_row_align1_prod_TA
00133 
00134 const char * const matrix_prod_col_col_row_align1_prod_AA = 
00135 "// file automatically generated - do not edit!\n"
00136 "// matrix-matrix multiplication C = A * B\n"
00137 "// matrix layouts: C...row_major, A...col_major, B...col_major\n"
00138 "__kernel void prod_AA(\n"
00139 "          __global const float * A,\n"
00140 "          unsigned int A_rows,\n"
00141 "          unsigned int A_cols,\n"
00142 "          unsigned int A_internal_rows,\n"
00143 "          unsigned int A_internal_cols,\n"
00144 "          __global const float * B,  \n"
00145 "          unsigned int B_rows,\n"
00146 "          unsigned int B_cols,\n"
00147 "          unsigned int B_internal_rows,\n"
00148 "          unsigned int B_internal_cols,\n"
00149 "          __global float * C,\n"
00150 "          unsigned int C_rows,\n"
00151 "          unsigned int C_cols,\n"
00152 "          unsigned int C_internal_rows,\n"
00153 "          unsigned int C_internal_cols,\n"
00154 "          __local float * bufA,\n"
00155 "          __local float * bufB) \n"
00156 "{ \n"
00157 "  int block_size = get_local_size(0);\n"
00158 "  int row_block_id = get_group_id(0);\n"
00159 "  int col_block_id = get_group_id(1);\n"
00160 "  int row_thread_id = get_local_id(0);\n"
00161 "  int col_thread_id = get_local_id(1);\n"
00162 "  int aBegin = row_block_id * block_size;\n"
00163 "  int aStep = block_size * A_internal_rows;\n"
00164 "  int bBegin = col_block_id * block_size * B_internal_rows;\n"
00165 "  int bStep = block_size;\n"
00166 "  int block_num = A_cols / block_size;\n"
00167 "  if (block_num * block_size != A_cols)\n"
00168 "    ++block_num;\n"
00169 "  float Csub = 0;\n"
00170 "  int aOffset = row_thread_id + col_thread_id * A_internal_rows;\n"
00171 "  int bOffset = row_thread_id + col_thread_id * B_internal_rows;\n"
00172 "  for (int block = 0;\n"
00173 "           block < block_num;\n"
00174 "           ++block)\n"
00175 "  {\n"
00176 "    if (block * block_size + col_thread_id < A_cols && get_global_id(0) < A_rows)\n"
00177 "      bufA[row_thread_id * block_size + col_thread_id] = A[aBegin + aOffset];\n"
00178 "    else\n"
00179 "      bufA[row_thread_id * block_size + col_thread_id] = 0;\n"
00180 "    if ( (block * block_size + row_thread_id < B_rows) && get_global_id(1) < B_cols )\n"
00181 "      bufB[row_thread_id * block_size + col_thread_id] = B[bBegin + bOffset]; \n"
00182 "    else\n"
00183 "      bufB[row_thread_id * block_size + col_thread_id] = 0;\n"
00184 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00185 "    for (int k = 0; k < block_size; ++k)\n"
00186 "      Csub += bufA[row_thread_id * block_size + k] * bufB[k * block_size + col_thread_id];\n"
00187 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00188 "    aBegin += aStep;\n"
00189 "    bBegin += bStep;\n"
00190 "  }\n"
00191 "  if (get_global_id(0) < A_rows && get_global_id(1) < B_cols)\n"
00192 "    C[get_global_id(0) * C_internal_cols + get_global_id(1)] = Csub;\n"
00193 "}\n"
00194 ; //matrix_prod_col_col_row_align1_prod_AA
00195 
00196 const char * const matrix_prod_col_col_row_align1_prod_AT = 
00197 "// file automatically generated - do not edit!\n"
00198 "// matrix-matrix multiplication C = A * B^T\n"
00199 "// matrix layouts: C...row_major, A...col_major, B...col_major\n"
00200 "__kernel void prod_AT(\n"
00201 "          __global const float * A,\n"
00202 "          unsigned int A_rows,\n"
00203 "          unsigned int A_cols,\n"
00204 "          unsigned int A_internal_rows,\n"
00205 "          unsigned int A_internal_cols,\n"
00206 "          __global const float * B,  \n"
00207 "          unsigned int B_rows,\n"
00208 "          unsigned int B_cols,\n"
00209 "          unsigned int B_internal_rows,\n"
00210 "          unsigned int B_internal_cols,\n"
00211 "          __global float * C,\n"
00212 "          unsigned int C_rows,\n"
00213 "          unsigned int C_cols,\n"
00214 "          unsigned int C_internal_rows,\n"
00215 "          unsigned int C_internal_cols,\n"
00216 "          __local float * bufA,\n"
00217 "          __local float * bufB) \n"
00218 "{ \n"
00219 "  int block_size = get_local_size(0);\n"
00220 "  int row_block_id = get_group_id(0);\n"
00221 "  int col_block_id = get_group_id(1);\n"
00222 "  int row_thread_id = get_local_id(0);\n"
00223 "  int col_thread_id = get_local_id(1);\n"
00224 "  int aBegin = row_block_id * block_size;\n"
00225 "  int aStep = block_size * A_internal_rows;\n"
00226 "  int bBegin = col_block_id * block_size;\n"
00227 "  int bStep = block_size * B_internal_rows;\n"
00228 "  int block_num = A_cols / block_size;\n"
00229 "  if (block_num * block_size != A_cols)\n"
00230 "    ++block_num;\n"
00231 "  float Csub = 0;\n"
00232 "  int aOffset = row_thread_id + col_thread_id * A_internal_rows;\n"
00233 "  int bOffset = row_thread_id * B_internal_rows + col_thread_id;\n"
00234 "  for (int block = 0;\n"
00235 "           block < block_num;\n"
00236 "           ++block)\n"
00237 "  {\n"
00238 "    if (block * block_size + col_thread_id < A_cols && get_global_id(0) < A_rows)\n"
00239 "      bufA[row_thread_id * block_size + col_thread_id] = A[aBegin + aOffset];\n"
00240 "    else\n"
00241 "      bufA[row_thread_id * block_size + col_thread_id] = 0;\n"
00242 "    if ( (block * block_size + row_thread_id < B_cols) && get_global_id(1) < B_rows )\n"
00243 "      bufB[row_thread_id * block_size + col_thread_id] = B[bBegin + bOffset]; \n"
00244 "    else\n"
00245 "      bufB[row_thread_id * block_size + col_thread_id] = 0;\n"
00246 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00247 "    for (int k = 0; k < block_size; ++k)\n"
00248 "      Csub += bufA[row_thread_id * block_size + k] * bufB[k * block_size + col_thread_id];\n"
00249 "    barrier(CLK_LOCAL_MEM_FENCE);\n"
00250 "    aBegin += aStep;\n"
00251 "    bBegin += bStep;\n"
00252 "  }\n"
00253 "  if (get_global_id(0) < A_rows && get_global_id(1) < B_rows)\n"
00254 "    C[get_global_id(0) * C_internal_cols + get_global_id(1)] = Csub;\n"
00255 "}\n"
00256 ; //matrix_prod_col_col_row_align1_prod_AT
00257 
00258   }  //namespace kernels
00259  }  //namespace linalg
00260 }  //namespace viennacl
00261 #endif

Generated on Sat May 21 2011 20:36:50 for ViennaCL - The Vienna Computing Library by  doxygen 1.7.1