SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg, Gunnar Raetsch 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/lib/common.h> 00012 #include <shogun/io/SGIO.h> 00013 #include <shogun/lib/Trie.h> 00014 #include <shogun/mathematics/Math.h> 00015 00016 namespace shogun 00017 { 00018 template <> 00019 void CTrie<POIMTrie>::POIMs_extract_W_helper( 00020 const int32_t nodeIdx, const int32_t depth, const int32_t offset, 00021 const int32_t y0, float64_t* const* const W, const int32_t K ) 00022 { 00023 ASSERT(nodeIdx!=NO_CHILD); 00024 ASSERT(depth<K); 00025 float64_t* const W_kiy = & W[ depth ][ offset + y0 ]; 00026 POIMTrie* const node = &TreeMem[ nodeIdx ]; 00027 int32_t sym; 00028 00029 if( depth < degree-1 ) 00030 { 00031 const int32_t offset1 = offset * NUM_SYMS; 00032 for( sym = 0; sym < NUM_SYMS; ++sym ) 00033 { 00034 ASSERT(W_kiy[sym]==0); 00035 const int32_t childIdx = node->children[ sym ]; 00036 if( childIdx != NO_CHILD ) 00037 { 00038 W_kiy[ sym ] = TreeMem[ childIdx ].weight; 00039 00040 if (depth < K-1) 00041 { 00042 const int32_t y1 = ( y0 + sym ) * NUM_SYMS; 00043 POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K ); 00044 } 00045 } 00046 } 00047 } 00048 else 00049 { 00050 ASSERT(depth==degree-1); 00051 for( sym = 0; sym < NUM_SYMS; ++sym ) 00052 { 00053 ASSERT(W_kiy[sym]==0); 00054 W_kiy[ sym ] = node->child_weights[ sym ]; 00055 } 00056 } 00057 } 00058 00059 template <> 00060 void CTrie<POIMTrie>::POIMs_extract_W( 00061 float64_t* const* const W, const int32_t K) 00062 { 00063 ASSERT(degree>=1); 00064 ASSERT(K>=1); 00065 const int32_t N = length; 00066 int32_t i; 00067 for( i = 0; i < N; ++i ) { 00068 //SG_PRINT( "W_helper( %d )\n", i ); 00069 POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K ); 00070 } 00071 } 00072 00073 template <> 00074 void CTrie<POIMTrie>::POIMs_calc_SLR_helper1( 00075 const float64_t* const distrib, const int32_t i, const int32_t nodeIdx, 00076 int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym, 00077 float64_t* S, float64_t* L, float64_t* R ) 00078 { 00079 ASSERT(depth==degree-1); 00080 ASSERT(nodeIdx!=NO_CHILD); 00081 00082 const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ]; 00083 POIMTrie* const node = &TreeMem[ nodeIdx ]; 00084 int32_t symRight; 00085 int32_t symLeft; 00086 00087 // --- init 00088 node->S = 0; 00089 node->L = 0; 00090 node->R = 0; 00091 00092 if (i+depth<length) 00093 { 00094 const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ]; 00095 00096 // --- go thru direct children 00097 for( symRight = 0; symRight < NUM_SYMS; ++symRight ) { 00098 const float64_t w1 = node->child_weights[ symRight ]; 00099 const float64_t pRight = distribRight[ symRight ]; 00100 const float64_t incr1 = pRight * w1; 00101 node->S += incr1; 00102 node->R += incr1; 00103 } 00104 } 00105 00106 // --- collect precalced values from left neighbor tree 00107 for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft ) 00108 { 00109 if (left_tries_idx[symLeft] != NO_CHILD) 00110 { 00111 POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]]; 00112 00113 ASSERT(nodeLeft); 00114 const float64_t w2 = nodeLeft->child_weights[ lastSym ]; 00115 const float64_t pLeft = distribLeft[ symLeft ]; 00116 const float64_t incr2 = pLeft * w2; 00117 node->S += incr2; 00118 node->L += incr2; 00119 } 00120 } 00121 00122 // --- add w and return results 00123 const float64_t w0 = node->weight; 00124 node->S += w0; 00125 node->L += w0; 00126 node->R += w0; 00127 *S = node->S; 00128 *L = node->L; 00129 *R = node->R; 00130 } 00131 00132 00133 template <> 00134 void CTrie<POIMTrie>::POIMs_calc_SLR_helper2( 00135 const float64_t* const distrib, const int32_t i, const int32_t nodeIdx, 00136 int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L, 00137 float64_t* R ) 00138 { 00139 ASSERT(0<=depth && depth<=degree-2); 00140 ASSERT(nodeIdx!=NO_CHILD); 00141 00142 const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ]; 00143 POIMTrie* const node = &TreeMem[ nodeIdx ]; 00144 float64_t dummy; 00145 float64_t auxS; 00146 float64_t auxR; 00147 int32_t symRight; 00148 int32_t symLeft; 00149 00150 // --- init 00151 node->S = 0; 00152 node->L = 0; 00153 node->R = 0; 00154 00155 // --- recurse thru direct children 00156 for( symRight = 0; symRight < NUM_SYMS; ++symRight ) 00157 { 00158 const int32_t childIdx = node->children[ symRight ]; 00159 if( childIdx != NO_CHILD ) 00160 { 00161 if( depth < degree-2 ) 00162 { 00163 int32_t new_left_tries_idx[4]; 00164 00165 for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft ) 00166 { 00167 new_left_tries_idx[symLeft]=NO_CHILD; 00168 00169 if (left_tries_idx[symLeft] != NO_CHILD) 00170 { 00171 POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]]; 00172 ASSERT(nodeLeft); 00173 new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ]; 00174 } 00175 } 00176 POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR ); 00177 } 00178 else 00179 POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR ); 00180 00181 if (i+depth<length) 00182 { 00183 const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ]; 00184 const float64_t pRight = distribRight[ symRight ]; 00185 node->S += pRight * auxS; 00186 node->R += pRight * auxR; 00187 } 00188 } 00189 } 00190 00191 // --- collect precalced values from left neighbor tree 00192 for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft ) 00193 { 00194 if (left_tries_idx[symLeft] != NO_CHILD) 00195 { 00196 const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]]; 00197 ASSERT(nodeLeft); 00198 const float64_t pLeft = distribLeft[ symLeft ]; 00199 00200 node->S += pLeft * nodeLeft->S; 00201 node->L += pLeft * nodeLeft->L; 00202 00203 if (i+depth<length) 00204 { 00205 const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ]; 00206 // - second order correction for S 00207 auxS = 0; 00208 if( depth < degree-2 ) 00209 { 00210 for( symRight = 0; symRight < NUM_SYMS; ++symRight ) 00211 { 00212 const int32_t childIdxLeft = nodeLeft->children[ symRight ]; 00213 if( childIdxLeft != NO_CHILD ) 00214 { 00215 const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ]; 00216 auxS += distribRight[symRight] * childLeft->S; 00217 } 00218 } 00219 } 00220 else 00221 { 00222 for( symRight = 0; symRight < NUM_SYMS; ++symRight ) { 00223 auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ]; 00224 } 00225 } 00226 node->S -= pLeft* auxS; 00227 } 00228 } 00229 } 00230 00231 // --- add w and return results 00232 const float64_t w0 = node->weight; 00233 //SG_PRINT( " d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 ); 00234 node->S += w0; 00235 node->L += w0; 00236 node->R += w0; 00237 *S = node->S; 00238 *L = node->L; 00239 *R = node->R; 00240 } 00241 00242 00243 00244 template <> 00245 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib ) 00246 { 00247 if( degree == 1 ) { 00248 return; 00249 } 00250 00251 ASSERT(degree>=2); 00252 const int32_t N = length; 00253 float64_t dummy; 00254 int32_t symLeft; 00255 int32_t leftSubtrees[4]; 00256 for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft ) 00257 leftSubtrees[ symLeft ] = NO_CHILD; 00258 00259 for(int32_t i = 0; i < N; ++i ) 00260 { 00261 POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy ); 00262 00263 const POIMTrie* const node = &TreeMem[ trees[i] ]; 00264 ASSERT(trees[i]!=NO_CHILD); 00265 00266 for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft ) 00267 leftSubtrees[ symLeft ] = node->children[ symLeft ]; 00268 } 00269 } 00270 00271 template <> 00272 void CTrie<POIMTrie>::POIMs_get_SLR( 00273 const int32_t parentIdx, const int32_t sym, const int32_t depth, 00274 float64_t* S, float64_t* L, float64_t* R ) 00275 { 00276 ASSERT(parentIdx!=NO_CHILD); 00277 const POIMTrie* const parent = &TreeMem[ parentIdx ]; 00278 if( depth < degree ) { 00279 const int32_t nodeIdx = parent->children[ sym ]; 00280 const POIMTrie* const node = &TreeMem[ nodeIdx ]; 00281 *S = node->S; 00282 *L = node->L; 00283 *R = node->R; 00284 } else { 00285 ASSERT(depth==degree); 00286 const float64_t w = parent->child_weights[ sym ]; 00287 *S = w; 00288 *L = w; 00289 *R = w; 00290 } 00291 } 00292 00293 template <> 00294 void CTrie<POIMTrie>::POIMs_add_SLR_helper2( 00295 float64_t* const* const poims, const int32_t K, const int32_t k, 00296 const int32_t i, const int32_t y, const float64_t valW, 00297 const float64_t valS, const float64_t valL, const float64_t valR, 00298 const int32_t debug) 00299 { 00300 //SG_PRINT( "i=%d, d=%d, y=%d: w=%.3f \n", i, k, y, valW ); 00301 const int32_t nk = nofsKmers[ k ]; 00302 ASSERT(1<=k && k<=K); 00303 ASSERT(0<=y && y<nk); 00304 int32_t z; 00305 int32_t j; 00306 00307 // --- add superstring score; subtract "w", as it was counted twice 00308 if( debug==0 || debug==2 ) 00309 { 00310 poims[ k-1 ][ i*nk + y ] += valS - valW; 00311 } 00312 00313 // --- left partial overlaps 00314 if( debug==0 || debug==3 ) 00315 { 00316 int32_t r; 00317 for( r = 1; k+r <= K; ++r ) 00318 { 00319 const int32_t nr = nofsKmers[ r ]; 00320 const int32_t nz = nofsKmers[ k+r ]; 00321 float64_t* const poim = & poims[ k+r-1 ][ i*nz ]; 00322 z = y * nr; 00323 for( j = 0; j < nr; ++j ) 00324 { 00325 if( !( 0 <= z && z < nz ) ) { 00326 SG_PRINT( "k=%d, nk=%d, r=%d, nr=%d, nz=%d \n", k, nk, r, nr, nz ); 00327 SG_PRINT( " j=%d, y=%d, z=%d \n", j, y, z ); 00328 } 00329 ASSERT(0<=z && z<nz); 00330 poim[ z ] += valL - valW; 00331 ++z; 00332 } 00333 } 00334 } 00335 // --- right partial overlaps 00336 if( debug==0 || debug==4 ) 00337 { 00338 int32_t l; 00339 for( l = 1; k+l <= K && l <= i; ++l ) 00340 { 00341 const int32_t nl = nofsKmers[ l ]; 00342 const int32_t nz = nofsKmers[ k+l ]; 00343 float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ]; 00344 z = y; 00345 for( j = 0; j < nl; ++j ) { 00346 ASSERT(0<=z && z<nz); 00347 poim[ z ] += valR - valW; 00348 z += nk; 00349 } 00350 } 00351 } 00352 } 00353 00354 template <> 00355 void CTrie<POIMTrie>::POIMs_add_SLR_helper1( 00356 const int32_t nodeIdx, const int32_t depth, const int32_t i, 00357 const int32_t y0, float64_t* const* const poims, const int32_t K, 00358 const int32_t debug) 00359 { 00360 ASSERT(nodeIdx!=NO_CHILD); 00361 ASSERT(depth<K); 00362 POIMTrie* const node = &TreeMem[ nodeIdx ]; 00363 int32_t sym; 00364 if( depth < degree-1 ) 00365 { 00366 if( depth < K-1 ) 00367 { 00368 for( sym = 0; sym < NUM_SYMS; ++sym ) 00369 { 00370 const int32_t childIdx = node->children[ sym ]; 00371 if( childIdx != NO_CHILD ) 00372 { 00373 POIMTrie* const child = &TreeMem[ childIdx ]; 00374 const int32_t y = y0 + sym; 00375 const int32_t y1 = y * NUM_SYMS; 00376 const float64_t w = child->weight; 00377 POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug ); 00378 POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug ); 00379 } 00380 } 00381 } 00382 else 00383 { 00384 ASSERT(depth==K-1); 00385 for( sym = 0; sym < NUM_SYMS; ++sym ) 00386 { 00387 const int32_t childIdx = node->children[ sym ]; 00388 if( childIdx != NO_CHILD ) { 00389 POIMTrie* const child = &TreeMem[ childIdx ]; 00390 const int32_t y = y0 + sym; 00391 const float64_t w = child->weight; 00392 POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug ); 00393 } 00394 } 00395 } 00396 } 00397 else 00398 { 00399 ASSERT(depth==degree-1); 00400 for( sym = 0; sym < NUM_SYMS; ++sym ) 00401 { 00402 const float64_t w = node->child_weights[ sym ]; 00403 const int32_t y = y0 + sym; 00404 POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug ); 00405 } 00406 } 00407 } 00408 00409 00410 00411 template <> 00412 void CTrie<POIMTrie>::POIMs_add_SLR( 00413 float64_t* const* const poims, const int32_t K, const int32_t debug) 00414 { 00415 ASSERT(degree>=1); 00416 ASSERT(K>=1); 00417 { 00418 const int32_t m = ( degree > K ) ? degree : K; 00419 nofsKmers = SG_MALLOC(int32_t, m+1 ); 00420 int32_t n; 00421 int32_t k; 00422 n = 1; 00423 for( k = 0; k < m+1; ++k ) { 00424 nofsKmers[ k ] = n; 00425 n *= NUM_SYMS; 00426 } 00427 } 00428 const int32_t N = length; 00429 int32_t i; 00430 for( i = 0; i < N; ++i ) { 00431 POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug ); 00432 } 00433 SG_FREE(nofsKmers); 00434 } 00435 }