SHOGUN
v1.1.0
|
00001 #include <shogun/features/SimpleFeatures.h> 00002 #include <shogun/preprocessor/SimplePreprocessor.h> 00003 #include <shogun/io/SGIO.h> 00004 #include <shogun/base/Parameter.h> 00005 #include <shogun/mathematics/Math.h> 00006 00007 #include <string.h> 00008 00009 namespace shogun { 00010 00011 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(int32_t size) : CDotFeatures(size) 00012 { 00013 init(); 00014 } 00015 00016 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(const CSimpleFeatures & orig) : 00017 CDotFeatures(orig) 00018 { 00019 copy_feature_matrix(SGMatrix<ST>(orig.feature_matrix, 00020 orig.num_features, 00021 orig.num_vectors)); 00022 initialize_cache(); 00023 m_subset=orig.m_subset->duplicate(); 00024 } 00025 00026 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(SGMatrix<ST> matrix) : 00027 CDotFeatures() 00028 { 00029 init(); 00030 set_feature_matrix(matrix); 00031 } 00032 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(ST* src, int32_t num_feat, int32_t num_vec) : 00033 CDotFeatures() 00034 { 00035 init(); 00036 set_feature_matrix(src, num_feat, num_vec); 00037 } 00038 template<class ST> CSimpleFeatures<ST>::CSimpleFeatures(CFile* loader) : 00039 CDotFeatures(loader) 00040 { 00041 init(); 00042 load(loader); 00043 } 00044 template<class ST> CFeatures* CSimpleFeatures<ST>::duplicate() const 00045 { 00046 return new CSimpleFeatures<ST>(*this); 00047 } 00048 00049 template<class ST> CSimpleFeatures<ST>::~CSimpleFeatures() { free_features(); } 00050 00051 template<class ST> void CSimpleFeatures<ST>::free_features() 00052 { 00053 remove_subset(); 00054 free_feature_matrix(); 00055 SG_UNREF(feature_cache); 00056 } 00057 00058 template<class ST> void CSimpleFeatures<ST>::free_feature_matrix() 00059 { 00060 remove_subset(); 00061 SG_FREE(feature_matrix); 00062 feature_matrix = NULL; 00063 feature_matrix_num_features = num_features; 00064 feature_matrix_num_vectors = num_vectors; 00065 num_vectors = 0; 00066 num_features = 0; 00067 } 00068 00069 template<class ST> ST* CSimpleFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree) 00070 { 00071 /* index conversion for subset, only for array access */ 00072 int32_t real_num=subset_idx_conversion(num); 00073 00074 len = num_features; 00075 00076 if (feature_matrix) 00077 { 00078 dofree = false; 00079 return &feature_matrix[real_num * int64_t(num_features)]; 00080 } 00081 00082 ST* feat = NULL; 00083 dofree = false; 00084 00085 if (feature_cache) 00086 { 00087 feat = feature_cache->lock_entry(num); 00088 00089 if (feat) 00090 return feat; 00091 else 00092 feat = feature_cache->set_entry(real_num); 00093 } 00094 00095 if (!feat) 00096 dofree = true; 00097 feat = compute_feature_vector(num, len, feat); 00098 00099 if (get_num_preprocessors()) 00100 { 00101 int32_t tmp_len = len; 00102 ST* tmp_feat_before = feat; 00103 ST* tmp_feat_after = NULL; 00104 00105 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00106 { 00107 CSimplePreprocessor<ST>* p = 00108 (CSimplePreprocessor<ST>*) get_preprocessor(i); 00109 // temporary hack 00110 SGVector<ST> applied = p->apply_to_feature_vector( 00111 SGVector<ST>(tmp_feat_before, tmp_len)); 00112 tmp_feat_after = applied.vector; 00113 SG_UNREF(p); 00114 00115 if (i != 0) // delete feature vector, except for the the first one, i.e., feat 00116 SG_FREE(tmp_feat_before); 00117 tmp_feat_before = tmp_feat_after; 00118 } 00119 00120 memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len); 00121 SG_FREE(tmp_feat_after); 00122 00123 len = tmp_len; 00124 } 00125 return feat; 00126 } 00127 00128 template<class ST> void CSimpleFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num) 00129 { 00130 /* index conversion for subset, only for array access */ 00131 int32_t real_num=subset_idx_conversion(num); 00132 00133 if (num>=get_num_vectors()) 00134 { 00135 SG_ERROR("Index out of bounds (number of vectors %d, you " 00136 "requested %d)\n", get_num_vectors(), num); 00137 } 00138 00139 if (!feature_matrix) 00140 SG_ERROR("Requires a in-memory feature matrix\n"); 00141 00142 if (vector.vlen != num_features) 00143 SG_ERROR( 00144 "Vector not of length %d (has %d)\n", num_features, vector.vlen); 00145 00146 memcpy(&feature_matrix[real_num * int64_t(num_features)], vector.vector, 00147 int64_t(num_features) * sizeof(ST)); 00148 } 00149 00150 template<class ST> SGVector<ST> CSimpleFeatures<ST>::get_feature_vector(int32_t num) 00151 { 00152 /* index conversion for subset, only for array access */ 00153 int32_t real_num=subset_idx_conversion(num); 00154 00155 if (num >= get_num_vectors()) 00156 { 00157 SG_ERROR("Index out of bounds (number of vectors %d, you " 00158 "requested %d)\n", get_num_vectors(), real_num); 00159 } 00160 00161 SGVector<ST> vec; 00162 vec.vector = get_feature_vector(num, vec.vlen, vec.do_free); 00163 return vec; 00164 } 00165 00166 template<class ST> void CSimpleFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree) 00167 { 00168 if (feature_cache) 00169 feature_cache->unlock_entry(subset_idx_conversion(num)); 00170 00171 if (dofree) 00172 SG_FREE(feat_vec); 00173 } 00174 00175 template<class ST> void CSimpleFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num) 00176 { 00177 free_feature_vector(vec.vector, num, vec.do_free); 00178 } 00179 00180 template<class ST> void CSimpleFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len) 00181 { 00182 if (m_subset) 00183 SG_ERROR("A subset is set, cannot call vector_subset\n"); 00184 00185 ASSERT(feature_matrix); 00186 ASSERT(idx_len<=num_vectors); 00187 00188 int32_t num_vec = num_vectors; 00189 num_vectors = idx_len; 00190 00191 int32_t old_ii = -1; 00192 00193 for (int32_t i = 0; i < idx_len; i++) 00194 { 00195 int32_t ii = idx[i]; 00196 ASSERT(old_ii<ii); 00197 00198 if (ii < 0 || ii >= num_vec) 00199 SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec); 00200 00201 if (i == ii) 00202 continue; 00203 00204 memcpy(&feature_matrix[int64_t(num_features) * i], 00205 &feature_matrix[int64_t(num_features) * ii], 00206 num_features * sizeof(ST)); 00207 old_ii = ii; 00208 } 00209 } 00210 00211 template<class ST> void CSimpleFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len) 00212 { 00213 if (m_subset) 00214 SG_ERROR("A subset is set, cannot call feature_subset\n"); 00215 00216 ASSERT(feature_matrix); 00217 ASSERT(idx_len<=num_features); 00218 int32_t num_feat = num_features; 00219 num_features = idx_len; 00220 00221 for (int32_t i = 0; i < num_vectors; i++) 00222 { 00223 ST* src = &feature_matrix[int64_t(num_feat) * i]; 00224 ST* dst = &feature_matrix[int64_t(num_features) * i]; 00225 00226 int32_t old_jj = -1; 00227 for (int32_t j = 0; j < idx_len; j++) 00228 { 00229 int32_t jj = idx[j]; 00230 ASSERT(old_jj<jj); 00231 if (jj < 0 || jj >= num_feat) 00232 SG_ERROR( 00233 "Index out of range: should be 0<%d<%d\n", jj, num_feat); 00234 00235 dst[j] = src[jj]; 00236 old_jj = jj; 00237 } 00238 } 00239 } 00240 00241 template<class ST> void CSimpleFeatures<ST>::get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec) 00242 { 00243 ASSERT(feature_matrix); 00244 00245 int64_t num = int64_t(num_features) * get_num_vectors(); 00246 *num_feat = num_features; 00247 *num_vec = get_num_vectors(); 00248 *dst = SG_MALLOC(ST, num); 00249 00250 /* copying depends on whether a subset is used */ 00251 if (m_subset) 00252 { 00253 /* copy vector wise */ 00254 for (int32_t i = 0; i < *num_vec; ++i) 00255 { 00256 int32_t real_i = m_subset->subset_idx_conversion(i); 00257 memcpy(*dst, &feature_matrix[real_i * int64_t(num_features)], 00258 num_features * sizeof(ST)); 00259 } 00260 } 00261 else 00262 { 00263 /* copy complete matrix */ 00264 memcpy(*dst, feature_matrix, num * sizeof(ST)); 00265 } 00266 } 00267 00268 template<class ST> SGMatrix<ST> CSimpleFeatures<ST>::get_feature_matrix() 00269 { 00270 return SGMatrix<ST>(feature_matrix, num_features, num_vectors); 00271 } 00272 00273 template<class ST> SGMatrix<ST> CSimpleFeatures<ST>::steal_feature_matrix() 00274 { 00275 SGMatrix<ST> st_feature_matrix(feature_matrix, num_features, num_vectors); 00276 remove_subset(); 00277 SG_UNREF(feature_cache); 00278 clean_preprocessors(); 00279 00280 feature_matrix = NULL; 00281 feature_matrix_num_vectors = 0; 00282 feature_matrix_num_features = 0; 00283 num_features = 0; 00284 num_vectors = 0; 00285 return st_feature_matrix; 00286 } 00287 00288 template<class ST> void CSimpleFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix) 00289 { 00290 remove_subset(); 00291 free_feature_matrix(); 00292 feature_matrix = matrix.matrix; 00293 num_features = matrix.num_rows; 00294 num_vectors = matrix.num_cols; 00295 feature_matrix_num_vectors = num_vectors; 00296 feature_matrix_num_features = num_features; 00297 } 00298 00299 template<class ST> ST* CSimpleFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec) 00300 { 00301 num_feat = num_features; 00302 num_vec = num_vectors; 00303 return feature_matrix; 00304 } 00305 00306 template<class ST> CSimpleFeatures<ST>* CSimpleFeatures<ST>::get_transposed() 00307 { 00308 int32_t num_feat; 00309 int32_t num_vec; 00310 ST* fm = get_transposed(num_feat, num_vec); 00311 00312 return new CSimpleFeatures<ST>(fm, num_feat, num_vec); 00313 } 00314 00315 template<class ST> ST* CSimpleFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec) 00316 { 00317 num_feat = get_num_vectors(); 00318 num_vec = num_features; 00319 00320 int32_t old_num_vec=get_num_vectors(); 00321 00322 ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec); 00323 00324 for (int32_t i=0; i<old_num_vec; i++) 00325 { 00326 SGVector<ST> vec=get_feature_vector(i); 00327 00328 for (int32_t j=0; j<vec.vlen; j++) 00329 fm[j*int64_t(old_num_vec)+i]=vec.vector[j]; 00330 00331 free_feature_vector(vec, i); 00332 } 00333 00334 return fm; 00335 } 00336 00337 template<class ST> void CSimpleFeatures<ST>::set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec) 00338 { 00339 if (m_subset) 00340 SG_ERROR("A subset is set, cannot call set_feature_matrix\n"); 00341 00342 free_feature_matrix(); 00343 feature_matrix = fm; 00344 feature_matrix_num_features = num_feat; 00345 feature_matrix_num_vectors = num_vec; 00346 00347 num_features = num_feat; 00348 num_vectors = num_vec; 00349 initialize_cache(); 00350 } 00351 00352 template<class ST> void CSimpleFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src) 00353 { 00354 if (m_subset) 00355 SG_ERROR("A subset is set, cannot call copy_feature_matrix\n"); 00356 00357 free_feature_matrix(); 00358 int32_t num_feat = src.num_rows; 00359 int32_t num_vec = src.num_cols; 00360 feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec); 00361 feature_matrix_num_features = num_feat; 00362 feature_matrix_num_vectors = num_vec; 00363 00364 memcpy(feature_matrix, src.matrix, 00365 (sizeof(ST) * ((int64_t) num_feat) * num_vec)); 00366 00367 num_features = num_feat; 00368 num_vectors = num_vec; 00369 initialize_cache(); 00370 } 00371 00372 template<class ST> void CSimpleFeatures<ST>::obtain_from_dot(CDotFeatures* df) 00373 { 00374 remove_subset(); 00375 00376 int32_t num_feat = df->get_dim_feature_space(); 00377 int32_t num_vec = df->get_num_vectors(); 00378 00379 ASSERT(num_feat>0 && num_vec>0); 00380 00381 free_feature_matrix(); 00382 feature_matrix = SG_MALLOC(ST, ((int64_t) num_feat) * num_vec); 00383 feature_matrix_num_features = num_feat; 00384 feature_matrix_num_vectors = num_vec; 00385 00386 for (int32_t i = 0; i < num_vec; i++) 00387 { 00388 SGVector<float64_t> v = df->get_computed_dot_feature_vector(i); 00389 ASSERT(num_feat==v.vlen); 00390 00391 for (int32_t j = 0; j < num_feat; j++) 00392 feature_matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j]; 00393 00394 v.free_vector(); 00395 } 00396 num_features = num_feat; 00397 num_vectors = num_vec; 00398 } 00399 00400 template<class ST> bool CSimpleFeatures<ST>::apply_preprocessor(bool force_preprocessing) 00401 { 00402 if (m_subset) 00403 SG_ERROR("A subset is set, cannot call apply_preproc\n"); 00404 00405 SG_DEBUG( "force: %d\n", force_preprocessing); 00406 00407 if (feature_matrix && get_num_preprocessors()) 00408 { 00409 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00410 { 00411 if ((!is_preprocessed(i) || force_preprocessing)) 00412 { 00413 set_preprocessed(i); 00414 CSimplePreprocessor<ST>* p = 00415 (CSimplePreprocessor<ST>*) get_preprocessor(i); 00416 SG_INFO( "preprocessing using preproc %s\n", p->get_name()); 00417 00418 if (p->apply_to_feature_matrix(this).matrix == NULL) 00419 { 00420 SG_UNREF(p); 00421 return false; 00422 }SG_UNREF(p); 00423 00424 } 00425 } 00426 00427 return true; 00428 } 00429 else 00430 { 00431 if (!feature_matrix) 00432 SG_ERROR( "no feature matrix\n"); 00433 00434 if (!get_num_preprocessors()) 00435 SG_ERROR( "no preprocessors available\n"); 00436 00437 return false; 00438 } 00439 } 00440 00441 template<class ST> int32_t CSimpleFeatures<ST>::get_size() { return sizeof(ST); } 00442 00443 template<class ST> int32_t CSimpleFeatures<ST>::get_num_vectors() const 00444 { 00445 return m_subset ? m_subset->get_size() : num_vectors; 00446 } 00447 00448 template<class ST> int32_t CSimpleFeatures<ST>::get_num_features() { return num_features; } 00449 00450 template<class ST> void CSimpleFeatures<ST>::set_num_features(int32_t num) 00451 { 00452 num_features = num; 00453 initialize_cache(); 00454 } 00455 00456 template<class ST> void CSimpleFeatures<ST>::set_num_vectors(int32_t num) 00457 { 00458 if (m_subset) 00459 SG_ERROR("A subset is set, cannot call set_num_vectors\n"); 00460 00461 num_vectors = num; 00462 initialize_cache(); 00463 } 00464 00465 template<class ST> void CSimpleFeatures<ST>::initialize_cache() 00466 { 00467 if (m_subset) 00468 SG_ERROR("A subset is set, cannot call initialize_cache\n"); 00469 00470 if (num_features && num_vectors) 00471 { 00472 SG_UNREF(feature_cache); 00473 feature_cache = new CCache<ST>(get_cache_size(), num_features, 00474 num_vectors); 00475 SG_REF(feature_cache); 00476 } 00477 } 00478 00479 template<class ST> EFeatureClass CSimpleFeatures<ST>::get_feature_class() { return C_SIMPLE; } 00480 00481 template<class ST> bool CSimpleFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors) 00482 { 00483 if (m_subset) 00484 SG_ERROR("A subset is set, cannot call reshape\n"); 00485 00486 if (p_num_features * p_num_vectors 00487 == this->num_features * this->num_vectors) 00488 { 00489 num_features = p_num_features; 00490 num_vectors = p_num_vectors; 00491 return true; 00492 } else 00493 return false; 00494 } 00495 00496 template<class ST> int32_t CSimpleFeatures<ST>::get_dim_feature_space() const { return num_features; } 00497 00498 template<class ST> float64_t CSimpleFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df, 00499 int32_t vec_idx2) 00500 { 00501 ASSERT(df); 00502 ASSERT(df->get_feature_type() == get_feature_type()); 00503 ASSERT(df->get_feature_class() == get_feature_class()); 00504 CSimpleFeatures<ST>* sf = (CSimpleFeatures<ST>*) df; 00505 00506 int32_t len1, len2; 00507 bool free1, free2; 00508 00509 ST* vec1 = get_feature_vector(vec_idx1, len1, free1); 00510 ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2); 00511 00512 float64_t result = CMath::dot(vec1, vec2, len1); 00513 00514 free_feature_vector(vec1, vec_idx1, free1); 00515 sf->free_feature_vector(vec2, vec_idx2, free2); 00516 00517 return result; 00518 } 00519 00520 template<class ST> void CSimpleFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, 00521 float64_t* vec2, int32_t vec2_len, bool abs_val) 00522 { 00523 ASSERT(vec2_len == num_features); 00524 00525 int32_t vlen; 00526 bool vfree; 00527 ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00528 00529 ASSERT(vlen == num_features); 00530 00531 if (abs_val) 00532 { 00533 for (int32_t i = 0; i < num_features; i++) 00534 vec2[i] += alpha * CMath::abs(vec1[i]); 00535 } 00536 else 00537 { 00538 for (int32_t i = 0; i < num_features; i++) 00539 vec2[i] += alpha * vec1[i]; 00540 } 00541 00542 free_feature_vector(vec1, vec_idx1, vfree); 00543 } 00544 00545 template<class ST> int32_t CSimpleFeatures<ST>::get_nnz_features_for_vector(int32_t num) 00546 { 00547 /* H.Strathmann: TODO fix according to Soerens mail */ 00548 return num_features; 00549 } 00550 00551 template<class ST> bool CSimpleFeatures<ST>::Align_char_features(CStringFeatures<char>* cf, 00552 CStringFeatures<char>* Ref, float64_t gapCost) 00553 { 00554 return false; 00555 } 00556 00557 template<class ST> void* CSimpleFeatures<ST>::get_feature_iterator(int32_t vector_index) 00558 { 00559 if (vector_index>=get_num_vectors()) 00560 { 00561 SG_ERROR("Index out of bounds (number of vectors %d, you " 00562 "requested %d)\n", get_num_vectors(), vector_index); 00563 } 00564 00565 simple_feature_iterator* iterator = SG_MALLOC(simple_feature_iterator, 1); 00566 iterator->vec = get_feature_vector(vector_index, iterator->vlen, 00567 iterator->vfree); 00568 iterator->vidx = vector_index; 00569 iterator->index = 0; 00570 return iterator; 00571 } 00572 00573 template<class ST> bool CSimpleFeatures<ST>::get_next_feature(int32_t& index, float64_t& value, 00574 void* iterator) 00575 { 00576 simple_feature_iterator* it = (simple_feature_iterator*) iterator; 00577 if (!it || it->index >= it->vlen) 00578 return false; 00579 00580 index = it->index++; 00581 value = (float64_t) it->vec[index]; 00582 00583 return true; 00584 } 00585 00586 template<class ST> void CSimpleFeatures<ST>::free_feature_iterator(void* iterator) 00587 { 00588 if (!iterator) 00589 return; 00590 00591 simple_feature_iterator* it = (simple_feature_iterator*) iterator; 00592 free_feature_vector(it->vec, it->vidx, it->vfree); 00593 SG_FREE(it); 00594 } 00595 00596 template<class ST> CFeatures* CSimpleFeatures<ST>::copy_subset(SGVector<index_t> indices) 00597 { 00598 SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen); 00599 00600 for (index_t i=0; i<indices.vlen; ++i) 00601 { 00602 index_t real_idx=subset_idx_conversion(indices.vector[i]); 00603 memcpy(&feature_matrix_copy.matrix[i*num_features], 00604 &feature_matrix[real_idx*num_features], 00605 num_features*sizeof(ST)); 00606 } 00607 00608 return new CSimpleFeatures(feature_matrix_copy); 00609 } 00610 00611 template<class ST> ST* CSimpleFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len, 00612 ST* target) 00613 { 00614 SG_NOTIMPLEMENTED; 00615 len = 0; 00616 return NULL; 00617 } 00618 00619 template<class ST> void CSimpleFeatures<ST>::init() 00620 { 00621 num_vectors = 0; 00622 num_features = 0; 00623 00624 feature_matrix = NULL; 00625 feature_matrix_num_vectors = 0; 00626 feature_matrix_num_features = 0; 00627 00628 feature_cache = NULL; 00629 00630 set_generic<ST>(); 00631 /* not store number of vectors in subset */ 00632 m_parameters->add(&num_vectors, "num_vectors", 00633 "Number of vectors."); 00634 m_parameters->add(&num_features, "num_features", "Number of features."); 00635 m_parameters->add_matrix(&feature_matrix, &feature_matrix_num_features, 00636 &feature_matrix_num_vectors, "feature_matrix", 00637 "Matrix of feature vectors / 1 vector per column."); 00638 } 00639 00640 #define GET_FEATURE_TYPE(f_type, sg_type) \ 00641 template<> EFeatureType CSimpleFeatures<sg_type>::get_feature_type() \ 00642 { \ 00643 return f_type; \ 00644 } 00645 00646 GET_FEATURE_TYPE(F_BOOL, bool) 00647 GET_FEATURE_TYPE(F_CHAR, char) 00648 GET_FEATURE_TYPE(F_BYTE, uint8_t) 00649 GET_FEATURE_TYPE(F_BYTE, int8_t) 00650 GET_FEATURE_TYPE(F_SHORT, int16_t) 00651 GET_FEATURE_TYPE(F_WORD, uint16_t) 00652 GET_FEATURE_TYPE(F_INT, int32_t) 00653 GET_FEATURE_TYPE(F_UINT, uint32_t) 00654 GET_FEATURE_TYPE(F_LONG, int64_t) 00655 GET_FEATURE_TYPE(F_ULONG, uint64_t) 00656 GET_FEATURE_TYPE(F_SHORTREAL, float32_t) 00657 GET_FEATURE_TYPE(F_DREAL, float64_t) 00658 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t) 00659 #undef GET_FEATURE_TYPE 00660 00669 template<> bool CSimpleFeatures<float64_t>::Align_char_features( 00670 CStringFeatures<char>* cf, CStringFeatures<char>* Ref, 00671 float64_t gapCost) 00672 { 00673 ASSERT(cf); 00674 /*num_vectors=cf->get_num_vectors(); 00675 num_features=Ref->get_num_vectors(); 00676 00677 int64_t len=((int64_t) num_vectors)*num_features; 00678 free_feature_matrix(); 00679 feature_matrix=SG_MALLOC(float64_t, len); 00680 int32_t num_cf_feat=0; 00681 int32_t num_cf_vec=0; 00682 int32_t num_ref_feat=0; 00683 int32_t num_ref_vec=0; 00684 char* fm_cf=NULL; //cf->get_feature_matrix(num_cf_feat, num_cf_vec); 00685 char* fm_ref=NULL; //Ref->get_feature_matrix(num_ref_feat, num_ref_vec); 00686 00687 ASSERT(num_cf_vec==num_vectors); 00688 ASSERT(num_ref_vec==num_features); 00689 00690 SG_INFO( "computing aligments of %i vectors to %i reference vectors: ", num_cf_vec, num_ref_vec) ; 00691 for (int32_t i=0; i< num_ref_vec; i++) 00692 { 00693 SG_PROGRESS(i, num_ref_vec) ; 00694 for (int32_t j=0; j<num_cf_vec; j++) 00695 feature_matrix[i+j*num_features] = CMath::Align(&fm_cf[j*num_cf_feat], &fm_ref[i*num_ref_feat], num_cf_feat, num_ref_feat, gapCost); 00696 } ; 00697 00698 SG_INFO( "created %i x %i matrix (0x%p)\n", num_features, num_vectors, feature_matrix) ;*/ 00699 return true; 00700 } 00701 00702 template<> float64_t CSimpleFeatures<bool>::dense_dot(int32_t vec_idx1, 00703 const float64_t* vec2, int32_t vec2_len) 00704 { 00705 ASSERT(vec2_len == num_features); 00706 00707 int32_t vlen; 00708 bool vfree; 00709 bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00710 00711 ASSERT(vlen == num_features); 00712 float64_t result = 0; 00713 00714 for (int32_t i = 0; i < num_features; i++) 00715 result += vec1[i] ? vec2[i] : 0; 00716 00717 free_feature_vector(vec1, vec_idx1, vfree); 00718 00719 return result; 00720 } 00721 00722 template<> float64_t CSimpleFeatures<char>::dense_dot(int32_t vec_idx1, 00723 const float64_t* vec2, int32_t vec2_len) 00724 { 00725 ASSERT(vec2_len == num_features); 00726 00727 int32_t vlen; 00728 bool vfree; 00729 char* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00730 00731 ASSERT(vlen == num_features); 00732 float64_t result = 0; 00733 00734 for (int32_t i = 0; i < num_features; i++) 00735 result += vec1[i] * vec2[i]; 00736 00737 free_feature_vector(vec1, vec_idx1, vfree); 00738 00739 return result; 00740 } 00741 00742 template<> float64_t CSimpleFeatures<int8_t>::dense_dot(int32_t vec_idx1, 00743 const float64_t* vec2, int32_t vec2_len) 00744 { 00745 ASSERT(vec2_len == num_features); 00746 00747 int32_t vlen; 00748 bool vfree; 00749 int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00750 00751 ASSERT(vlen == num_features); 00752 float64_t result = 0; 00753 00754 for (int32_t i = 0; i < num_features; i++) 00755 result += vec1[i] * vec2[i]; 00756 00757 free_feature_vector(vec1, vec_idx1, vfree); 00758 00759 return result; 00760 } 00761 00762 template<> float64_t CSimpleFeatures<uint8_t>::dense_dot( 00763 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00764 { 00765 ASSERT(vec2_len == num_features); 00766 00767 int32_t vlen; 00768 bool vfree; 00769 uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00770 00771 ASSERT(vlen == num_features); 00772 float64_t result = 0; 00773 00774 for (int32_t i = 0; i < num_features; i++) 00775 result += vec1[i] * vec2[i]; 00776 00777 free_feature_vector(vec1, vec_idx1, vfree); 00778 00779 return result; 00780 } 00781 00782 template<> float64_t CSimpleFeatures<int16_t>::dense_dot( 00783 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00784 { 00785 ASSERT(vec2_len == num_features); 00786 00787 int32_t vlen; 00788 bool vfree; 00789 int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00790 00791 ASSERT(vlen == num_features); 00792 float64_t result = 0; 00793 00794 for (int32_t i = 0; i < num_features; i++) 00795 result += vec1[i] * vec2[i]; 00796 00797 free_feature_vector(vec1, vec_idx1, vfree); 00798 00799 return result; 00800 } 00801 00802 template<> float64_t CSimpleFeatures<uint16_t>::dense_dot( 00803 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00804 { 00805 ASSERT(vec2_len == num_features); 00806 00807 int32_t vlen; 00808 bool vfree; 00809 uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00810 00811 ASSERT(vlen == num_features); 00812 float64_t result = 0; 00813 00814 for (int32_t i = 0; i < num_features; i++) 00815 result += vec1[i] * vec2[i]; 00816 00817 free_feature_vector(vec1, vec_idx1, vfree); 00818 00819 return result; 00820 } 00821 00822 template<> float64_t CSimpleFeatures<int32_t>::dense_dot( 00823 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00824 { 00825 ASSERT(vec2_len == num_features); 00826 00827 int32_t vlen; 00828 bool vfree; 00829 int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00830 00831 ASSERT(vlen == num_features); 00832 float64_t result = 0; 00833 00834 for (int32_t i = 0; i < num_features; i++) 00835 result += vec1[i] * vec2[i]; 00836 00837 free_feature_vector(vec1, vec_idx1, vfree); 00838 00839 return result; 00840 } 00841 00842 template<> float64_t CSimpleFeatures<uint32_t>::dense_dot( 00843 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00844 { 00845 ASSERT(vec2_len == num_features); 00846 00847 int32_t vlen; 00848 bool vfree; 00849 uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00850 00851 ASSERT(vlen == num_features); 00852 float64_t result = 0; 00853 00854 for (int32_t i = 0; i < num_features; i++) 00855 result += vec1[i] * vec2[i]; 00856 00857 free_feature_vector(vec1, vec_idx1, vfree); 00858 00859 return result; 00860 } 00861 00862 template<> float64_t CSimpleFeatures<int64_t>::dense_dot( 00863 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00864 { 00865 ASSERT(vec2_len == num_features); 00866 00867 int32_t vlen; 00868 bool vfree; 00869 int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00870 00871 ASSERT(vlen == num_features); 00872 float64_t result = 0; 00873 00874 for (int32_t i = 0; i < num_features; i++) 00875 result += vec1[i] * vec2[i]; 00876 00877 free_feature_vector(vec1, vec_idx1, vfree); 00878 00879 return result; 00880 } 00881 00882 template<> float64_t CSimpleFeatures<uint64_t>::dense_dot( 00883 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00884 { 00885 ASSERT(vec2_len == num_features); 00886 00887 int32_t vlen; 00888 bool vfree; 00889 uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00890 00891 ASSERT(vlen == num_features); 00892 float64_t result = 0; 00893 00894 for (int32_t i = 0; i < num_features; i++) 00895 result += vec1[i] * vec2[i]; 00896 00897 free_feature_vector(vec1, vec_idx1, vfree); 00898 00899 return result; 00900 } 00901 00902 template<> float64_t CSimpleFeatures<float32_t>::dense_dot( 00903 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00904 { 00905 ASSERT(vec2_len == num_features); 00906 00907 int32_t vlen; 00908 bool vfree; 00909 float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00910 00911 ASSERT(vlen == num_features); 00912 float64_t result = 0; 00913 00914 for (int32_t i = 0; i < num_features; i++) 00915 result += vec1[i] * vec2[i]; 00916 00917 free_feature_vector(vec1, vec_idx1, vfree); 00918 00919 return result; 00920 } 00921 00922 template<> float64_t CSimpleFeatures<float64_t>::dense_dot( 00923 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00924 { 00925 ASSERT(vec2_len == num_features); 00926 00927 int32_t vlen; 00928 bool vfree; 00929 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00930 00931 ASSERT(vlen == num_features); 00932 float64_t result = CMath::dot(vec1, vec2, num_features); 00933 00934 free_feature_vector(vec1, vec_idx1, vfree); 00935 00936 return result; 00937 } 00938 00939 template<> float64_t CSimpleFeatures<floatmax_t>::dense_dot( 00940 int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00941 { 00942 ASSERT(vec2_len == num_features); 00943 00944 int32_t vlen; 00945 bool vfree; 00946 floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00947 00948 ASSERT(vlen == num_features); 00949 float64_t result = 0; 00950 00951 for (int32_t i = 0; i < num_features; i++) 00952 result += vec1[i] * vec2[i]; 00953 00954 free_feature_vector(vec1, vec_idx1, vfree); 00955 00956 return result; 00957 } 00958 00959 #define LOAD(f_load, sg_type) \ 00960 template<> void CSimpleFeatures<sg_type>::load(CFile* loader) \ 00961 { \ 00962 SG_SET_LOCALE_C; \ 00963 ASSERT(loader); \ 00964 sg_type* matrix; \ 00965 int32_t num_feat; \ 00966 int32_t num_vec; \ 00967 loader->f_load(matrix, num_feat, num_vec); \ 00968 set_feature_matrix(matrix, num_feat, num_vec); \ 00969 SG_RESET_LOCALE; \ 00970 } 00971 00972 LOAD(get_matrix, bool) 00973 LOAD(get_matrix, char) 00974 LOAD(get_int8_matrix, int8_t) 00975 LOAD(get_matrix, uint8_t) 00976 LOAD(get_matrix, int16_t) 00977 LOAD(get_matrix, uint16_t) 00978 LOAD(get_matrix, int32_t) 00979 LOAD(get_uint_matrix, uint32_t) 00980 LOAD(get_long_matrix, int64_t) 00981 LOAD(get_ulong_matrix, uint64_t) 00982 LOAD(get_matrix, float32_t) 00983 LOAD(get_matrix, float64_t) 00984 LOAD(get_longreal_matrix, floatmax_t) 00985 #undef LOAD 00986 00987 #define SAVE(f_write, sg_type) \ 00988 template<> void CSimpleFeatures<sg_type>::save(CFile* writer) \ 00989 { \ 00990 SG_SET_LOCALE_C; \ 00991 ASSERT(writer); \ 00992 writer->f_write(feature_matrix, num_features, num_vectors); \ 00993 SG_RESET_LOCALE; \ 00994 } 00995 00996 SAVE(set_matrix, bool) 00997 SAVE(set_matrix, char) 00998 SAVE(set_int8_matrix, int8_t) 00999 SAVE(set_matrix, uint8_t) 01000 SAVE(set_matrix, int16_t) 01001 SAVE(set_matrix, uint16_t) 01002 SAVE(set_matrix, int32_t) 01003 SAVE(set_uint_matrix, uint32_t) 01004 SAVE(set_long_matrix, int64_t) 01005 SAVE(set_ulong_matrix, uint64_t) 01006 SAVE(set_matrix, float32_t) 01007 SAVE(set_matrix, float64_t) 01008 SAVE(set_longreal_matrix, floatmax_t) 01009 #undef SAVE 01010 01011 template class CSimpleFeatures<bool>; 01012 template class CSimpleFeatures<char>; 01013 template class CSimpleFeatures<int8_t>; 01014 template class CSimpleFeatures<uint8_t>; 01015 template class CSimpleFeatures<int16_t>; 01016 template class CSimpleFeatures<uint16_t>; 01017 template class CSimpleFeatures<int32_t>; 01018 template class CSimpleFeatures<uint32_t>; 01019 template class CSimpleFeatures<int64_t>; 01020 template class CSimpleFeatures<uint64_t>; 01021 template class CSimpleFeatures<float32_t>; 01022 template class CSimpleFeatures<float64_t>; 01023 template class CSimpleFeatures<floatmax_t>; 01024 }