SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009-2010 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #include <shogun/features/CombinedDotFeatures.h> 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/mathematics/Math.h> 00015 00016 using namespace shogun; 00017 00018 CCombinedDotFeatures::CCombinedDotFeatures() : CDotFeatures() 00019 { 00020 init(); 00021 00022 feature_list=new CList(true); 00023 update_dim_feature_space_and_num_vec(); 00024 } 00025 00026 CCombinedDotFeatures::CCombinedDotFeatures(const CCombinedDotFeatures & orig) 00027 : CDotFeatures(orig), num_vectors(orig.num_vectors), 00028 num_dimensions(orig.num_dimensions) 00029 { 00030 init(); 00031 00032 feature_list=new CList(true); 00033 } 00034 00035 CFeatures* CCombinedDotFeatures::duplicate() const 00036 { 00037 return new CCombinedDotFeatures(*this); 00038 } 00039 00040 CCombinedDotFeatures::~CCombinedDotFeatures() 00041 { 00042 delete feature_list; 00043 } 00044 00045 void CCombinedDotFeatures::list_feature_objs() 00046 { 00047 SG_INFO( "BEGIN COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions); 00048 this->list_feature_obj(); 00049 00050 CListElement* current = NULL ; 00051 CDotFeatures* f=get_first_feature_obj(current); 00052 00053 while (f) 00054 { 00055 f->list_feature_obj(); 00056 f=get_next_feature_obj(current); 00057 } 00058 00059 SG_INFO( "END COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions); 00060 this->list_feature_obj(); 00061 } 00062 00063 void CCombinedDotFeatures::update_dim_feature_space_and_num_vec() 00064 { 00065 CListElement* current = NULL ; 00066 CDotFeatures* f=get_first_feature_obj(current); 00067 00068 int32_t dim=0; 00069 int32_t vec=-1; 00070 00071 while (f) 00072 { 00073 dim+= f->get_dim_feature_space(); 00074 if (vec==-1) 00075 vec=f->get_num_vectors(); 00076 else if (vec != f->get_num_vectors()) 00077 { 00078 f->list_feature_obj(); 00079 SG_ERROR("Number of vectors (%d) mismatches in above feature obj (%d)\n", vec, f->get_num_vectors()); 00080 } 00081 00082 SG_UNREF(f); 00083 00084 f=get_next_feature_obj(current); 00085 } 00086 00087 num_dimensions=dim; 00088 num_vectors=vec; 00089 SG_DEBUG("vecs=%d, dims=%d\n", num_vectors, num_dimensions); 00090 } 00091 00092 float64_t CCombinedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00093 { 00094 float64_t result=0; 00095 00096 ASSERT(df); 00097 ASSERT(df->get_feature_type() == get_feature_type()); 00098 ASSERT(df->get_feature_class() == get_feature_class()); 00099 CCombinedDotFeatures* cf = (CCombinedDotFeatures*) df; 00100 00101 CListElement* current1 = NULL; 00102 CDotFeatures* f1=get_first_feature_obj(current1); 00103 00104 CListElement* current2 = NULL; 00105 CDotFeatures* f2=cf->get_first_feature_obj(current2); 00106 00107 while (f1 && f2) 00108 { 00109 result += f1->dot(vec_idx1, f2,vec_idx2) * 00110 f1->get_combined_feature_weight() * 00111 f2->get_combined_feature_weight(); 00112 00113 SG_UNREF(f1); 00114 SG_UNREF(f2); 00115 f1=get_next_feature_obj(current1); 00116 f2=cf->get_next_feature_obj(current2); 00117 } 00118 00119 // check that both have same number of feature objects inside 00120 ASSERT(f1 == NULL && f2 == NULL); 00121 00122 return result; 00123 } 00124 00125 float64_t CCombinedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len) 00126 { 00127 float64_t result=0; 00128 00129 CListElement* current = NULL ; 00130 CDotFeatures* f=get_first_feature_obj(current); 00131 uint32_t offs=0; 00132 00133 while (f) 00134 { 00135 int32_t dim = f->get_dim_feature_space(); 00136 result += f->dense_dot(vec_idx1, vec2+offs, dim)*f->get_combined_feature_weight(); 00137 offs += dim; 00138 00139 SG_UNREF(f); 00140 f=get_next_feature_obj(current); 00141 } 00142 00143 return result; 00144 } 00145 00146 void CCombinedDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00147 { 00148 if (stop<=start) 00149 return; 00150 ASSERT(dim==num_dimensions); 00151 00152 CListElement* current = NULL; 00153 CDotFeatures* f=get_first_feature_obj(current); 00154 uint32_t offs=0; 00155 bool first=true; 00156 int32_t num=stop-start; 00157 float64_t* tmp=SG_MALLOC(float64_t, num); 00158 00159 while (f) 00160 { 00161 int32_t f_dim = f->get_dim_feature_space(); 00162 if (first) 00163 { 00164 f->dense_dot_range(output, start, stop, alphas, vec+offs, f_dim, b); 00165 first=false; 00166 } 00167 else 00168 { 00169 f->dense_dot_range(tmp, start, stop, alphas, vec+offs, f_dim, b); 00170 for (int32_t i=0; i<num; i++) 00171 output[i]+=tmp[i]; 00172 } 00173 offs += f_dim; 00174 00175 SG_UNREF(f); 00176 f=get_next_feature_obj(current); 00177 } 00178 SG_FREE(tmp); 00179 } 00180 00181 void CCombinedDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00182 { 00183 if (num<=0) 00184 return; 00185 ASSERT(dim==num_dimensions); 00186 00187 CListElement* current = NULL; 00188 CDotFeatures* f=get_first_feature_obj(current); 00189 uint32_t offs=0; 00190 bool first=true; 00191 float64_t* tmp=SG_MALLOC(float64_t, num); 00192 00193 while (f) 00194 { 00195 int32_t f_dim = f->get_dim_feature_space(); 00196 if (first) 00197 { 00198 f->dense_dot_range_subset(sub_index, num, output, alphas, vec+offs, f_dim, b); 00199 first=false; 00200 } 00201 else 00202 { 00203 f->dense_dot_range_subset(sub_index, num, tmp, alphas, vec+offs, f_dim, b); 00204 for (int32_t i=0; i<num; i++) 00205 output[i]+=tmp[i]; 00206 } 00207 offs += f_dim; 00208 00209 SG_UNREF(f); 00210 f=get_next_feature_obj(current); 00211 } 00212 SG_FREE(tmp); 00213 } 00214 00215 void CCombinedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00216 { 00217 CListElement* current = NULL ; 00218 CDotFeatures* f=get_first_feature_obj(current); 00219 uint32_t offs=0; 00220 00221 while (f) 00222 { 00223 int32_t dim = f->get_dim_feature_space(); 00224 f->add_to_dense_vec(alpha*f->get_combined_feature_weight(), vec_idx1, vec2+offs, dim, abs_val); 00225 offs += dim; 00226 00227 SG_UNREF(f); 00228 f=get_next_feature_obj(current); 00229 } 00230 } 00231 00232 void* CCombinedDotFeatures::get_feature_iterator(int32_t vector_index) 00233 { 00234 combined_feature_iterator* it=SG_MALLOC(combined_feature_iterator, 1); 00235 00236 it->current=NULL; 00237 it->f=get_first_feature_obj(it->current); 00238 it->iterator=it->f->get_feature_iterator(vector_index); 00239 it->vector_index=vector_index; 00240 return it; 00241 } 00242 00243 bool CCombinedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator) 00244 { 00245 ASSERT(iterator); 00246 combined_feature_iterator* it = (combined_feature_iterator*) iterator; 00247 00248 while (it->f) 00249 { 00250 if (it->f->get_next_feature(index, value, it->iterator)) 00251 { 00252 value*=get_combined_feature_weight(); 00253 return true; 00254 } 00255 00256 it->f->free_feature_iterator(it->iterator); 00257 it->f=get_next_feature_obj(it->current); 00258 if (it->f) 00259 it->iterator=it->f->get_feature_iterator(it->vector_index); 00260 else 00261 it->iterator=NULL; 00262 } 00263 return false; 00264 } 00265 00266 void CCombinedDotFeatures::free_feature_iterator(void* iterator) 00267 { 00268 if (iterator) 00269 { 00270 combined_feature_iterator* it = (combined_feature_iterator*) iterator; 00271 if (it->iterator && it->f) 00272 it->f->free_feature_iterator(it->iterator); 00273 SG_FREE(it); 00274 } 00275 } 00276 00277 CDotFeatures* CCombinedDotFeatures::get_first_feature_obj() 00278 { 00279 return (CDotFeatures*) feature_list->get_first_element(); 00280 } 00281 00282 CDotFeatures* CCombinedDotFeatures::get_first_feature_obj(CListElement*& current) 00283 { 00284 return (CDotFeatures*) feature_list->get_first_element(current); 00285 } 00286 00287 CDotFeatures* CCombinedDotFeatures::get_next_feature_obj() 00288 { 00289 return (CDotFeatures*) feature_list->get_next_element(); 00290 } 00291 00292 CDotFeatures* CCombinedDotFeatures::get_next_feature_obj(CListElement*& current) 00293 { 00294 return (CDotFeatures*) feature_list->get_next_element(current); 00295 } 00296 00297 CDotFeatures* CCombinedDotFeatures::get_last_feature_obj() 00298 { 00299 return (CDotFeatures*) feature_list->get_last_element(); 00300 } 00301 00302 bool CCombinedDotFeatures::insert_feature_obj(CDotFeatures* obj) 00303 { 00304 ASSERT(obj); 00305 bool result=feature_list->insert_element(obj); 00306 update_dim_feature_space_and_num_vec(); 00307 return result; 00308 } 00309 00310 bool CCombinedDotFeatures::append_feature_obj(CDotFeatures* obj) 00311 { 00312 ASSERT(obj); 00313 bool result=feature_list->append_element(obj); 00314 update_dim_feature_space_and_num_vec(); 00315 return result; 00316 } 00317 00318 bool CCombinedDotFeatures::delete_feature_obj() 00319 { 00320 CDotFeatures* f=(CDotFeatures*) feature_list->delete_element(); 00321 if (f) 00322 { 00323 SG_UNREF(f); 00324 update_dim_feature_space_and_num_vec(); 00325 return true; 00326 } 00327 else 00328 return false; 00329 } 00330 00331 int32_t CCombinedDotFeatures::get_num_feature_obj() 00332 { 00333 return feature_list->get_num_elements(); 00334 } 00335 00336 int32_t CCombinedDotFeatures::get_nnz_features_for_vector(int32_t num) 00337 { 00338 CListElement* current = NULL ; 00339 CDotFeatures* f=get_first_feature_obj(current); 00340 int32_t result=0; 00341 00342 while (f) 00343 { 00344 result+=f->get_nnz_features_for_vector(num); 00345 00346 SG_UNREF(f); 00347 f=get_next_feature_obj(current); 00348 } 00349 00350 return result; 00351 } 00352 00353 void CCombinedDotFeatures::get_subfeature_weights(float64_t** weights, int32_t* num_weights) 00354 { 00355 *num_weights = get_num_feature_obj(); 00356 ASSERT(*num_weights > 0); 00357 00358 *weights=SG_MALLOC(float64_t, *num_weights); 00359 float64_t* w = *weights; 00360 00361 CListElement* current = NULL; 00362 CDotFeatures* f = get_first_feature_obj(current); 00363 00364 while (f) 00365 { 00366 *w++=f->get_combined_feature_weight(); 00367 00368 SG_UNREF(f); 00369 f = get_next_feature_obj(current); 00370 } 00371 } 00372 00373 void CCombinedDotFeatures::set_subfeature_weights( 00374 float64_t* weights, int32_t num_weights) 00375 { 00376 int32_t i=0 ; 00377 CListElement* current = NULL ; 00378 CDotFeatures* f = get_first_feature_obj(current); 00379 00380 ASSERT(num_weights==get_num_feature_obj()); 00381 00382 while(f) 00383 { 00384 f->set_combined_feature_weight(weights[i]); 00385 00386 SG_UNREF(f); 00387 f = get_next_feature_obj(current); 00388 i++; 00389 } 00390 } 00391 00392 void CCombinedDotFeatures::init() 00393 { 00394 m_parameters->add(&num_dimensions, "num_dimensions", 00395 "Total number of dimensions."); 00396 m_parameters->add(&num_vectors, "num_vectors", 00397 "Total number of vectors."); 00398 m_parameters->add((CSGObject**) &feature_list, 00399 "feature_list", "Feature list."); 00400 } 00401