SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Subset support written (W) 2011 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/features/Features.h> 00014 #include <shogun/preprocessor/Preprocessor.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/base/Parameter.h> 00017 00018 #include <string.h> 00019 00020 using namespace shogun; 00021 00022 CFeatures::CFeatures(int32_t size) 00023 : CSGObject() 00024 { 00025 init(); 00026 cache_size = size; 00027 } 00028 00029 CFeatures::CFeatures(const CFeatures& orig) 00030 : CSGObject(orig) 00031 { 00032 init(); 00033 00034 preproc = orig.preproc; 00035 num_preproc = orig.num_preproc; 00036 00037 preprocessed=SG_MALLOC(bool, orig.num_preproc); 00038 memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc); 00039 } 00040 00041 CFeatures::CFeatures(CFile* loader) 00042 : CSGObject() 00043 { 00044 init(); 00045 00046 load(loader); 00047 SG_INFO("Feature object loaded (%p)\n",this) ; 00048 } 00049 00050 CFeatures::~CFeatures() 00051 { 00052 clean_preprocessors(); 00053 delete m_subset; 00054 } 00055 00056 void 00057 CFeatures::init() 00058 { 00059 m_parameters->add(&properties, "properties", 00060 "Feature properties."); 00061 m_parameters->add(&cache_size, "cache_size", 00062 "Size of cache in MB."); 00063 00064 m_parameters->add_vector((CSGObject***) &preproc, 00065 &num_preproc, "preproc", 00066 "List of preprocessors."); 00067 m_parameters->add_vector(&preprocessed, 00068 &num_preproc, "preprocessed", 00069 "Feature[i] is already preprocessed."); 00070 00071 m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object"); 00072 00073 m_subset=NULL; 00074 properties = FP_NONE; 00075 cache_size = 0; 00076 preproc = NULL; 00077 num_preproc = 0; 00078 preprocessed = NULL; 00079 } 00080 00082 int32_t CFeatures::add_preprocessor(CPreprocessor* p) 00083 { 00084 SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc); 00085 ASSERT(p); 00086 00087 bool* preprocd=SG_MALLOC(bool, num_preproc+1); 00088 CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1); 00089 for (int32_t i=0; i<num_preproc; i++) 00090 { 00091 pps[i]=preproc[i]; 00092 preprocd[i]=preprocessed[i]; 00093 } 00094 SG_FREE(preproc); 00095 SG_FREE(preprocessed); 00096 preproc=pps; 00097 preprocessed=preprocd; 00098 preproc[num_preproc]=p; 00099 preprocessed[num_preproc]=false; 00100 00101 num_preproc++; 00102 00103 for (int32_t i=0; i<num_preproc; i++) 00104 SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ; 00105 00106 SG_REF(p); 00107 00108 return num_preproc; 00109 } 00110 00112 CPreprocessor* CFeatures::get_preprocessor(int32_t num) 00113 { 00114 if (num<num_preproc) 00115 { 00116 SG_REF(preproc[num]); 00117 return preproc[num]; 00118 } 00119 else 00120 return NULL; 00121 } 00122 00124 int32_t CFeatures::get_num_preprocessed() 00125 { 00126 int32_t num=0; 00127 00128 for (int32_t i=0; i<num_preproc; i++) 00129 { 00130 if (preprocessed[i]) 00131 num++; 00132 } 00133 00134 return num; 00135 } 00136 00138 void CFeatures::clean_preprocessors() 00139 { 00140 while (del_preprocessor(0)); 00141 } 00142 00144 CPreprocessor* CFeatures::del_preprocessor(int32_t num) 00145 { 00146 CPreprocessor** pps=NULL; 00147 bool* preprocd=NULL; 00148 CPreprocessor* removed_preproc=NULL; 00149 00150 if (num_preproc>0 && num<num_preproc) 00151 { 00152 removed_preproc=preproc[num]; 00153 00154 if (num_preproc>1) 00155 { 00156 pps= SG_MALLOC(CPreprocessor*, num_preproc-1); 00157 preprocd= SG_MALLOC(bool, num_preproc-1); 00158 00159 if (pps && preprocd) 00160 { 00161 int32_t j=0; 00162 for (int32_t i=0; i<num_preproc; i++) 00163 { 00164 if (i!=num) 00165 { 00166 pps[j]=preproc[i]; 00167 preprocd[j]=preprocessed[i]; 00168 j++; 00169 } 00170 } 00171 } 00172 } 00173 00174 SG_FREE(preproc); 00175 preproc=pps; 00176 SG_FREE(preprocessed); 00177 preprocessed=preprocd; 00178 00179 num_preproc--; 00180 00181 for (int32_t i=0; i<num_preproc; i++) 00182 SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ; 00183 } 00184 00185 SG_UNREF(removed_preproc); 00186 return removed_preproc; 00187 } 00188 00189 void CFeatures::set_preprocessed(int32_t num) 00190 { 00191 preprocessed[num]=true; 00192 } 00193 00194 bool CFeatures::is_preprocessed(int32_t num) 00195 { 00196 return preprocessed[num]; 00197 } 00198 00199 int32_t CFeatures::get_num_preprocessors() const 00200 { 00201 return num_preproc; 00202 } 00203 00204 int32_t CFeatures::get_cache_size() 00205 { 00206 return cache_size; 00207 } 00208 00209 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors) 00210 { 00211 SG_NOTIMPLEMENTED; 00212 return false; 00213 } 00214 00215 void CFeatures::list_feature_obj() 00216 { 00217 SG_INFO( "%p - ", this); 00218 switch (get_feature_class()) 00219 { 00220 case C_UNKNOWN: 00221 SG_INFO( "C_UNKNOWN "); 00222 break; 00223 case C_SIMPLE: 00224 SG_INFO( "C_SIMPLE "); 00225 break; 00226 case C_SPARSE: 00227 SG_INFO( "C_SPARSE "); 00228 break; 00229 case C_STRING: 00230 SG_INFO( "C_STRING "); 00231 break; 00232 case C_COMBINED: 00233 SG_INFO( "C_COMBINED "); 00234 break; 00235 case C_COMBINED_DOT: 00236 SG_INFO( "C_COMBINED_DOT "); 00237 break; 00238 case C_WD: 00239 SG_INFO( "C_WD "); 00240 break; 00241 case C_SPEC: 00242 SG_INFO( "C_SPEC "); 00243 break; 00244 case C_WEIGHTEDSPEC: 00245 SG_INFO( "C_WEIGHTEDSPEC "); 00246 break; 00247 case C_STREAMING_SIMPLE: 00248 SG_INFO( "C_STREAMING_SIMPLE "); 00249 break; 00250 case C_STREAMING_SPARSE: 00251 SG_INFO( "C_STREAMING_SPARSE "); 00252 break; 00253 case C_STREAMING_STRING: 00254 SG_INFO( "C_STREAMING_STRING "); 00255 break; 00256 case C_STREAMING_VW: 00257 SG_INFO( "C_STREAMING_VW "); 00258 break; 00259 case C_ANY: 00260 SG_INFO( "C_ANY "); 00261 break; 00262 default: 00263 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS"); 00264 } 00265 00266 switch (get_feature_type()) 00267 { 00268 case F_UNKNOWN: 00269 SG_INFO( "F_UNKNOWN \n"); 00270 break; 00271 case F_CHAR: 00272 SG_INFO( "F_CHAR \n"); 00273 break; 00274 case F_BYTE: 00275 SG_INFO( "F_BYTE \n"); 00276 break; 00277 case F_SHORT: 00278 SG_INFO( "F_SHORT \n"); 00279 break; 00280 case F_WORD: 00281 SG_INFO( "F_WORD \n"); 00282 break; 00283 case F_INT: 00284 SG_INFO( "F_INT \n"); 00285 break; 00286 case F_UINT: 00287 SG_INFO( "F_UINT \n"); 00288 break; 00289 case F_LONG: 00290 SG_INFO( "F_LONG \n"); 00291 break; 00292 case F_ULONG: 00293 SG_INFO( "F_ULONG \n"); 00294 break; 00295 case F_SHORTREAL: 00296 SG_INFO( "F_SHORTEAL \n"); 00297 break; 00298 case F_DREAL: 00299 SG_INFO( "F_DREAL \n"); 00300 break; 00301 case F_LONGREAL: 00302 SG_INFO( "F_LONGREAL \n"); 00303 break; 00304 case F_ANY: 00305 SG_INFO( "F_ANY \n"); 00306 break; 00307 default: 00308 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n"); 00309 } 00310 } 00311 00312 00313 void CFeatures::load(CFile* loader) 00314 { 00315 SG_SET_LOCALE_C; 00316 SG_NOTIMPLEMENTED; 00317 SG_RESET_LOCALE; 00318 } 00319 00320 void CFeatures::save(CFile* writer) 00321 { 00322 SG_SET_LOCALE_C; 00323 SG_NOTIMPLEMENTED; 00324 SG_RESET_LOCALE; 00325 } 00326 00327 bool CFeatures::check_feature_compatibility(CFeatures* f) 00328 { 00329 bool result=false; 00330 00331 if (f) 00332 result= ( (this->get_feature_class() == f->get_feature_class()) && 00333 (this->get_feature_type() == f->get_feature_type())); 00334 return result; 00335 } 00336 00337 bool CFeatures::has_property(EFeatureProperty p) 00338 { 00339 return (properties & p) != 0; 00340 } 00341 00342 void CFeatures::set_property(EFeatureProperty p) 00343 { 00344 properties |= p; 00345 } 00346 00347 void CFeatures::unset_property(EFeatureProperty p) 00348 { 00349 properties &= (properties | p) ^ p; 00350 } 00351 00352 void CFeatures::set_subset(CSubset* subset) 00353 { 00354 SG_UNREF(m_subset); 00355 m_subset=subset; 00356 SG_REF(subset); 00357 subset_changed_post(); 00358 } 00359 00360 index_t CFeatures::subset_idx_conversion(index_t idx) const 00361 { 00362 return m_subset ? m_subset->subset_idx_conversion(idx) : idx; 00363 } 00364 00365 bool CFeatures::has_subset() const 00366 { 00367 return m_subset!=NULL; 00368 } 00369 00370 void CFeatures::remove_subset() 00371 { 00372 set_subset(NULL); 00373 } 00374 00375 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices) 00376 { 00377 SG_ERROR("copy_subset and therefore model storage of CMachine " 00378 "(required for cross-validation and model-selection is ", 00379 "not yet implemented for feature type %s\n", get_name()); 00380 return NULL; 00381 }