SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/ui/GUIPreprocessor.h> 00013 #include <shogun/ui/SGInterface.h> 00014 00015 #include <shogun/lib/config.h> 00016 #include <shogun/io/SGIO.h> 00017 #include <shogun/lib/config.h> 00018 #include <shogun/preprocessor/LogPlusOne.h> 00019 #include <shogun/preprocessor/NormOne.h> 00020 #include <shogun/preprocessor/PruneVarSubMean.h> 00021 #include <shogun/preprocessor/PCA.h> 00022 #include <shogun/preprocessor/DecompressString.h> 00023 #include <shogun/preprocessor/SortWordString.h> 00024 #include <shogun/preprocessor/SortUlongString.h> 00025 #include <shogun/features/RealFileFeatures.h> 00026 #include <shogun/features/TOPFeatures.h> 00027 #include <shogun/features/FKFeatures.h> 00028 #include <shogun/features/StringFeatures.h> 00029 #include <shogun/features/SimpleFeatures.h> 00030 #include <shogun/features/SparseFeatures.h> 00031 #include <shogun/features/CombinedFeatures.h> 00032 #include <shogun/features/Features.h> 00033 00034 #include <string.h> 00035 #include <stdio.h> 00036 00037 using namespace shogun; 00038 00039 CGUIPreprocessor::CGUIPreprocessor(CSGInterface* ui_) 00040 : CSGObject(), ui(ui_) 00041 { 00042 preprocs=new CList(true); 00043 } 00044 00045 CGUIPreprocessor::~CGUIPreprocessor() 00046 { 00047 SG_UNREF(preprocs); 00048 } 00049 00050 CPreprocessor* CGUIPreprocessor::create_prunevarsubmean(bool divide_by_std) 00051 { 00052 CPreprocessor* preproc=new CPruneVarSubMean(divide_by_std); 00053 00054 if (preproc) 00055 SG_INFO("PRUNEVARSUBMEAN created (%p), divide_by_std %d", preproc, divide_by_std); 00056 else 00057 SG_ERROR("Could not create preproc PRUNEVARSUBMEAN, divide_by_std %d", divide_by_std); 00058 00059 return preproc; 00060 } 00061 00062 CPreprocessor* CGUIPreprocessor::create_pca(bool do_whitening, float64_t threshold) 00063 { 00064 #ifdef HAVE_LAPACK 00065 CPreprocessor* preproc=new CPCA(do_whitening, THRESHOLD, threshold); 00066 00067 if (preproc) 00068 SG_INFO("PCA created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold); 00069 else 00070 SG_ERROR("Could not create preproc PCA, do_whitening %i threshold %e", do_whitening, threshold); 00071 00072 return preproc; 00073 #else //HAVE_LAPACK 00074 SG_ERROR("Could not create preproc PCA - lapack not available at compile time\n"); 00075 return NULL; 00076 #endif //HAVE_LAPACK 00077 } 00078 00079 CPreprocessor* CGUIPreprocessor::create_generic(EPreprocessorType type) 00080 { 00081 CPreprocessor* preproc=NULL; 00082 00083 switch (type) 00084 { 00085 case P_NORMONE: 00086 preproc=new CNormOne(); break; 00087 case P_LOGPLUSONE: 00088 preproc=new CLogPlusOne(); break; 00089 case P_SORTWORDSTRING: 00090 preproc=new CSortWordString(); break; 00091 case P_SORTULONGSTRING: 00092 preproc=new CSortUlongString(); break; 00093 case P_DECOMPRESSCHARSTRING: 00094 preproc=new CDecompressString<char>(LZO); break; 00095 default: 00096 SG_ERROR("Unknown Preprocessor type %d\n", type); 00097 } 00098 00099 if (preproc) 00100 SG_INFO("Preproc of type %d created (%p).\n", type, preproc); 00101 else 00102 SG_ERROR("Could not create preproc of type %d.\n", type); 00103 00104 return preproc; 00105 } 00106 00107 bool CGUIPreprocessor::add_preproc(CPreprocessor* preproc) 00108 { 00109 return preprocs->append_element_at_listend(preproc); 00110 } 00111 00112 bool CGUIPreprocessor::clean_preproc() 00113 { 00114 SG_UNREF(preprocs); 00115 preprocs=new CList(true); 00116 return (preprocs!=NULL); 00117 } 00118 00119 bool CGUIPreprocessor::del_preproc() 00120 { 00121 SG_INFO("Deleting preproc %i/(%i).\n", preprocs->get_num_elements()-1, preprocs->get_num_elements()); 00122 00123 CSGObject* preproc=preprocs->delete_element(); 00124 SG_UNREF(preproc); 00125 00126 return (preproc!=NULL); 00127 } 00128 00129 bool CGUIPreprocessor::attach_preproc(char* target, bool do_force) 00130 { 00131 bool result=false; 00132 00133 if (strncmp(target, "TRAIN", 5)==0) 00134 { 00135 CFeatures* f=ui->ui_features->get_train_features(); 00136 if (!f) 00137 SG_ERROR("No train features assigned!\n"); 00138 00139 if (f->get_feature_class()==C_COMBINED) 00140 f=((CCombinedFeatures*)f)->get_last_feature_obj(); 00141 00142 preprocess_features(f, NULL, do_force); 00143 ui->ui_features->invalidate_train(); 00144 result=true; 00145 } 00146 else if (strncmp(target, "TEST", 4)==0) 00147 { 00148 CFeatures* f_test=ui->ui_features->get_test_features(); 00149 if (!f_test) 00150 SG_ERROR("No test features assigned!\n"); 00151 00152 CFeatures* f_train=ui->ui_features->get_train_features(); 00153 if (!f_train) 00154 SG_ERROR("No train features assigned!\n"); 00155 00156 EFeatureClass fclass_train=f_train->get_feature_class(); 00157 EFeatureClass fclass_test=f_test->get_feature_class(); 00158 00159 if (fclass_train==fclass_test) 00160 { 00161 if (fclass_train==C_COMBINED) 00162 { 00163 if (((CCombinedFeatures*) f_train)->check_feature_obj_compatibility((CCombinedFeatures*) f_test)) 00164 { 00165 //preprocess the last test feature obj 00166 CFeatures* te_feat=((CCombinedFeatures*) f_test)->get_first_feature_obj(); 00167 CFeatures* tr_feat=((CCombinedFeatures*) f_train)->get_first_feature_obj(); 00168 00169 int32_t num_combined=((CCombinedFeatures*) f_test)->get_num_feature_obj(); 00170 ASSERT(((CCombinedFeatures*) f_train)->get_num_feature_obj()==num_combined); 00171 00172 if (!(num_combined && tr_feat && te_feat)) 00173 SG_ERROR("One of the combined features has no sub-features ?!\n"); 00174 00175 SG_INFO("BEGIN PREPROCESSING COMBINED FEATURES (%d sub-featureobjects).\n", num_combined); 00176 00177 int32_t n=0; 00178 while (n<num_combined && tr_feat && te_feat) 00179 { 00180 // and preprocess using that one 00181 SG_INFO("TRAIN "); 00182 tr_feat->list_feature_obj(); 00183 SG_INFO("TEST "); 00184 te_feat->list_feature_obj(); 00185 preprocess_features(tr_feat, te_feat, do_force); 00186 tr_feat=((CCombinedFeatures*) f_train)->get_next_feature_obj(); 00187 te_feat=((CCombinedFeatures*) f_test)->get_next_feature_obj(); 00188 n++; 00189 } 00190 ASSERT(n==num_combined); 00191 result=true; 00192 SG_INFO( "END PREPROCESSING COMBINED FEATURES\n"); 00193 } 00194 else 00195 SG_ERROR( "combined features not compatible\n"); 00196 } 00197 else 00198 { 00199 preprocess_features(f_train, f_test, do_force); 00200 ui->ui_features->invalidate_test(); 00201 result=true; 00202 } 00203 } 00204 else 00205 SG_ERROR("Features not compatible.\n"); 00206 } 00207 else 00208 SG_ERROR("Features not correctly assigned!\n"); 00209 00211 if (result) 00212 clean_preproc(); 00213 00214 return result; 00215 } 00216 00217 bool CGUIPreprocessor::preprocess_features(CFeatures* trainfeat, CFeatures* testfeat, bool force) 00218 { 00219 if (trainfeat) 00220 { 00221 if (testfeat) 00222 { 00223 // if we don't have a preproc for trainfeatures we 00224 // don't need a preproc for test features 00225 SG_DEBUG( "%d preprocessors attached to train features %d to test features\n", trainfeat->get_num_preprocessors(), testfeat->get_num_preprocessors()); 00226 00227 if (trainfeat->get_num_preprocessors() < testfeat->get_num_preprocessors()) 00228 { 00229 SG_ERROR( "more preprocessors attached to test features than to train features\n"); 00230 return false; 00231 } 00232 00233 if (trainfeat->get_num_preprocessors() && (trainfeat->get_num_preprocessors() > testfeat->get_num_preprocessors())) 00234 { 00235 for (int32_t i=0; i<trainfeat->get_num_preprocessors(); i++) 00236 { 00237 CPreprocessor* preproc = trainfeat->get_preprocessor(i); 00238 preproc->init(trainfeat); 00239 testfeat->add_preprocessor(preproc); 00240 SG_UNREF(preproc); 00241 } 00242 00243 preproc_all_features(testfeat, force); 00244 } 00245 } 00246 else 00247 { 00248 CPreprocessor* preproc = (CPreprocessor*) preprocs->get_first_element(); 00249 00250 if (preproc) 00251 { 00252 preproc->init(trainfeat); 00253 trainfeat->add_preprocessor(preproc); 00254 00255 preproc_all_features(trainfeat, force); 00256 SG_UNREF(preproc); 00257 } 00258 00259 while ( (preproc = (CPreprocessor*) preprocs->get_next_element()) !=NULL ) 00260 { 00261 preproc->init(trainfeat); 00262 trainfeat->add_preprocessor(preproc); 00263 SG_UNREF(preproc); 00264 00265 preproc_all_features(trainfeat, force); 00266 } 00267 } 00268 00269 return true; 00270 } 00271 else 00272 SG_ERROR( "no features for preprocessing available!\n"); 00273 00274 return false; 00275 } 00276 00277 bool CGUIPreprocessor::preproc_all_features(CFeatures* f, bool force) 00278 { 00279 switch (f->get_feature_class()) 00280 { 00281 case C_SIMPLE: 00282 switch (f->get_feature_type()) 00283 { 00284 case F_DREAL: 00285 return ((CSimpleFeatures<float64_t>*) f)->apply_preprocessor(force); 00286 case F_SHORT: 00287 return ((CSimpleFeatures<int16_t>*) f)->apply_preprocessor(force); 00288 case F_WORD: 00289 return ((CSimpleFeatures<uint16_t>*) f)->apply_preprocessor(force); 00290 case F_CHAR: 00291 return ((CSimpleFeatures<char>*) f)->apply_preprocessor(force); 00292 case F_BYTE: 00293 return ((CSimpleFeatures<uint8_t>*) f)->apply_preprocessor(force); 00294 default: 00295 SG_NOTIMPLEMENTED; 00296 } 00297 break; 00298 case C_STRING: 00299 switch (f->get_feature_type()) 00300 { 00301 case F_WORD: 00302 return ((CStringFeatures<uint16_t>*) f)->apply_preprocessor(force); 00303 case F_ULONG: 00304 return ((CStringFeatures<uint64_t>*) f)->apply_preprocessor(force); 00305 default: 00306 SG_NOTIMPLEMENTED; 00307 } 00308 break; 00309 case C_SPARSE: 00310 switch (f->get_feature_type()) 00311 { 00312 case F_DREAL: 00313 return ((CSparseFeatures<float64_t>*) f)->apply_preprocessor(force); 00314 default: 00315 SG_NOTIMPLEMENTED; 00316 }; 00317 break; 00318 case C_COMBINED: 00319 SG_ERROR( "Combined feature objects cannot be preprocessed. Only its sub-feature objects!\n"); 00320 break; 00321 default: 00322 SG_NOTIMPLEMENTED; 00323 } 00324 00325 return false; 00326 }