SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/ui/GUIClassifier.h> 00012 #include <shogun/ui/SGInterface.h> 00013 00014 #include <shogun/lib/config.h> 00015 #include <shogun/io/SGIO.h> 00016 00017 #include <shogun/features/SparseFeatures.h> 00018 #include <shogun/features/RealFileFeatures.h> 00019 #include <shogun/features/Labels.h> 00020 00021 #include <shogun/kernel/AUCKernel.h> 00022 00023 #include <shogun/classifier/KNN.h> 00024 #include <shogun/clustering/KMeans.h> 00025 #include <shogun/clustering/Hierarchical.h> 00026 #include <shogun/classifier/PluginEstimate.h> 00027 00028 #include <shogun/classifier/LDA.h> 00029 #include <shogun/classifier/LPM.h> 00030 #include <shogun/classifier/LPBoost.h> 00031 #include <shogun/classifier/Perceptron.h> 00032 00033 #include <shogun/machine/LinearMachine.h> 00034 00035 00036 00037 #include <shogun/classifier/mkl/MKLClassification.h> 00038 #include <shogun/regression/svr/MKLRegression.h> 00039 #include <shogun/classifier/mkl/MKLOneClass.h> 00040 #include <shogun/classifier/mkl/MKLMultiClass.h> 00041 #include <shogun/classifier/svm/LibSVM.h> 00042 #include <shogun/classifier/svm/LaRank.h> 00043 #include <shogun/classifier/svm/GPBTSVM.h> 00044 #include <shogun/classifier/svm/LibSVMOneClass.h> 00045 #include <shogun/classifier/svm/LibSVMMultiClass.h> 00046 00047 #include <shogun/regression/svr/LibSVR.h> 00048 #include <shogun/regression/KRR.h> 00049 00050 #include <shogun/classifier/svm/LibLinear.h> 00051 #include <shogun/classifier/svm/MPDSVM.h> 00052 #include <shogun/classifier/svm/GNPPSVM.h> 00053 #include <shogun/classifier/svm/GMNPSVM.h> 00054 #include <shogun/classifier/svm/ScatterSVM.h> 00055 00056 #include <shogun/classifier/svm/SVMLin.h> 00057 #include <shogun/classifier/svm/SubGradientSVM.h> 00058 #include <shogun/classifier/SubGradientLPM.h> 00059 #include <shogun/classifier/svm/SVMOcas.h> 00060 #include <shogun/classifier/svm/SVMSGD.h> 00061 #include <shogun/classifier/svm/WDSVMOcas.h> 00062 00063 using namespace shogun; 00064 00065 CGUIClassifier::CGUIClassifier(CSGInterface* ui_) 00066 : CSGObject(), ui(ui_) 00067 { 00068 constraint_generator=NULL; 00069 classifier=NULL; 00070 max_train_time=0; 00071 00072 // Perceptron parameters 00073 perceptron_learnrate=0.1; 00074 perceptron_maxiter=1000; 00075 00076 // SVM parameters 00077 svm_qpsize=41; 00078 svm_bufsize=3000; 00079 svm_max_qpsize=1000; 00080 mkl_norm=1; 00081 ent_lambda=0; 00082 mkl_block_norm=4; 00083 svm_C1=1; 00084 svm_C2=1; 00085 C_mkl=0; 00086 mkl_use_interleaved=true; 00087 svm_weight_epsilon=1e-5; 00088 svm_epsilon=1e-5; 00089 svm_tube_epsilon=1e-2; 00090 svm_nu=0.5; 00091 svm_use_shrinking = true ; 00092 00093 svm_use_bias = true; 00094 svm_use_batch_computation = true ; 00095 svm_use_linadd = true ; 00096 svm_do_auc_maximization = false ; 00097 00098 // KRR parameters 00099 krr_tau=1; 00100 00101 solver_type=ST_AUTO; 00102 } 00103 00104 CGUIClassifier::~CGUIClassifier() 00105 { 00106 SG_UNREF(classifier); 00107 SG_UNREF(constraint_generator); 00108 } 00109 00110 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d) 00111 { 00112 if (strcmp(name,"LIBSVM_ONECLASS")==0) 00113 { 00114 SG_UNREF(classifier); 00115 classifier = new CLibSVMOneClass(); 00116 SG_INFO("created SVMlibsvm object for oneclass\n"); 00117 } 00118 else if (strcmp(name,"LIBSVM_MULTICLASS")==0) 00119 { 00120 SG_UNREF(classifier); 00121 classifier = new CLibSVMMultiClass(); 00122 SG_INFO("created SVMlibsvm object for multiclass\n"); 00123 } 00124 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0) 00125 { 00126 SG_UNREF(classifier); 00127 classifier= new CLibSVMMultiClass(LIBSVM_NU_SVC); 00128 SG_INFO("created SVMlibsvm object for multiclass\n") ; 00129 } 00130 00131 else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0) 00132 { 00133 SG_UNREF(classifier); 00134 classifier= new CScatterSVM(NO_BIAS_LIBSVM); 00135 SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ; 00136 } 00137 else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0) 00138 { 00139 SG_UNREF(classifier); 00140 classifier= new CScatterSVM(TEST_RULE1); 00141 SG_INFO("created ScatterSVM TESTRULE1 object\n") ; 00142 } 00143 else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0) 00144 { 00145 SG_UNREF(classifier); 00146 classifier= new CScatterSVM(TEST_RULE2); 00147 SG_INFO("created ScatterSVM TESTRULE2 object\n") ; 00148 } 00149 else if (strcmp(name,"LIBSVM_NU")==0) 00150 { 00151 SG_UNREF(classifier); 00152 classifier= new CLibSVM(LIBSVM_NU_SVC); 00153 SG_INFO("created SVMlibsvm object\n") ; 00154 } 00155 else if (strcmp(name,"LIBSVM")==0) 00156 { 00157 SG_UNREF(classifier); 00158 classifier= new CLibSVM(); 00159 SG_INFO("created SVMlibsvm object\n") ; 00160 } 00161 else if (strcmp(name,"LARANK")==0) 00162 { 00163 SG_UNREF(classifier); 00164 classifier= new CLaRank(); 00165 SG_INFO("created LaRank object\n") ; 00166 } 00167 00168 else if (strcmp(name,"GPBTSVM")==0) 00169 { 00170 SG_UNREF(classifier); 00171 classifier= new CGPBTSVM(); 00172 SG_INFO("created GPBT-SVM object\n") ; 00173 } 00174 else if (strcmp(name,"MPDSVM")==0) 00175 { 00176 SG_UNREF(classifier); 00177 classifier= new CMPDSVM(); 00178 SG_INFO("created MPD-SVM object\n") ; 00179 } 00180 else if (strcmp(name,"GNPPSVM")==0) 00181 { 00182 SG_UNREF(classifier); 00183 classifier= new CGNPPSVM(); 00184 SG_INFO("created GNPP-SVM object\n") ; 00185 } 00186 else if (strcmp(name,"GMNPSVM")==0) 00187 { 00188 SG_UNREF(classifier); 00189 classifier= new CGMNPSVM(); 00190 SG_INFO("created GMNP-SVM object\n") ; 00191 } 00192 else if (strcmp(name,"LIBSVR")==0) 00193 { 00194 SG_UNREF(classifier); 00195 classifier= new CLibSVR(); 00196 SG_INFO("created SVRlibsvm object\n") ; 00197 } 00198 #ifdef HAVE_LAPACK 00199 else if (strcmp(name, "KRR")==0) 00200 { 00201 SG_UNREF(classifier); 00202 classifier=new CKRR(krr_tau, ui->ui_kernel->get_kernel(), 00203 ui->ui_labels->get_train_labels()); 00204 SG_INFO("created KRR object %p\n", classifier); 00205 } 00206 #endif //HAVE_LAPACK 00207 else if (strcmp(name,"PERCEPTRON")==0) 00208 { 00209 SG_UNREF(classifier); 00210 classifier= new CPerceptron(); 00211 SG_INFO("created Perceptron object\n") ; 00212 } 00213 #ifdef HAVE_LAPACK 00214 else if (strncmp(name,"LIBLINEAR",9)==0) 00215 { 00216 LIBLINEAR_SOLVER_TYPE st=L2R_LR; 00217 00218 if (strcmp(name,"LIBLINEAR_L2R_LR")==0) 00219 { 00220 st=L2R_LR; 00221 SG_INFO("created LibLinear l2 regularized logistic regression object\n") ; 00222 } 00223 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0) 00224 { 00225 st=L2R_L2LOSS_SVC_DUAL; 00226 SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ; 00227 } 00228 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0) 00229 { 00230 st=L2R_L2LOSS_SVC; 00231 SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ; 00232 } 00233 else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0) 00234 { 00235 st=L1R_L2LOSS_SVC; 00236 SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ; 00237 } 00238 else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0) 00239 { 00240 st=L2R_L1LOSS_SVC_DUAL; 00241 SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ; 00242 } 00243 else 00244 SG_ERROR("unknown liblinear type\n"); 00245 00246 SG_UNREF(classifier); 00247 classifier= new CLibLinear(st); 00248 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00249 ((CLibLinear*) classifier)->set_epsilon(svm_epsilon); 00250 ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias); 00251 } 00252 else if (strcmp(name,"LDA")==0) 00253 { 00254 SG_UNREF(classifier); 00255 classifier= new CLDA(); 00256 SG_INFO("created LDA object\n") ; 00257 } 00258 #endif //HAVE_LAPACK 00259 #ifdef USE_CPLEX 00260 else if (strcmp(name,"LPM")==0) 00261 { 00262 SG_UNREF(classifier); 00263 classifier= new CLPM(); 00264 ((CLPM*) classifier)->set_C(svm_C1, svm_C2); 00265 ((CLPM*) classifier)->set_epsilon(svm_epsilon); 00266 ((CLPM*) classifier)->set_bias_enabled(svm_use_bias); 00267 ((CLPM*) classifier)->set_max_train_time(max_train_time); 00268 SG_INFO("created LPM object\n") ; 00269 } 00270 else if (strcmp(name,"LPBOOST")==0) 00271 { 00272 SG_UNREF(classifier); 00273 classifier= new CLPBoost(); 00274 ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2); 00275 ((CLPBoost*) classifier)->set_epsilon(svm_epsilon); 00276 ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias); 00277 ((CLPBoost*) classifier)->set_max_train_time(max_train_time); 00278 SG_INFO("created LPBoost object\n") ; 00279 } 00280 else if (strcmp(name,"SUBGRADIENTLPM")==0) 00281 { 00282 SG_UNREF(classifier); 00283 classifier= new CSubGradientLPM(); 00284 00285 ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias); 00286 ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize); 00287 ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize); 00288 ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2); 00289 ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon); 00290 ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time); 00291 SG_INFO("created Subgradient LPM object\n") ; 00292 } 00293 #endif //USE_CPLEX 00294 else if (strncmp(name,"KNN", strlen("KNN"))==0) 00295 { 00296 SG_UNREF(classifier); 00297 classifier= new CKNN(); 00298 SG_INFO("created KNN object\n") ; 00299 } 00300 else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0) 00301 { 00302 SG_UNREF(classifier); 00303 classifier= new CKMeans(); 00304 SG_INFO("created KMeans object\n") ; 00305 } 00306 else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0) 00307 { 00308 SG_UNREF(classifier); 00309 classifier= new CHierarchical(); 00310 SG_INFO("created Hierarchical clustering object\n") ; 00311 } 00312 else if (strcmp(name,"SVMLIN")==0) 00313 { 00314 SG_UNREF(classifier); 00315 classifier= new CSVMLin(); 00316 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00317 ((CSVMLin*) classifier)->set_epsilon(svm_epsilon); 00318 ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias); 00319 SG_INFO("created SVMLin object\n") ; 00320 } 00321 else if (strcmp(name,"SUBGRADIENTSVM")==0) 00322 { 00323 SG_UNREF(classifier); 00324 classifier= new CSubGradientSVM(); 00325 00326 ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias); 00327 ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize); 00328 ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize); 00329 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2); 00330 ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon); 00331 ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time); 00332 SG_INFO("created Subgradient SVM object\n") ; 00333 } 00334 else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0) 00335 { 00336 SG_UNREF(classifier); 00337 classifier= new CWDSVMOcas(SVM_OCAS); 00338 00339 ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00340 ((CWDSVMOcas*) classifier)->set_degree(d, from_d); 00341 ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00342 ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00343 ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00344 SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ; 00345 } 00346 else if (strcmp(name,"SVMOCAS")==0) 00347 { 00348 SG_UNREF(classifier); 00349 classifier= new CSVMOcas(SVM_OCAS); 00350 00351 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00352 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00353 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00354 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00355 SG_INFO("created SVM Ocas(OCAS) object\n") ; 00356 } 00357 else if (strcmp(name,"SVMSGD")==0) 00358 { 00359 SG_UNREF(classifier); 00360 classifier= new CSVMSGD(svm_C1); 00361 ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias); 00362 SG_INFO("created SVM SGD object\n") ; 00363 } 00364 else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0)) 00365 { 00366 SG_UNREF(classifier); 00367 classifier= new CSVMOcas(SVM_BMRM); 00368 00369 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00370 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00371 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00372 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00373 SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ; 00374 } 00375 else if (strcmp(name,"MKL_CLASSIFICATION")==0) 00376 { 00377 SG_UNREF(classifier); 00378 classifier= new CMKLClassification(); 00379 } 00380 //else if (strcmp(name,"MKL_MULTICLASS")==0) 00381 //{ 00382 // SG_UNREF(classifier); 00383 // classifier= new CMKLClassification(); 00384 //} 00385 else if (strcmp(name,"MKL_ONECLASS")==0) 00386 { 00387 SG_UNREF(classifier); 00388 classifier= new CMKLOneClass(); 00389 } 00390 else if (strcmp(name,"MKL_MULTICLASS")==0) 00391 { 00392 SG_UNREF(classifier); 00393 classifier= new CMKLMultiClass(); 00394 } 00395 else if (strcmp(name,"MKL_REGRESSION")==0) 00396 { 00397 SG_UNREF(classifier); 00398 classifier= new CMKLRegression(); 00399 } 00400 else 00401 { 00402 SG_ERROR("Unknown classifier %s.\n", name); 00403 return false; 00404 } 00405 SG_REF(classifier); 00406 00407 return (classifier!=NULL); 00408 } 00409 00410 bool CGUIClassifier::train_mkl_multiclass() 00411 { 00412 CMKLMultiClass* mkl= (CMKLMultiClass*) classifier; 00413 if (!mkl) 00414 SG_ERROR("No MKL available.\n"); 00415 00416 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00417 if (!trainlabels) 00418 SG_ERROR("No trainlabels available.\n"); 00419 00420 CKernel* kernel=ui->ui_kernel->get_kernel(); 00421 if (!kernel) 00422 SG_ERROR("No kernel available.\n"); 00423 00424 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00425 00426 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00427 SG_ERROR("Kernel not initialized / no train features available.\n"); 00428 00429 int32_t num_vec=kernel->get_num_vec_lhs(); 00430 if (trainlabels->get_num_labels() != num_vec) 00431 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00432 00433 SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00434 00435 mkl->set_mkl_epsilon(svm_weight_epsilon); 00436 mkl->set_mkl_norm(mkl_norm); 00437 //mkl->set_max_num_mkliters(-1); 00438 mkl->set_solver_type(solver_type); 00439 mkl->set_bias_enabled(svm_use_bias); 00440 mkl->set_epsilon(svm_epsilon); 00441 mkl->set_max_train_time(max_train_time); 00442 mkl->set_tube_epsilon(svm_tube_epsilon); 00443 mkl->set_nu(svm_nu); 00444 mkl->set_C(svm_C1, svm_C2); 00445 mkl->set_qpsize(svm_qpsize); 00446 mkl->set_shrinking_enabled(svm_use_shrinking); 00447 mkl->set_linadd_enabled(svm_use_linadd); 00448 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00449 00450 ((CKernelMachine*) mkl)->set_labels(trainlabels); 00451 ((CKernelMachine*) mkl)->set_kernel(kernel); 00452 00453 return mkl->train(); 00454 } 00455 00456 bool CGUIClassifier::train_mkl() 00457 { 00458 CMKL* mkl= (CMKL*) classifier; 00459 if (!mkl) 00460 SG_ERROR("No SVM available.\n"); 00461 00462 bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS); 00463 CLabels* trainlabels=NULL; 00464 if(!oneclass) 00465 trainlabels=ui->ui_labels->get_train_labels(); 00466 else 00467 SG_INFO("Training one class mkl.\n"); 00468 if (!trainlabels && !oneclass) 00469 SG_ERROR("No trainlabels available.\n"); 00470 00471 CKernel* kernel=ui->ui_kernel->get_kernel(); 00472 if (!kernel) 00473 SG_ERROR("No kernel available.\n"); 00474 00475 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00476 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00477 SG_ERROR("Kernel not initialized.\n"); 00478 00479 int32_t num_vec=kernel->get_num_vec_lhs(); 00480 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00481 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00482 00483 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00484 00485 if (constraint_generator) 00486 mkl->set_constraint_generator(constraint_generator); 00487 mkl->set_solver_type(solver_type); 00488 mkl->set_bias_enabled(svm_use_bias); 00489 mkl->set_epsilon(svm_epsilon); 00490 mkl->set_max_train_time(max_train_time); 00491 mkl->set_tube_epsilon(svm_tube_epsilon); 00492 mkl->set_nu(svm_nu); 00493 mkl->set_C(svm_C1, svm_C2); 00494 mkl->set_qpsize(svm_qpsize); 00495 mkl->set_shrinking_enabled(svm_use_shrinking); 00496 mkl->set_linadd_enabled(svm_use_linadd); 00497 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00498 mkl->set_mkl_epsilon(svm_weight_epsilon); 00499 mkl->set_mkl_norm(mkl_norm); 00500 mkl->set_elasticnet_lambda(ent_lambda); 00501 mkl->set_mkl_block_norm(mkl_block_norm); 00502 mkl->set_C_mkl(C_mkl); 00503 mkl->set_interleaved_optimization_enabled(mkl_use_interleaved); 00504 00505 if (svm_do_auc_maximization) 00506 { 00507 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00508 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels); 00509 ((CKernelMachine*) mkl)->set_labels(auc_labels); 00510 ((CKernelMachine*) mkl)->set_kernel(auc_kernel); 00511 SG_UNREF(auc_labels); 00512 } 00513 else 00514 { 00515 if(!oneclass) 00516 ((CKernelMachine*) mkl)->set_labels(trainlabels); 00517 ((CKernelMachine*) mkl)->set_kernel(kernel); 00518 } 00519 00520 bool result=mkl->train(); 00521 00522 return result; 00523 } 00524 00525 bool CGUIClassifier::train_svm() 00526 { 00527 CSVM* svm= (CSVM*) classifier; 00528 if (!svm) 00529 SG_ERROR("No SVM available.\n"); 00530 00531 bool oneclass=(svm->get_classifier_type()==CT_LIBSVMONECLASS); 00532 CLabels* trainlabels=NULL; 00533 if(!oneclass) 00534 trainlabels=ui->ui_labels->get_train_labels(); 00535 else 00536 SG_INFO("Training one class svm.\n"); 00537 if (!trainlabels && !oneclass) 00538 SG_ERROR("No trainlabels available.\n"); 00539 00540 CKernel* kernel=ui->ui_kernel->get_kernel(); 00541 if (!kernel) 00542 SG_ERROR("No kernel available.\n"); 00543 00544 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00545 00546 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00547 SG_ERROR("Kernel not initialized / no train features available.\n"); 00548 00549 int32_t num_vec=kernel->get_num_vec_lhs(); 00550 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00551 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00552 00553 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00554 00555 svm->set_solver_type(solver_type); 00556 svm->set_bias_enabled(svm_use_bias); 00557 svm->set_epsilon(svm_epsilon); 00558 svm->set_max_train_time(max_train_time); 00559 svm->set_tube_epsilon(svm_tube_epsilon); 00560 svm->set_nu(svm_nu); 00561 svm->set_C(svm_C1, svm_C2); 00562 svm->set_qpsize(svm_qpsize); 00563 svm->set_shrinking_enabled(svm_use_shrinking); 00564 svm->set_linadd_enabled(svm_use_linadd); 00565 svm->set_batch_computation_enabled(svm_use_batch_computation); 00566 00567 if(svm->get_classifier_type()==CT_MKLMULTICLASS) 00568 { 00569 ((CMKLMultiClass *)svm)->set_mkl_epsilon(svm_weight_epsilon ); 00570 } 00571 00572 if (svm_do_auc_maximization) 00573 { 00574 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00575 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels); 00576 ((CKernelMachine*) svm)->set_labels(auc_labels); 00577 ((CKernelMachine*) svm)->set_kernel(auc_kernel); 00578 SG_UNREF(auc_labels); 00579 } 00580 else 00581 { 00582 if(!oneclass) 00583 ((CKernelMachine*) svm)->set_labels(trainlabels); 00584 ((CKernelMachine*) svm)->set_kernel(kernel); 00585 } 00586 00587 bool result=svm->train(); 00588 00589 return result; 00590 } 00591 00592 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter) 00593 { 00594 bool result=false; 00595 CDistance* distance=ui->ui_distance->get_distance(); 00596 00597 if (!distance) 00598 SG_ERROR("No distance available\n"); 00599 00600 if (!ui->ui_distance->init_distance("TRAIN")) 00601 SG_ERROR("Initializing distance with train features failed.\n"); 00602 00603 ((CDistanceMachine*) classifier)->set_distance(distance); 00604 00605 EClassifierType type=classifier->get_classifier_type(); 00606 switch (type) 00607 { 00608 case CT_KMEANS: 00609 { 00610 ((CKMeans*) classifier)->set_k(k); 00611 ((CKMeans*) classifier)->set_max_iter(max_iter); 00612 result=((CKMeans*) classifier)->train(); 00613 break; 00614 } 00615 case CT_HIERARCHICAL: 00616 { 00617 ((CHierarchical*) classifier)->set_merges(k); 00618 result=((CHierarchical*) classifier)->train(); 00619 break; 00620 } 00621 default: 00622 SG_ERROR("Unknown clustering type %d\n", type); 00623 } 00624 00625 return result; 00626 } 00627 00628 bool CGUIClassifier::train_knn(int32_t k) 00629 { 00630 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00631 CDistance* distance=ui->ui_distance->get_distance(); 00632 00633 bool result=false; 00634 00635 if (trainlabels) 00636 { 00637 if (distance) 00638 { 00639 if (!ui->ui_distance->init_distance("TRAIN")) 00640 SG_ERROR("Initializing distance with train features failed.\n"); 00641 ((CKNN*) classifier)->set_labels(trainlabels); 00642 ((CKNN*) classifier)->set_distance(distance); 00643 ((CKNN*) classifier)->set_k(k); 00644 result=((CKNN*) classifier)->train(); 00645 } 00646 else 00647 SG_ERROR("No distance available.\n"); 00648 } 00649 else 00650 SG_ERROR("No labels available\n"); 00651 00652 return result; 00653 } 00654 00655 bool CGUIClassifier::train_krr() 00656 { 00657 #ifdef HAVE_LAPACK 00658 CKRR* krr= (CKRR*) classifier; 00659 if (!krr) 00660 SG_ERROR("No SVM available.\n"); 00661 00662 CLabels* trainlabels=NULL; 00663 trainlabels=ui->ui_labels->get_train_labels(); 00664 if (!trainlabels) 00665 SG_ERROR("No trainlabels available.\n"); 00666 00667 CKernel* kernel=ui->ui_kernel->get_kernel(); 00668 if (!kernel) 00669 SG_ERROR("No kernel available.\n"); 00670 00671 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00672 00673 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00674 SG_ERROR("Kernel not initialized / no train features available.\n"); 00675 00676 int32_t num_vec=kernel->get_num_vec_lhs(); 00677 if (trainlabels->get_num_labels() != num_vec) 00678 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00679 00680 00681 // Set training labels and kernel 00682 krr->set_labels(trainlabels); 00683 krr->set_kernel(kernel); 00684 00685 bool result=krr->train(); 00686 return result; 00687 #else 00688 return false; 00689 #endif 00690 } 00691 00692 bool CGUIClassifier::train_linear(float64_t gamma) 00693 { 00694 ASSERT(classifier); 00695 EClassifierType ctype = classifier->get_classifier_type(); 00696 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00697 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00698 bool result=false; 00699 00700 if (!trainfeatures) 00701 SG_ERROR("No trainfeatures available.\n"); 00702 00703 if (!trainfeatures->has_property(FP_DOT)) 00704 SG_ERROR("Trainfeatures not based on DotFeatures.\n"); 00705 00706 if (!trainlabels) 00707 SG_ERROR("No labels available\n"); 00708 00709 if (ctype==CT_PERCEPTRON) 00710 { 00711 ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate); 00712 ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter); 00713 } 00714 00715 #ifdef HAVE_LAPACK 00716 if (ctype==CT_LDA) 00717 { 00718 if (trainfeatures->get_feature_type()!=F_DREAL || 00719 trainfeatures->get_feature_class()!=C_SIMPLE) 00720 SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n"); 00721 ((CLDA*) classifier)->set_gamma(gamma); 00722 } 00723 #endif 00724 00725 if (ctype==CT_SVMOCAS) 00726 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00727 #ifdef HAVE_LAPACK 00728 else if (ctype==CT_LIBLINEAR) 00729 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00730 #endif 00731 else if (ctype==CT_SVMLIN) 00732 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00733 else if (ctype==CT_SVMSGD) 00734 ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2); 00735 else if (ctype==CT_SUBGRADIENTSVM) 00736 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2); 00737 00738 else if (ctype==CT_LPM || ctype==CT_LPBOOST) 00739 { 00740 if (trainfeatures->get_feature_class()!=C_SPARSE || 00741 trainfeatures->get_feature_type()!=F_DREAL) 00742 SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n"); 00743 } 00744 00745 ((CLinearMachine*) classifier)->set_labels(trainlabels); 00746 ((CLinearMachine*) classifier)->set_features((CSimpleFeatures<float64_t>*) trainfeatures); 00747 result=((CLinearMachine*) classifier)->train(); 00748 00749 return result; 00750 } 00751 00752 bool CGUIClassifier::train_wdocas() 00753 { 00754 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00755 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00756 00757 bool result=false; 00758 00759 if (!trainfeatures) 00760 SG_ERROR("No trainfeatures available.\n"); 00761 00762 if (trainfeatures->get_feature_class()!=C_STRING || 00763 trainfeatures->get_feature_type()!=F_BYTE ) 00764 SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n"); 00765 00766 if (!trainlabels) 00767 SG_ERROR("No labels available.\n"); 00768 00769 ((CWDSVMOcas*) classifier)->set_labels(trainlabels); 00770 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures); 00771 result=((CWDSVMOcas*) classifier)->train(); 00772 00773 return result; 00774 } 00775 00776 bool CGUIClassifier::load(char* filename, char* type) 00777 { 00778 bool result=false; 00779 00780 if (new_classifier(type)) 00781 { 00782 FILE* model_file=fopen(filename, "r"); 00783 00784 if (model_file) 00785 { 00786 if (classifier && classifier->load(model_file)) 00787 { 00788 SG_DEBUG("file successfully read.\n"); 00789 result=true; 00790 } 00791 else 00792 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename); 00793 00794 fclose(model_file); 00795 } 00796 else 00797 SG_ERROR("Opening file %s failed.\n", filename); 00798 00799 return result; 00800 } 00801 else 00802 SG_ERROR("Type %s of SVM/Classifier unknown.\n", type); 00803 00804 return false; 00805 } 00806 00807 bool CGUIClassifier::save(char* param) 00808 { 00809 bool result=false; 00810 param=SGIO::skip_spaces(param); 00811 00812 if (classifier) 00813 { 00814 FILE* file=fopen(param, "w"); 00815 00816 if ((!file) || (!classifier->save(file))) 00817 printf("writing to file %s failed!\n", param); 00818 else 00819 { 00820 printf("successfully written classifier into \"%s\" !\n", param); 00821 result=true; 00822 } 00823 00824 if (file) 00825 fclose(file); 00826 } 00827 else 00828 SG_ERROR("create classifier first\n"); 00829 00830 return result; 00831 } 00832 00833 bool CGUIClassifier::set_perceptron_parameters( 00834 float64_t learnrate, int32_t maxiter) 00835 { 00836 if (learnrate<=0) 00837 perceptron_learnrate=0.01; 00838 else 00839 perceptron_learnrate=learnrate; 00840 00841 if (maxiter<=0) 00842 perceptron_maxiter=1000; 00843 else 00844 perceptron_maxiter=maxiter; 00845 SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter); 00846 00847 return true; 00848 } 00849 00850 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon) 00851 { 00852 if (epsilon<0) 00853 svm_epsilon=1e-4; 00854 else 00855 svm_epsilon=epsilon; 00856 SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon); 00857 00858 return true; 00859 } 00860 00861 bool CGUIClassifier::set_max_train_time(float64_t max) 00862 { 00863 if (max>0) 00864 { 00865 max_train_time=max; 00866 SG_INFO("Set to max_train_time=%f.\n", max_train_time); 00867 } 00868 else 00869 SG_INFO("Disabling max_train_time.\n"); 00870 00871 return true; 00872 } 00873 00874 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon) 00875 { 00876 if (!classifier) 00877 SG_ERROR("No regression method allocated\n"); 00878 00879 if (classifier->get_classifier_type() != CT_LIBSVR && 00880 classifier->get_classifier_type() != CT_SVRLIGHT && 00881 classifier->get_classifier_type() != CT_MKLREGRESSION ) 00882 { 00883 SG_ERROR("Underlying method not capable of SV-regression\n"); 00884 } 00885 00886 if (tube_epsilon<0) 00887 svm_tube_epsilon=1e-2; 00888 svm_tube_epsilon=tube_epsilon; 00889 00890 ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon); 00891 SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon); 00892 00893 return true; 00894 } 00895 00896 bool CGUIClassifier::set_svm_nu(float64_t nu) 00897 { 00898 if (nu<0 || nu>1) 00899 nu=0.5; 00900 00901 svm_nu=nu; 00902 SG_INFO("Set to nu=%f.\n", svm_nu); 00903 00904 return true; 00905 } 00906 00907 bool CGUIClassifier::set_svm_mkl_parameters( 00908 float64_t weight_epsilon, float64_t C, float64_t norm) 00909 { 00910 if (weight_epsilon<0) 00911 weight_epsilon=1e-4; 00912 if (C<0) 00913 C=0; 00914 if (norm<0) 00915 SG_ERROR("MKL norm >= 0\n"); 00916 00917 svm_weight_epsilon=weight_epsilon; 00918 C_mkl=C; 00919 mkl_norm=norm; 00920 00921 SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon); 00922 SG_INFO("Set to C_mkl=%f.\n", C_mkl); 00923 SG_INFO("Set to mkl_norm=%f.\n", mkl_norm); 00924 00925 return true; 00926 } 00927 00928 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda) 00929 { 00930 if (lambda<0 || lambda>1) 00931 SG_ERROR("0 <= ent_lambda <= 1\n"); 00932 00933 ent_lambda = lambda; 00934 return true; 00935 } 00936 00937 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm) 00938 { 00939 if (mkl_bnorm<1) 00940 SG_ERROR("1 <= mkl_block_norm <= inf\n"); 00941 00942 mkl_block_norm=mkl_bnorm; 00943 return true; 00944 } 00945 00946 00947 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2) 00948 { 00949 if (C1<0) 00950 svm_C1=1.0; 00951 else 00952 svm_C1=C1; 00953 00954 if (C2<0) 00955 svm_C2=svm_C1; 00956 else 00957 svm_C2=C2; 00958 00959 SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2); 00960 00961 return true; 00962 } 00963 00964 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize) 00965 { 00966 if (qpsize<2) 00967 svm_qpsize=41; 00968 else 00969 svm_qpsize=qpsize; 00970 SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize); 00971 00972 return true; 00973 } 00974 00975 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize) 00976 { 00977 if (max_qpsize<50) 00978 svm_max_qpsize=50; 00979 else 00980 svm_max_qpsize=max_qpsize; 00981 SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize); 00982 00983 return true; 00984 } 00985 00986 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize) 00987 { 00988 if (svm_bufsize<0) 00989 svm_bufsize=3000; 00990 else 00991 svm_bufsize=bufsize; 00992 SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize); 00993 00994 return true ; 00995 } 00996 00997 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled) 00998 { 00999 svm_use_shrinking=enabled; 01000 if (svm_use_shrinking) 01001 SG_INFO("Enabling shrinking optimization.\n"); 01002 else 01003 SG_INFO("Disabling shrinking optimization.\n"); 01004 01005 return true; 01006 } 01007 01008 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled) 01009 { 01010 svm_use_batch_computation=enabled; 01011 if (svm_use_batch_computation) 01012 SG_INFO("Enabling batch computation.\n"); 01013 else 01014 SG_INFO("Disabling batch computation.\n"); 01015 01016 return true; 01017 } 01018 01019 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled) 01020 { 01021 svm_use_linadd=enabled; 01022 if (svm_use_linadd) 01023 SG_INFO("Enabling LINADD optimization.\n"); 01024 else 01025 SG_INFO("Disabling LINADD optimization.\n"); 01026 01027 return true; 01028 } 01029 01030 bool CGUIClassifier::set_svm_bias_enabled(bool enabled) 01031 { 01032 svm_use_bias=enabled; 01033 if (svm_use_bias) 01034 SG_INFO("Enabling svm bias.\n"); 01035 else 01036 SG_INFO("Disabling svm bias.\n"); 01037 01038 return true; 01039 } 01040 01041 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled) 01042 { 01043 mkl_use_interleaved=enabled; 01044 if (mkl_use_interleaved) 01045 SG_INFO("Enabling mkl interleaved optimization.\n"); 01046 else 01047 SG_INFO("Disabling mkl interleaved optimization.\n"); 01048 01049 return true; 01050 } 01051 01052 bool CGUIClassifier::set_do_auc_maximization(bool do_auc) 01053 { 01054 svm_do_auc_maximization=do_auc; 01055 01056 if (svm_do_auc_maximization) 01057 SG_INFO("Enabling AUC maximization.\n"); 01058 else 01059 SG_INFO("Disabling AUC maximization.\n"); 01060 01061 return true; 01062 } 01063 01064 01065 CLabels* CGUIClassifier::classify() 01066 { 01067 ASSERT(classifier); 01068 01069 switch (classifier->get_classifier_type()) 01070 { 01071 case CT_LIGHT: 01072 case CT_LIGHTONECLASS: 01073 case CT_LIBSVM: 01074 case CT_SCATTERSVM: 01075 case CT_MPD: 01076 case CT_GPBT: 01077 case CT_CPLEXSVM: 01078 case CT_GMNPSVM: 01079 case CT_GNPPSVM: 01080 case CT_LIBSVR: 01081 case CT_LIBSVMMULTICLASS: 01082 case CT_LIBSVMONECLASS: 01083 case CT_SVRLIGHT: 01084 case CT_MKLCLASSIFICATION: 01085 case CT_MKLMULTICLASS: 01086 case CT_MKLREGRESSION: 01087 case CT_MKLONECLASS: 01088 case CT_KRR: 01089 return classify_kernelmachine(); 01090 case CT_KNN: 01091 return classify_distancemachine(); 01092 case CT_PERCEPTRON: 01093 case CT_LDA: 01094 return classify_linear(); 01095 case CT_SVMLIN: 01096 case CT_SVMPERF: 01097 case CT_SUBGRADIENTSVM: 01098 case CT_SVMOCAS: 01099 case CT_SVMSGD: 01100 case CT_LPM: 01101 case CT_LPBOOST: 01102 case CT_SUBGRADIENTLPM: 01103 case CT_LIBLINEAR: 01104 return classify_linear(); 01105 case CT_WDSVMOCAS: 01106 return classify_byte_linear(); 01107 default: 01108 SG_ERROR("unknown classifier type\n"); 01109 break; 01110 }; 01111 01112 return false; 01113 } 01114 01115 CLabels* CGUIClassifier::classify_kernelmachine() 01116 { 01117 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01118 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01119 01120 if (!classifier) 01121 SG_ERROR("No kernelmachine available.\n"); 01122 01123 bool success=true; 01124 01125 if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM) 01126 { 01127 if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED 01128 && ( !trainfeatures || !testfeatures )) 01129 { 01130 SG_DEBUG("skipping initialisation of combined kernel " 01131 "as train/test features are unavailable\n"); 01132 } 01133 else 01134 { 01135 if (!trainfeatures) 01136 SG_ERROR("No training features available.\n"); 01137 if (!testfeatures) 01138 SG_ERROR("No test features available.\n"); 01139 01140 success=ui->ui_kernel->init_kernel("TEST"); 01141 } 01142 } 01143 01144 if (!success || !ui->ui_kernel->is_initialized()) 01145 SG_ERROR("Kernel not initialized.\n"); 01146 01147 CKernelMachine* km=(CKernelMachine*) classifier; 01148 km->set_kernel(ui->ui_kernel->get_kernel()); 01149 km->set_batch_computation_enabled(svm_use_batch_computation); 01150 01151 SG_INFO("Starting kernel machine testing.\n"); 01152 return classifier->apply(); 01153 } 01154 01155 bool CGUIClassifier::get_trained_classifier( 01156 float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias, 01157 int32_t& brows, int32_t& bcols, 01158 int32_t idx) // which SVM for MultiClass 01159 { 01160 ASSERT(classifier); 01161 01162 switch (classifier->get_classifier_type()) 01163 { 01164 case CT_SCATTERSVM: 01165 case CT_GNPPSVM: 01166 case CT_LIBSVMMULTICLASS: 01167 case CT_LIGHT: 01168 case CT_LIGHTONECLASS: 01169 case CT_LIBSVM: 01170 case CT_MPD: 01171 case CT_GPBT: 01172 case CT_CPLEXSVM: 01173 case CT_GMNPSVM: 01174 case CT_LIBSVR: 01175 case CT_LIBSVMONECLASS: 01176 case CT_SVRLIGHT: 01177 case CT_MKLCLASSIFICATION: 01178 case CT_MKLREGRESSION: 01179 case CT_MKLONECLASS: 01180 case CT_MKLMULTICLASS: 01181 case CT_KRR: 01182 return get_svm(weights, rows, cols, bias, brows, bcols, idx); 01183 break; 01184 case CT_PERCEPTRON: 01185 case CT_LDA: 01186 case CT_LPM: 01187 case CT_LPBOOST: 01188 case CT_SUBGRADIENTLPM: 01189 case CT_SVMOCAS: 01190 case CT_SVMSGD: 01191 case CT_SVMLIN: 01192 case CT_SVMPERF: 01193 case CT_SUBGRADIENTSVM: 01194 case CT_LIBLINEAR: 01195 return get_linear(weights, rows, cols, bias, brows, bcols); 01196 break; 01197 case CT_KMEANS: 01198 case CT_HIERARCHICAL: 01199 return get_clustering(weights, rows, cols, bias, brows, bcols); 01200 break; 01201 case CT_KNN: 01202 SG_ERROR("not implemented"); 01203 break; 01204 default: 01205 SG_ERROR("unknown classifier type\n"); 01206 break; 01207 }; 01208 return false; 01209 } 01210 01211 01212 int32_t CGUIClassifier::get_num_svms() 01213 { 01214 ASSERT(classifier); 01215 return ((CMultiClassSVM*) classifier)->get_num_svms(); 01216 } 01217 01218 bool CGUIClassifier::get_svm( 01219 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01220 int32_t& brows, int32_t& bcols, int32_t idx) 01221 { 01222 CSVM* svm=(CSVM*) classifier; 01223 01224 if (idx>-1) // should be MultiClassSVM 01225 svm=((CMultiClassSVM*) svm)->get_svm(idx); 01226 01227 if (svm) 01228 { 01229 brows=1; 01230 bcols=1; 01231 bias=SG_MALLOC(float64_t, 1); 01232 *bias=svm->get_bias(); 01233 01234 rows=svm->get_num_support_vectors(); 01235 cols=2; 01236 weights=SG_MALLOC(float64_t, rows*cols); 01237 01238 for (int32_t i=0; i<rows; i++) 01239 { 01240 weights[i]=svm->get_alpha(i); 01241 weights[i+rows]=svm->get_support_vector(i); 01242 } 01243 01244 return true; 01245 } 01246 01247 return false; 01248 } 01249 01250 bool CGUIClassifier::get_clustering( 01251 float64_t* ¢ers, int32_t& rows, int32_t& cols, float64_t*& radi, 01252 int32_t& brows, int32_t& bcols) 01253 { 01254 if (!classifier) 01255 return false; 01256 01257 switch (classifier->get_classifier_type()) 01258 { 01259 case CT_KMEANS: 01260 { 01261 CKMeans* clustering=(CKMeans*) classifier; 01262 01263 bcols=1; 01264 SGVector<float64_t> r=clustering->get_radiuses(); 01265 brows=r.vlen; 01266 radi=SG_MALLOC(float64_t, brows); 01267 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01268 01269 cols=1; 01270 SGMatrix<float64_t> c=clustering->get_cluster_centers(); 01271 rows=c.num_rows; 01272 cols=c.num_cols; 01273 centers=SG_MALLOC(float64_t, rows*cols); 01274 memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols); 01275 break; 01276 } 01277 01278 case CT_HIERARCHICAL: 01279 { 01280 CHierarchical* clustering=(CHierarchical*) classifier; 01281 01282 // radi == merge_distances, centers == pairs 01283 bcols=1; 01284 SGVector<float64_t> r=clustering->get_merge_distances(); 01285 brows=r.vlen; 01286 radi=SG_MALLOC(float64_t, brows); 01287 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01288 01289 SGMatrix<int32_t> p=clustering->get_cluster_pairs(); 01290 rows=p.num_rows; 01291 cols=p.num_cols; 01292 centers=SG_MALLOC(float64_t, rows*cols); 01293 for (int32_t i=0; i<rows*cols; i++) 01294 centers[i]=(float64_t) p.matrix[i]; 01295 01296 break; 01297 } 01298 01299 default: 01300 SG_ERROR("internal error - unknown clustering type\n"); 01301 } 01302 01303 return true; 01304 } 01305 01306 bool CGUIClassifier::get_linear( 01307 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01308 int32_t& brows, int32_t& bcols) 01309 { 01310 CLinearMachine* linear=(CLinearMachine*) classifier; 01311 01312 if (!linear) 01313 return false; 01314 01315 bias=SG_MALLOC(float64_t, 1); 01316 *bias=linear->get_bias(); 01317 brows=1; 01318 bcols=1; 01319 01320 cols=1; 01321 float64_t* w=NULL; 01322 linear->get_w(w, rows); 01323 01324 weights= SG_MALLOC(float64_t, rows); 01325 memcpy(weights, w, sizeof(float64_t)*rows); 01326 01327 return true; 01328 } 01329 01330 CLabels* CGUIClassifier::classify_distancemachine() 01331 { 01332 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01333 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01334 01335 if (!classifier) 01336 { 01337 SG_ERROR("no kernelmachine available\n") ; 01338 return NULL; 01339 } 01340 if (!trainfeatures) 01341 { 01342 SG_ERROR("no training features available\n") ; 01343 return NULL; 01344 } 01345 01346 if (!testfeatures) 01347 { 01348 SG_ERROR("no test features available\n") ; 01349 return NULL; 01350 } 01351 01352 bool success=ui->ui_distance->init_distance("TEST"); 01353 01354 if (!success || !ui->ui_distance->is_initialized()) 01355 { 01356 SG_ERROR("distance not initialized\n") ; 01357 return NULL; 01358 } 01359 01360 ((CDistanceMachine*) classifier)->set_distance( 01361 ui->ui_distance->get_distance()); 01362 SG_INFO("starting distance machine testing\n") ; 01363 return classifier->apply(); 01364 } 01365 01366 01367 CLabels* CGUIClassifier::classify_linear() 01368 { 01369 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01370 01371 if (!classifier) 01372 { 01373 SG_ERROR("no classifier available\n") ; 01374 return NULL; 01375 } 01376 if (!testfeatures) 01377 { 01378 SG_ERROR("no test features available\n") ; 01379 return NULL; 01380 } 01381 if (!(testfeatures->has_property(FP_DOT))) 01382 { 01383 SG_ERROR("testfeatures not based on DotFeatures\n") ; 01384 return false ; 01385 } 01386 01387 ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures); 01388 SG_INFO("starting linear classifier testing\n") ; 01389 return classifier->apply(); 01390 } 01391 01392 CLabels* CGUIClassifier::classify_byte_linear() 01393 { 01394 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01395 01396 if (!classifier) 01397 { 01398 SG_ERROR("no svm available\n") ; 01399 return NULL; 01400 } 01401 if (!testfeatures) 01402 { 01403 SG_ERROR("no test features available\n") ; 01404 return NULL; 01405 } 01406 if (testfeatures->get_feature_class() != C_STRING || 01407 testfeatures->get_feature_type() != F_BYTE ) 01408 { 01409 SG_ERROR("testfeatures not of class STRING type BYTE\n") ; 01410 return false ; 01411 } 01412 01413 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures); 01414 SG_INFO("starting linear classifier testing\n") ; 01415 return classifier->apply(); 01416 } 01417 01418 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result) 01419 { 01420 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01421 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01422 01423 if (!classifier) 01424 { 01425 SG_ERROR("no svm available\n") ; 01426 return false; 01427 } 01428 01429 if (!ui->ui_kernel->is_initialized()) 01430 { 01431 SG_ERROR("kernel not initialized\n") ; 01432 return false; 01433 } 01434 01435 if (!ui->ui_kernel->get_kernel() || 01436 !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM) 01437 { 01438 if (!trainfeatures) 01439 { 01440 SG_ERROR("no training features available\n") ; 01441 return false; 01442 } 01443 01444 if (!testfeatures) 01445 { 01446 SG_ERROR("no test features available\n") ; 01447 return false; 01448 } 01449 } 01450 01451 ((CKernelMachine*) classifier)->set_kernel( 01452 ui->ui_kernel->get_kernel()); 01453 01454 result=classifier->apply(idx); 01455 return true ; 01456 } 01457 01458 01459 bool CGUIClassifier::set_krr_tau(float64_t tau) 01460 { 01461 #ifdef HAVE_LAPACK 01462 krr_tau=tau; 01463 ((CKRR*) classifier)->set_tau(krr_tau); 01464 SG_INFO("Set to krr_tau=%f.\n", krr_tau); 01465 01466 return true; 01467 #else 01468 return false; 01469 #endif 01470 } 01471 01472 bool CGUIClassifier::set_solver(char* solver) 01473 { 01474 ESolverType s=ST_AUTO; 01475 01476 if (strncmp(solver,"NEWTON", 6)==0) 01477 { 01478 SG_INFO("Using NEWTON solver.\n"); 01479 s=ST_NEWTON; 01480 } 01481 else if (strncmp(solver,"DIRECT", 6)==0) 01482 { 01483 SG_INFO("Using DIRECT solver\n"); 01484 s=ST_DIRECT; 01485 } 01486 else if (strncmp(solver,"BLOCK_NORM", 9)==0) 01487 { 01488 SG_INFO("Using BLOCK_NORM solver\n"); 01489 s=ST_BLOCK_NORM; 01490 } 01491 else if (strncmp(solver,"ELASTICNET", 10)==0) 01492 { 01493 SG_INFO("Using ELASTICNET solver\n"); 01494 s=ST_ELASTICNET; 01495 } 01496 else if (strncmp(solver,"AUTO", 4)==0) 01497 { 01498 SG_INFO("Automagically determining solver.\n"); 01499 s=ST_AUTO; 01500 } 01501 #ifdef USE_CPLEX 01502 else if (strncmp(solver, "CPLEX", 5)==0) 01503 { 01504 SG_INFO("USING CPLEX METHOD selected\n"); 01505 s=ST_CPLEX; 01506 } 01507 #endif 01508 #ifdef USE_GLPK 01509 else if (strncmp(solver,"GLPK", 4)==0) 01510 { 01511 SG_INFO("Using GLPK solver\n"); 01512 s=ST_GLPK; 01513 } 01514 #endif 01515 else 01516 SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver); 01517 01518 01519 solver_type=s; 01520 return true; 01521 } 01522 01523 bool CGUIClassifier::set_constraint_generator(char* name) 01524 { 01525 if (strcmp(name,"LIBSVM_ONECLASS")==0) 01526 { 01527 SG_UNREF(constraint_generator); 01528 constraint_generator = new CLibSVMOneClass(); 01529 SG_INFO("created SVMlibsvm object for oneclass\n"); 01530 } 01531 else if (strcmp(name,"LIBSVM_MULTICLASS")==0) 01532 { 01533 SG_UNREF(constraint_generator); 01534 constraint_generator = new CLibSVMMultiClass(); 01535 SG_INFO("created SVMlibsvm object for multiclass\n"); 01536 } 01537 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0) 01538 { 01539 SG_UNREF(constraint_generator); 01540 constraint_generator= new CLibSVMMultiClass(LIBSVM_NU_SVC); 01541 SG_INFO("created SVMlibsvm object for multiclass\n") ; 01542 } 01543 else if (strcmp(name,"SCATTERSVM_RULE1")==0) 01544 { 01545 SG_UNREF(constraint_generator); 01546 constraint_generator= new CScatterSVM(TEST_RULE1); 01547 SG_INFO("created ScatterSVM RULE1 object\n") ; 01548 } 01549 else if (strcmp(name,"SCATTERSVM_RULE2")==0) 01550 { 01551 SG_UNREF(constraint_generator); 01552 constraint_generator= new CScatterSVM(TEST_RULE2); 01553 SG_INFO("created ScatterSVM RULE2 object\n") ; 01554 } 01555 else if (strcmp(name,"LIBSVM_NU")==0) 01556 { 01557 SG_UNREF(constraint_generator); 01558 constraint_generator= new CLibSVM(LIBSVM_NU_SVC); 01559 SG_INFO("created SVMlibsvm object\n") ; 01560 } 01561 else if (strcmp(name,"LIBSVM")==0) 01562 { 01563 SG_UNREF(constraint_generator); 01564 constraint_generator= new CLibSVM(); 01565 SG_INFO("created SVMlibsvm object\n") ; 01566 } 01567 else if (strcmp(name,"LARANK")==0) 01568 { 01569 SG_UNREF(constraint_generator); 01570 constraint_generator= new CLaRank(); 01571 SG_INFO("created LaRank object\n") ; 01572 } 01573 01574 else if (strcmp(name,"GPBTSVM")==0) 01575 { 01576 SG_UNREF(constraint_generator); 01577 constraint_generator= new CGPBTSVM(); 01578 SG_INFO("created GPBT-SVM object\n") ; 01579 } 01580 else if (strcmp(name,"MPDSVM")==0) 01581 { 01582 SG_UNREF(constraint_generator); 01583 constraint_generator= new CMPDSVM(); 01584 SG_INFO("created MPD-SVM object\n") ; 01585 } 01586 else if (strcmp(name,"GNPPSVM")==0) 01587 { 01588 SG_UNREF(constraint_generator); 01589 constraint_generator= new CGNPPSVM(); 01590 SG_INFO("created GNPP-SVM object\n") ; 01591 } 01592 else if (strcmp(name,"GMNPSVM")==0) 01593 { 01594 SG_UNREF(constraint_generator); 01595 constraint_generator= new CGMNPSVM(); 01596 SG_INFO("created GMNP-SVM object\n") ; 01597 } 01598 else if (strcmp(name,"LIBSVR")==0) 01599 { 01600 SG_UNREF(constraint_generator); 01601 constraint_generator= new CLibSVR(); 01602 SG_INFO("created SVRlibsvm object\n") ; 01603 } 01604 else 01605 { 01606 SG_ERROR("Unknown SV-classifier %s.\n", name); 01607 return false; 01608 } 01609 SG_REF(constraint_generator); 01610 01611 return (constraint_generator!=NULL); 01612 }