SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
GUIClassifier.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/ui/GUIClassifier.h>
00012 #include <shogun/ui/SGInterface.h>
00013 
00014 #include <shogun/lib/config.h>
00015 #include <shogun/io/SGIO.h>
00016 
00017 #include <shogun/features/SparseFeatures.h>
00018 #include <shogun/features/RealFileFeatures.h>
00019 #include <shogun/features/Labels.h>
00020 
00021 #include <shogun/kernel/AUCKernel.h>
00022 
00023 #include <shogun/classifier/KNN.h>
00024 #include <shogun/clustering/KMeans.h>
00025 #include <shogun/clustering/Hierarchical.h>
00026 #include <shogun/classifier/PluginEstimate.h>
00027 
00028 #include <shogun/classifier/LDA.h>
00029 #include <shogun/classifier/LPM.h>
00030 #include <shogun/classifier/LPBoost.h>
00031 #include <shogun/classifier/Perceptron.h>
00032 
00033 #include <shogun/machine/LinearMachine.h>
00034 
00035 
00036 
00037 #include <shogun/classifier/mkl/MKLClassification.h>
00038 #include <shogun/regression/svr/MKLRegression.h>
00039 #include <shogun/classifier/mkl/MKLOneClass.h>
00040 #include <shogun/classifier/mkl/MKLMultiClass.h>
00041 #include <shogun/classifier/svm/LibSVM.h>
00042 #include <shogun/classifier/svm/LaRank.h>
00043 #include <shogun/classifier/svm/GPBTSVM.h>
00044 #include <shogun/classifier/svm/LibSVMOneClass.h>
00045 #include <shogun/classifier/svm/LibSVMMultiClass.h>
00046 
00047 #include <shogun/regression/svr/LibSVR.h>
00048 #include <shogun/regression/KRR.h>
00049 
00050 #include <shogun/classifier/svm/LibLinear.h>
00051 #include <shogun/classifier/svm/MPDSVM.h>
00052 #include <shogun/classifier/svm/GNPPSVM.h>
00053 #include <shogun/classifier/svm/GMNPSVM.h>
00054 #include <shogun/classifier/svm/ScatterSVM.h>
00055 
00056 #include <shogun/classifier/svm/SVMLin.h>
00057 #include <shogun/classifier/svm/SubGradientSVM.h>
00058 #include <shogun/classifier/SubGradientLPM.h>
00059 #include <shogun/classifier/svm/SVMOcas.h>
00060 #include <shogun/classifier/svm/SVMSGD.h>
00061 #include <shogun/classifier/svm/WDSVMOcas.h>
00062 
00063 using namespace shogun;
00064 
00065 CGUIClassifier::CGUIClassifier(CSGInterface* ui_)
00066 : CSGObject(), ui(ui_)
00067 {
00068     constraint_generator=NULL;
00069     classifier=NULL;
00070     max_train_time=0;
00071 
00072     // Perceptron parameters
00073     perceptron_learnrate=0.1;
00074     perceptron_maxiter=1000;
00075 
00076     // SVM parameters
00077     svm_qpsize=41;
00078     svm_bufsize=3000;
00079     svm_max_qpsize=1000;
00080     mkl_norm=1;
00081     ent_lambda=0;
00082     mkl_block_norm=4;
00083     svm_C1=1;
00084     svm_C2=1;
00085     C_mkl=0;
00086     mkl_use_interleaved=true;
00087     svm_weight_epsilon=1e-5;
00088     svm_epsilon=1e-5;
00089     svm_tube_epsilon=1e-2;
00090     svm_nu=0.5;
00091     svm_use_shrinking = true ;
00092 
00093     svm_use_bias = true;
00094     svm_use_batch_computation = true ;
00095     svm_use_linadd = true ;
00096     svm_do_auc_maximization = false ;
00097 
00098     // KRR parameters
00099     krr_tau=1;
00100 
00101     solver_type=ST_AUTO;
00102 }
00103 
00104 CGUIClassifier::~CGUIClassifier()
00105 {
00106     SG_UNREF(classifier);
00107     SG_UNREF(constraint_generator);
00108 }
00109 
00110 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d)
00111 {
00112     if (strcmp(name,"LIBSVM_ONECLASS")==0)
00113     {
00114         SG_UNREF(classifier);
00115         classifier = new CLibSVMOneClass();
00116         SG_INFO("created SVMlibsvm object for oneclass\n");
00117     }
00118     else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
00119     {
00120         SG_UNREF(classifier);
00121         classifier = new CLibSVMMultiClass();
00122         SG_INFO("created SVMlibsvm object for multiclass\n");
00123     }
00124     else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
00125     {
00126         SG_UNREF(classifier);
00127         classifier= new CLibSVMMultiClass(LIBSVM_NU_SVC);
00128         SG_INFO("created SVMlibsvm object for multiclass\n") ;
00129     }
00130 
00131     else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0)
00132     {
00133         SG_UNREF(classifier);
00134         classifier= new CScatterSVM(NO_BIAS_LIBSVM);
00135         SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ;
00136     }
00137     else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0)
00138     {
00139         SG_UNREF(classifier);
00140         classifier= new CScatterSVM(TEST_RULE1);
00141         SG_INFO("created ScatterSVM TESTRULE1 object\n") ;
00142     }
00143     else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0)
00144     {
00145         SG_UNREF(classifier);
00146         classifier= new CScatterSVM(TEST_RULE2);
00147         SG_INFO("created ScatterSVM TESTRULE2 object\n") ;
00148     }
00149     else if (strcmp(name,"LIBSVM_NU")==0)
00150     {
00151         SG_UNREF(classifier);
00152         classifier= new CLibSVM(LIBSVM_NU_SVC);
00153         SG_INFO("created SVMlibsvm object\n") ;
00154     }
00155     else if (strcmp(name,"LIBSVM")==0)
00156     {
00157         SG_UNREF(classifier);
00158         classifier= new CLibSVM();
00159         SG_INFO("created SVMlibsvm object\n") ;
00160     }
00161     else if (strcmp(name,"LARANK")==0)
00162     {
00163         SG_UNREF(classifier);
00164         classifier= new CLaRank();
00165         SG_INFO("created LaRank object\n") ;
00166     }
00167 
00168     else if (strcmp(name,"GPBTSVM")==0)
00169     {
00170         SG_UNREF(classifier);
00171         classifier= new CGPBTSVM();
00172         SG_INFO("created GPBT-SVM object\n") ;
00173     }
00174     else if (strcmp(name,"MPDSVM")==0)
00175     {
00176         SG_UNREF(classifier);
00177         classifier= new CMPDSVM();
00178         SG_INFO("created MPD-SVM object\n") ;
00179     }
00180     else if (strcmp(name,"GNPPSVM")==0)
00181     {
00182         SG_UNREF(classifier);
00183         classifier= new CGNPPSVM();
00184         SG_INFO("created GNPP-SVM object\n") ;
00185     }
00186     else if (strcmp(name,"GMNPSVM")==0)
00187     {
00188         SG_UNREF(classifier);
00189         classifier= new CGMNPSVM();
00190         SG_INFO("created GMNP-SVM object\n") ;
00191     }
00192     else if (strcmp(name,"LIBSVR")==0)
00193     {
00194         SG_UNREF(classifier);
00195         classifier= new CLibSVR();
00196         SG_INFO("created SVRlibsvm object\n") ;
00197     }
00198 #ifdef HAVE_LAPACK
00199     else if (strcmp(name, "KRR")==0)
00200     {
00201         SG_UNREF(classifier);
00202         classifier=new CKRR(krr_tau, ui->ui_kernel->get_kernel(),
00203             ui->ui_labels->get_train_labels());
00204         SG_INFO("created KRR object %p\n", classifier);
00205     }
00206 #endif //HAVE_LAPACK
00207     else if (strcmp(name,"PERCEPTRON")==0)
00208     {
00209         SG_UNREF(classifier);
00210         classifier= new CPerceptron();
00211         SG_INFO("created Perceptron object\n") ;
00212     }
00213 #ifdef HAVE_LAPACK
00214     else if (strncmp(name,"LIBLINEAR",9)==0)
00215     {
00216         LIBLINEAR_SOLVER_TYPE st=L2R_LR;
00217         
00218         if (strcmp(name,"LIBLINEAR_L2R_LR")==0)
00219         {
00220             st=L2R_LR;
00221             SG_INFO("created LibLinear l2 regularized logistic regression object\n") ;
00222         }
00223         else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0)
00224         {
00225             st=L2R_L2LOSS_SVC_DUAL;
00226             SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ;
00227         }
00228         else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0)
00229         {
00230             st=L2R_L2LOSS_SVC;
00231             SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ;
00232         }
00233         else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0)
00234         {
00235             st=L1R_L2LOSS_SVC;
00236             SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ;
00237         }
00238         else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0)
00239         {
00240             st=L2R_L1LOSS_SVC_DUAL;
00241             SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ;
00242         }
00243         else
00244             SG_ERROR("unknown liblinear type\n");
00245 
00246         SG_UNREF(classifier);
00247         classifier= new CLibLinear(st);
00248         ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00249         ((CLibLinear*) classifier)->set_epsilon(svm_epsilon);
00250         ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias);
00251     }
00252     else if (strcmp(name,"LDA")==0)
00253     {
00254         SG_UNREF(classifier);
00255         classifier= new CLDA();
00256         SG_INFO("created LDA object\n") ;
00257     }
00258 #endif //HAVE_LAPACK
00259 #ifdef USE_CPLEX
00260     else if (strcmp(name,"LPM")==0)
00261     {
00262         SG_UNREF(classifier);
00263         classifier= new CLPM();
00264         ((CLPM*) classifier)->set_C(svm_C1, svm_C2);
00265         ((CLPM*) classifier)->set_epsilon(svm_epsilon);
00266         ((CLPM*) classifier)->set_bias_enabled(svm_use_bias);
00267         ((CLPM*) classifier)->set_max_train_time(max_train_time);
00268         SG_INFO("created LPM object\n") ;
00269     }
00270     else if (strcmp(name,"LPBOOST")==0)
00271     {
00272         SG_UNREF(classifier);
00273         classifier= new CLPBoost();
00274         ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2);
00275         ((CLPBoost*) classifier)->set_epsilon(svm_epsilon);
00276         ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias);
00277         ((CLPBoost*) classifier)->set_max_train_time(max_train_time);
00278         SG_INFO("created LPBoost object\n") ;
00279     }
00280     else if (strcmp(name,"SUBGRADIENTLPM")==0)
00281     {
00282         SG_UNREF(classifier);
00283         classifier= new CSubGradientLPM();
00284 
00285         ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias);
00286         ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize);
00287         ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize);
00288         ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2);
00289         ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon);
00290         ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time);
00291         SG_INFO("created Subgradient LPM object\n") ;
00292     }
00293 #endif //USE_CPLEX
00294     else if (strncmp(name,"KNN", strlen("KNN"))==0)
00295     {
00296         SG_UNREF(classifier);
00297         classifier= new CKNN();
00298         SG_INFO("created KNN object\n") ;
00299     }
00300     else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0)
00301     {
00302         SG_UNREF(classifier);
00303         classifier= new CKMeans();
00304         SG_INFO("created KMeans object\n") ;
00305     }
00306     else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0)
00307     {
00308         SG_UNREF(classifier);
00309         classifier= new CHierarchical();
00310         SG_INFO("created Hierarchical clustering object\n") ;
00311     }
00312     else if (strcmp(name,"SVMLIN")==0)
00313     {
00314         SG_UNREF(classifier);
00315         classifier= new CSVMLin();
00316         ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00317         ((CSVMLin*) classifier)->set_epsilon(svm_epsilon);
00318         ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias);
00319         SG_INFO("created SVMLin object\n") ;
00320     }
00321     else if (strcmp(name,"SUBGRADIENTSVM")==0)
00322     {
00323         SG_UNREF(classifier);
00324         classifier= new CSubGradientSVM();
00325 
00326         ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias);
00327         ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize);
00328         ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize);
00329         ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00330         ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon);
00331         ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time);
00332         SG_INFO("created Subgradient SVM object\n") ;
00333     }
00334     else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0)
00335     {
00336         SG_UNREF(classifier);
00337         classifier= new CWDSVMOcas(SVM_OCAS);
00338 
00339         ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00340         ((CWDSVMOcas*) classifier)->set_degree(d, from_d);
00341         ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00342         ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00343         ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00344         SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ;
00345     }
00346     else if (strcmp(name,"SVMOCAS")==0)
00347     {
00348         SG_UNREF(classifier);
00349         classifier= new CSVMOcas(SVM_OCAS);
00350 
00351         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00352         ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00353         ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00354         ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00355         SG_INFO("created SVM Ocas(OCAS) object\n") ;
00356     }
00357     else if (strcmp(name,"SVMSGD")==0)
00358     {
00359         SG_UNREF(classifier);
00360         classifier= new CSVMSGD(svm_C1);
00361         ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias);
00362         SG_INFO("created SVM SGD object\n") ;
00363     }
00364     else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0))
00365     {
00366         SG_UNREF(classifier);
00367         classifier= new CSVMOcas(SVM_BMRM);
00368 
00369         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00370         ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00371         ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00372         ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00373         SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ;
00374     }
00375     else if (strcmp(name,"MKL_CLASSIFICATION")==0)
00376     {
00377         SG_UNREF(classifier);
00378         classifier= new CMKLClassification();
00379     }
00380     //else if (strcmp(name,"MKL_MULTICLASS")==0)
00381     //{
00382     //  SG_UNREF(classifier);
00383     //  classifier= new CMKLClassification();
00384     //}
00385     else if (strcmp(name,"MKL_ONECLASS")==0)
00386     {
00387         SG_UNREF(classifier);
00388         classifier= new CMKLOneClass();
00389     }
00390     else if (strcmp(name,"MKL_MULTICLASS")==0)
00391     {
00392         SG_UNREF(classifier);
00393         classifier= new CMKLMultiClass();
00394     }
00395     else if (strcmp(name,"MKL_REGRESSION")==0)
00396     {
00397         SG_UNREF(classifier);
00398         classifier= new CMKLRegression();
00399     }
00400     else
00401     {
00402         SG_ERROR("Unknown classifier %s.\n", name);
00403         return false;
00404     }
00405     SG_REF(classifier);
00406 
00407     return (classifier!=NULL);
00408 }
00409 
00410 bool CGUIClassifier::train_mkl_multiclass()
00411 {
00412     CMKLMultiClass* mkl= (CMKLMultiClass*) classifier;
00413     if (!mkl)
00414         SG_ERROR("No MKL available.\n");
00415 
00416     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00417     if (!trainlabels)
00418         SG_ERROR("No trainlabels available.\n");
00419 
00420     CKernel* kernel=ui->ui_kernel->get_kernel();
00421     if (!kernel)
00422         SG_ERROR("No kernel available.\n");
00423 
00424     bool success=ui->ui_kernel->init_kernel("TRAIN");
00425 
00426     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00427         SG_ERROR("Kernel not initialized / no train features available.\n");
00428 
00429     int32_t num_vec=kernel->get_num_vec_lhs();
00430     if (trainlabels->get_num_labels() != num_vec)
00431         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00432 
00433     SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00434 
00435     mkl->set_mkl_epsilon(svm_weight_epsilon);
00436     mkl->set_mkl_norm(mkl_norm); 
00437     //mkl->set_max_num_mkliters(-1);
00438     mkl->set_solver_type(solver_type);
00439     mkl->set_bias_enabled(svm_use_bias);
00440     mkl->set_epsilon(svm_epsilon);
00441     mkl->set_max_train_time(max_train_time);
00442     mkl->set_tube_epsilon(svm_tube_epsilon);
00443     mkl->set_nu(svm_nu);
00444     mkl->set_C(svm_C1, svm_C2);
00445     mkl->set_qpsize(svm_qpsize);
00446     mkl->set_shrinking_enabled(svm_use_shrinking);
00447     mkl->set_linadd_enabled(svm_use_linadd);
00448     mkl->set_batch_computation_enabled(svm_use_batch_computation);
00449 
00450     ((CKernelMachine*) mkl)->set_labels(trainlabels);
00451     ((CKernelMachine*) mkl)->set_kernel(kernel);
00452 
00453     return mkl->train();
00454 }
00455 
00456 bool CGUIClassifier::train_mkl()
00457 {
00458     CMKL* mkl= (CMKL*) classifier;
00459     if (!mkl)
00460         SG_ERROR("No SVM available.\n");
00461 
00462     bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS);
00463     CLabels* trainlabels=NULL;
00464     if(!oneclass)
00465         trainlabels=ui->ui_labels->get_train_labels();
00466     else
00467         SG_INFO("Training one class mkl.\n");
00468     if (!trainlabels && !oneclass)
00469         SG_ERROR("No trainlabels available.\n");
00470 
00471     CKernel* kernel=ui->ui_kernel->get_kernel();
00472     if (!kernel)
00473         SG_ERROR("No kernel available.\n");
00474 
00475     bool success=ui->ui_kernel->init_kernel("TRAIN");
00476     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00477         SG_ERROR("Kernel not initialized.\n");
00478 
00479     int32_t num_vec=kernel->get_num_vec_lhs();
00480     if (!oneclass && trainlabels->get_num_labels() != num_vec)
00481         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00482 
00483     SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00484 
00485     if (constraint_generator)
00486         mkl->set_constraint_generator(constraint_generator);
00487     mkl->set_solver_type(solver_type);
00488     mkl->set_bias_enabled(svm_use_bias);
00489     mkl->set_epsilon(svm_epsilon);
00490     mkl->set_max_train_time(max_train_time);
00491     mkl->set_tube_epsilon(svm_tube_epsilon);
00492     mkl->set_nu(svm_nu);
00493     mkl->set_C(svm_C1, svm_C2);
00494     mkl->set_qpsize(svm_qpsize);
00495     mkl->set_shrinking_enabled(svm_use_shrinking);
00496     mkl->set_linadd_enabled(svm_use_linadd);
00497     mkl->set_batch_computation_enabled(svm_use_batch_computation);
00498     mkl->set_mkl_epsilon(svm_weight_epsilon);
00499     mkl->set_mkl_norm(mkl_norm); 
00500     mkl->set_elasticnet_lambda(ent_lambda);
00501     mkl->set_mkl_block_norm(mkl_block_norm);
00502     mkl->set_C_mkl(C_mkl);
00503     mkl->set_interleaved_optimization_enabled(mkl_use_interleaved);
00504 
00505     if (svm_do_auc_maximization)
00506     {
00507         CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00508         CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00509         ((CKernelMachine*) mkl)->set_labels(auc_labels);
00510         ((CKernelMachine*) mkl)->set_kernel(auc_kernel);
00511         SG_UNREF(auc_labels);
00512     }
00513     else
00514     {
00515         if(!oneclass)
00516             ((CKernelMachine*) mkl)->set_labels(trainlabels);
00517         ((CKernelMachine*) mkl)->set_kernel(kernel);
00518     }
00519 
00520     bool result=mkl->train();
00521 
00522     return result;
00523 }
00524 
00525 bool CGUIClassifier::train_svm()
00526 {
00527     CSVM* svm= (CSVM*) classifier;
00528     if (!svm)
00529         SG_ERROR("No SVM available.\n");
00530 
00531     bool oneclass=(svm->get_classifier_type()==CT_LIBSVMONECLASS);
00532     CLabels* trainlabels=NULL;
00533     if(!oneclass)
00534         trainlabels=ui->ui_labels->get_train_labels();
00535     else
00536         SG_INFO("Training one class svm.\n");
00537     if (!trainlabels && !oneclass)
00538         SG_ERROR("No trainlabels available.\n");
00539 
00540     CKernel* kernel=ui->ui_kernel->get_kernel();
00541     if (!kernel)
00542         SG_ERROR("No kernel available.\n");
00543 
00544     bool success=ui->ui_kernel->init_kernel("TRAIN");
00545 
00546     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00547         SG_ERROR("Kernel not initialized / no train features available.\n");
00548 
00549     int32_t num_vec=kernel->get_num_vec_lhs();
00550     if (!oneclass && trainlabels->get_num_labels() != num_vec)
00551         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00552 
00553     SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00554 
00555     svm->set_solver_type(solver_type);
00556     svm->set_bias_enabled(svm_use_bias);
00557     svm->set_epsilon(svm_epsilon);
00558     svm->set_max_train_time(max_train_time);
00559     svm->set_tube_epsilon(svm_tube_epsilon);
00560     svm->set_nu(svm_nu);
00561     svm->set_C(svm_C1, svm_C2);
00562     svm->set_qpsize(svm_qpsize);
00563     svm->set_shrinking_enabled(svm_use_shrinking);
00564     svm->set_linadd_enabled(svm_use_linadd);
00565     svm->set_batch_computation_enabled(svm_use_batch_computation);
00566 
00567     if(svm->get_classifier_type()==CT_MKLMULTICLASS)
00568     {
00569         ((CMKLMultiClass *)svm)->set_mkl_epsilon(svm_weight_epsilon ); 
00570     }
00571 
00572     if (svm_do_auc_maximization)
00573     {
00574         CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00575         CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00576         ((CKernelMachine*) svm)->set_labels(auc_labels);
00577         ((CKernelMachine*) svm)->set_kernel(auc_kernel);
00578         SG_UNREF(auc_labels);
00579     }
00580     else
00581     {
00582         if(!oneclass)
00583             ((CKernelMachine*) svm)->set_labels(trainlabels);
00584         ((CKernelMachine*) svm)->set_kernel(kernel);
00585     }
00586 
00587     bool result=svm->train();
00588 
00589     return result;
00590 }
00591 
00592 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter)
00593 {
00594     bool result=false;
00595     CDistance* distance=ui->ui_distance->get_distance();
00596 
00597     if (!distance)
00598         SG_ERROR("No distance available\n");
00599 
00600     if (!ui->ui_distance->init_distance("TRAIN"))
00601         SG_ERROR("Initializing distance with train features failed.\n");
00602 
00603     ((CDistanceMachine*) classifier)->set_distance(distance);
00604 
00605     EClassifierType type=classifier->get_classifier_type();
00606     switch (type)
00607     {
00608         case CT_KMEANS:
00609         {
00610             ((CKMeans*) classifier)->set_k(k);
00611             ((CKMeans*) classifier)->set_max_iter(max_iter);
00612             result=((CKMeans*) classifier)->train();
00613             break;
00614         }
00615         case CT_HIERARCHICAL:
00616         {
00617             ((CHierarchical*) classifier)->set_merges(k);
00618             result=((CHierarchical*) classifier)->train();
00619             break;
00620         }
00621         default:
00622             SG_ERROR("Unknown clustering type %d\n", type);
00623     }
00624 
00625     return result;
00626 }
00627 
00628 bool CGUIClassifier::train_knn(int32_t k)
00629 {
00630     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00631     CDistance* distance=ui->ui_distance->get_distance();
00632 
00633     bool result=false;
00634 
00635     if (trainlabels)
00636     {
00637         if (distance)
00638         {
00639             if (!ui->ui_distance->init_distance("TRAIN"))
00640                 SG_ERROR("Initializing distance with train features failed.\n");
00641             ((CKNN*) classifier)->set_labels(trainlabels);
00642             ((CKNN*) classifier)->set_distance(distance);
00643             ((CKNN*) classifier)->set_k(k);
00644             result=((CKNN*) classifier)->train();
00645         }
00646         else
00647             SG_ERROR("No distance available.\n");
00648     }
00649     else
00650         SG_ERROR("No labels available\n");
00651 
00652     return result;
00653 }
00654 
00655 bool CGUIClassifier::train_krr()
00656 {
00657 #ifdef HAVE_LAPACK
00658     CKRR* krr= (CKRR*) classifier;
00659     if (!krr)
00660         SG_ERROR("No SVM available.\n");
00661 
00662     CLabels* trainlabels=NULL;
00663     trainlabels=ui->ui_labels->get_train_labels();
00664     if (!trainlabels)
00665         SG_ERROR("No trainlabels available.\n");
00666 
00667     CKernel* kernel=ui->ui_kernel->get_kernel();
00668     if (!kernel)
00669         SG_ERROR("No kernel available.\n");
00670 
00671     bool success=ui->ui_kernel->init_kernel("TRAIN");
00672 
00673     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00674         SG_ERROR("Kernel not initialized / no train features available.\n");
00675 
00676     int32_t num_vec=kernel->get_num_vec_lhs();
00677     if (trainlabels->get_num_labels() != num_vec)
00678         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00679 
00680 
00681     // Set training labels and kernel
00682     krr->set_labels(trainlabels);
00683     krr->set_kernel(kernel);
00684 
00685     bool result=krr->train();
00686     return result;
00687 #else
00688     return false;
00689 #endif
00690 }
00691 
00692 bool CGUIClassifier::train_linear(float64_t gamma)
00693 {
00694     ASSERT(classifier);
00695     EClassifierType ctype = classifier->get_classifier_type();
00696     CFeatures* trainfeatures=ui->ui_features->get_train_features();
00697     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00698     bool result=false;
00699 
00700     if (!trainfeatures)
00701         SG_ERROR("No trainfeatures available.\n");
00702 
00703     if (!trainfeatures->has_property(FP_DOT))
00704         SG_ERROR("Trainfeatures not based on DotFeatures.\n");
00705 
00706     if (!trainlabels)
00707         SG_ERROR("No labels available\n");
00708 
00709     if (ctype==CT_PERCEPTRON)
00710     {
00711         ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate);
00712         ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter);
00713     }
00714 
00715 #ifdef HAVE_LAPACK
00716     if (ctype==CT_LDA)
00717     {
00718         if (trainfeatures->get_feature_type()!=F_DREAL ||
00719                 trainfeatures->get_feature_class()!=C_SIMPLE)
00720         SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n");
00721         ((CLDA*) classifier)->set_gamma(gamma);
00722     }
00723 #endif
00724 
00725     if (ctype==CT_SVMOCAS)
00726         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00727 #ifdef HAVE_LAPACK
00728     else if (ctype==CT_LIBLINEAR)
00729         ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00730 #endif
00731     else if (ctype==CT_SVMLIN)
00732         ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00733     else if (ctype==CT_SVMSGD)
00734         ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2);
00735     else if (ctype==CT_SUBGRADIENTSVM)
00736         ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00737 
00738     else if (ctype==CT_LPM || ctype==CT_LPBOOST)
00739     {
00740         if (trainfeatures->get_feature_class()!=C_SPARSE ||
00741                 trainfeatures->get_feature_type()!=F_DREAL)
00742             SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n");
00743     }
00744 
00745     ((CLinearMachine*) classifier)->set_labels(trainlabels);
00746     ((CLinearMachine*) classifier)->set_features((CSimpleFeatures<float64_t>*) trainfeatures);
00747     result=((CLinearMachine*) classifier)->train();
00748 
00749     return result;
00750 }
00751 
00752 bool CGUIClassifier::train_wdocas()
00753 {
00754     CFeatures* trainfeatures=ui->ui_features->get_train_features();
00755     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00756 
00757     bool result=false;
00758 
00759     if (!trainfeatures)
00760         SG_ERROR("No trainfeatures available.\n");
00761 
00762     if (trainfeatures->get_feature_class()!=C_STRING ||
00763             trainfeatures->get_feature_type()!=F_BYTE )
00764         SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n");
00765 
00766     if (!trainlabels)
00767         SG_ERROR("No labels available.\n");
00768 
00769     ((CWDSVMOcas*) classifier)->set_labels(trainlabels);
00770     ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures);
00771     result=((CWDSVMOcas*) classifier)->train();
00772 
00773     return result;
00774 }
00775 
00776 bool CGUIClassifier::load(char* filename, char* type)
00777 {
00778     bool result=false;
00779 
00780     if (new_classifier(type))
00781     {
00782         FILE* model_file=fopen(filename, "r");
00783 
00784         if (model_file)
00785         {
00786             if (classifier && classifier->load(model_file))
00787             {
00788                 SG_DEBUG("file successfully read.\n");
00789                 result=true;
00790             }
00791             else
00792                 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename);
00793 
00794             fclose(model_file);
00795         }
00796         else
00797             SG_ERROR("Opening file %s failed.\n", filename);
00798 
00799         return result;
00800     }
00801     else
00802         SG_ERROR("Type %s of SVM/Classifier unknown.\n", type);
00803 
00804     return false;
00805 }
00806 
00807 bool CGUIClassifier::save(char* param)
00808 {
00809     bool result=false;
00810     param=SGIO::skip_spaces(param);
00811 
00812     if (classifier)
00813     {
00814         FILE* file=fopen(param, "w");
00815 
00816         if ((!file) ||  (!classifier->save(file)))
00817             printf("writing to file %s failed!\n", param);
00818         else
00819         {
00820             printf("successfully written classifier into \"%s\" !\n", param);
00821             result=true;
00822         }
00823 
00824         if (file)
00825             fclose(file);
00826     }
00827     else
00828         SG_ERROR("create classifier first\n");
00829 
00830     return result;
00831 }
00832 
00833 bool CGUIClassifier::set_perceptron_parameters(
00834     float64_t learnrate, int32_t maxiter)
00835 {
00836     if (learnrate<=0)
00837         perceptron_learnrate=0.01;
00838     else
00839         perceptron_learnrate=learnrate;
00840 
00841     if (maxiter<=0)
00842         perceptron_maxiter=1000;
00843     else
00844         perceptron_maxiter=maxiter;
00845     SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter);
00846 
00847     return true;
00848 }
00849 
00850 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon)
00851 {
00852     if (epsilon<0)
00853         svm_epsilon=1e-4;
00854     else
00855         svm_epsilon=epsilon;
00856     SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon);
00857 
00858     return true;
00859 }
00860 
00861 bool CGUIClassifier::set_max_train_time(float64_t max)
00862 {
00863     if (max>0)
00864     {
00865         max_train_time=max;
00866         SG_INFO("Set to max_train_time=%f.\n", max_train_time);
00867     }
00868     else
00869         SG_INFO("Disabling max_train_time.\n");
00870 
00871     return true;
00872 }
00873 
00874 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon)
00875 {
00876     if (!classifier)
00877         SG_ERROR("No regression method allocated\n");
00878 
00879     if (classifier->get_classifier_type() != CT_LIBSVR &&
00880             classifier->get_classifier_type() != CT_SVRLIGHT &&
00881             classifier->get_classifier_type() != CT_MKLREGRESSION )
00882     {
00883         SG_ERROR("Underlying method not capable of SV-regression\n");
00884     }
00885 
00886     if (tube_epsilon<0)
00887         svm_tube_epsilon=1e-2;
00888     svm_tube_epsilon=tube_epsilon;
00889 
00890     ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon);
00891     SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon);
00892 
00893     return true;
00894 }
00895 
00896 bool CGUIClassifier::set_svm_nu(float64_t nu)
00897 {
00898     if (nu<0 || nu>1)
00899         nu=0.5;
00900 
00901     svm_nu=nu;
00902     SG_INFO("Set to nu=%f.\n", svm_nu);
00903 
00904     return true;
00905 }
00906 
00907 bool CGUIClassifier::set_svm_mkl_parameters(
00908     float64_t weight_epsilon, float64_t C, float64_t norm)
00909 {
00910     if (weight_epsilon<0)
00911         weight_epsilon=1e-4;
00912     if (C<0)
00913         C=0;
00914     if (norm<0)
00915         SG_ERROR("MKL norm >= 0\n");
00916 
00917     svm_weight_epsilon=weight_epsilon;
00918     C_mkl=C;
00919     mkl_norm=norm;
00920 
00921     SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon);
00922     SG_INFO("Set to C_mkl=%f.\n", C_mkl);
00923     SG_INFO("Set to mkl_norm=%f.\n", mkl_norm);
00924 
00925     return true;
00926 }
00927 
00928 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda)
00929 {
00930   if (lambda<0 || lambda>1)
00931     SG_ERROR("0 <= ent_lambda <= 1\n");
00932 
00933   ent_lambda = lambda;
00934   return true;
00935 }
00936 
00937 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm)
00938 {
00939   if (mkl_bnorm<1)
00940     SG_ERROR("1 <= mkl_block_norm <= inf\n");
00941 
00942   mkl_block_norm=mkl_bnorm;
00943   return true;
00944 }
00945 
00946 
00947 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2)
00948 {
00949     if (C1<0)
00950         svm_C1=1.0;
00951     else
00952         svm_C1=C1;
00953 
00954     if (C2<0)
00955         svm_C2=svm_C1;
00956     else
00957         svm_C2=C2;
00958 
00959     SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2);
00960 
00961     return true;
00962 }
00963 
00964 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize)
00965 {
00966     if (qpsize<2)
00967         svm_qpsize=41;
00968     else
00969         svm_qpsize=qpsize;
00970     SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize);
00971 
00972     return true;
00973 }
00974 
00975 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize)
00976 {
00977     if (max_qpsize<50)
00978         svm_max_qpsize=50;
00979     else
00980         svm_max_qpsize=max_qpsize;
00981     SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize);
00982 
00983     return true;
00984 }
00985 
00986 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize)
00987 {
00988     if (svm_bufsize<0)
00989         svm_bufsize=3000;
00990     else
00991         svm_bufsize=bufsize;
00992     SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize);
00993 
00994     return true ;
00995 }
00996 
00997 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled)
00998 {
00999     svm_use_shrinking=enabled;
01000     if (svm_use_shrinking)
01001         SG_INFO("Enabling shrinking optimization.\n");
01002     else
01003         SG_INFO("Disabling shrinking optimization.\n");
01004 
01005     return true;
01006 }
01007 
01008 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled)
01009 {
01010     svm_use_batch_computation=enabled;
01011     if (svm_use_batch_computation)
01012         SG_INFO("Enabling batch computation.\n");
01013     else
01014         SG_INFO("Disabling batch computation.\n");
01015 
01016     return true;
01017 }
01018 
01019 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled)
01020 {
01021     svm_use_linadd=enabled;
01022     if (svm_use_linadd)
01023         SG_INFO("Enabling LINADD optimization.\n");
01024     else
01025         SG_INFO("Disabling LINADD optimization.\n");
01026 
01027     return true;
01028 }
01029 
01030 bool CGUIClassifier::set_svm_bias_enabled(bool enabled)
01031 {
01032     svm_use_bias=enabled;
01033     if (svm_use_bias)
01034         SG_INFO("Enabling svm bias.\n");
01035     else
01036         SG_INFO("Disabling svm bias.\n");
01037 
01038     return true;
01039 }
01040 
01041 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled)
01042 {
01043     mkl_use_interleaved=enabled;
01044     if (mkl_use_interleaved)
01045         SG_INFO("Enabling mkl interleaved optimization.\n");
01046     else
01047         SG_INFO("Disabling mkl interleaved optimization.\n");
01048 
01049     return true;
01050 }
01051 
01052 bool CGUIClassifier::set_do_auc_maximization(bool do_auc)
01053 {
01054     svm_do_auc_maximization=do_auc;
01055 
01056     if (svm_do_auc_maximization)
01057         SG_INFO("Enabling AUC maximization.\n");
01058     else
01059         SG_INFO("Disabling AUC maximization.\n");
01060 
01061     return true;
01062 }
01063 
01064 
01065 CLabels* CGUIClassifier::classify()
01066 {
01067     ASSERT(classifier);
01068 
01069     switch (classifier->get_classifier_type())
01070     {
01071         case CT_LIGHT:
01072         case CT_LIGHTONECLASS:
01073         case CT_LIBSVM:
01074         case CT_SCATTERSVM:
01075         case CT_MPD:
01076         case CT_GPBT:
01077         case CT_CPLEXSVM:
01078         case CT_GMNPSVM:
01079         case CT_GNPPSVM:
01080         case CT_LIBSVR:
01081         case CT_LIBSVMMULTICLASS:
01082         case CT_LIBSVMONECLASS:
01083         case CT_SVRLIGHT:
01084         case CT_MKLCLASSIFICATION:
01085         case CT_MKLMULTICLASS:
01086         case CT_MKLREGRESSION:
01087         case CT_MKLONECLASS:
01088         case CT_KRR:
01089             return classify_kernelmachine();
01090         case CT_KNN:
01091             return classify_distancemachine();
01092         case CT_PERCEPTRON:
01093         case CT_LDA:
01094             return classify_linear();
01095         case CT_SVMLIN:
01096         case CT_SVMPERF:
01097         case CT_SUBGRADIENTSVM:
01098         case CT_SVMOCAS:
01099         case CT_SVMSGD:
01100         case CT_LPM:
01101         case CT_LPBOOST:
01102         case CT_SUBGRADIENTLPM:
01103         case CT_LIBLINEAR:
01104             return classify_linear();
01105         case CT_WDSVMOCAS:
01106             return classify_byte_linear();
01107         default:
01108             SG_ERROR("unknown classifier type\n");
01109             break;
01110     };
01111 
01112     return false;
01113 }
01114 
01115 CLabels* CGUIClassifier::classify_kernelmachine()
01116 {
01117     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01118     CFeatures* testfeatures=ui->ui_features->get_test_features();
01119 
01120     if (!classifier)
01121         SG_ERROR("No kernelmachine available.\n");
01122 
01123     bool success=true;
01124 
01125     if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
01126     {
01127         if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED
01128                 && ( !trainfeatures || !testfeatures ))
01129         {
01130             SG_DEBUG("skipping initialisation of combined kernel "
01131                     "as train/test features are unavailable\n");
01132         }
01133         else
01134         {
01135             if (!trainfeatures)
01136                 SG_ERROR("No training features available.\n");
01137             if (!testfeatures)
01138                 SG_ERROR("No test features available.\n");
01139 
01140             success=ui->ui_kernel->init_kernel("TEST");
01141         }
01142     }
01143 
01144     if (!success || !ui->ui_kernel->is_initialized())
01145         SG_ERROR("Kernel not initialized.\n");
01146 
01147     CKernelMachine* km=(CKernelMachine*) classifier;
01148     km->set_kernel(ui->ui_kernel->get_kernel());
01149     km->set_batch_computation_enabled(svm_use_batch_computation);
01150 
01151     SG_INFO("Starting kernel machine testing.\n");
01152     return classifier->apply();
01153 }
01154 
01155 bool CGUIClassifier::get_trained_classifier(
01156     float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias,
01157     int32_t& brows, int32_t& bcols,
01158     int32_t idx) // which SVM for MultiClass
01159 {
01160     ASSERT(classifier);
01161 
01162     switch (classifier->get_classifier_type())
01163     {
01164         case CT_SCATTERSVM:
01165         case CT_GNPPSVM:
01166         case CT_LIBSVMMULTICLASS:
01167         case CT_LIGHT:
01168         case CT_LIGHTONECLASS:
01169         case CT_LIBSVM:
01170         case CT_MPD:
01171         case CT_GPBT:
01172         case CT_CPLEXSVM:
01173         case CT_GMNPSVM:
01174         case CT_LIBSVR:
01175         case CT_LIBSVMONECLASS:
01176         case CT_SVRLIGHT:
01177         case CT_MKLCLASSIFICATION:
01178         case CT_MKLREGRESSION:
01179         case CT_MKLONECLASS:
01180         case CT_MKLMULTICLASS:
01181         case CT_KRR:
01182             return get_svm(weights, rows, cols, bias, brows, bcols, idx);
01183             break;
01184         case CT_PERCEPTRON:
01185         case CT_LDA:
01186         case CT_LPM:
01187         case CT_LPBOOST:
01188         case CT_SUBGRADIENTLPM:
01189         case CT_SVMOCAS:
01190         case CT_SVMSGD:
01191         case CT_SVMLIN:
01192         case CT_SVMPERF:
01193         case CT_SUBGRADIENTSVM:
01194         case CT_LIBLINEAR:
01195             return get_linear(weights, rows, cols, bias, brows, bcols);
01196             break;
01197         case CT_KMEANS:
01198         case CT_HIERARCHICAL:
01199             return get_clustering(weights, rows, cols, bias, brows, bcols);
01200             break;
01201         case CT_KNN:
01202             SG_ERROR("not implemented");
01203             break;
01204         default:
01205             SG_ERROR("unknown classifier type\n");
01206             break;
01207     };
01208     return false;
01209 }
01210 
01211 
01212 int32_t CGUIClassifier::get_num_svms()
01213 {
01214     ASSERT(classifier);
01215     return ((CMultiClassSVM*) classifier)->get_num_svms();
01216 }
01217 
01218 bool CGUIClassifier::get_svm(
01219     float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01220     int32_t& brows, int32_t& bcols, int32_t idx)
01221 {
01222     CSVM* svm=(CSVM*) classifier;
01223 
01224     if (idx>-1) // should be MultiClassSVM
01225         svm=((CMultiClassSVM*) svm)->get_svm(idx);
01226 
01227     if (svm)
01228     {
01229         brows=1;
01230         bcols=1;
01231         bias=SG_MALLOC(float64_t, 1);
01232         *bias=svm->get_bias();
01233 
01234         rows=svm->get_num_support_vectors();
01235         cols=2;
01236         weights=SG_MALLOC(float64_t, rows*cols);
01237 
01238         for (int32_t i=0; i<rows; i++)
01239         {
01240             weights[i]=svm->get_alpha(i);
01241             weights[i+rows]=svm->get_support_vector(i);
01242         }
01243 
01244         return true;
01245     }
01246 
01247     return false;
01248 }
01249 
01250 bool CGUIClassifier::get_clustering(
01251     float64_t* &centers, int32_t& rows, int32_t& cols, float64_t*& radi,
01252     int32_t& brows, int32_t& bcols)
01253 {
01254     if (!classifier)
01255         return false;
01256 
01257     switch (classifier->get_classifier_type())
01258     {
01259         case CT_KMEANS:
01260         {
01261             CKMeans* clustering=(CKMeans*) classifier;
01262 
01263             bcols=1;
01264             SGVector<float64_t> r=clustering->get_radiuses();
01265             brows=r.vlen;
01266             radi=SG_MALLOC(float64_t, brows);
01267             memcpy(radi, r.vector, sizeof(float64_t)*brows);
01268 
01269             cols=1;
01270             SGMatrix<float64_t> c=clustering->get_cluster_centers();
01271             rows=c.num_rows;
01272             cols=c.num_cols;
01273             centers=SG_MALLOC(float64_t, rows*cols);
01274             memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols);
01275             break;
01276         }
01277 
01278         case CT_HIERARCHICAL:
01279         {
01280             CHierarchical* clustering=(CHierarchical*) classifier;
01281 
01282             // radi == merge_distances, centers == pairs
01283             bcols=1;
01284             SGVector<float64_t> r=clustering->get_merge_distances();
01285             brows=r.vlen;
01286             radi=SG_MALLOC(float64_t, brows);
01287             memcpy(radi, r.vector, sizeof(float64_t)*brows);
01288 
01289             SGMatrix<int32_t> p=clustering->get_cluster_pairs();
01290             rows=p.num_rows;
01291             cols=p.num_cols;
01292             centers=SG_MALLOC(float64_t, rows*cols);
01293             for (int32_t i=0; i<rows*cols; i++)
01294                 centers[i]=(float64_t) p.matrix[i];
01295 
01296             break;
01297         }
01298 
01299         default:
01300             SG_ERROR("internal error - unknown clustering type\n");
01301     }
01302 
01303     return true;
01304 }
01305 
01306 bool CGUIClassifier::get_linear(
01307     float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01308     int32_t& brows, int32_t& bcols)
01309 {
01310     CLinearMachine* linear=(CLinearMachine*) classifier;
01311 
01312     if (!linear)
01313         return false;
01314 
01315     bias=SG_MALLOC(float64_t, 1);
01316     *bias=linear->get_bias();
01317     brows=1;
01318     bcols=1;
01319 
01320     cols=1;
01321     float64_t* w=NULL;
01322     linear->get_w(w, rows);
01323 
01324     weights= SG_MALLOC(float64_t, rows);
01325     memcpy(weights, w, sizeof(float64_t)*rows);
01326 
01327     return true;
01328 }
01329 
01330 CLabels* CGUIClassifier::classify_distancemachine()
01331 {
01332     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01333     CFeatures* testfeatures=ui->ui_features->get_test_features();
01334 
01335     if (!classifier)
01336     {
01337         SG_ERROR("no kernelmachine available\n") ;
01338         return NULL;
01339     }
01340     if (!trainfeatures)
01341     {
01342         SG_ERROR("no training features available\n") ;
01343         return NULL;
01344     }
01345 
01346     if (!testfeatures)
01347     {
01348         SG_ERROR("no test features available\n") ;
01349         return NULL;
01350     }
01351 
01352     bool success=ui->ui_distance->init_distance("TEST");
01353 
01354     if (!success || !ui->ui_distance->is_initialized())
01355     {
01356         SG_ERROR("distance not initialized\n") ;
01357         return NULL;
01358     }
01359       
01360     ((CDistanceMachine*) classifier)->set_distance(
01361         ui->ui_distance->get_distance());
01362     SG_INFO("starting distance machine testing\n") ;
01363     return classifier->apply();
01364 }
01365 
01366 
01367 CLabels* CGUIClassifier::classify_linear()
01368 {
01369     CFeatures* testfeatures=ui->ui_features->get_test_features();
01370 
01371     if (!classifier)
01372     {
01373         SG_ERROR("no classifier available\n") ;
01374         return NULL;
01375     }
01376     if (!testfeatures)
01377     {
01378         SG_ERROR("no test features available\n") ;
01379         return NULL;
01380     }
01381     if (!(testfeatures->has_property(FP_DOT)))
01382     {
01383         SG_ERROR("testfeatures not based on DotFeatures\n") ;
01384         return false ;
01385     }
01386 
01387     ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures);
01388     SG_INFO("starting linear classifier testing\n") ;
01389     return classifier->apply();
01390 }
01391 
01392 CLabels* CGUIClassifier::classify_byte_linear()
01393 {
01394     CFeatures* testfeatures=ui->ui_features->get_test_features();
01395 
01396     if (!classifier)
01397     {
01398         SG_ERROR("no svm available\n") ;
01399         return NULL;
01400     }
01401     if (!testfeatures)
01402     {
01403         SG_ERROR("no test features available\n") ;
01404         return NULL;
01405     }
01406     if (testfeatures->get_feature_class() != C_STRING ||
01407             testfeatures->get_feature_type() != F_BYTE )
01408     {
01409         SG_ERROR("testfeatures not of class STRING type BYTE\n") ;
01410         return false ;
01411     }
01412 
01413     ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures);
01414     SG_INFO("starting linear classifier testing\n") ;
01415     return classifier->apply();
01416 }
01417 
01418 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result)
01419 {
01420     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01421     CFeatures* testfeatures=ui->ui_features->get_test_features();
01422 
01423     if (!classifier)
01424     {
01425         SG_ERROR("no svm available\n") ;
01426         return false;
01427     }
01428 
01429     if (!ui->ui_kernel->is_initialized())
01430     {
01431         SG_ERROR("kernel not initialized\n") ;
01432         return false;
01433     }
01434 
01435     if (!ui->ui_kernel->get_kernel() ||
01436             !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
01437     {
01438         if (!trainfeatures)
01439         {
01440             SG_ERROR("no training features available\n") ;
01441             return false;
01442         }
01443 
01444         if (!testfeatures)
01445         {
01446             SG_ERROR("no test features available\n") ;
01447             return false;
01448         }
01449     }
01450 
01451     ((CKernelMachine*) classifier)->set_kernel(
01452         ui->ui_kernel->get_kernel());
01453 
01454     result=classifier->apply(idx);
01455     return true ;
01456 }
01457 
01458 
01459 bool CGUIClassifier::set_krr_tau(float64_t tau)
01460 {
01461 #ifdef HAVE_LAPACK
01462     krr_tau=tau;
01463     ((CKRR*) classifier)->set_tau(krr_tau);
01464     SG_INFO("Set to krr_tau=%f.\n", krr_tau);
01465 
01466     return true;
01467 #else
01468     return false;
01469 #endif
01470 }
01471 
01472 bool CGUIClassifier::set_solver(char* solver)
01473 {
01474     ESolverType s=ST_AUTO;
01475 
01476     if (strncmp(solver,"NEWTON", 6)==0)
01477     {
01478         SG_INFO("Using NEWTON solver.\n");
01479         s=ST_NEWTON;
01480     }
01481     else if (strncmp(solver,"DIRECT", 6)==0)
01482     {
01483         SG_INFO("Using DIRECT solver\n");
01484         s=ST_DIRECT;
01485     }
01486     else if (strncmp(solver,"BLOCK_NORM", 9)==0)
01487     {
01488         SG_INFO("Using BLOCK_NORM solver\n");
01489         s=ST_BLOCK_NORM;
01490     }
01491     else if (strncmp(solver,"ELASTICNET", 10)==0)
01492     {
01493         SG_INFO("Using ELASTICNET solver\n");
01494         s=ST_ELASTICNET;
01495     }
01496     else if (strncmp(solver,"AUTO", 4)==0)
01497     {
01498         SG_INFO("Automagically determining solver.\n");
01499         s=ST_AUTO;
01500     }
01501 #ifdef USE_CPLEX
01502     else if (strncmp(solver, "CPLEX", 5)==0)
01503     {
01504         SG_INFO("USING CPLEX METHOD selected\n");
01505         s=ST_CPLEX;
01506     }
01507 #endif
01508 #ifdef USE_GLPK
01509     else if (strncmp(solver,"GLPK", 4)==0)
01510     {
01511         SG_INFO("Using GLPK solver\n");
01512         s=ST_GLPK;
01513     }
01514 #endif
01515     else
01516         SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver);
01517 
01518 
01519     solver_type=s;
01520     return true;
01521 }
01522 
01523 bool CGUIClassifier::set_constraint_generator(char* name)
01524 {
01525     if (strcmp(name,"LIBSVM_ONECLASS")==0)
01526     {
01527         SG_UNREF(constraint_generator);
01528         constraint_generator = new CLibSVMOneClass();
01529         SG_INFO("created SVMlibsvm object for oneclass\n");
01530     }
01531     else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
01532     {
01533         SG_UNREF(constraint_generator);
01534         constraint_generator = new CLibSVMMultiClass();
01535         SG_INFO("created SVMlibsvm object for multiclass\n");
01536     }
01537     else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
01538     {
01539         SG_UNREF(constraint_generator);
01540         constraint_generator= new CLibSVMMultiClass(LIBSVM_NU_SVC);
01541         SG_INFO("created SVMlibsvm object for multiclass\n") ;
01542     }
01543     else if (strcmp(name,"SCATTERSVM_RULE1")==0)
01544     {
01545         SG_UNREF(constraint_generator);
01546         constraint_generator= new CScatterSVM(TEST_RULE1);
01547         SG_INFO("created ScatterSVM RULE1 object\n") ;
01548     }
01549     else if (strcmp(name,"SCATTERSVM_RULE2")==0)
01550     {
01551         SG_UNREF(constraint_generator);
01552         constraint_generator= new CScatterSVM(TEST_RULE2);
01553         SG_INFO("created ScatterSVM RULE2 object\n") ;
01554     }
01555     else if (strcmp(name,"LIBSVM_NU")==0)
01556     {
01557         SG_UNREF(constraint_generator);
01558         constraint_generator= new CLibSVM(LIBSVM_NU_SVC);
01559         SG_INFO("created SVMlibsvm object\n") ;
01560     }
01561     else if (strcmp(name,"LIBSVM")==0)
01562     {
01563         SG_UNREF(constraint_generator);
01564         constraint_generator= new CLibSVM();
01565         SG_INFO("created SVMlibsvm object\n") ;
01566     }
01567     else if (strcmp(name,"LARANK")==0)
01568     {
01569         SG_UNREF(constraint_generator);
01570         constraint_generator= new CLaRank();
01571         SG_INFO("created LaRank object\n") ;
01572     }
01573 
01574     else if (strcmp(name,"GPBTSVM")==0)
01575     {
01576         SG_UNREF(constraint_generator);
01577         constraint_generator= new CGPBTSVM();
01578         SG_INFO("created GPBT-SVM object\n") ;
01579     }
01580     else if (strcmp(name,"MPDSVM")==0)
01581     {
01582         SG_UNREF(constraint_generator);
01583         constraint_generator= new CMPDSVM();
01584         SG_INFO("created MPD-SVM object\n") ;
01585     }
01586     else if (strcmp(name,"GNPPSVM")==0)
01587     {
01588         SG_UNREF(constraint_generator);
01589         constraint_generator= new CGNPPSVM();
01590         SG_INFO("created GNPP-SVM object\n") ;
01591     }
01592     else if (strcmp(name,"GMNPSVM")==0)
01593     {
01594         SG_UNREF(constraint_generator);
01595         constraint_generator= new CGMNPSVM();
01596         SG_INFO("created GMNP-SVM object\n") ;
01597     }
01598     else if (strcmp(name,"LIBSVR")==0)
01599     {
01600         SG_UNREF(constraint_generator);
01601         constraint_generator= new CLibSVR();
01602         SG_INFO("created SVRlibsvm object\n") ;
01603     }
01604     else
01605     {
01606         SG_ERROR("Unknown SV-classifier %s.\n", name);
01607         return false;
01608     }
01609     SG_REF(constraint_generator);
01610 
01611     return (constraint_generator!=NULL);
01612 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation