SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 2009 Soeren Sonnnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/kernel/AUCKernel.h> 00015 #include <shogun/features/SimpleFeatures.h> 00016 #include <shogun/io/SGIO.h> 00017 00018 using namespace shogun; 00019 00020 void 00021 CAUCKernel::init() 00022 { 00023 m_parameters->add((CSGObject**) &subkernel, "subkernel", 00024 "The subkernel."); 00025 } 00026 00027 CAUCKernel::CAUCKernel() 00028 : CDotKernel(0), subkernel(NULL) 00029 { 00030 init(); 00031 } 00032 00033 CAUCKernel::CAUCKernel(int32_t size, CKernel* s) 00034 : CDotKernel(size), subkernel(s) 00035 { 00036 init(); 00037 SG_REF(subkernel); 00038 } 00039 00040 CAUCKernel::~CAUCKernel() 00041 { 00042 SG_UNREF(subkernel); 00043 cleanup(); 00044 } 00045 00046 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels) 00047 { 00048 SG_INFO( "setting up AUC maximization\n") ; 00049 ASSERT(labels); 00050 ASSERT(labels->is_two_class_labeling()); 00051 00052 // get the original labels 00053 ASSERT(labels); 00054 SGVector<int32_t> int_labels=labels->get_int_labels(); 00055 ASSERT(subkernel->get_num_vec_rhs()==int_labels.vlen); 00056 00057 // count positive and negative 00058 int32_t num_pos=0; 00059 int32_t num_neg=0; 00060 00061 for (int32_t i=0; i<int_labels.vlen; i++) 00062 { 00063 if (int_labels.vector[i]==1) 00064 num_pos++; 00065 else 00066 num_neg++; 00067 } 00068 00069 // create AUC features and labels (alternate labels) 00070 int32_t num_auc = num_pos*num_neg; 00071 SG_INFO("num_pos: %i num_neg: %i num_auc: %i\n", num_pos, num_neg, num_auc); 00072 00073 uint16_t* features_auc = SG_MALLOC(uint16_t, num_auc*2); 00074 int32_t* labels_auc = SG_MALLOC(int32_t, num_auc); 00075 int32_t n=0 ; 00076 00077 for (int32_t i=0; i<int_labels.vlen; i++) 00078 { 00079 if (int_labels.vector[i]!=1) 00080 continue; 00081 00082 for (int32_t j=0; j<int_labels.vlen; j++) 00083 { 00084 if (int_labels.vector[j]!=-1) 00085 continue; 00086 00087 // create about as many positively as negatively labeled examples 00088 if (n%2==0) 00089 { 00090 features_auc[n*2]=i; 00091 features_auc[n*2+1]=j; 00092 labels_auc[n]=1; 00093 } 00094 else 00095 { 00096 features_auc[n*2]=j; 00097 features_auc[n*2+1]=i; 00098 labels_auc[n]=-1; 00099 } 00100 00101 n++; 00102 ASSERT(n<=num_auc); 00103 } 00104 } 00105 00106 // create label object and attach it to svm 00107 CLabels* lab_auc = new CLabels(num_auc); 00108 lab_auc->set_int_labels(SGVector<int32_t>(labels_auc, num_auc)); 00109 SG_REF(lab_auc); 00110 00111 // create feature object 00112 CSimpleFeatures<uint16_t>* f = new CSimpleFeatures<uint16_t>(0); 00113 f->set_feature_matrix(features_auc, 2, num_auc); 00114 00115 // create AUC kernel and attach the features 00116 init(f,f); 00117 00118 int_labels.free_vector(); 00119 SG_FREE(labels_auc); 00120 00121 return lab_auc; 00122 } 00123 00124 00125 bool CAUCKernel::init(CFeatures* l, CFeatures* r) 00126 { 00127 CDotKernel::init(l, r); 00128 init_normalizer(); 00129 return true; 00130 } 00131 00132 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b) 00133 { 00134 int32_t alen, blen; 00135 bool afree, bfree; 00136 00137 uint16_t* avec=((CSimpleFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree); 00138 uint16_t* bvec=((CSimpleFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); 00139 00140 ASSERT(alen==2); 00141 ASSERT(blen==2); 00142 00143 ASSERT(subkernel && subkernel->has_features()); 00144 00145 float64_t k11,k12,k21,k22; 00146 int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1]; 00147 00148 k11 = subkernel->kernel(idx_a1,idx_b1); 00149 k12 = subkernel->kernel(idx_a1,idx_b2); 00150 k21 = subkernel->kernel(idx_a2,idx_b1); 00151 k22 = subkernel->kernel(idx_a2,idx_b2); 00152 00153 float64_t result = k11+k22-k21-k12; 00154 00155 ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree); 00156 ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); 00157 00158 return result; 00159 }