SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _KERNEL_H___ 00013 #define _KERNEL_H___ 00014 00015 #include <shogun/lib/common.h> 00016 #include <shogun/lib/Signal.h> 00017 #include <shogun/io/File.h> 00018 #include <shogun/mathematics/Math.h> 00019 #include <shogun/features/FeatureTypes.h> 00020 #include <shogun/base/SGObject.h> 00021 #include <shogun/features/Features.h> 00022 #include <shogun/kernel/KernelNormalizer.h> 00023 00024 #include <vector> 00025 00026 namespace shogun 00027 { 00028 class CFile; 00029 class CFeatures; 00030 class CKernelNormalizer; 00031 00032 #ifdef USE_SHORTREAL_KERNELCACHE 00033 00034 typedef float32_t KERNELCACHE_ELEM; 00035 #else 00036 00037 typedef float64_t KERNELCACHE_ELEM; 00038 #endif 00039 00041 typedef int64_t KERNELCACHE_IDX; 00042 00043 00045 enum EOptimizationType 00046 { 00047 FASTBUTMEMHUNGRY, 00048 SLOWBUTMEMEFFICIENT 00049 }; 00050 00052 enum EKernelType 00053 { 00054 K_UNKNOWN = 0, 00055 K_LINEAR = 10, 00056 K_POLY = 20, 00057 K_GAUSSIAN = 30, 00058 K_GAUSSIANSHIFT = 32, 00059 K_GAUSSIANMATCH = 33, 00060 K_HISTOGRAM = 40, 00061 K_SALZBERG = 41, 00062 K_LOCALITYIMPROVED = 50, 00063 K_SIMPLELOCALITYIMPROVED = 60, 00064 K_FIXEDDEGREE = 70, 00065 K_WEIGHTEDDEGREE = 80, 00066 K_WEIGHTEDDEGREEPOS = 81, 00067 K_WEIGHTEDDEGREERBF = 82, 00068 K_WEIGHTEDCOMMWORDSTRING = 90, 00069 K_POLYMATCH = 100, 00070 K_ALIGNMENT = 110, 00071 K_COMMWORDSTRING = 120, 00072 K_COMMULONGSTRING = 121, 00073 K_SPECTRUMRBF = 122, 00074 K_SPECTRUMMISMATCHRBF = 123, 00075 K_COMBINED = 140, 00076 K_AUC = 150, 00077 K_CUSTOM = 160, 00078 K_SIGMOID = 170, 00079 K_CHI2 = 180, 00080 K_DIAG = 190, 00081 K_CONST = 200, 00082 K_DISTANCE = 220, 00083 K_LOCALALIGNMENT = 230, 00084 K_PYRAMIDCHI2 = 240, 00085 K_OLIGO = 250, 00086 K_MATCHWORD = 260, 00087 K_TPPK = 270, 00088 K_REGULATORYMODULES = 280, 00089 K_SPARSESPATIALSAMPLE = 290, 00090 K_HISTOGRAMINTERSECTION = 300, 00091 K_WAVELET = 310, 00092 K_WAVE = 320, 00093 K_CAUCHY = 330, 00094 K_TSTUDENT = 340, 00095 K_RATIONAL_QUADRATIC = 350, 00096 K_MULTIQUADRIC = 360, 00097 K_EXPONENTIAL = 370, 00098 K_SPHERICAL = 380, 00099 K_SPLINE = 390, 00100 K_ANOVA = 400, 00101 K_POWER = 410, 00102 K_LOG = 420, 00103 K_CIRCULAR = 430, 00104 K_INVERSEMULTIQUADRIC = 440, 00105 K_DISTANTSEGMENTS = 450, 00106 K_BESSEL = 460, 00107 }; 00108 00110 enum EKernelProperty 00111 { 00112 KP_NONE = 0, 00113 KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw 00114 KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i 00115 KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples 00116 }; 00117 00118 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00119 00120 template <class T> struct K_THREAD_PARAM 00121 { 00123 CKernel* kernel; 00125 int32_t start; 00127 int32_t end; 00129 int32_t total_start; 00131 int32_t total_end; 00133 int32_t m; 00135 int32_t n; 00137 T* result; 00139 bool symmetric; 00141 bool verbose; 00142 }; 00143 #endif 00144 00145 class CSVM; 00146 00172 class CKernel : public CSGObject 00173 { 00174 friend class CVarianceKernelNormalizer; 00175 friend class CSqrtDiagKernelNormalizer; 00176 friend class CAvgDiagKernelNormalizer; 00177 friend class CRidgeKernelNormalizer; 00178 friend class CFirstElementKernelNormalizer; 00179 friend class CMultitaskKernelNormalizer; 00180 friend class CMultitaskKernelMklNormalizer; 00181 friend class CMultitaskKernelMaskNormalizer; 00182 friend class CMultitaskKernelMaskPairNormalizer; 00183 friend class CTanimotoKernelNormalizer; 00184 friend class CDiceKernelNormalizer; 00185 friend class CZeroMeanCenterKernelNormalizer; 00186 00187 public: 00188 00192 CKernel(); 00193 00194 00199 CKernel(int32_t size); 00200 00207 CKernel(CFeatures* l, CFeatures* r, int32_t size); 00208 00209 virtual ~CKernel(); 00210 00218 inline float64_t kernel(int32_t idx_a, int32_t idx_b) 00219 { 00220 if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs) 00221 { 00222 SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n", 00223 idx_a,num_lhs, idx_b,num_rhs); 00224 } 00225 00226 return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b); 00227 } 00228 00233 SGMatrix<float64_t> get_kernel_matrix() 00234 { 00235 return get_kernel_matrix<float64_t>(); 00236 } 00237 00243 virtual SGVector<float64_t> get_kernel_col(int32_t j) 00244 { 00245 00246 SGVector<float64_t> col = SGVector<float64_t>(num_rhs); 00247 00248 for (int32_t i=0; i!=num_rhs; i++) 00249 col[i] = kernel(i,j); 00250 00251 return col; 00252 } 00253 00254 00260 virtual SGVector<float64_t> get_kernel_row(int32_t i) 00261 { 00262 SGVector<float64_t> row = SGVector<float64_t>(num_lhs); 00263 00264 for (int32_t j=0; j!=num_lhs; j++) 00265 row[j] = kernel(i,j); 00266 00267 return row; 00268 } 00269 00274 template <class T> 00275 SGMatrix<T> get_kernel_matrix() 00276 { 00277 T* result = NULL; 00278 00279 if (!has_features()) 00280 SG_ERROR( "no features assigned to kernel\n"); 00281 00282 int32_t m=get_num_vec_lhs(); 00283 int32_t n=get_num_vec_rhs(); 00284 00285 int64_t total_num = int64_t(m)*n; 00286 00287 // if lhs == rhs and sizes match assume k(i,j)=k(j,i) 00288 bool symmetric= (lhs && lhs==rhs && m==n); 00289 00290 SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n); 00291 00292 result=SG_MALLOC(T, total_num); 00293 00294 int32_t num_threads=parallel->get_num_threads(); 00295 if (num_threads < 2) 00296 { 00297 K_THREAD_PARAM<T> params; 00298 params.kernel=this; 00299 params.result=result; 00300 params.start=0; 00301 params.end=m; 00302 params.total_start=0; 00303 params.total_end=total_num; 00304 params.n=n; 00305 params.m=m; 00306 params.symmetric=symmetric; 00307 params.verbose=true; 00308 get_kernel_matrix_helper<T>((void*) ¶ms); 00309 } 00310 else 00311 { 00312 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00313 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads); 00314 int64_t step= total_num/num_threads; 00315 00316 int32_t t; 00317 00318 num_threads--; 00319 for (t=0; t<num_threads; t++) 00320 { 00321 params[t].kernel = this; 00322 params[t].result = result; 00323 params[t].start = compute_row_start(t*step, n, symmetric); 00324 params[t].end = compute_row_start((t+1)*step, n, symmetric); 00325 params[t].total_start=t*step; 00326 params[t].total_end=(t+1)*step; 00327 params[t].n=n; 00328 params[t].m=m; 00329 params[t].symmetric=symmetric; 00330 params[t].verbose=false; 00331 00332 int code=pthread_create(&threads[t], NULL, 00333 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]); 00334 00335 if (code != 0) 00336 { 00337 SG_WARNING("Thread creation failed (thread %d of %d) " 00338 "with error:'%s'\n",t, num_threads, strerror(code)); 00339 num_threads=t; 00340 break; 00341 } 00342 } 00343 00344 params[t].kernel = this; 00345 params[t].result = result; 00346 params[t].start = compute_row_start(t*step, n, symmetric); 00347 params[t].end = m; 00348 params[t].total_start=t*step; 00349 params[t].total_end=total_num; 00350 params[t].n=n; 00351 params[t].m=m; 00352 params[t].symmetric=symmetric; 00353 params[t].verbose=true; 00354 get_kernel_matrix_helper<T>(¶ms[t]); 00355 00356 for (t=0; t<num_threads; t++) 00357 { 00358 if (pthread_join(threads[t], NULL) != 0) 00359 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads); 00360 } 00361 00362 SG_FREE(params); 00363 SG_FREE(threads); 00364 } 00365 00366 SG_DONE(); 00367 00368 return SGMatrix<T>(result,m,n,true); 00369 } 00370 00371 00382 virtual bool init(CFeatures* lhs, CFeatures* rhs); 00383 00388 virtual bool set_normalizer(CKernelNormalizer* normalizer); 00389 00394 virtual CKernelNormalizer* get_normalizer(); 00395 00399 virtual bool init_normalizer(); 00400 00407 virtual void cleanup(); 00408 00413 void load(CFile* loader); 00414 00419 void save(CFile* writer); 00420 00425 inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; } 00426 00431 inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; } 00432 00437 virtual inline int32_t get_num_vec_lhs() 00438 { 00439 return num_lhs; 00440 } 00441 00446 virtual inline int32_t get_num_vec_rhs() 00447 { 00448 return num_rhs; 00449 } 00450 00455 virtual inline bool has_features() 00456 { 00457 return lhs && rhs; 00458 } 00459 00464 inline bool get_lhs_equals_rhs() 00465 { 00466 return lhs_equals_rhs; 00467 } 00468 00470 virtual void remove_lhs_and_rhs(); 00471 00473 virtual void remove_lhs(); 00474 00476 virtual void remove_rhs(); 00477 00485 virtual EKernelType get_kernel_type()=0 ; 00486 00493 virtual EFeatureType get_feature_type()=0; 00494 00501 virtual EFeatureClass get_feature_class()=0; 00502 00507 inline void set_cache_size(int32_t size) 00508 { 00509 cache_size = size; 00510 00511 } 00512 00517 inline int32_t get_cache_size() { return cache_size; } 00518 00519 00520 00522 void list_kernel(); 00523 00529 inline bool has_property(EKernelProperty p) { return (properties & p) != 0; } 00530 00534 virtual void clear_normal(); 00535 00541 virtual void add_to_normal(int32_t vector_idx, float64_t weight); 00542 00547 inline EOptimizationType get_optimization_type() { return opt_type; } 00548 00553 virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;} 00554 00559 inline bool get_is_initialized() { return optimization_initialized; } 00560 00568 virtual bool init_optimization( 00569 int32_t count, int32_t *IDX, float64_t *weights); 00570 00575 virtual bool delete_optimization(); 00576 00582 bool init_optimization_svm(CSVM * svm) ; 00583 00589 virtual float64_t compute_optimized(int32_t vector_idx); 00590 00599 virtual void compute_batch( 00600 int32_t num_vec, int32_t* vec_idx, float64_t* target, 00601 int32_t num_suppvec, int32_t* IDX, float64_t* alphas, 00602 float64_t factor=1.0); 00603 00608 inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; } 00609 00614 inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; } 00615 00620 virtual int32_t get_num_subkernels(); 00621 00627 virtual void compute_by_subkernel( 00628 int32_t vector_idx, float64_t * subkernel_contrib); 00629 00635 virtual const float64_t* get_subkernel_weights(int32_t& num_weights); 00636 00641 virtual void set_subkernel_weights(SGVector<float64_t> weights); 00642 00643 protected: 00648 inline void set_property(EKernelProperty p) 00649 { 00650 properties |= p; 00651 } 00652 00657 inline void unset_property(EKernelProperty p) 00658 { 00659 properties &= (properties | p) ^ p; 00660 } 00661 00666 inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; } 00667 00678 virtual float64_t compute(int32_t x, int32_t y)=0; 00679 00686 int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric) 00687 { 00688 int32_t i_start; 00689 00690 if (symmetric) 00691 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs)); 00692 else 00693 i_start=(int32_t) (offs/int64_t(n)); 00694 00695 return i_start; 00696 } 00697 00702 template <class T> 00703 static void* get_kernel_matrix_helper(void* p) 00704 { 00705 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p; 00706 int32_t i_start=params->start; 00707 int32_t i_end=params->end; 00708 CKernel* k=params->kernel; 00709 T* result=params->result; 00710 bool symmetric=params->symmetric; 00711 int32_t n=params->n; 00712 int32_t m=params->m; 00713 bool verbose=params->verbose; 00714 int64_t total_start=params->total_start; 00715 int64_t total_end=params->total_end; 00716 int64_t total=total_start; 00717 00718 for (int32_t i=i_start; i<i_end; i++) 00719 { 00720 int32_t j_start=0; 00721 00722 if (symmetric) 00723 j_start=i; 00724 00725 for (int32_t j=j_start; j<n; j++) 00726 { 00727 float64_t v=k->kernel(i,j); 00728 result[i+j*m]=v; 00729 00730 if (symmetric && i!=j) 00731 result[j+i*m]=v; 00732 00733 if (verbose) 00734 { 00735 total++; 00736 00737 if (symmetric && i!=j) 00738 total++; 00739 00740 if (total%100 == 0) 00741 k->SG_PROGRESS(total, total_start, total_end); 00742 00743 if (CSignal::cancel_computations()) 00744 break; 00745 } 00746 } 00747 00748 } 00749 00750 return NULL; 00751 } 00752 00761 virtual void load_serializable_post() throw (ShogunException); 00762 00771 virtual void save_serializable_pre() throw (ShogunException); 00772 00781 virtual void save_serializable_post() throw (ShogunException); 00786 virtual void register_params(); 00787 00788 private: 00791 void init(); 00792 00793 00794 00796 00797 protected: 00799 int32_t cache_size; 00800 00801 00802 00805 KERNELCACHE_ELEM* kernel_matrix; 00806 00808 CFeatures* lhs; 00810 CFeatures* rhs; 00811 00813 bool lhs_equals_rhs; 00814 00816 int32_t num_lhs; 00818 int32_t num_rhs; 00819 00821 float64_t combined_kernel_weight; 00822 00824 bool optimization_initialized; 00828 EOptimizationType opt_type; 00829 00831 uint64_t properties; 00832 00835 CKernelNormalizer* normalizer; 00836 }; 00837 00838 } 00839 #endif /* _KERNEL_H__ */