SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/KernelLocallyLinearEmbedding.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/mathematics/lapack.h> 00014 #include <shogun/lib/FibonacciHeap.h> 00015 #include <shogun/lib/common.h> 00016 #include <shogun/base/DynArray.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/io/SGIO.h> 00019 #include <shogun/lib/Signal.h> 00020 00021 #ifdef HAVE_PTHREAD 00022 #include <pthread.h> 00023 #endif 00024 00025 using namespace shogun; 00026 00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00028 struct LK_RECONSTRUCTION_THREAD_PARAM 00029 { 00031 int32_t idx_start; 00033 int32_t idx_stop; 00035 int32_t idx_step; 00037 int32_t m_k; 00039 int32_t N; 00041 const int32_t* neighborhood_matrix; 00043 float64_t* local_gram_matrix; 00045 const float64_t* kernel_matrix; 00047 float64_t* id_vector; 00049 float64_t reconstruction_shift; 00051 float64_t* W_matrix; 00052 }; 00053 00054 struct K_NEIGHBORHOOD_THREAD_PARAM 00055 { 00057 int32_t idx_start; 00059 int32_t idx_step; 00061 int32_t idx_stop; 00063 int32_t N; 00065 int32_t m_k; 00067 CFibonacciHeap* heap; 00069 const float64_t* kernel_matrix; 00071 int32_t* neighborhood_matrix; 00072 }; 00073 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 00074 00075 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() : 00076 CLocallyLinearEmbedding() 00077 { 00078 } 00079 00080 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding(CKernel* kernel) : 00081 CLocallyLinearEmbedding() 00082 { 00083 set_kernel(kernel); 00084 } 00085 00086 const char* CKernelLocallyLinearEmbedding::get_name() const 00087 { 00088 return "KernelLocallyLinearEmbedding"; 00089 }; 00090 00091 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding() 00092 { 00093 } 00094 00095 CFeatures* CKernelLocallyLinearEmbedding::apply(CFeatures* features) 00096 { 00097 ASSERT(features); 00098 SG_REF(features); 00099 00100 // get dimensionality and number of vectors of data 00101 int32_t N = features->get_num_vectors(); 00102 if (m_k>=N) 00103 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n", 00104 m_k, N); 00105 00106 // compute kernel matrix 00107 ASSERT(m_kernel); 00108 m_kernel->init(features,features); 00109 CSimpleFeatures<float64_t>* embedding = embed_kernel(m_kernel); 00110 m_kernel->cleanup(); 00111 SG_UNREF(features); 00112 return (CFeatures*)embedding; 00113 } 00114 00115 CSimpleFeatures<float64_t>* CKernelLocallyLinearEmbedding::embed_kernel(CKernel* kernel) 00116 { 00117 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix(); 00118 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix,m_k); 00119 00120 SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix); 00121 neighborhood_matrix.destroy_matrix(); 00122 00123 SGMatrix<float64_t> nullspace = construct_embedding(M_matrix,m_target_dim); 00124 M_matrix.destroy_matrix(); 00125 00126 return new CSimpleFeatures<float64_t>(nullspace); 00127 } 00128 00129 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix, 00130 SGMatrix<int32_t> neighborhood_matrix) 00131 { 00132 int32_t N = kernel_matrix.num_cols; 00133 // loop variables 00134 int32_t t; 00135 #ifdef HAVE_PTHREAD 00136 int32_t num_threads = parallel->get_num_threads(); 00137 ASSERT(num_threads>0); 00138 // allocate threads 00139 pthread_t* threads = SG_MALLOC(pthread_t, num_threads); 00140 LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads); 00141 pthread_attr_t attr; 00142 pthread_attr_init(&attr); 00143 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00144 #else 00145 int32_t num_threads = 1; 00146 #endif 00147 float64_t* W_matrix = SG_CALLOC(float64_t, N*N); 00148 // init matrices and norm factor to be used 00149 float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads); 00150 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads); 00151 00152 #ifdef HAVE_PTHREAD 00153 for (t=0; t<num_threads; t++) 00154 { 00155 parameters[t].idx_start = t; 00156 parameters[t].idx_step = num_threads; 00157 parameters[t].idx_stop = N; 00158 parameters[t].m_k = m_k; 00159 parameters[t].N = N; 00160 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix; 00161 parameters[t].kernel_matrix = kernel_matrix.matrix; 00162 parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t; 00163 parameters[t].id_vector = id_vector+m_k*t; 00164 parameters[t].W_matrix = W_matrix; 00165 parameters[t].reconstruction_shift = m_reconstruction_shift; 00166 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]); 00167 } 00168 for (t=0; t<num_threads; t++) 00169 pthread_join(threads[t], NULL); 00170 pthread_attr_destroy(&attr); 00171 SG_FREE(parameters); 00172 SG_FREE(threads); 00173 #else 00174 LK_RECONSTRUCTION_THREAD_PARAM single_thread_param; 00175 single_thread_param.idx_start = 0; 00176 single_thread_param.idx_step = 1; 00177 single_thread_param.idx_stop = N; 00178 single_thread_param.m_k = m_k; 00179 single_thread_param.N = N; 00180 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix; 00181 single_thread_param.local_gram_matrix = local_gram_matrix; 00182 single_thread_param.kernel_matrix = kernel_matrix.matrix; 00183 single_thread_param.id_vector = id_vector; 00184 single_thread_param.W_matrix = W_matrix; 00185 run_linearreconstruction_thread((void*)single_thread_param); 00186 #endif 00187 00188 // clean 00189 SG_FREE(id_vector); 00190 SG_FREE(local_gram_matrix); 00191 00192 return SGMatrix<float64_t>(W_matrix,N,N); 00193 } 00194 00195 void* CKernelLocallyLinearEmbedding::run_linearreconstruction_thread(void* p) 00196 { 00197 LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p; 00198 int32_t idx_start = parameters->idx_start; 00199 int32_t idx_step = parameters->idx_step; 00200 int32_t idx_stop = parameters->idx_stop; 00201 int32_t m_k = parameters->m_k; 00202 int32_t N = parameters->N; 00203 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix; 00204 float64_t* local_gram_matrix = parameters->local_gram_matrix; 00205 const float64_t* kernel_matrix = parameters->kernel_matrix; 00206 float64_t* id_vector = parameters->id_vector; 00207 float64_t* W_matrix = parameters->W_matrix; 00208 float64_t reconstruction_shift = parameters->reconstruction_shift; 00209 00210 int32_t i,j,k; 00211 float64_t norming,trace; 00212 00213 for (i=idx_start; i<idx_stop; i+=idx_step) 00214 { 00215 for (j=0; j<m_k; j++) 00216 { 00217 for (k=0; k<m_k; k++) 00218 local_gram_matrix[j*m_k+k] = 00219 kernel_matrix[i*N+i] - 00220 kernel_matrix[i*N+neighborhood_matrix[j*N+i]] - 00221 kernel_matrix[i*N+neighborhood_matrix[k*N+i]] + 00222 kernel_matrix[neighborhood_matrix[j*N+i]*N+neighborhood_matrix[k*N+i]]; 00223 } 00224 00225 for (j=0; j<m_k; j++) 00226 id_vector[j] = 1.0; 00227 00228 // compute tr(C) 00229 trace = 0.0; 00230 for (j=0; j<m_k; j++) 00231 trace += local_gram_matrix[j*m_k+j]; 00232 00233 // regularize gram matrix 00234 for (j=0; j<m_k; j++) 00235 local_gram_matrix[j*m_k+j] += reconstruction_shift*trace; 00236 00237 clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k); 00238 00239 // normalize weights 00240 norming=0.0; 00241 for (j=0; j<m_k; j++) 00242 norming += id_vector[j]; 00243 00244 cblas_dscal(m_k,1.0/norming,id_vector,1); 00245 00246 memset(local_gram_matrix,0,sizeof(float64_t)*m_k*m_k); 00247 cblas_dger(CblasColMajor,m_k,m_k,1.0,id_vector,1,id_vector,1,local_gram_matrix,m_k); 00248 00249 // put weights into W matrix 00250 W_matrix[N*i+i] += 1.0; 00251 for (j=0; j<m_k; j++) 00252 { 00253 W_matrix[N*i+neighborhood_matrix[j*N+i]] -= id_vector[j]; 00254 W_matrix[N*neighborhood_matrix[j*N+i]+i] -= id_vector[j]; 00255 } 00256 for (j=0; j<m_k; j++) 00257 { 00258 for (k=0; k<m_k; k++) 00259 W_matrix[N*neighborhood_matrix[j*N+i]+neighborhood_matrix[k*N+i]]+=local_gram_matrix[j*m_k+k]; 00260 } 00261 } 00262 return NULL; 00263 } 00264 00265 SGMatrix<int32_t> CKernelLocallyLinearEmbedding::get_neighborhood_matrix(SGMatrix<float64_t> kernel_matrix, int32_t k) 00266 { 00267 int32_t t; 00268 int32_t N = kernel_matrix.num_cols; 00269 // init matrix and heap to be used 00270 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*k); 00271 #ifdef HAVE_PTHREAD 00272 int32_t num_threads = parallel->get_num_threads(); 00273 ASSERT(num_threads>0); 00274 K_NEIGHBORHOOD_THREAD_PARAM* parameters = SG_MALLOC(K_NEIGHBORHOOD_THREAD_PARAM, num_threads); 00275 pthread_t* threads = SG_MALLOC(pthread_t, num_threads); 00276 pthread_attr_t attr; 00277 pthread_attr_init(&attr); 00278 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00279 #else 00280 int32_t num_threads = 1; 00281 #endif 00282 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads); 00283 for (t=0; t<num_threads; t++) 00284 heaps[t] = new CFibonacciHeap(N); 00285 00286 #ifdef HAVE_PTHREAD 00287 for (t=0; t<num_threads; t++) 00288 { 00289 parameters[t].idx_start = t; 00290 parameters[t].idx_step = num_threads; 00291 parameters[t].idx_stop = N; 00292 parameters[t].m_k = k; 00293 parameters[t].N = N; 00294 parameters[t].heap = heaps[t]; 00295 parameters[t].neighborhood_matrix = neighborhood_matrix; 00296 parameters[t].kernel_matrix = kernel_matrix.matrix; 00297 pthread_create(&threads[t], &attr, run_neighborhood_thread, (void*)¶meters[t]); 00298 } 00299 for (t=0; t<num_threads; t++) 00300 pthread_join(threads[t], NULL); 00301 pthread_attr_destroy(&attr); 00302 SG_FREE(threads); 00303 SG_FREE(parameters); 00304 #else 00305 K_NEIGHBORHOOD_THREAD_PARAM single_thread_param; 00306 single_thread_param.idx_start = 0; 00307 single_thread_param.idx_step = 1; 00308 single_thread_param.idx_stop = N; 00309 single_thread_param.m_k = k; 00310 single_thread_param.N = N; 00311 single_thread_param.heap = heaps[0] 00312 single_thread_param.neighborhood_matrix = neighborhood_matrix; 00313 single_thread_param.kernel_matrix = kernel_matrix.matrix; 00314 run_neighborhood_thread((void*)&single_thread_param); 00315 #endif 00316 00317 for (t=0; t<num_threads; t++) 00318 delete heaps[t]; 00319 SG_FREE(heaps); 00320 00321 return SGMatrix<int32_t>(neighborhood_matrix,k,N); 00322 } 00323 00324 void* CKernelLocallyLinearEmbedding::run_neighborhood_thread(void* p) 00325 { 00326 K_NEIGHBORHOOD_THREAD_PARAM* parameters = (K_NEIGHBORHOOD_THREAD_PARAM*)p; 00327 int32_t idx_start = parameters->idx_start; 00328 int32_t idx_step = parameters->idx_step; 00329 int32_t idx_stop = parameters->idx_stop; 00330 int32_t N = parameters->N; 00331 int32_t m_k = parameters->m_k; 00332 CFibonacciHeap* heap = parameters->heap; 00333 const float64_t* kernel_matrix = parameters->kernel_matrix; 00334 int32_t* neighborhood_matrix = parameters->neighborhood_matrix; 00335 00336 int32_t i,j; 00337 float64_t tmp; 00338 for (i=idx_start; i<idx_stop; i+=idx_step) 00339 { 00340 for (j=0; j<N; j++) 00341 { 00342 heap->insert(j,kernel_matrix[i*N+i]-2*kernel_matrix[i*N+j]+kernel_matrix[j*N+j]); 00343 } 00344 00345 heap->extract_min(tmp); 00346 00347 for (j=0; j<m_k; j++) 00348 neighborhood_matrix[j*N+i] = heap->extract_min(tmp); 00349 00350 heap->clear(); 00351 } 00352 00353 return NULL; 00354 } 00355 #endif /* HAVE_LAPACK */