SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/LaplacianEigenmaps.h> 00012 #include <shogun/converter/EmbeddingConverter.h> 00013 #ifdef HAVE_LAPACK 00014 #include <shogun/mathematics/arpack.h> 00015 #include <shogun/mathematics/lapack.h> 00016 #include <shogun/lib/FibonacciHeap.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/io/SGIO.h> 00019 #include <shogun/distance/Distance.h> 00020 #include <shogun/lib/Signal.h> 00021 00022 using namespace shogun; 00023 00024 CLaplacianEigenmaps::CLaplacianEigenmaps() : 00025 CEmbeddingConverter() 00026 { 00027 m_k = 3; 00028 m_tau = 1.0; 00029 00030 init(); 00031 } 00032 00033 void CLaplacianEigenmaps::init() 00034 { 00035 m_parameters->add(&m_k, "k", "number of neighbors"); 00036 m_parameters->add(&m_tau, "tau", "heat distribution coefficient"); 00037 } 00038 00039 CLaplacianEigenmaps::~CLaplacianEigenmaps() 00040 { 00041 } 00042 00043 void CLaplacianEigenmaps::set_k(int32_t k) 00044 { 00045 ASSERT(k>0); 00046 m_k = k; 00047 } 00048 00049 int32_t CLaplacianEigenmaps::get_k() const 00050 { 00051 return m_k; 00052 } 00053 00054 void CLaplacianEigenmaps::set_tau(float64_t tau) 00055 { 00056 m_tau = tau; 00057 } 00058 00059 float64_t CLaplacianEigenmaps::get_tau() const 00060 { 00061 return m_tau; 00062 } 00063 00064 const char* CLaplacianEigenmaps::get_name() const 00065 { 00066 return "LaplacianEigenmaps"; 00067 }; 00068 00069 CFeatures* CLaplacianEigenmaps::apply(CFeatures* features) 00070 { 00071 // shorthand for simplefeatures 00072 SG_REF(features); 00073 00074 // get dimensionality and number of vectors of data 00075 int32_t N = features->get_num_vectors(); 00076 ASSERT(m_k<N); 00077 ASSERT(m_target_dim<N); 00078 00079 // compute distance matrix 00080 ASSERT(m_distance); 00081 m_distance->init(features,features); 00082 CSimpleFeatures<float64_t>* embedding = embed_distance(m_distance,features); 00083 m_distance->remove_lhs_and_rhs(); 00084 SG_UNREF(features); 00085 return (CFeatures*)embedding; 00086 } 00087 00088 CSimpleFeatures<float64_t>* CLaplacianEigenmaps::embed_distance(CDistance* distance, CFeatures* features) 00089 { 00090 int32_t i,j; 00091 SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix(); 00092 ASSERT(W_sgmatrix.num_rows==W_sgmatrix.num_cols); 00093 int32_t N = W_sgmatrix.num_rows; 00094 // shorthand 00095 float64_t* W_matrix = W_sgmatrix.matrix; 00096 00097 // init heap to use 00098 CFibonacciHeap* heap = new CFibonacciHeap(N); 00099 float64_t tmp; 00100 // for each object 00101 for (i=0; i<N; i++) 00102 { 00103 // fill heap 00104 for (j=0; j<N; j++) 00105 heap->insert(j,W_matrix[i*N+j]); 00106 00107 // rearrange heap with extracting ith object itself 00108 heap->extract_min(tmp); 00109 00110 // extract nearest neighbors, takes ~O(k*log n), and change sign for them 00111 for (j=0; j<m_k; j++) 00112 W_matrix[i*N+heap->extract_min(tmp)] *= -1.0; 00113 00114 // remove all 'positive' distances and change 'negative' ones to positive 00115 for (j=0; j<N; j++) 00116 { 00117 if (W_matrix[i*N+j]>0.0) 00118 W_matrix[i*N+j] = 0.0; 00119 else 00120 W_matrix[i*N+j] *= -1.0; 00121 } 00122 00123 // clear heap to reuse 00124 heap->clear(); 00125 } 00126 delete heap; 00127 // make distance matrix symmetric with mutual kNN relation 00128 for (i=0; i<N; i++) 00129 { 00130 // check only upper triangle 00131 for (j=i; j<N; j++) 00132 { 00133 // make kNN relation symmetric 00134 if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0) 00135 { 00136 W_matrix[j*N+i] = W_matrix[i*N+j]; 00137 } 00138 if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0) 00139 { 00140 W_matrix[i*N+j] = W_matrix[j*N+i]; 00141 } 00142 00143 if (W_matrix[i*N+j] != 0.0) 00144 { 00145 // compute heat, exp(-d^2/tau) 00146 tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau); 00147 W_matrix[i*N+j] = tmp; 00148 W_matrix[j*N+i] = tmp; 00149 } 00150 } 00151 } 00152 00153 // compute D 00154 CSimpleFeatures<float64_t>* embedding = construct_embedding(features,W_sgmatrix); 00155 W_sgmatrix.destroy_matrix(); 00156 00157 return embedding; 00158 } 00159 00160 CSimpleFeatures<float64_t>* CLaplacianEigenmaps::construct_embedding(CFeatures* features, 00161 SGMatrix<float64_t> W_matrix) 00162 { 00163 int32_t i,j; 00164 int32_t N = W_matrix.num_cols; 00165 00166 float64_t* D_diag_vector = SG_CALLOC(float64_t, N); 00167 for (i=0; i<N; i++) 00168 { 00169 for (j=0; j<N; j++) 00170 D_diag_vector[i] += W_matrix[i*N+j]; 00171 } 00172 00173 // W = -W 00174 for (i=0; i<N*N; i++) 00175 if (W_matrix[i]>0.0) 00176 W_matrix[i] *= -1.0; 00177 // W = W + D 00178 for (i=0; i<N; i++) 00179 W_matrix[i*N+i] += D_diag_vector[i]; 00180 00181 #ifdef HAVE_ARPACK 00182 // using ARPACK DS{E,A}UPD 00183 int eigenproblem_status = 0; 00184 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1); 00185 arpack_dsxupd(W_matrix.matrix,D_diag_vector,true,N,m_target_dim+1,"LA",true,3,false,-1e-9,0.0, 00186 eigenvalues_vector,W_matrix.matrix,eigenproblem_status); 00187 00188 if (eigenproblem_status!=0) 00189 SG_ERROR("DSXUPD failed with code %d\n",eigenproblem_status); 00190 00191 SG_FREE(eigenvalues_vector); 00192 #else /* HAVE_ARPACK */ 00193 // using LAPACK DSYGVX 00194 // requires 2x memory because of dense rhs matrix usage 00195 int eigenproblem_status = 0; 00196 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N); 00197 float64_t* rhs = SG_CALLOC(float64_t,N*N); 00198 // fill rhs with diag (for safety reasons zeros will be replaced with 1e-3) 00199 for (i=0; i<N; i++) 00200 rhs[i*N+i] = D_diag_vector[i]; 00201 00202 wrap_dsygvx(1,'V','U',N,W_matrix.matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix.matrix,&eigenproblem_status); 00203 00204 if (eigenproblem_status) 00205 SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status); 00206 00207 SG_FREE(rhs); 00208 SG_FREE(eigenvalues_vector); 00209 00210 #endif /* HAVE_ARPACK */ 00211 SG_FREE(D_diag_vector); 00212 00213 SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N); 00214 // fill features according to used solver 00215 for (i=0; i<m_target_dim; i++) 00216 { 00217 for (j=0; j<N; j++) 00218 { 00219 #ifdef HAVE_ARPACK 00220 new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1]; 00221 #else 00222 new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j]; 00223 #endif 00224 } 00225 } 00226 return new CSimpleFeatures<float64_t>(new_features); 00227 } 00228 00229 #endif /* HAVE_LAPACK */