SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2010 Soeren Sonnenburg 00008 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00009 * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/lib/config.h> 00012 00013 #ifdef HAVE_LAPACK 00014 #include <shogun/io/SGIO.h> 00015 #include <shogun/lib/Signal.h> 00016 #include <shogun/lib/Time.h> 00017 #include <shogun/base/Parameter.h> 00018 #include <shogun/classifier/svm/LibLinear.h> 00019 #include <shogun/classifier/svm/SVM_linear.h> 00020 #include <shogun/classifier/svm/Tron.h> 00021 #include <shogun/features/DotFeatures.h> 00022 00023 using namespace shogun; 00024 00025 CLibLinear::CLibLinear() 00026 : CLinearMachine() 00027 { 00028 init(); 00029 } 00030 00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l) 00032 : CLinearMachine() 00033 { 00034 init(); 00035 liblinear_solver_type=l; 00036 } 00037 00038 CLibLinear::CLibLinear( 00039 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00040 : CLinearMachine() 00041 { 00042 init(); 00043 C1=C; 00044 C2=C; 00045 use_bias=true; 00046 00047 set_features(traindat); 00048 set_labels(trainlab); 00049 init_linear_term(); 00050 } 00051 00052 void CLibLinear::init() 00053 { 00054 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL; 00055 use_bias=false; 00056 C1=1; 00057 C2=1; 00058 set_max_iterations(); 00059 epsilon=1e-5; 00060 00061 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE); 00062 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE); 00063 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.", 00064 MS_NOT_AVAILABLE); 00065 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE); 00066 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.", 00067 MS_NOT_AVAILABLE); 00068 SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE); 00069 SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type", 00070 "Type of LibLinear solver.", MS_NOT_AVAILABLE); 00071 } 00072 00073 CLibLinear::~CLibLinear() 00074 { 00075 m_linear_term.destroy_vector(); 00076 } 00077 00078 bool CLibLinear::train_machine(CFeatures* data) 00079 { 00080 CSignal::clear_cancel(); 00081 ASSERT(labels); 00082 00083 if (data) 00084 { 00085 if (!data->has_property(FP_DOT)) 00086 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00087 00088 set_features((CDotFeatures*) data); 00089 } 00090 ASSERT(features); 00091 ASSERT(labels->is_two_class_labeling()); 00092 00093 00094 int32_t num_train_labels=labels->get_num_labels(); 00095 int32_t num_feat=features->get_dim_feature_space(); 00096 int32_t num_vec=features->get_num_vectors(); 00097 00098 if (liblinear_solver_type == L1R_L2LOSS_SVC || 00099 (liblinear_solver_type == L1R_LR) ) 00100 { 00101 if (num_feat!=num_train_labels) 00102 { 00103 SG_ERROR("L1 methods require the data to be transposed: " 00104 "number of features %d does not match number of " 00105 "training labels %d\n", 00106 num_feat, num_train_labels); 00107 } 00108 CMath::swap(num_feat, num_vec); 00109 00110 } 00111 else 00112 { 00113 if (num_vec!=num_train_labels) 00114 { 00115 SG_ERROR("number of vectors %d does not match " 00116 "number of training labels %d\n", 00117 num_vec, num_train_labels); 00118 } 00119 } 00120 SG_FREE(w); 00121 if (use_bias) 00122 w=SG_MALLOC(float64_t, num_feat+1); 00123 else 00124 w=SG_MALLOC(float64_t, num_feat+0); 00125 w_dim=num_feat; 00126 00127 problem prob; 00128 if (use_bias) 00129 { 00130 prob.n=w_dim+1; 00131 memset(w, 0, sizeof(float64_t)*(w_dim+1)); 00132 } 00133 else 00134 { 00135 prob.n=w_dim; 00136 memset(w, 0, sizeof(float64_t)*(w_dim+0)); 00137 } 00138 prob.l=num_vec; 00139 prob.x=features; 00140 prob.y=SG_MALLOC(int, prob.l); 00141 prob.use_bias=use_bias; 00142 00143 for (int32_t i=0; i<prob.l; i++) 00144 prob.y[i]=labels->get_int_label(i); 00145 00146 int pos = 0; 00147 int neg = 0; 00148 for(int i=0;i<prob.l;i++) 00149 { 00150 if(prob.y[i]==+1) 00151 pos++; 00152 } 00153 neg = prob.l - pos; 00154 00155 SG_INFO("%d training points %d dims\n", prob.l, prob.n); 00156 00157 function *fun_obj=NULL; 00158 double Cp=C1; 00159 double Cn=C2; 00160 switch (liblinear_solver_type) 00161 { 00162 case L2R_LR: 00163 { 00164 fun_obj=new l2r_lr_fun(&prob, Cp, Cn); 00165 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00166 SG_DEBUG("starting L2R_LR training via tron\n"); 00167 tron_obj.tron(w, max_train_time); 00168 SG_DEBUG("done with tron\n"); 00169 delete fun_obj; 00170 break; 00171 } 00172 case L2R_L2LOSS_SVC: 00173 { 00174 fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn); 00175 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00176 tron_obj.tron(w, max_train_time); 00177 delete fun_obj; 00178 break; 00179 } 00180 case L2R_L2LOSS_SVC_DUAL: 00181 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL); 00182 break; 00183 case L2R_L1LOSS_SVC_DUAL: 00184 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL); 00185 break; 00186 case L1R_L2LOSS_SVC: 00187 { 00188 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00189 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00190 break; 00191 } 00192 case L1R_LR: 00193 { 00194 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00195 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00196 break; 00197 } 00198 case MCSVM_CS: 00199 { 00200 SG_NOTIMPLEMENTED; 00201 /* TODO... 00202 model_->w=Malloc(double, n*nr_class); 00203 for(i=0;i<nr_class;i++) 00204 for(j=start[i];j<start[i]+count[i];j++) 00205 sub_prob.y[j] = i; 00206 Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps); 00207 Solver.Solve(model_->w); 00208 */ 00209 } 00210 default: 00211 SG_ERROR("Error: unknown solver_type\n"); 00212 break; 00213 } 00214 00215 if (use_bias) 00216 set_bias(w[w_dim]); 00217 else 00218 set_bias(0); 00219 00220 SG_FREE(prob.y); 00221 00222 return true; 00223 } 00224 00225 // A coordinate descent algorithm for 00226 // L1-loss and L2-loss SVM dual problems 00227 // 00228 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, 00229 // s.t. 0 <= alpha_i <= upper_bound_i, 00230 // 00231 // where Qij = yi yj xi^T xj and 00232 // D is a diagonal matrix 00233 // 00234 // In L1-SVM case: 00235 // upper_bound_i = Cp if y_i = 1 00236 // upper_bound_i = Cn if y_i = -1 00237 // D_ii = 0 00238 // In L2-SVM case: 00239 // upper_bound_i = INF 00240 // D_ii = 1/(2*Cp) if y_i = 1 00241 // D_ii = 1/(2*Cn) if y_i = -1 00242 // 00243 // Given: 00244 // x, y, Cp, Cn 00245 // eps is the stopping tolerance 00246 // 00247 // solution will be put in w 00248 00249 #undef GETI 00250 #define GETI(i) (y[i]+1) 00251 // To support weights for instances, use GETI(i) (i) 00252 00253 void CLibLinear::solve_l2r_l1l2_svc( 00254 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st) 00255 { 00256 int l = prob->l; 00257 int w_size = prob->n; 00258 int i, s, iter = 0; 00259 double C, d, G; 00260 double *QD = SG_MALLOC(double, l); 00261 int *index = SG_MALLOC(int, l); 00262 double *alpha = SG_MALLOC(double, l); 00263 int32_t *y = SG_MALLOC(int32_t, l); 00264 int active_size = l; 00265 00266 // PG: projected gradient, for shrinking and stopping 00267 double PG; 00268 double PGmax_old = CMath::INFTY; 00269 double PGmin_old = -CMath::INFTY; 00270 double PGmax_new, PGmin_new; 00271 00272 // default solver_type: L2R_L2LOSS_SVC_DUAL 00273 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 00274 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY}; 00275 if(st == L2R_L1LOSS_SVC_DUAL) 00276 { 00277 diag[0] = 0; 00278 diag[2] = 0; 00279 upper_bound[0] = Cn; 00280 upper_bound[2] = Cp; 00281 } 00282 00283 int n = prob->n; 00284 00285 if (prob->use_bias) 00286 n--; 00287 00288 for(i=0; i<w_size; i++) 00289 w[i] = 0; 00290 00291 for(i=0; i<l; i++) 00292 { 00293 alpha[i] = 0; 00294 if(prob->y[i] > 0) 00295 { 00296 y[i] = +1; 00297 } 00298 else 00299 { 00300 y[i] = -1; 00301 } 00302 QD[i] = diag[GETI(i)]; 00303 00304 QD[i] += prob->x->dot(i, prob->x,i); 00305 index[i] = i; 00306 } 00307 00308 00309 CTime start_time; 00310 while (iter < max_iterations && !CSignal::cancel_computations()) 00311 { 00312 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00313 break; 00314 00315 PGmax_new = -CMath::INFTY; 00316 PGmin_new = CMath::INFTY; 00317 00318 for (i=0; i<active_size; i++) 00319 { 00320 int j = i+rand()%(active_size-i); 00321 CMath::swap(index[i], index[j]); 00322 } 00323 00324 for (s=0;s<active_size;s++) 00325 { 00326 i = index[s]; 00327 int32_t yi = y[i]; 00328 00329 G = prob->x->dense_dot(i, w, n); 00330 if (prob->use_bias) 00331 G+=w[n]; 00332 00333 if (m_linear_term.vector) 00334 G = G*yi + m_linear_term.vector[i]; 00335 else 00336 G = G*yi-1; 00337 00338 C = upper_bound[GETI(i)]; 00339 G += alpha[i]*diag[GETI(i)]; 00340 00341 PG = 0; 00342 if (alpha[i] == 0) 00343 { 00344 if (G > PGmax_old) 00345 { 00346 active_size--; 00347 CMath::swap(index[s], index[active_size]); 00348 s--; 00349 continue; 00350 } 00351 else if (G < 0) 00352 PG = G; 00353 } 00354 else if (alpha[i] == C) 00355 { 00356 if (G < PGmin_old) 00357 { 00358 active_size--; 00359 CMath::swap(index[s], index[active_size]); 00360 s--; 00361 continue; 00362 } 00363 else if (G > 0) 00364 PG = G; 00365 } 00366 else 00367 PG = G; 00368 00369 PGmax_new = CMath::max(PGmax_new, PG); 00370 PGmin_new = CMath::min(PGmin_new, PG); 00371 00372 if(fabs(PG) > 1.0e-12) 00373 { 00374 double alpha_old = alpha[i]; 00375 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C); 00376 d = (alpha[i] - alpha_old)*yi; 00377 00378 prob->x->add_to_dense_vec(d, i, w, n); 00379 00380 if (prob->use_bias) 00381 w[n]+=d; 00382 } 00383 } 00384 00385 iter++; 00386 float64_t gap=PGmax_new - PGmin_new; 00387 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00388 00389 if(gap <= eps) 00390 { 00391 if(active_size == l) 00392 break; 00393 else 00394 { 00395 active_size = l; 00396 PGmax_old = CMath::INFTY; 00397 PGmin_old = -CMath::INFTY; 00398 continue; 00399 } 00400 } 00401 PGmax_old = PGmax_new; 00402 PGmin_old = PGmin_new; 00403 if (PGmax_old <= 0) 00404 PGmax_old = CMath::INFTY; 00405 if (PGmin_old >= 0) 00406 PGmin_old = -CMath::INFTY; 00407 } 00408 00409 SG_DONE(); 00410 SG_INFO("optimization finished, #iter = %d\n",iter); 00411 if (iter >= max_iterations) 00412 { 00413 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster" 00414 "(also see liblinear FAQ)\n\n"); 00415 } 00416 00417 // calculate objective value 00418 00419 double v = 0; 00420 int nSV = 0; 00421 for(i=0; i<w_size; i++) 00422 v += w[i]*w[i]; 00423 for(i=0; i<l; i++) 00424 { 00425 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); 00426 if(alpha[i] > 0) 00427 ++nSV; 00428 } 00429 SG_INFO("Objective value = %lf\n",v/2); 00430 SG_INFO("nSV = %d\n",nSV); 00431 00432 delete [] QD; 00433 delete [] alpha; 00434 delete [] y; 00435 delete [] index; 00436 } 00437 00438 // A coordinate descent algorithm for 00439 // L1-regularized L2-loss support vector classification 00440 // 00441 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, 00442 // 00443 // Given: 00444 // x, y, Cp, Cn 00445 // eps is the stopping tolerance 00446 // 00447 // solution will be put in w 00448 00449 #undef GETI 00450 #define GETI(i) (y[i]+1) 00451 // To support weights for instances, use GETI(i) (i) 00452 00453 void CLibLinear::solve_l1r_l2_svc( 00454 problem *prob_col, double eps, double Cp, double Cn) 00455 { 00456 int l = prob_col->l; 00457 int w_size = prob_col->n; 00458 int j, s, iter = 0; 00459 int active_size = w_size; 00460 int max_num_linesearch = 20; 00461 00462 double sigma = 0.01; 00463 double d, G_loss, G, H; 00464 double Gmax_old = CMath::INFTY; 00465 double Gmax_new; 00466 double Gmax_init=0; 00467 double d_old, d_diff; 00468 double loss_old=0, loss_new; 00469 double appxcond, cond; 00470 00471 int *index = SG_MALLOC(int, w_size); 00472 int32_t *y = SG_MALLOC(int32_t, l); 00473 double *b = SG_MALLOC(double, l); // b = 1-ywTx 00474 double *xj_sq = SG_MALLOC(double, w_size); 00475 00476 CDotFeatures* x = (CDotFeatures*) prob_col->x; 00477 void* iterator; 00478 int32_t ind; 00479 float64_t val; 00480 00481 double C[3] = {Cn,0,Cp}; 00482 00483 int n = prob_col->n; 00484 if (prob_col->use_bias) 00485 n--; 00486 00487 for(j=0; j<l; j++) 00488 { 00489 b[j] = 1; 00490 if(prob_col->y[j] > 0) 00491 y[j] = 1; 00492 else 00493 y[j] = -1; 00494 } 00495 00496 for(j=0; j<w_size; j++) 00497 { 00498 w[j] = 0; 00499 index[j] = j; 00500 xj_sq[j] = 0; 00501 00502 if (use_bias && j==n) 00503 { 00504 for (ind=0; ind<l; ind++) 00505 xj_sq[n] += C[GETI(ind)]; 00506 } 00507 else 00508 { 00509 iterator=x->get_feature_iterator(j); 00510 while (x->get_next_feature(ind, val, iterator)) 00511 xj_sq[j] += C[GETI(ind)]*val*val; 00512 x->free_feature_iterator(iterator); 00513 } 00514 } 00515 00516 00517 CTime start_time; 00518 while (iter < max_iterations && !CSignal::cancel_computations()) 00519 { 00520 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00521 break; 00522 00523 Gmax_new = 0; 00524 00525 for(j=0; j<active_size; j++) 00526 { 00527 int i = j+rand()%(active_size-j); 00528 CMath::swap(index[i], index[j]); 00529 } 00530 00531 for(s=0; s<active_size; s++) 00532 { 00533 j = index[s]; 00534 G_loss = 0; 00535 H = 0; 00536 00537 if (use_bias && j==n) 00538 { 00539 for (ind=0; ind<l; ind++) 00540 { 00541 if(b[ind] > 0) 00542 { 00543 double tmp = C[GETI(ind)]*y[ind]; 00544 G_loss -= tmp*b[ind]; 00545 H += tmp*y[ind]; 00546 } 00547 } 00548 } 00549 else 00550 { 00551 iterator=x->get_feature_iterator(j); 00552 00553 while (x->get_next_feature(ind, val, iterator)) 00554 { 00555 if(b[ind] > 0) 00556 { 00557 double tmp = C[GETI(ind)]*val*y[ind]; 00558 G_loss -= tmp*b[ind]; 00559 H += tmp*val*y[ind]; 00560 } 00561 } 00562 x->free_feature_iterator(iterator); 00563 } 00564 00565 G_loss *= 2; 00566 00567 G = G_loss; 00568 H *= 2; 00569 H = CMath::max(H, 1e-12); 00570 00571 double Gp = G+1; 00572 double Gn = G-1; 00573 double violation = 0; 00574 if(w[j] == 0) 00575 { 00576 if(Gp < 0) 00577 violation = -Gp; 00578 else if(Gn > 0) 00579 violation = Gn; 00580 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00581 { 00582 active_size--; 00583 CMath::swap(index[s], index[active_size]); 00584 s--; 00585 continue; 00586 } 00587 } 00588 else if(w[j] > 0) 00589 violation = fabs(Gp); 00590 else 00591 violation = fabs(Gn); 00592 00593 Gmax_new = CMath::max(Gmax_new, violation); 00594 00595 // obtain Newton direction d 00596 if(Gp <= H*w[j]) 00597 d = -Gp/H; 00598 else if(Gn >= H*w[j]) 00599 d = -Gn/H; 00600 else 00601 d = -w[j]; 00602 00603 if(fabs(d) < 1.0e-12) 00604 continue; 00605 00606 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00607 d_old = 0; 00608 int num_linesearch; 00609 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00610 { 00611 d_diff = d_old - d; 00612 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00613 00614 appxcond = xj_sq[j]*d*d + G_loss*d + cond; 00615 if(appxcond <= 0) 00616 { 00617 if (use_bias && j==n) 00618 { 00619 for (ind=0; ind<l; ind++) 00620 b[ind] += d_diff*y[ind]; 00621 break; 00622 } 00623 else 00624 { 00625 iterator=x->get_feature_iterator(j); 00626 while (x->get_next_feature(ind, val, iterator)) 00627 b[ind] += d_diff*val*y[ind]; 00628 00629 x->free_feature_iterator(iterator); 00630 break; 00631 } 00632 } 00633 00634 if(num_linesearch == 0) 00635 { 00636 loss_old = 0; 00637 loss_new = 0; 00638 00639 if (use_bias && j==n) 00640 { 00641 for (ind=0; ind<l; ind++) 00642 { 00643 if(b[ind] > 0) 00644 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00645 double b_new = b[ind] + d_diff*y[ind]; 00646 b[ind] = b_new; 00647 if(b_new > 0) 00648 loss_new += C[GETI(ind)]*b_new*b_new; 00649 } 00650 } 00651 else 00652 { 00653 iterator=x->get_feature_iterator(j); 00654 while (x->get_next_feature(ind, val, iterator)) 00655 { 00656 if(b[ind] > 0) 00657 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00658 double b_new = b[ind] + d_diff*val*y[ind]; 00659 b[ind] = b_new; 00660 if(b_new > 0) 00661 loss_new += C[GETI(ind)]*b_new*b_new; 00662 } 00663 x->free_feature_iterator(iterator); 00664 } 00665 } 00666 else 00667 { 00668 loss_new = 0; 00669 if (use_bias && j==n) 00670 { 00671 for (ind=0; ind<l; ind++) 00672 { 00673 double b_new = b[ind] + d_diff*y[ind]; 00674 b[ind] = b_new; 00675 if(b_new > 0) 00676 loss_new += C[GETI(ind)]*b_new*b_new; 00677 } 00678 } 00679 else 00680 { 00681 iterator=x->get_feature_iterator(j); 00682 while (x->get_next_feature(ind, val, iterator)) 00683 { 00684 double b_new = b[ind] + d_diff*val*y[ind]; 00685 b[ind] = b_new; 00686 if(b_new > 0) 00687 loss_new += C[GETI(ind)]*b_new*b_new; 00688 } 00689 x->free_feature_iterator(iterator); 00690 } 00691 } 00692 00693 cond = cond + loss_new - loss_old; 00694 if(cond <= 0) 00695 break; 00696 else 00697 { 00698 d_old = d; 00699 d *= 0.5; 00700 delta *= 0.5; 00701 } 00702 } 00703 00704 w[j] += d; 00705 00706 // recompute b[] if line search takes too many steps 00707 if(num_linesearch >= max_num_linesearch) 00708 { 00709 SG_INFO("#"); 00710 for(int i=0; i<l; i++) 00711 b[i] = 1; 00712 00713 for(int i=0; i<n; i++) 00714 { 00715 if(w[i]==0) 00716 continue; 00717 00718 iterator=x->get_feature_iterator(i); 00719 while (x->get_next_feature(ind, val, iterator)) 00720 b[ind] -= w[i]*val*y[ind]; 00721 x->free_feature_iterator(iterator); 00722 } 00723 00724 if (use_bias && w[n]) 00725 { 00726 for (ind=0; ind<l; ind++) 00727 b[ind] -= w[n]*y[ind]; 00728 } 00729 } 00730 } 00731 00732 if(iter == 0) 00733 Gmax_init = Gmax_new; 00734 iter++; 00735 00736 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 00737 00738 if(Gmax_new <= eps*Gmax_init) 00739 { 00740 if(active_size == w_size) 00741 break; 00742 else 00743 { 00744 active_size = w_size; 00745 Gmax_old = CMath::INFTY; 00746 continue; 00747 } 00748 } 00749 00750 Gmax_old = Gmax_new; 00751 } 00752 00753 SG_DONE(); 00754 SG_INFO("optimization finished, #iter = %d\n", iter); 00755 if(iter >= max_iterations) 00756 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 00757 00758 // calculate objective value 00759 00760 double v = 0; 00761 int nnz = 0; 00762 for(j=0; j<w_size; j++) 00763 { 00764 if(w[j] != 0) 00765 { 00766 v += fabs(w[j]); 00767 nnz++; 00768 } 00769 } 00770 for(j=0; j<l; j++) 00771 if(b[j] > 0) 00772 v += C[GETI(j)]*b[j]*b[j]; 00773 00774 SG_INFO("Objective value = %lf\n", v); 00775 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 00776 00777 delete [] index; 00778 delete [] y; 00779 delete [] b; 00780 delete [] xj_sq; 00781 } 00782 00783 // A coordinate descent algorithm for 00784 // L1-regularized logistic regression problems 00785 // 00786 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), 00787 // 00788 // Given: 00789 // x, y, Cp, Cn 00790 // eps is the stopping tolerance 00791 // 00792 // solution will be put in w 00793 00794 #undef GETI 00795 #define GETI(i) (y[i]+1) 00796 // To support weights for instances, use GETI(i) (i) 00797 00798 void CLibLinear::solve_l1r_lr( 00799 const problem *prob_col, double eps, 00800 double Cp, double Cn) 00801 { 00802 int l = prob_col->l; 00803 int w_size = prob_col->n; 00804 int j, s, iter = 0; 00805 int active_size = w_size; 00806 int max_num_linesearch = 20; 00807 00808 double x_min = 0; 00809 double sigma = 0.01; 00810 double d, G, H; 00811 double Gmax_old = CMath::INFTY; 00812 double Gmax_new; 00813 double Gmax_init=0; 00814 double sum1, appxcond1; 00815 double sum2, appxcond2; 00816 double cond; 00817 00818 int *index = SG_MALLOC(int, w_size); 00819 int32_t *y = SG_MALLOC(int32_t, l); 00820 double *exp_wTx = SG_MALLOC(double, l); 00821 double *exp_wTx_new = SG_MALLOC(double, l); 00822 double *xj_max = SG_MALLOC(double, w_size); 00823 double *C_sum = SG_MALLOC(double, w_size); 00824 double *xjneg_sum = SG_MALLOC(double, w_size); 00825 double *xjpos_sum = SG_MALLOC(double, w_size); 00826 00827 CDotFeatures* x = prob_col->x; 00828 void* iterator; 00829 int ind; 00830 double val; 00831 00832 double C[3] = {Cn,0,Cp}; 00833 00834 int n = prob_col->n; 00835 if (prob_col->use_bias) 00836 n--; 00837 00838 for(j=0; j<l; j++) 00839 { 00840 exp_wTx[j] = 1; 00841 if(prob_col->y[j] > 0) 00842 y[j] = 1; 00843 else 00844 y[j] = -1; 00845 } 00846 for(j=0; j<w_size; j++) 00847 { 00848 w[j] = 0; 00849 index[j] = j; 00850 xj_max[j] = 0; 00851 C_sum[j] = 0; 00852 xjneg_sum[j] = 0; 00853 xjpos_sum[j] = 0; 00854 00855 if (use_bias && j==n) 00856 { 00857 for (ind=0; ind<l; ind++) 00858 { 00859 x_min = CMath::min(x_min, 1.0); 00860 xj_max[j] = CMath::max(xj_max[j], 1.0); 00861 C_sum[j] += C[GETI(ind)]; 00862 if(y[ind] == -1) 00863 xjneg_sum[j] += C[GETI(ind)]; 00864 else 00865 xjpos_sum[j] += C[GETI(ind)]; 00866 } 00867 } 00868 else 00869 { 00870 iterator=x->get_feature_iterator(j); 00871 while (x->get_next_feature(ind, val, iterator)) 00872 { 00873 x_min = CMath::min(x_min, val); 00874 xj_max[j] = CMath::max(xj_max[j], val); 00875 C_sum[j] += C[GETI(ind)]; 00876 if(y[ind] == -1) 00877 xjneg_sum[j] += C[GETI(ind)]*val; 00878 else 00879 xjpos_sum[j] += C[GETI(ind)]*val; 00880 } 00881 x->free_feature_iterator(iterator); 00882 } 00883 } 00884 00885 CTime start_time; 00886 while (iter < max_iterations && !CSignal::cancel_computations()) 00887 { 00888 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time) 00889 break; 00890 00891 Gmax_new = 0; 00892 00893 for(j=0; j<active_size; j++) 00894 { 00895 int i = j+rand()%(active_size-j); 00896 CMath::swap(index[i], index[j]); 00897 } 00898 00899 for(s=0; s<active_size; s++) 00900 { 00901 j = index[s]; 00902 sum1 = 0; 00903 sum2 = 0; 00904 H = 0; 00905 00906 if (use_bias && j==n) 00907 { 00908 for (ind=0; ind<l; ind++) 00909 { 00910 double exp_wTxind = exp_wTx[ind]; 00911 double tmp1 = 1.0/(1+exp_wTxind); 00912 double tmp2 = C[GETI(ind)]*tmp1; 00913 double tmp3 = tmp2*exp_wTxind; 00914 sum2 += tmp2; 00915 sum1 += tmp3; 00916 H += tmp1*tmp3; 00917 } 00918 } 00919 else 00920 { 00921 iterator=x->get_feature_iterator(j); 00922 while (x->get_next_feature(ind, val, iterator)) 00923 { 00924 double exp_wTxind = exp_wTx[ind]; 00925 double tmp1 = val/(1+exp_wTxind); 00926 double tmp2 = C[GETI(ind)]*tmp1; 00927 double tmp3 = tmp2*exp_wTxind; 00928 sum2 += tmp2; 00929 sum1 += tmp3; 00930 H += tmp1*tmp3; 00931 } 00932 x->free_feature_iterator(iterator); 00933 } 00934 00935 G = -sum2 + xjneg_sum[j]; 00936 00937 double Gp = G+1; 00938 double Gn = G-1; 00939 double violation = 0; 00940 if(w[j] == 0) 00941 { 00942 if(Gp < 0) 00943 violation = -Gp; 00944 else if(Gn > 0) 00945 violation = Gn; 00946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00947 { 00948 active_size--; 00949 CMath::swap(index[s], index[active_size]); 00950 s--; 00951 continue; 00952 } 00953 } 00954 else if(w[j] > 0) 00955 violation = fabs(Gp); 00956 else 00957 violation = fabs(Gn); 00958 00959 Gmax_new = CMath::max(Gmax_new, violation); 00960 00961 // obtain Newton direction d 00962 if(Gp <= H*w[j]) 00963 d = -Gp/H; 00964 else if(Gn >= H*w[j]) 00965 d = -Gn/H; 00966 else 00967 d = -w[j]; 00968 00969 if(fabs(d) < 1.0e-12) 00970 continue; 00971 00972 d = CMath::min(CMath::max(d,-10.0),10.0); 00973 00974 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; 00975 int num_linesearch; 00976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00977 { 00978 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; 00979 00980 if(x_min >= 0) 00981 { 00982 double tmp = exp(d*xj_max[j]); 00983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; 00984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; 00985 if(CMath::min(appxcond1,appxcond2) <= 0) 00986 { 00987 if (use_bias && j==n) 00988 { 00989 for (ind=0; ind<l; ind++) 00990 exp_wTx[ind] *= exp(d); 00991 } 00992 00993 else 00994 { 00995 iterator=x->get_feature_iterator(j); 00996 while (x->get_next_feature(ind, val, iterator)) 00997 exp_wTx[ind] *= exp(d*val); 00998 x->free_feature_iterator(iterator); 00999 } 01000 break; 01001 } 01002 } 01003 01004 cond += d*xjneg_sum[j]; 01005 01006 int i = 0; 01007 01008 if (use_bias && j==n) 01009 { 01010 for (ind=0; ind<l; ind++) 01011 { 01012 double exp_dx = exp(d); 01013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01014 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01015 i++; 01016 } 01017 } 01018 else 01019 { 01020 01021 iterator=x->get_feature_iterator(j); 01022 while (x->get_next_feature(ind, val, iterator)) 01023 { 01024 double exp_dx = exp(d*val); 01025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01026 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01027 i++; 01028 } 01029 x->free_feature_iterator(iterator); 01030 } 01031 01032 if(cond <= 0) 01033 { 01034 i = 0; 01035 if (use_bias && j==n) 01036 { 01037 for (ind=0; ind<l; ind++) 01038 { 01039 exp_wTx[ind] = exp_wTx_new[i]; 01040 i++; 01041 } 01042 } 01043 else 01044 { 01045 iterator=x->get_feature_iterator(j); 01046 while (x->get_next_feature(ind, val, iterator)) 01047 { 01048 exp_wTx[ind] = exp_wTx_new[i]; 01049 i++; 01050 } 01051 x->free_feature_iterator(iterator); 01052 } 01053 break; 01054 } 01055 else 01056 { 01057 d *= 0.5; 01058 delta *= 0.5; 01059 } 01060 } 01061 01062 w[j] += d; 01063 01064 // recompute exp_wTx[] if line search takes too many steps 01065 if(num_linesearch >= max_num_linesearch) 01066 { 01067 SG_INFO("#"); 01068 for(int i=0; i<l; i++) 01069 exp_wTx[i] = 0; 01070 01071 for(int i=0; i<w_size; i++) 01072 { 01073 if(w[i]==0) continue; 01074 01075 if (use_bias && i==n) 01076 { 01077 for (ind=0; ind<l; ind++) 01078 exp_wTx[ind] += w[i]; 01079 } 01080 else 01081 { 01082 iterator=x->get_feature_iterator(i); 01083 while (x->get_next_feature(ind, val, iterator)) 01084 exp_wTx[ind] += w[i]*val; 01085 x->free_feature_iterator(iterator); 01086 } 01087 } 01088 01089 for(int i=0; i<l; i++) 01090 exp_wTx[i] = exp(exp_wTx[i]); 01091 } 01092 } 01093 01094 if(iter == 0) 01095 Gmax_init = Gmax_new; 01096 iter++; 01097 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 01098 01099 if(Gmax_new <= eps*Gmax_init) 01100 { 01101 if(active_size == w_size) 01102 break; 01103 else 01104 { 01105 active_size = w_size; 01106 Gmax_old = CMath::INFTY; 01107 continue; 01108 } 01109 } 01110 01111 Gmax_old = Gmax_new; 01112 } 01113 01114 SG_DONE(); 01115 SG_INFO("optimization finished, #iter = %d\n", iter); 01116 if(iter >= max_iterations) 01117 SG_WARNING("\nWARNING: reaching max number of iterations\n"); 01118 01119 // calculate objective value 01120 01121 double v = 0; 01122 int nnz = 0; 01123 for(j=0; j<w_size; j++) 01124 if(w[j] != 0) 01125 { 01126 v += fabs(w[j]); 01127 nnz++; 01128 } 01129 for(j=0; j<l; j++) 01130 if(y[j] == 1) 01131 v += C[GETI(j)]*log(1+1/exp_wTx[j]); 01132 else 01133 v += C[GETI(j)]*log(1+exp_wTx[j]); 01134 01135 SG_INFO("Objective value = %lf\n", v); 01136 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size); 01137 01138 delete [] index; 01139 delete [] y; 01140 delete [] exp_wTx; 01141 delete [] exp_wTx_new; 01142 delete [] xj_max; 01143 delete [] C_sum; 01144 delete [] xjneg_sum; 01145 delete [] xjpos_sum; 01146 } 01147 01148 SGVector<float64_t> CLibLinear::get_linear_term() 01149 { 01150 if (!m_linear_term.vlen || !m_linear_term.vector) 01151 SG_ERROR("Please assign linear term first!\n"); 01152 01153 return m_linear_term; 01154 } 01155 01156 void CLibLinear::init_linear_term() 01157 { 01158 if (!labels) 01159 SG_ERROR("Please assign labels first!\n"); 01160 01161 m_linear_term.destroy_vector(); 01162 01163 m_linear_term=SGVector<float64_t>(labels->get_num_labels()); 01164 CMath::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0); 01165 } 01166 01167 #endif //HAVE_LAPACK