// Copyright (C) 2010 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_OPTIMIZATIoN_OCA_H__ #define DLIB_OPTIMIZATIoN_OCA_H__ #include "optimization_oca_abstract.h" #include "../matrix.h" #include "optimization_solve_qp_using_smo.h" #include <list> // ---------------------------------------------------------------------------------------- namespace dlib { template <typename matrix_type> class oca_problem { public: typedef typename matrix_type::type scalar_type; virtual ~oca_problem() {} virtual bool risk_has_lower_bound ( scalar_type& ) const { return false; } virtual bool optimization_status ( scalar_type , scalar_type , unsigned long, unsigned long ) const = 0; virtual scalar_type get_c ( ) const = 0; virtual long get_num_dimensions ( ) const = 0; virtual void get_risk ( matrix_type& current_solution, scalar_type& risk_value, matrix_type& risk_subgradient ) const = 0; }; // ---------------------------------------------------------------------------------------- class oca { public: oca () { sub_eps = 1e-2; sub_max_iter = 200000; inactive_thresh = 10; } void set_subproblem_epsilon ( double eps_ ) { sub_eps = eps_; } double get_subproblem_epsilon ( ) const { return sub_eps; } void set_subproblem_max_iterations ( unsigned long sub_max_iter_ ) { // make sure requires clause is not broken DLIB_ASSERT(sub_max_iter_ > 0, "\t void oca::set_subproblem_max_iterations" << "\n\t max iterations must be greater than 0" << "\n\t sub_max_iter_: " << sub_max_iter_ << "\n\t this: " << this ); sub_max_iter = sub_max_iter_; } unsigned long get_subproblem_max_iterations ( ) const { return sub_max_iter; } void set_inactive_plane_threshold ( unsigned long inactive_thresh_ ) { // make sure requires clause is not broken DLIB_ASSERT(inactive_thresh_ > 0, "\t void oca::set_inactive_plane_threshold" << "\n\t inactive threshold must be greater than 0" << "\n\t inactive_thresh_: " << inactive_thresh_ << "\n\t this: " << this ); inactive_thresh = inactive_thresh_; } unsigned long get_inactive_plane_threshold ( ) const { return inactive_thresh; } template < typename matrix_type > typename matrix_type::type operator() ( const oca_problem<matrix_type>& problem, matrix_type& w ) const { // make sure requires clause is not broken DLIB_ASSERT(problem.get_c() > 0 && problem.get_num_dimensions() > 0, "\t void oca::operator()" << "\n\t The oca_problem is invalid" << "\n\t problem.get_c(): " << problem.get_c() << "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions() << "\n\t this: " << this ); typedef typename matrix_type::type scalar_type; typedef typename matrix_type::layout_type layout_type; typedef typename matrix_type::mem_manager_type mem_manager_type; typedef matrix_type vect_type; const scalar_type C = problem.get_c(); std::list<vect_type> planes; std::vector<scalar_type> bs, miss_count; vect_type temp, alpha, w_cur; w.set_size(problem.get_num_dimensions(), 1); w = 0; w_cur = w; // The best objective value seen so far. Note also // that w always contains the best solution seen so far. scalar_type best_obj = std::numeric_limits<scalar_type>::max(); // This will hold the cutting plane objective value. This value is // a lower bound on the true optimal objective value. scalar_type cp_obj = 0; matrix<scalar_type,0,0,mem_manager_type, layout_type> K, Ktmp; scalar_type R_lower_bound; if (problem.risk_has_lower_bound(R_lower_bound)) { // The flat lower bounding plane is always good to have if we know // what it is. bs.push_back(R_lower_bound); planes.push_back(zeros_matrix<scalar_type>(w.size(),1)); miss_count.push_back(0); K.set_size(1,1); K(0,0) = 0; } unsigned long counter = 0; while (true) { ++counter; // add the next cutting plane scalar_type cur_risk; planes.resize(planes.size()+1); problem.get_risk(w_cur, cur_risk, planes.back()); bs.push_back(cur_risk - dot(w_cur,planes.back())); miss_count.push_back(0); // Check the objective value at w_cur and see if it is better than // the best seen so far. const scalar_type cur_obj = 0.5*trans(w_cur)*w_cur + C*cur_risk; if (cur_obj < best_obj) { best_obj = cur_obj; // move w_cur into w w.swap(w_cur); } // compute kernel matrix for all the planes K.swap(Ktmp); K.set_size(planes.size(), planes.size()); // copy over the old K matrix set_subm(K, 0,0, Ktmp.nr(), Ktmp.nc()) = Ktmp; // now add the new row and column to K long rr = 0; for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r) { K(rr, Ktmp.nc()) = dot(*r, planes.back()); K(Ktmp.nc(), rr) = K(rr,Ktmp.nc()); ++rr; } alpha = uniform_matrix<scalar_type>(planes.size(),1, C/planes.size()); // solve the cutting plane subproblem for the next w_cur. We solve it to an // accuracy that is related to how big the error gap is scalar_type eps = std::min<scalar_type>(sub_eps, 0.1*(best_obj-cp_obj)) ; // just a sanity check if (eps < 1e-16) eps = 1e-16; solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter); // construct the w_cur that minimized the subproblem. w_cur = 0; rr = 0; for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i) { if (alpha(rr) != 0) { w_cur -= alpha(rr)*(*i); miss_count[rr] = 0; } else { miss_count[rr] += 1; } ++rr; } // Compute the lower bound on the true objective given to us by the cutting // plane subproblem. cp_obj = -0.5*trans(w_cur)*w_cur + trans(alpha)*vector_to_matrix(bs); // report current status if (problem.optimization_status(best_obj, best_obj - cp_obj, planes.size(), counter)) break; // If it has been a while since a cutting plane was an active constraint then // we should throw it away. while (max(vector_to_matrix(miss_count)) >= inactive_thresh) { const long idx = index_of_max(vector_to_matrix(miss_count)); typename std::list<vect_type>::iterator i0 = planes.begin(); advance(i0, idx); planes.erase(i0); bs.erase(bs.begin()+idx); miss_count.erase(miss_count.begin()+idx); K = removerc(K, idx, idx); } } return best_obj; } private: double sub_eps; unsigned long sub_max_iter; unsigned long inactive_thresh; }; } // ---------------------------------------------------------------------------------------- #endif // DLIB_OPTIMIZATIoN_OCA_H__