00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef _VIENNACL_GMRES_HPP_
00016 #define _VIENNACL_GMRES_HPP_
00017
00022 #include <vector>
00023 #include <cmath>
00024 #include "viennacl/forwards.h"
00025 #include "viennacl/tools/tools.hpp"
00026 #include "viennacl/linalg/norm_2.hpp"
00027 #include "viennacl/linalg/prod.hpp"
00028 #include "viennacl/linalg/inner_prod.hpp"
00029
00030 namespace viennacl
00031 {
00032 namespace linalg
00033 {
00034
00037 class gmres_tag
00038 {
00039 public:
00046 gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
00047 : _tol(tol), _iterations(max_iterations), _krylov_dim(krylov_dim), iters_taken_(0) {};
00048
00050 double tolerance() const { return _tol; }
00052 unsigned int max_iterations() const { return _iterations; }
00054 unsigned int krylov_dim() const { return _krylov_dim; }
00056 unsigned int max_restarts() const
00057 {
00058 unsigned int ret = _iterations / _krylov_dim;
00059 if (ret > 0 && (ret * _krylov_dim == _iterations) )
00060 return ret - 1;
00061 return ret;
00062 }
00063
00065 unsigned int iters() const { return iters_taken_; }
00067 void iters(unsigned int i) const { iters_taken_ = i; }
00068
00070 double error() const { return last_error_; }
00072 void error(double e) const { last_error_ = e; }
00073
00074 private:
00075 double _tol;
00076 unsigned int _iterations;
00077 unsigned int _krylov_dim;
00078
00079
00080 mutable unsigned int iters_taken_;
00081 mutable double last_error_;
00082 };
00083
00084 namespace
00085 {
00086
00087 template <typename SRC_VECTOR, typename DEST_VECTOR>
00088 void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, unsigned int len)
00089 {
00090 for (unsigned int i=0; i<len; ++i)
00091 dest[i] = src[i];
00092 }
00093
00094 template <typename ScalarType, typename DEST_VECTOR>
00095 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, unsigned int len)
00096 {
00097 viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00098 }
00099
00100 template <typename ScalarType>
00101 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, viennacl::vector<ScalarType> & dest, unsigned int len)
00102 {
00103 viennacl::copy(src.begin(), src.begin() + len, dest.begin());
00104 }
00105
00106 }
00107
00118 template <typename MatrixType, typename VectorType, typename PreconditionerType>
00119 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
00120 {
00121 typedef typename viennacl::tools::result_of::value_type<VectorType>::type ScalarType;
00122 typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER<ScalarType>::ResultType CPU_ScalarType;
00123 unsigned int problem_size = viennacl::tools::traits::size(rhs);
00124 VectorType result(problem_size);
00125 viennacl::tools::traits::clear(result);
00126 unsigned int krylov_dim = tag.krylov_dim();
00127 if (problem_size < tag.krylov_dim())
00128 krylov_dim = problem_size;
00129
00130 VectorType res(problem_size);
00131 VectorType v_k_tilde(problem_size);
00132 VectorType v_k_tilde_temp(problem_size);
00133
00134 std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
00135 std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
00136 std::vector<VectorType> U(krylov_dim);
00137
00138 const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1);
00139 const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1);
00140 const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2);
00141
00142 CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
00143
00144 unsigned int k;
00145 for (k = 0; k < krylov_dim; ++k)
00146 {
00147 R[k].resize(tag.krylov_dim());
00148 viennacl::tools::traits::resize(U[k], problem_size);
00149 }
00150
00151
00152 tag.iters(0);
00153
00154 for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
00155 {
00156 res = rhs;
00157 res -= viennacl::linalg::prod(matrix, result);
00158 precond.apply(res);
00159
00160 CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res);
00161 CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
00162
00163 if (rho_0 / norm_rhs < tag.tolerance())
00164 {
00165
00166 tag.error(rho_0 / norm_rhs);
00167 return result;
00168 }
00169
00170 res /= rho_0;
00171 for (k=0; k<krylov_dim; ++k)
00172 {
00173 viennacl::tools::traits::clear(R[k]);
00174 viennacl::tools::traits::clear(U[k]);
00175 R[k].resize(krylov_dim);
00176 viennacl::tools::traits::resize(U[k], problem_size);
00177 }
00178
00179 for (k = 0; k < krylov_dim; ++k)
00180 {
00181 tag.iters( tag.iters() + 1 );
00182
00183
00184 if (k == 0)
00185 {
00186 v_k_tilde = viennacl::linalg::prod(matrix, res);
00187 precond.apply(v_k_tilde);
00188 }
00189 else
00190 {
00191 viennacl::tools::traits::clear(v_k_tilde);
00192 v_k_tilde[k-1] = gpu_scalar_1;
00193
00194 for (int i = k-1; i > -1; --i)
00195 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00196
00197 v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
00198 precond.apply(v_k_tilde_temp);
00199 v_k_tilde = v_k_tilde_temp;
00200
00201
00202 for (unsigned int i = 0; i < k; ++i)
00203 v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
00204 }
00205
00206 viennacl::tools::traits::clear(U[k]);
00207 viennacl::tools::traits::resize(U[k], problem_size);
00208
00209 gmres_copy_helper(v_k_tilde, U[k], k);
00210
00211 U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) );
00212
00213
00214 gmres_copy_helper(U[k], R[k], k+1);
00215
00216 U[k] -= v_k_tilde;
00217 U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] );
00218
00219 res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2);
00220
00221 projection_rhs[k] = res[k];
00222 rho *= std::sin( std::acos(projection_rhs[k] / rho) );
00223
00224 if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())
00225 {
00226
00227 tag.error( std::fabs(rho*rho_0 / norm_rhs) );
00228 break;
00229 }
00230
00231 }
00232
00233
00234 for (int i=k-1; i>-1; --i)
00235 {
00236 for (unsigned int j=i+1; j<k; ++j)
00237
00238 projection_rhs[i] -= R[j][i] * projection_rhs[j];
00239
00240 projection_rhs[i] /= R[i][i];
00241 }
00242
00243 res *= projection_rhs[0];
00244
00245 if (k > 0)
00246 {
00247 for (unsigned int i = 0; i < k-1; ++i)
00248 {
00249 res[i] += projection_rhs[i+1];
00250 }
00251 }
00252
00253 for (int i = k-1; i > -1; --i)
00254 res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2);
00255
00256 res *= rho_0;
00257 result += res;
00258
00259 if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() )
00260 {
00261
00262 tag.error(std::fabs(rho*rho_0 / norm_rhs));
00263 return result;
00264 }
00265
00266 res = rhs;
00267 res -= viennacl::linalg::prod(matrix, result);
00268
00269
00270
00271
00272 tag.error(std::fabs(rho*rho_0));
00273 }
00274
00275 return result;
00276 }
00277
00280 template <typename MatrixType, typename VectorType>
00281 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag)
00282 {
00283 return solve(matrix, rhs, tag, no_precond());
00284 }
00285
00286
00287 }
00288 }
00289
00290 #endif