Actual source code: nepdefl.c
slepc-3.10.2 2019-02-11
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h> /*I "slepcnep.h" I*/
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
20: if (X) *X = extop->X;
21: if (H) {
22: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
23: }
24: return(0);
25: }
27: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
28: {
30: PetscScalar *array1,*array2;
31: PetscInt nloc;
34: if (extop->szd) {
35: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
36: if (v) {
37: VecGetArray(v,&array1);
38: VecGetArray(vex,&array2);
39: if (back) {
40: PetscMemcpy(array1,array2,nloc*sizeof(PetscScalar));
41: } else {
42: PetscMemcpy(array2,array1,nloc*sizeof(PetscScalar));
43: }
44: VecRestoreArray(v,&array1);
45: VecRestoreArray(vex,&array2);
46: }
47: if (a) {
48: VecGetArray(vex,&array2);
49: if (back) {
50: PetscMemcpy(a,array2+nloc,extop->szd*sizeof(PetscScalar));
51: } else {
52: PetscMemcpy(array2+nloc,a,extop->szd*sizeof(PetscScalar));
53: }
54: VecRestoreArray(vex,&array2);
55: }
56: } else {
57: if (back) {VecCopy(vex,v);}
58: else {VecCopy(v,vex);}
59: }
60: return(0);
61: }
63: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
64: {
66: PetscInt nloc;
67: Vec u;
68: VecType type;
71: if (extop->szd) {
72: BVGetColumn(extop->nep->V,0,&u);
73: VecGetType(u,&type);
74: BVRestoreColumn(extop->nep->V,0,&u);
75: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
76: VecSetType(*v,type);
77: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
78: nloc += extop->szd;
79: VecSetSizes(*v,nloc,PETSC_DECIDE);
80: } else {
81: BVCreateVec(extop->nep->V,v);
82: }
83: return(0);
84: }
86: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
87: {
88: PetscErrorCode ierr;
89: PetscInt nloc;
90: BVType type;
91: BVOrthogType otype;
92: BVOrthogRefineType oref;
93: PetscReal oeta;
94: BVOrthogBlockType oblock;
95: NEP nep=extop->nep;
98: if (extop->szd) {
99: BVGetSizes(nep->V,&nloc,NULL,NULL);
100: BVCreate(PetscObjectComm((PetscObject)nep),V);
101: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
102: BVGetType(nep->V,&type);
103: BVSetType(*V,type);
104: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
105: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
106: PetscObjectStateIncrease((PetscObject)*V);
107: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
108: } else {
109: BVDuplicateResize(nep->V,sz,V);
110: }
111: return(0);
112: }
114: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
115: {
117: PetscInt n,next,i;
118: PetscRandom rand;
119: PetscScalar *array;
120: PetscMPIInt nn;
123: BVGetRandomContext(extop->nep->V,&rand);
124: VecSetRandom(v,rand);
125: if (extop->szd) {
126: BVGetSizes(extop->nep->V,&n,NULL,NULL);
127: VecGetLocalSize(v,&next);
128: VecGetArray(v,&array);
129: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
130: PetscMPIIntCast(extop->n,&nn);
131: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PETSC_COMM_WORLD);
132: VecRestoreArray(v,&array);
133: }
134: return(0);
135: }
137: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
138: {
140: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
141: PetscScalar sone=1.0,zero=0.0;
142: PetscBLASInt ldh_,ldhj_,n_;
145: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
146: PetscMemzero(Hj,i*sizeof(PetscScalar));
147: PetscBLASIntCast(ldhj+1,&ldh_);
148: PetscBLASIntCast(ldhj,&ldhj_);
149: PetscBLASIntCast(n,&n_);
150: if (idx<1) {
151: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
152: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
153: } else {
154: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
155: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
156: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
157: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
158: }
159: if (idx<0) {
160: idx = -idx;
161: for (k=1;k<idx;k++) {
162: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
163: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
164: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
165: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
166: }
167: }
168: return(0);
169: }
171: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
172: {
174: Vec uu;
175: PetscInt ld,i;
176: PetscReal norm;
179: BVGetColumn(extop->X,extop->n,&uu);
180: ld = extop->szd+1;
181: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+extop->n*ld,u,PETSC_TRUE);
182: BVRestoreColumn(extop->X,extop->n,&uu);
183: BVNormColumn(extop->X,extop->n,NORM_2,&norm);
184: BVScaleColumn(extop->X,extop->n,1.0/norm);
185: for (i=0;i<extop->n;i++) extop->H[extop->n*ld+i] /= norm;
186: extop->H[extop->n*(ld+1)] = lambda;
187: extop->n++;
188: BVSetActiveColumns(extop->X,0,extop->n);
189: if (extop->n <= extop->szd) {
190: /* update XpX */
191: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
192: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
193: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
194: /* determine minimality index */
195: extop->midx = PetscMin(extop->max_midx,extop->n);
196: /* polynominal basis coeficients */
197: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
198: /* evaluate the polynomial basis in H */
199: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
200: }
201: return(0);
202: }
204: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
205: {
207: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
208: Mat A,B;
209: PetscScalar *array;
212: if (idx<0) {ini = 0; fin = extop->nep->nt;}
213: else {ini = idx; fin = idx+1;}
214: if (y) sz = hfjp?n+2:n+1;
215: else sz = hfjp?3*n:2*n;
216: ldh = extop->szd+1;
217: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
218: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
219: MatDenseGetArray(A,&array);
220: for (j=0;j<n;j++)
221: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
222: MatDenseRestoreArray(A,&array);
223: if (y) {
224: MatDenseGetArray(A,&array);
225: array[extop->n*(sz+1)] = lambda;
226: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
227: for (i=0;i<n;i++) array[n*sz+i] = y[i];
228: MatDenseRestoreArray(A,&array);
229: for (j=ini;j<fin;j++) {
230: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
231: MatDenseGetArray(B,&array);
232: for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
233: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
234: MatDenseRestoreArray(B,&array);
235: }
236: } else {
237: off = idx<0?ld*n:0;
238: MatDenseGetArray(A,&array);
239: for (k=0;k<n;k++) {
240: array[(n+k)*sz+k] = 1.0;
241: array[(n+k)*sz+n+k] = lambda;
242: }
243: if (hfjp) for (k=0;k<n;k++) {
244: array[(2*n+k)*sz+n+k] = 1.0;
245: array[(2*n+k)*sz+2*n+k] = lambda;
246: }
247: MatDenseRestoreArray(A,&array);
248: for (j=ini;j<fin;j++) {
249: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
250: MatDenseGetArray(B,&array);
251: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
252: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
253: MatDenseRestoreArray(B,&array);
254: }
255: }
256: MatDestroy(&A);
257: MatDestroy(&B);
258: return(0);
259: }
261: static PetscErrorCode NEPDeflationMatShell_MatMult(Mat M,Vec x,Vec y)
262: {
263: NEP_DEF_MATSHELL *matctx;
264: PetscErrorCode ierr;
265: NEP_EXT_OP extop;
266: Vec x1,y1;
267: PetscScalar *yy,sone=1.0,zero=0.0;
268: const PetscScalar *xx;
269: PetscInt nloc,i;
270: PetscBLASInt n_,one=1,szd_;
273: MatShellGetContext(M,(void**)&matctx);
274: extop = matctx->extop;
275: if (extop->szd) {
276: x1 = matctx->w[0]; y1 = matctx->w[1];
277: VecGetArrayRead(x,&xx);
278: VecPlaceArray(x1,xx);
279: VecGetArray(y,&yy);
280: VecPlaceArray(y1,yy);
281: MatMult(matctx->T,x1,y1);
282: if (extop->n) {
283: VecGetLocalSize(x1,&nloc);
284: /* copy for avoiding warning of constant array xx */
285: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i];
286: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
287: BVDotVec(extop->X,x1,matctx->work);
288: PetscBLASIntCast(extop->n,&n_);
289: PetscBLASIntCast(extop->szd,&szd_);
290: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
291: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
292: }
293: VecResetArray(x1);
294: VecRestoreArrayRead(x,&xx);
295: VecResetArray(y1);
296: VecRestoreArray(y,&yy);
297: } else {
298: MatMult(matctx->T,x,y);
299: }
300: return(0);
301: }
303: static PetscErrorCode NEPDeflationMatShell_CreateVecs(Mat M,Vec *right,Vec *left)
304: {
305: PetscErrorCode ierr;
306: NEP_DEF_MATSHELL *matctx;
309: MatShellGetContext(M,(void**)&matctx);
310: if (right) {
311: VecDuplicate(matctx->w[0],right);
312: }
313: if (left) {
314: VecDuplicate(matctx->w[0],left);
315: }
316: return(0);
317: }
319: static PetscErrorCode NEPDeflationMatShell_Destroy(Mat M)
320: {
321: PetscErrorCode ierr;
322: NEP_DEF_MATSHELL *matctx;
325: MatShellGetContext(M,(void**)&matctx);
326: if (matctx->extop->szd) {
327: BVDestroy(&matctx->U);
328: PetscFree2(matctx->hfj,matctx->work);
329: PetscFree2(matctx->A,matctx->B);
330: VecDestroy(&matctx->w[0]);
331: VecDestroy(&matctx->w[1]);
332: }
333: MatDestroy(&matctx->T);
334: PetscFree(matctx);
335: return(0);
336: }
338: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
339: {
340: PetscScalar p;
341: PetscInt i;
344: if (!jacobian) {
345: val[0] = 1.0;
346: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
347: } else {
348: val[0] = 0.0;
349: p = 1.0;
350: for (i=1;i<extop->n;i++) {
351: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
352: p *= (lambda-extop->bc[i-1]);
353: }
354: }
355: return(0);
356: }
358: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
359: {
360: PetscErrorCode ierr;
361: NEP_DEF_MATSHELL *matctx,*matctxC;
362: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
363: Mat F;
364: Mat Mshell,Mcomp;
365: PetscBool ini=PETSC_FALSE;
366: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
367: PetscBLASInt n_,info,szd_;
370: if (!M) {
371: Mshell = jacobian?extop->MJ:extop->MF;
372: } else Mshell = *M;
373: Mcomp = jacobian?extop->MF:extop->MJ;
374: if (!Mshell) {
375: ini = PETSC_TRUE;
376: PetscNew(&matctx);
377: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
378: nloc += szd; mloc += szd;
379: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
380: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)())NEPDeflationMatShell_MatMult);
381: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)())NEPDeflationMatShell_CreateVecs);
382: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)())NEPDeflationMatShell_Destroy);
383: matctx->nep = extop->nep;
384: matctx->extop = extop;
385: if (!M) {
386: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
387: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
388: PetscObjectReference((PetscObject)matctx->T);
389: } else {
390: matctx->jacob = jacobian;
391: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
392: *M = Mshell;
393: }
394: if (szd) {
395: BVCreateVec(extop->nep->V,matctx->w);
396: VecDuplicate(matctx->w[0],matctx->w+1);
397: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
398: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
399: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
400: }
401: } else {
402: MatShellGetContext(Mshell,(void**)&matctx);
403: }
404: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
405: if (ini || matctx->theta != lambda) {
406: if (jacobian) {
407: NEPComputeJacobian(extop->nep,lambda,matctx->T);
408: } else {
409: NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
410: }
411: }
412: if (n) {
413: matctx->hfjset = PETSC_FALSE;
414: if (!extop->simpU) {
415: /* likely hfjp has been already computed */
416: if (Mcomp) {
417: MatShellGetContext(Mcomp,(void**)&matctxC);
418: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
419: PetscMemcpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt*sizeof(PetscScalar));
420: matctx->hfjset = PETSC_TRUE;
421: }
422: }
423: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
424: if (!matctx->hfjset) {
425: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
426: matctx->hfjset = PETSC_TRUE;
427: }
428: BVSetActiveColumns(matctx->U,0,n);
429: hf = jacobian?hfjp:hfj;
430: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
431: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
432: BVMultInPlace(matctx->U,F,0,n);
433: BVSetActiveColumns(extop->W,0,extop->n);
434: for (j=1;j<extop->nep->nt;j++) {
435: BVMatMult(extop->X,extop->nep->A[j],extop->W);
436: MatDensePlaceArray(F,hf+j*n*n);
437: BVMult(matctx->U,1.0,1.0,extop->W,F);
438: MatDenseResetArray(F);
439: }
440: MatDestroy(&F);
441: } else {
442: hfj = matctx->hfj;
443: BVSetActiveColumns(matctx->U,0,n);
444: BVMatMult(extop->X,matctx->T,matctx->U);
445: for (j=0;j<n;j++) {
446: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
447: hfj[j*(n+1)] += lambda;
448: }
449: PetscBLASIntCast(n,&n_);
450: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
451: SlepcCheckLapackInfo("trtri",info);
452: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
453: BVMultInPlace(matctx->U,F,0,n);
454: if (jacobian) {
455: NEPDeflationComputeFunction(extop,lambda,NULL);
456: MatShellGetContext(extop->MF,(void**)&matctxC);
457: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
458: }
459: MatDestroy(&F);
460: }
461: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
462: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
463: A = matctx->A;
464: PetscMemzero(A,szd*szd*sizeof(PetscScalar));
465: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
466: for (j=0;j<n;j++)
467: for (i=0;i<n;i++)
468: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
469: PetscBLASIntCast(n,&n_);
470: PetscBLASIntCast(szd,&szd_);
471: B = matctx->B;
472: PetscMemzero(B,szd*szd*sizeof(PetscScalar));
473: for (i=1;i<extop->midx;i++) {
474: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
475: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
476: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
477: pts = hHprev; hHprev = hH; hH = pts;
478: }
479: PetscFree3(basisv,hH,hHprev);
480: }
481: matctx->theta = lambda;
482: matctx->n = extop->n;
483: }
484: return(0);
485: }
487: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
488: {
492: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
493: if (F) *F = extop->MF;
494: return(0);
495: }
497: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
498: {
502: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
503: if (J) *J = extop->MJ;
504: return(0);
505: }
507: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
508: {
509: PetscErrorCode ierr;
510: NEP_DEF_MATSHELL *matctx;
511: NEP_DEF_FUN_SOLVE solve;
512: PetscInt i,j,n=extop->n;
513: Vec u,tu;
514: Mat F;
515: PetscScalar snone=-1.0,sone=1.0;
516: PetscBLASInt n_,szd_,ldh_,*p,info;
517: Mat Mshell;
520: solve = extop->solve;
521: if (lambda!=solve->theta || n!=solve->n) {
522: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
523: Mshell = (solve->sincf)?extop->MF:solve->T;
524: MatShellGetContext(Mshell,(void**)&matctx);
525: KSPSetOperators(solve->ksp,matctx->T,matctx->T);
526: if (n) {
527: PetscBLASIntCast(n,&n_);
528: PetscBLASIntCast(extop->szd,&szd_);
529: PetscBLASIntCast(extop->szd+1,&ldh_);
530: if (!extop->simpU) {
531: BVSetActiveColumns(solve->T_1U,0,n);
532: for (i=0;i<n;i++) {
533: BVGetColumn(matctx->U,i,&u);
534: BVGetColumn(solve->T_1U,i,&tu);
535: KSPSolve(solve->ksp,u,tu);
536: BVRestoreColumn(solve->T_1U,i,&tu);
537: BVRestoreColumn(matctx->U,i,&u);
538: }
539: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
540: BVDot(solve->T_1U,extop->X,F);
541: MatDestroy(&F);
542: } else {
543: for (j=0;j<n;j++)
544: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
545: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
546: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
547: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
548: }
549: PetscMemcpy(solve->M,matctx->B,extop->szd*extop->szd*sizeof(PetscScalar));
550: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
551: PetscMalloc1(n,&p);
552: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
553: SlepcCheckLapackInfo("getrf",info);
554: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
555: SlepcCheckLapackInfo("getri",info);
556: PetscFree(p);
557: }
558: solve->theta = lambda;
559: solve->n = n;
560: }
561: return(0);
562: }
564: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
565: {
566: PetscErrorCode ierr;
567: Vec b1,x1;
568: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
569: NEP_DEF_MATSHELL *matctx;
570: NEP_DEF_FUN_SOLVE solve=extop->solve;
571: PetscBLASInt one=1,szd_,n_,ldh_;
572: PetscInt nloc,i;
575: if (extop->szd) {
576: x1 = solve->w[0]; b1 = solve->w[1];
577: VecGetArray(x,&xx);
578: VecPlaceArray(x1,xx);
579: VecGetArray(b,&bb);
580: VecPlaceArray(b1,bb);
581: } else {
582: b1 = b; x1 = x;
583: }
584: KSPSolve(extop->solve->ksp,b1,x1);
585: if (extop->n) {
586: PetscBLASIntCast(extop->szd,&szd_);
587: PetscBLASIntCast(extop->n,&n_);
588: PetscBLASIntCast(extop->szd+1,&ldh_);
589: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
590: b2 = bb+nloc; x2 = xx+nloc;
591: w = solve->work; w2 = solve->work+extop->n;
592: MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
593: PetscMemcpy(w2,b2,extop->n*sizeof(PetscScalar));
594: BVDotVec(extop->X,x1,w);
595: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
596: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
597: if (extop->simpU) {
598: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
599: for (i=0;i<extop->n;i++) w[i] = x2[i];
600: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
601: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
602: BVMultVec(extop->X,-1.0,1.0,x1,w);
603: } else {
604: BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
605: }
606: }
607: if (extop->szd) {
608: VecResetArray(x1);
609: VecRestoreArray(x,&xx);
610: VecResetArray(b1);
611: VecRestoreArray(b,&bb);
612: }
613: return(0);
614: }
616: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
617: {
618: PetscErrorCode ierr;
619: PetscInt j;
620: NEP_DEF_FUN_SOLVE solve;
623: if (!extop) return(0);
624: PetscFree(extop->H);
625: BVDestroy(&extop->X);
626: if (extop->szd) {
627: PetscFree3(extop->Hj,extop->XpX,extop->bc);
628: BVDestroy(&extop->W);
629: }
630: MatDestroy(&extop->MF);
631: MatDestroy(&extop->MJ);
632: if (extop->solve) {
633: solve = extop->solve;
634: if (extop->szd) {
635: if (!extop->simpU) {BVDestroy(&solve->T_1U);}
636: PetscFree2(solve->M,solve->work);
637: VecDestroy(&solve->w[0]);
638: VecDestroy(&solve->w[1]);
639: }
640: if (!solve->sincf) {
641: MatDestroy(&solve->T);
642: }
643: PetscFree(extop->solve);
644: }
645: if (extop->proj) {
646: if (extop->szd) {
647: for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
648: MatDestroy(&extop->proj->XpV1);
649: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
650: VecDestroy(&extop->proj->w);
651: BVDestroy(&extop->proj->V1);
652: }
653: PetscFree(extop->proj);
654: }
655: PetscFree(extop);
656: return(0);
657: }
659: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
660: {
661: PetscErrorCode ierr;
662: NEP_EXT_OP op;
663: NEP_DEF_FUN_SOLVE solve;
664: PetscInt szd;
667: NEPDeflationReset(*extop);
668: PetscNew(&op);
669: *extop = op;
670: op->nep = nep;
671: op->n = 0;
672: op->szd = szd = sz-1;
673: op->max_midx = PetscMin(MAX_MINIDX,szd);
674: op->X = X;
675: if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
676: else { PetscObjectReference((PetscObject)X); }
677: PetscCalloc1(sz*sz,&(op)->H);
678: if (op->szd) {
679: op->simpU = PETSC_FALSE;
680: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
681: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
682: } else {
683: op->simpU = PETSC_TRUE;
684: }
685: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
686: BVDuplicateResize(op->X,op->szd,&op->W);
687: }
688: if (ksp) {
689: PetscNew(&solve);
690: op->solve = solve;
691: solve->ksp = ksp;
692: solve->sincf = sincfun;
693: solve->n = -1;
694: if (op->szd) {
695: if (!op->simpU) {
696: BVDuplicateResize(nep->V,szd,&solve->T_1U);
697: }
698: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
699: BVCreateVec(nep->V,&solve->w[0]);
700: VecDuplicate(solve->w[0],&solve->w[1]);
701: }
702: }
703: return(0);
704: }
706: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
707: {
708: PetscScalar *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
709: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv;
710: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
711: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
712: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
713: NEP_EXT_OP extop=proj->extop;
714: NEP nep=extop->nep;
715: PetscErrorCode ierr;
718: DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
719: DSGetLeadingDimension(ds,&ldds);
720: DSGetArray(ds,mat,&T);
721: PetscMemzero(T,ldds*nv*sizeof(PetscScalar));
722: PetscBLASIntCast(ldds*nv,&dim2);
723: /* mat = V1^*T(lambda)V1 */
724: for (i=0;i<nep->nt;i++) {
725: if (deriv) {
726: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
727: } else {
728: FNEvaluateFunction(nep->f[i],lambda,&alpha);
729: }
730: DSGetArray(ds,DSMatExtra[i],&E);
731: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
732: DSRestoreArray(ds,DSMatExtra[i],&E);
733: }
734: if (extop->n) {
735: n = extop->n;
736: szd = extop->szd;
737: PetscMemzero(proj->work,proj->lwork*sizeof(PetscScalar));
738: PetscBLASIntCast(nv,&nv_);
739: PetscBLASIntCast(n,&n_);
740: PetscBLASIntCast(ldds,&ldds_);
741: PetscBLASIntCast(szd,&szd_);
742: PetscBLASIntCast(proj->dim,&dim_);
743: PetscBLASIntCast(extop->szd+1,&ldh_);
744: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
745: nwork += 2*proj->dim*proj->dim;
747: /* mat = mat + V1^*U(lambda)V2 */
748: for (i=0;i<nep->nt;i++) {
749: MatDenseGetArray(proj->V1pApX[i],&E);
750: if (extop->simpU) {
751: if (deriv) {
752: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
753: } else {
754: FNEvaluateFunction(nep->f[i],lambda,&alpha);
755: }
756: ww = w1; w = w2;
757: PetscMemcpy(ww,proj->V2,szd*nv*sizeof(PetscScalar));
758: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
759: alpha = -alpha;
760: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
761: if (deriv) {
762: PetscBLASIntCast(szd*nv,&szdk);
763: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
764: PetscMemcpy(w,proj->V2,szd*nv*sizeof(PetscScalar));
765: alpha2 = -alpha2;
766: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
767: alpha2 = 1.0;
768: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
769: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
770: }
771: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
772: } else {
773: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
774: w = deriv?w2:w1; ww = deriv?w1:w2;
775: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
776: }
777: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
778: MatDenseRestoreArray(proj->V1pApX[i],&E);
779: }
781: /* mat = mat + V2^*A(lambda)V1 */
782: basisv = proj->work+nwork; nwork += szd;
783: hH = proj->work+nwork; nwork += szd*szd;
784: hHprev = proj->work+nwork; nwork += szd*szd;
785: AB = proj->work+nwork; nwork += szd*szd;
786: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
787: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
788: for (j=0;j<n;j++)
789: for (i=0;i<n;i++)
790: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
791: MatDenseGetArray(proj->XpV1,&E);
792: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
793: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
794: MatDenseRestoreArray(proj->XpV1,&E);
796: /* mat = mat + V2^*B(lambda)V2 */
797: PetscMemzero(AB,szd*szd*sizeof(PetscScalar));
798: for (i=1;i<extop->midx;i++) {
799: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
800: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
801: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
802: pts = hHprev; hHprev = hH; hH = pts;
803: }
804: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
805: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
806: }
807: DSRestoreArray(ds,mat,&T);
808: return(0);
809: }
811: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
812: {
813: PetscErrorCode ierr;
814: PetscInt k,j,n=extop->n,dim;
815: Vec v,ve;
816: BV V1;
817: Mat G;
818: NEP nep=extop->nep;
819: NEP_DEF_PROJECT proj;
822: NEPCheckSplit(extop->nep,1);
823: proj = extop->proj;
824: if (!proj) {
825: /* Initialize the projection data structure */
826: PetscNew(&proj);
827: extop->proj = proj;
828: proj->extop = extop;
829: BVGetSizes(Vext,NULL,NULL,&dim);
830: proj->dim = dim;
831: if (extop->szd) {
832: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
833: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
834: for (j=0;j<nep->nt;j++) {
835: MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
836: }
837: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
838: BVCreateVec(extop->X,&proj->w);
839: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
840: }
841: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
842: }
844: /* Split Vext in V1 and V2 */
845: if (extop->szd) {
846: for (j=j0;j<j1;j++) {
847: BVGetColumn(Vext,j,&ve);
848: BVGetColumn(proj->V1,j,&v);
849: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
850: BVRestoreColumn(proj->V1,j,&v);
851: BVRestoreColumn(Vext,j,&ve);
852: }
853: V1 = proj->V1;
854: } else V1 = Vext;
856: /* Compute matrices V1^* A_i V1 */
857: BVSetActiveColumns(V1,j0,j1);
858: for (k=0;k<nep->nt;k++) {
859: DSGetMat(ds,DSMatExtra[k],&G);
860: BVMatProject(V1,nep->A[k],V1,G);
861: DSRestoreMat(ds,DSMatExtra[k],&G);
862: }
864: if (extop->n) {
865: if (extop->szd) {
866: /* Compute matrices V1^* A_i X and V1^* X */
867: BVSetActiveColumns(extop->W,0,n);
868: for (k=0;k<nep->nt;k++) {
869: BVMatMult(extop->X,nep->A[k],extop->W);
870: BVDot(extop->W,V1,proj->V1pApX[k]);
871: }
872: BVDot(V1,extop->X,proj->XpV1);
873: }
874: }
875: return(0);
876: }