Actual source code: flow.c
2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
3: originally written by W. K. Anderson of NASA Langley, \n\
4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";
6: #include <petscsnes.h>
7: #include <petsctime.h>
8: #include <petscao.h>
9: #include "user.h"
10: #if defined(_OPENMP)
11: #include "omp.h"
12: #if !defined(HAVE_REDUNDANT_WORK)
13: #include "metis.h"
14: #endif
15: #endif
17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
20: typedef struct {
21: Vec qnew,qold,func;
22: double fnorm_ini,dt_ini,cfl_ini;
23: double ptime;
24: double cfl_max,max_time;
25: double fnorm,dt,cfl;
26: double fnorm_ratio;
27: int ires,iramp,itstep;
28: int max_steps,print_freq;
29: int LocalTimeStepping;
30: } TstepCtx;
32: typedef struct { /*============================*/
33: GRID *grid; /* Pointer to Grid info */
34: TstepCtx *tsCtx; /* Pointer to Time Stepping Context */
35: PetscBool PreLoading;
36: } AppCtx; /*============================*/
38: extern int FormJacobian(SNES,Vec,Mat,Mat,void*),
39: FormFunction(SNES,Vec,Vec,void*),
40: FormInitialGuess(SNES,GRID*),
41: Update(SNES,void*),
42: ComputeTimeStep(SNES,int,void*),
43: GetLocalOrdering(GRID*),
44: SetPetscDS(GRID *,TstepCtx*);
45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
48: #endif
49: /* Global Variables */
51: /*============================*/
52: CINFO *c_info; /* Pointer to COMMON INFO */
53: CRUNGE *c_runge; /* Pointer to COMMON RUNGE */
54: CGMCOM *c_gmcom; /* Pointer to COMMON GMCOM */
55: /*============================*/
56: int rank,size,rstart;
57: REAL memSize = 0.0,grad_time = 0.0;
58: #if defined(_OPENMP)
59: int max_threads = 2,tot_threads,my_thread_id;
60: #endif
62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
63: int event0,event1;
64: Scalar time_counters;
65: long long counter0,counter1;
66: #endif
67: int ntran[max_nbtran]; /* transition stuff put here to make global */
68: REAL dxtran[max_nbtran];
70: /* ======================== MAIN ROUTINE =================================== */
71: /* */
72: /* Finite volume flux split solver for general polygons */
73: /* */
74: /*===========================================================================*/
76: int main(int argc,char **args)
77: {
78: AppCtx user;
79: GRID f_pntr;
80: TstepCtx tsCtx;
81: SNES snes; /* nonlinear solver context */
82: Mat Jpc; /* Jacobian and Preconditioner matrices */
83: PetscScalar *qnode;
84: int ierr;
85: PetscBool flg,write_pvtu,pvtu_base64;
86: MPI_Comm comm;
87: PetscInt maxfails = 10000;
88: char pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";
90: PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
91: PetscInitializeFortran();
92: PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);
94: comm = PETSC_COMM_WORLD;
95: f77FORLINK(); /* Link FORTRAN and C COMMONS */
97: MPI_Comm_rank(comm,&rank);
98: MPI_Comm_size(comm,&size);
100: flg = PETSC_FALSE;
101: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102: if (flg) {PetscMemorySetGetMaximumUsage();}
104: /*======================================================================*/
105: /* Initilize stuff related to time stepping */
106: /*======================================================================*/
107: tsCtx.fnorm_ini = 0.0; tsCtx.cfl_ini = 50.0; tsCtx.cfl_max = 1.0e+05;
108: tsCtx.max_steps = 50; tsCtx.max_time = 1.0e+12; tsCtx.iramp = -50;
109: tsCtx.dt = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110: tsCtx.LocalTimeStepping = 1;
111: PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112: PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113: PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114: PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115: tsCtx.print_freq = tsCtx.max_steps;
116: PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117: PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118: pvtu_base64 = PETSC_FALSE;
119: PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);
121: c_info->alpha = 3.0;
122: c_info->beta = 15.0;
123: c_info->ivisc = 0;
125: c_gmcom->ilu0 = 1;
126: c_gmcom->nsrch = 10;
128: c_runge->nitfo = 0;
130: PetscMemzero(&f_pntr,sizeof(f_pntr));
131: f_pntr.jvisc = c_info->ivisc;
132: f_pntr.ileast = 4;
133: PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134: PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);
136: /*======================================================================*/
138: /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140: PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141: omp_set_num_threads(max_threads);
142: PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif
145: /* Get the grid information into local ordering */
146: GetLocalOrdering(&f_pntr);
148: /* Allocate Memory for Some Other Grid Arrays */
149: set_up_grid(&f_pntr);
151: /* If using least squares for the gradients,calculate the r's */
152: if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);
154: user.grid = &f_pntr;
155: user.tsCtx = &tsCtx;
157: /* SAWs Stuff */
159: /*
160: Preload the executable to get accurate timings. This runs the following chunk of
161: code twice, first to get the executable pages into memory and the second time for
162: accurate timings.
163: */
164: PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165: user.PreLoading = PetscPreLoading;
167: /* Create nonlinear solver */
168: SetPetscDS(&f_pntr,&tsCtx);
169: SNESCreate(comm,&snes);
170: SNESSetType(snes,"newtonls");
173: /* Set various routines and options */
174: SNESSetFunction(snes,user.grid->res,FormFunction,&user);
175: flg = PETSC_FALSE;
176: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
177: if (flg) {
178: /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
179: MatCreateSNESMF(snes,&Jpc);
180: SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
181: } else {
182: /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
183: SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
184: }
186: SNESSetMaxLinearSolveFailures(snes,maxfails);
187: SNESSetFromOptions(snes);
189: /* Initialize the flowfield */
190: FormInitialGuess(snes,user.grid);
192: /* Solve nonlinear system */
193: Update(snes,&user);
195: /* Write restart file */
196: VecGetArray(user.grid->qnode,&qnode);
197: /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/
199: /* Write Tecplot solution file */
200: #if 0
201: if (!rank)
202: f77TECFLO(&user.grid->nnodes,
203: &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
204: &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
205: &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
206: c_info->title,
207: user.grid->x, user.grid->y, user.grid->z,
208: qnode,
209: user.grid->nnpts, user.grid->nntet, user.grid->nvpts,
210: user.grid->nvtet, user.grid->nfpts, user.grid->nftet,
211: user.grid->f2ntn, user.grid->f2ntv, user.grid->f2ntf,
212: user.grid->isnode, user.grid->ivnode, user.grid->ifnode,
213: &rank);
214: #endif
215: if (write_pvtu) {WritePVTU(&user,pvtu_fname,pvtu_base64);}
217: /* Write residual,lift,drag,and moment history file */
218: /*
219: if (!rank) f77PLLAN(&user.grid->nnodes,&rank);
220: */
222: VecRestoreArray(user.grid->qnode,&qnode);
223: flg = PETSC_FALSE;
224: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
225: if (flg) {
226: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
227: }
229: VecDestroy(&user.grid->qnode);
230: VecDestroy(&user.grid->qnodeLoc);
231: VecDestroy(&user.tsCtx->qold);
232: VecDestroy(&user.tsCtx->func);
233: VecDestroy(&user.grid->res);
234: VecDestroy(&user.grid->grad);
235: VecDestroy(&user.grid->gradLoc);
236: MatDestroy(&user.grid->A);
237: flg = PETSC_FALSE;
238: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
239: if (flg) { MatDestroy(&Jpc);}
240: SNESDestroy(&snes);
241: VecScatterDestroy(&user.grid->scatter);
242: VecScatterDestroy(&user.grid->gradScatter);
243: flg = PETSC_FALSE;
244: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
245: if (flg) {
246: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
247: }
248: PetscPreLoadEnd();
250: /* allocated in set_up_grid() */
251: PetscFree(user.grid->isface);
252: PetscFree(user.grid->ivface);
253: PetscFree(user.grid->ifface);
254: PetscFree(user.grid->us);
255: PetscFree(user.grid->vs);
256: PetscFree(user.grid->as);
258: /* Allocated in GetLocalOrdering() */
259: PetscFree(user.grid->eptr);
260: PetscFree(user.grid->ia);
261: PetscFree(user.grid->ja);
262: PetscFree(user.grid->loc2glo);
263: PetscFree(user.grid->loc2pet);
264: PetscFree(user.grid->xyzn);
265: #if defined(_OPENMP)
266: # if defined(HAVE_REDUNDANT_WORK)
267: PetscFree(user.grid->resd);
268: # else
269: PetscFree(user.grid->part_thr);
270: PetscFree(user.grid->nedge_thr);
271: PetscFree(user.grid->edge_thr);
272: PetscFree(user.grid->xyzn_thr);
273: # endif
274: #endif
275: PetscFree(user.grid->xyz);
276: PetscFree(user.grid->area);
278: PetscFree(user.grid->nntet);
279: PetscFree(user.grid->nnpts);
280: PetscFree(user.grid->f2ntn);
281: PetscFree(user.grid->isnode);
282: PetscFree(user.grid->sxn);
283: PetscFree(user.grid->syn);
284: PetscFree(user.grid->szn);
285: PetscFree(user.grid->sa);
286: PetscFree(user.grid->sface_bit);
288: PetscFree(user.grid->nvtet);
289: PetscFree(user.grid->nvpts);
290: PetscFree(user.grid->f2ntv);
291: PetscFree(user.grid->ivnode);
292: PetscFree(user.grid->vxn);
293: PetscFree(user.grid->vyn);
294: PetscFree(user.grid->vzn);
295: PetscFree(user.grid->va);
296: PetscFree(user.grid->vface_bit);
298: PetscFree(user.grid->nftet);
299: PetscFree(user.grid->nfpts);
300: PetscFree(user.grid->f2ntf);
301: PetscFree(user.grid->ifnode);
302: PetscFree(user.grid->fxn);
303: PetscFree(user.grid->fyn);
304: PetscFree(user.grid->fzn);
305: PetscFree(user.grid->fa);
306: PetscFree(user.grid->cdt);
307: PetscFree(user.grid->phi);
308: PetscFree(user.grid->rxy);
310: PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);
312: PetscFinalize();
313: return ierr;
314: }
316: /*---------------------------------------------------------------------*/
317: /* --------------------- Form initial approximation ----------------- */
318: int FormInitialGuess(SNES snes,GRID *grid)
319: /*---------------------------------------------------------------------*/
320: {
321: int ierr;
322: PetscScalar *qnode;
325: VecGetArray(grid->qnode,&qnode);
326: f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
327: VecRestoreArray(grid->qnode,&qnode);
328: return(0);
329: }
331: /*---------------------------------------------------------------------*/
332: /* --------------------- Evaluate Function F(x) --------------------- */
333: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
334: /*---------------------------------------------------------------------*/
335: {
336: AppCtx *user = (AppCtx*) dummy;
337: GRID *grid = user->grid;
338: TstepCtx *tsCtx = user->tsCtx;
339: PetscScalar *qnode,*res,*qold;
340: PetscScalar *grad;
341: PetscScalar temp;
342: VecScatter scatter = grid->scatter;
343: VecScatter gradScatter = grid->gradScatter;
344: Vec localX = grid->qnodeLoc;
345: Vec localGrad = grid->gradLoc;
346: int i,j,in,ierr;
347: int nbface,ires;
348: PetscScalar time_ini,time_fin;
351: /* Get X into the local work vector */
352: VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
353: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
354: /* VecCopy(x,localX); */
355: /* access the local work f,grad,and input */
356: VecGetArray(f,&res);
357: VecGetArray(grid->grad,&grad);
358: VecGetArray(localX,&qnode);
359: ires = tsCtx->ires;
361: PetscTime(&time_ini);
362: f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
363: &rank,&grid->nvertices);
364: PetscTime(&time_fin);
365: grad_time += time_fin - time_ini;
366: VecRestoreArray(grid->grad,&grad);
368: VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
369: VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
370: /*VecCopy(grid->grad,localGrad);*/
372: VecGetArray(localGrad,&grad);
373: nbface = grid->nsface + grid->nvface + grid->nfface;
374: f77GETRES(&grid->nnodesLoc,&grid->ncell, &grid->nedgeLoc, &grid->nsface,
375: &grid->nvface,&grid->nfface, &nbface,
376: &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
377: grid->isface, grid->ivface, grid->ifface, &grid->ileast,
378: grid->isnode, grid->ivnode, grid->ifnode,
379: &grid->nnfacetLoc,grid->f2ntn, &grid->nnbound,
380: &grid->nvfacetLoc,grid->f2ntv, &grid->nvbound,
381: &grid->nffacetLoc,grid->f2ntf, &grid->nfbound,
382: grid->eptr,
383: grid->sxn, grid->syn, grid->szn,
384: grid->vxn, grid->vyn, grid->vzn,
385: grid->fxn, grid->fyn, grid->fzn,
386: grid->xyzn,
387: qnode, grid->cdt,
388: grid->xyz, grid->area,
389: grad, res,
390: grid->turbre,
391: grid->slen, grid->c2n,
392: grid->c2e,
393: grid->us, grid->vs, grid->as,
394: grid->phi,
395: grid->amut, &ires,
396: #if defined(_OPENMP)
397: &max_threads,
398: #if defined(HAVE_EDGE_COLORING)
399: &grid->ncolor, grid->ncount,
400: #elif defined(HAVE_REDUNDANT_WORK)
401: grid->resd,
402: #else
403: &grid->nedgeAllThr,
404: grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
405: #endif
406: #endif
407: &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);
409: /* Add the contribution due to time stepping */
410: if (ires == 1) {
411: VecGetArray(tsCtx->qold,&qold);
412: #if defined(INTERLACING)
413: for (i = 0; i < grid->nnodesLoc; i++) {
414: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
415: for (j = 0; j < 4; j++) {
416: in = 4*i + j;
417: res[in] += temp*(qnode[in] - qold[in]);
418: }
419: }
420: #else
421: for (j = 0; j < 4; j++) {
422: for (i = 0; i < grid->nnodesLoc; i++) {
423: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
424: in = grid->nnodesLoc*j + i;
425: res[in] += temp*(qnode[in] - qold[in]);
426: }
427: }
428: #endif
429: VecRestoreArray(tsCtx->qold,&qold);
430: }
431: VecRestoreArray(localX,&qnode);
432: VecRestoreArray(f,&res);
433: VecRestoreArray(localGrad,&grad);
434: return(0);
435: }
437: /*---------------------------------------------------------------------*/
438: /* -------------------- Evaluate Jacobian F'(x) -------------------- */
440: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
441: /*---------------------------------------------------------------------*/
442: {
443: AppCtx *user = (AppCtx*) dummy;
444: GRID *grid = user->grid;
445: TstepCtx *tsCtx = user->tsCtx;
446: Vec localX = grid->qnodeLoc;
447: PetscScalar *qnode;
448: int ierr;
451: /* VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
452: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
453: /* VecCopy(x,localX); */
454: MatSetUnfactored(pc_mat);
456: VecGetArray(localX,&qnode);
457: f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
458: &grid->nsface,
459: grid->isface,grid->fxn,grid->fyn,grid->fzn,
460: grid->sxn,grid->syn,grid->szn,
461: &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
462: grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
463: grid->area,grid->xyzn,&tsCtx->cfl,
464: &rank,&grid->nvertices);
465: VecRestoreArray(localX,&qnode);
466: MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
467: MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
468: #if defined(MATRIX_VIEW)
469: if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
470: PetscViewer viewer;
471: char mat_file[PETSC_MAX_PATH_LEN];
472: sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
473: PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
474: MatView(pc_mat,viewer);
475: PetscViewerDestroy(&viewer);
476: }
477: #endif
478: return(0);
479: }
481: /*---------------------------------------------------------------------*/
482: int Update(SNES snes,void *ctx)
483: /*---------------------------------------------------------------------*/
484: {
485: AppCtx *user = (AppCtx*) ctx;
486: GRID *grid = user->grid;
487: TstepCtx *tsCtx = user->tsCtx;
488: VecScatter scatter = grid->scatter;
489: Vec localX = grid->qnodeLoc;
490: PetscScalar *qnode,*res;
491: PetscScalar clift,cdrag,cmom;
492: int ierr,its;
493: PetscScalar fratio;
494: PetscScalar time1,time2,cpuloc,cpuglo;
495: int max_steps;
496: PetscBool print_flag = PETSC_FALSE;
497: FILE *fptr = 0;
498: int nfailsCum = 0,nfails = 0;
499: /*Scalar cpu_ini,cpu_fin,cpu_time;*/
500: /*int event0 = 14,event1 = 25,gen_start,gen_read;
501: PetscScalar time_start_counters,time_read_counters;
502: long long counter0,counter1;*/
505: PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
506: if (print_flag) {
507: PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
508: PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
509: }
510: if (user->PreLoading) max_steps = 1;
511: else max_steps = tsCtx->max_steps;
512: fratio = 1.0;
513: /*tsCtx->ptime = 0.0;*/
514: VecCopy(grid->qnode,tsCtx->qold);
515: PetscTime(&time1);
516: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
517: /* if (!user->PreLoading) {
518: PetscBool flg = PETSC_FALSE;
519: PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
520: PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
521: PetscTime(&time_start_counters);
522: if ((gen_start = start_counters(event0,event1)) < 0)
523: SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters\n");
524: }*/
525: #endif
526: /*cpu_ini = PetscGetCPUTime();*/
527: for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
528: (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
529: ComputeTimeStep(snes,tsCtx->itstep,user);
530: /*tsCtx->ptime += tsCtx->dt;*/
532: SNESSolve(snes,NULL,grid->qnode);
533: SNESGetIterationNumber(snes,&its);
535: SNESGetNonlinearStepFailures(snes,&nfails);
536: nfailsCum += nfails; nfails = 0;
537: if (nfailsCum >= 2) SETERRQ(PETSC_COMM_SELF,1,"Unable to find a Newton Step");
538: if (print_flag) {
539: PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
540: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
541: }
542: VecCopy(grid->qnode,tsCtx->qold);
544: c_info->ntt = tsCtx->itstep+1;
545: PetscTime(&time2);
546: cpuloc = time2-time1;
547: cpuglo = 0.0;
548: MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
549: c_info->tot = cpuglo; /* Total CPU time used upto this time step */
551: VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
552: VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
553: /* VecCopy(grid->qnode,localX); */
555: VecGetArray(grid->res,&res);
556: VecGetArray(localX,&qnode);
558: f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
559: grid->isnode, grid->ivnode,
560: &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
561: &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
562: grid->eptr, qnode,
563: grid->xyz,
564: grid->sface_bit,grid->vface_bit,
565: &clift,&cdrag,&cmom,&rank,&grid->nvertices);
566: if (print_flag) {
567: PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
568: tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
569: PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
570: cpuglo,tsCtx->itstep);
571: PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
572: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
573: }
574: VecRestoreArray(localX,&qnode);
575: VecRestoreArray(grid->res,&res);
576: fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
577: MPI_Barrier(PETSC_COMM_WORLD);
579: } /* End of time step loop */
581: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
582: if (!user->PreLoading) {
583: int eve0,eve1;
584: FILE *cfp0,*cfp1;
585: char str[256];
586: /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
587: SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n");
588: PetscTime(&time_read_counters);
589: if (gen_read != gen_start) {
590: SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n");
591: }*/
592: /*sprintf(str,"counters%d_and_%d",event0,event1);
593: cfp0 = fopen(str,"a");*/
594: /*print_counters(event0,counter0,event1,counter1);*/
595: /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
596: time_counters);
597: fclose(cfp0);*/
598: }
599: #endif
600: PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
601: cpuglo,tsCtx->itstep);
602: PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
603: PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);
605: if (!rank && print_flag) fclose(fptr);
606: if (user->PreLoading) {
607: tsCtx->fnorm_ini = 0.0;
608: PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
609: }
610: return(0);
611: }
613: /*---------------------------------------------------------------------*/
614: int ComputeTimeStep(SNES snes,int iter,void *ctx)
615: /*---------------------------------------------------------------------*/
616: {
617: AppCtx *user = (AppCtx*) ctx;
618: TstepCtx *tsCtx = user->tsCtx;
619: Vec func = tsCtx->func;
620: PetscScalar inc = 1.1;
621: PetscScalar newcfl;
622: int ierr;
623: /*int iramp = tsCtx->iramp;*/
626: tsCtx->ires = 0;
627: FormFunction(snes,tsCtx->qold,func,user);
628: tsCtx->ires = 1;
629: VecNorm(func,NORM_2,&tsCtx->fnorm);
630: /* first time through so compute initial function norm */
631: if (tsCtx->fnorm_ini == 0.0) {
632: tsCtx->fnorm_ini = tsCtx->fnorm;
633: tsCtx->cfl = tsCtx->cfl_ini;
634: } else {
635: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
636: tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
637: }
639: /* if (iramp < 0) {
640: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
641: } else {
642: if (tsCtx->dt < 0 && iramp > 0)
643: if (iter > iramp) newcfl = tsCtx->cfl_max;
644: else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
645: (double) iter/(double) iramp;
646: }
647: tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
648: /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
649: /*VecDestroy(&func);*/
650: return(0);
651: }
653: /*---------------------------------------------------------------------*/
654: int GetLocalOrdering(GRID *grid)
655: /*---------------------------------------------------------------------*/
656: {
657: int ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
658: int nnodes,nedge,nnz,jstart,jend;
659: int nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
660: int nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
661: int nnfacet,nvfacet,nffacet;
662: int nnfacetLoc,nvfacetLoc,nffacetLoc;
663: int nsnode,nvnode,nfnode;
664: int nsnodeLoc,nvnodeLoc,nfnodeLoc;
665: int nnbound,nvbound,nfbound;
666: int bs = 4;
667: int fdes = 0;
668: off_t currentPos = 0,newPos = 0;
669: int grid_param = 13;
670: int cross_edges = 0;
671: int *edge_bit,*pordering;
672: int *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
673: int *tmp,*tmp1,*tmp2;
674: PetscScalar time_ini,time_fin;
675: PetscScalar *ftmp,*ftmp1;
676: char mesh_file[PETSC_MAX_PATH_LEN] = "";
677: AO ao;
678: FILE *fptr,*fptr1;
679: PetscBool flg;
680: MPI_Comm comm = PETSC_COMM_WORLD;
683: /* Read the integer grid parameters */
684: ICALLOC(grid_param,&tmp);
685: if (!rank) {
686: PetscBool exists;
687: PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
688: PetscTestFile(mesh_file,'r',&exists);
689: if (!exists) { /* try uns3d.msh as the file name */
690: PetscStrcpy(mesh_file,"uns3d.msh");
691: }
692: PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
693: }
694: PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
695: grid->ncell = tmp[0];
696: grid->nnodes = tmp[1];
697: grid->nedge = tmp[2];
698: grid->nnbound = tmp[3];
699: grid->nvbound = tmp[4];
700: grid->nfbound = tmp[5];
701: grid->nnfacet = tmp[6];
702: grid->nvfacet = tmp[7];
703: grid->nffacet = tmp[8];
704: grid->nsnode = tmp[9];
705: grid->nvnode = tmp[10];
706: grid->nfnode = tmp[11];
707: grid->ntte = tmp[12];
708: grid->nsface = 0;
709: grid->nvface = 0;
710: grid->nfface = 0;
711: PetscFree(tmp);
712: PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
713: grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);
715: nnodes = grid->nnodes;
716: nedge = grid->nedge;
717: nnfacet = grid->nnfacet;
718: nvfacet = grid->nvfacet;
719: nffacet = grid->nffacet;
720: nnbound = grid->nnbound;
721: nvbound = grid->nvbound;
722: nfbound = grid->nfbound;
723: nsnode = grid->nsnode;
724: nvnode = grid->nvnode;
725: nfnode = grid->nfnode;
727: /* Read the partitioning vector generated by MeTiS */
728: ICALLOC(nnodes,&l2a);
729: ICALLOC(nnodes,&v2p);
730: ICALLOC(nnodes,&a2l);
731: nnodesLoc = 0;
733: for (i = 0; i < nnodes; i++) a2l[i] = -1;
734: PetscTime(&time_ini);
736: if (!rank) {
737: if (size == 1) {
738: PetscMemzero(v2p,nnodes*sizeof(int));
739: } else {
740: char spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
741: PetscBool exists;
743: PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
744: PetscTestFile(spart_file,'r',&exists);
745: if (!exists) { /* try appending the number of processors */
746: sprintf(part_file,"part_vec.part.%d",size);
747: PetscStrcpy(spart_file,part_file);
748: }
749: fptr = fopen(spart_file,"r");
750: if (!fptr) SETERRQ1(PETSC_COMM_SELF,1,"Cannot open file %s\n",part_file);
751: for (inode = 0; inode < nnodes; inode++) {
752: fscanf(fptr,"%d\n",&node1);
753: v2p[inode] = node1;
754: }
755: fclose(fptr);
756: }
757: }
758: MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
759: for (inode = 0; inode < nnodes; inode++) {
760: if (v2p[inode] == rank) {
761: l2a[nnodesLoc] = inode;
762: a2l[inode] = nnodesLoc;
763: nnodesLoc++;
764: }
765: }
767: PetscTime(&time_fin);
768: time_fin -= time_ini;
769: PetscPrintf(comm,"Partition Vector read successfully\n");
770: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
772: MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
773: rstart -= nnodesLoc;
774: ICALLOC(nnodesLoc,&pordering);
775: for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
776: AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
777: PetscFree(pordering);
779: /* Now count the local number of edges - including edges with
780: ghost nodes but edges between ghost nodes are NOT counted */
781: nedgeLoc = 0;
782: nvertices = nnodesLoc;
783: /* Choose an estimated number of local edges. The choice
784: nedgeLocEst = 1000000 looks reasonable as it will read
785: the edge and edge normal arrays in 8 MB chunks */
786: /*nedgeLocEst = nedge/size;*/
787: nedgeLocEst = PetscMin(nedge,1000000);
788: remEdges = nedge;
789: ICALLOC(2*nedgeLocEst,&tmp);
790: PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,¤tPos);
791: PetscTime(&time_ini);
792: while (remEdges > 0) {
793: readEdges = PetscMin(remEdges,nedgeLocEst);
794: /*time_ini = PetscTime();*/
795: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
796: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
797: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
798: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
799: /*time_fin += PetscTime()-time_ini;*/
800: for (j = 0; j < readEdges; j++) {
801: node1 = tmp[j]-1;
802: node2 = tmp[j+readEdges]-1;
803: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
804: nedgeLoc++;
805: if (a2l[node1] == -1) {
806: l2a[nvertices] = node1;
807: a2l[node1] = nvertices;
808: nvertices++;
809: }
810: if (a2l[node2] == -1) {
811: l2a[nvertices] = node2;
812: a2l[node2] = nvertices;
813: nvertices++;
814: }
815: }
816: }
817: remEdges = remEdges - readEdges;
818: MPI_Barrier(comm);
819: }
820: PetscTime(&time_fin);
821: time_fin -= time_ini;
822: PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
823: PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
824: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
826: /* Now store the local edges */
827: ICALLOC(2*nedgeLoc,&grid->eptr);
828: ICALLOC(nedgeLoc,&edge_bit);
829: ICALLOC(nedgeLoc,&eperm);
830: i = 0; j = 0; k = 0;
831: remEdges = nedge;
832: PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
833: currentPos = newPos;
835: PetscTime(&time_ini);
836: while (remEdges > 0) {
837: readEdges = PetscMin(remEdges,nedgeLocEst);
838: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
839: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
840: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
841: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
842: for (j = 0; j < readEdges; j++) {
843: node1 = tmp[j]-1;
844: node2 = tmp[j+readEdges]-1;
845: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
846: grid->eptr[k] = a2l[node1];
847: grid->eptr[k+nedgeLoc] = a2l[node2];
848: edge_bit[k] = i; /* Record global file index of the edge */
849: eperm[k] = k;
850: k++;
851: }
852: i++;
853: }
854: remEdges = remEdges - readEdges;
855: MPI_Barrier(comm);
856: }
857: PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
858: PetscTime(&time_fin);
859: time_fin -= time_ini;
860: PetscPrintf(comm,"Local edges stored\n");
861: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
863: PetscFree(tmp);
864: ICALLOC(2*nedgeLoc,&tmp);
865: PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
866: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
867: EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
868: #else
869: /* Now reorder the edges for better cache locality */
870: /*
871: tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
872: PetscSortIntWithPermutation(6,tmp,eperm);
873: for (i=0; i<6; i++)
874: printf("%d %d %d\n",i,tmp[i],eperm[i]);
875: */
876: flg = PETSC_FALSE;
877: PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
878: if (!flg) {
879: PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
880: }
881: #endif
882: PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
883: k = 0;
884: for (i = 0; i < nedgeLoc; i++) {
885: int cross_node=nnodesLoc/2;
886: node1 = tmp[eperm[i]] + 1;
887: node2 = tmp[nedgeLoc+eperm[i]] + 1;
888: #if defined(INTERLACING)
889: grid->eptr[k++] = node1;
890: grid->eptr[k++] = node2;
891: #else
892: grid->eptr[i] = node1;
893: grid->eptr[nedgeLoc+i] = node2;
894: #endif
895: /* if (node1 > node2)
896: printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
897: rank,i,node1,node2);*/
898: if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
899: }
900: PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
901: PetscFree(tmp);
902: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
903: /* Now make the local 'ia' and 'ja' arrays */
904: ICALLOC(nvertices+1,&grid->ia);
905: /* Use tmp for a work array */
906: ICALLOC(nvertices,&tmp);
907: f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
908: nnz = grid->ia[nvertices] - 1;
909: ICALLOC(nnz,&grid->ja);
910: f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
911: PetscFree(tmp);
912: #else
913: /* Now make the local 'ia' and 'ja' arrays */
914: ICALLOC(nnodesLoc+1,&grid->ia);
915: /* Use tmp for a work array */
916: ICALLOC(nnodesLoc,&tmp);
917: f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
918: nnz = grid->ia[nnodesLoc] - 1;
919: #if defined(BLOCKING)
920: PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
921: #else
922: PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
923: #endif
924: ICALLOC(nnz,&grid->ja);
925: f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
926: PetscFree(tmp);
927: #endif
928: ICALLOC(nvertices,&grid->loc2glo);
929: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
930: PetscFree(l2a);
931: l2a = grid->loc2glo;
932: ICALLOC(nvertices,&grid->loc2pet);
933: l2p = grid->loc2pet;
934: PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
935: AOApplicationToPetsc(ao,nvertices,l2p);
938: /* Renumber unit normals of dual face (from node1 to node2)
939: and the area of the dual mesh face */
940: FCALLOC(nedgeLocEst,&ftmp);
941: FCALLOC(nedgeLoc,&ftmp1);
942: FCALLOC(4*nedgeLoc,&grid->xyzn);
943: /* Do the x-component */
944: i = 0; k = 0;
945: remEdges = nedge;
946: PetscTime(&time_ini);
947: while (remEdges > 0) {
948: readEdges = PetscMin(remEdges,nedgeLocEst);
949: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
950: for (j = 0; j < readEdges; j++)
951: if (edge_bit[k] == (i+j)) {
952: ftmp1[k] = ftmp[j];
953: k++;
954: }
955: i += readEdges;
956: remEdges = remEdges - readEdges;
957: MPI_Barrier(comm);
958: }
959: for (i = 0; i < nedgeLoc; i++)
960: #if defined(INTERLACING)
961: grid->xyzn[4*i] = ftmp1[eperm[i]];
962: #else
963: grid->xyzn[i] = ftmp1[eperm[i]];
964: #endif
965: /* Do the y-component */
966: i = 0; k = 0;
967: remEdges = nedge;
968: while (remEdges > 0) {
969: readEdges = PetscMin(remEdges,nedgeLocEst);
970: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
971: for (j = 0; j < readEdges; j++)
972: if (edge_bit[k] == (i+j)) {
973: ftmp1[k] = ftmp[j];
974: k++;
975: }
976: i += readEdges;
977: remEdges = remEdges - readEdges;
978: MPI_Barrier(comm);
979: }
980: for (i = 0; i < nedgeLoc; i++)
981: #if defined(INTERLACING)
982: grid->xyzn[4*i+1] = ftmp1[eperm[i]];
983: #else
984: grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
985: #endif
986: /* Do the z-component */
987: i = 0; k = 0;
988: remEdges = nedge;
989: while (remEdges > 0) {
990: readEdges = PetscMin(remEdges,nedgeLocEst);
991: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
992: for (j = 0; j < readEdges; j++)
993: if (edge_bit[k] == (i+j)) {
994: ftmp1[k] = ftmp[j];
995: k++;
996: }
997: i += readEdges;
998: remEdges = remEdges - readEdges;
999: MPI_Barrier(comm);
1000: }
1001: for (i = 0; i < nedgeLoc; i++)
1002: #if defined(INTERLACING)
1003: grid->xyzn[4*i+2] = ftmp1[eperm[i]];
1004: #else
1005: grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
1006: #endif
1007: /* Do the area */
1008: i = 0; k = 0;
1009: remEdges = nedge;
1010: while (remEdges > 0) {
1011: readEdges = PetscMin(remEdges,nedgeLocEst);
1012: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1013: for (j = 0; j < readEdges; j++)
1014: if (edge_bit[k] == (i+j)) {
1015: ftmp1[k] = ftmp[j];
1016: k++;
1017: }
1018: i += readEdges;
1019: remEdges = remEdges - readEdges;
1020: MPI_Barrier(comm);
1021: }
1022: for (i = 0; i < nedgeLoc; i++)
1023: #if defined(INTERLACING)
1024: grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1025: #else
1026: grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1027: #endif
1029: PetscFree(edge_bit);
1030: PetscFree(eperm);
1031: PetscFree(ftmp);
1032: PetscFree(ftmp1);
1033: PetscTime(&time_fin);
1034: time_fin -= time_ini;
1035: PetscPrintf(comm,"Edge normals partitioned\n");
1036: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1037: #if defined(_OPENMP)
1038: /*Arrange for the division of work among threads*/
1039: #if defined(HAVE_EDGE_COLORING)
1040: #elif defined(HAVE_REDUNDANT_WORK)
1041: FCALLOC(4*nnodesLoc, &grid->resd);
1042: #else
1043: {
1044: /* Get the local adjacency structure of the graph for partitioning the local
1045: graph into max_threads pieces */
1046: int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1047: int numflag = 0, wgtflag = 0, edgecut;
1048: int thr1,thr2,nedgeAllThreads,ned1,ned2;
1049: ICALLOC((nvertices+1),&ia);
1050: ICALLOC((2*nedgeLoc),&ja);
1051: ia[0] = 0;
1052: for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1053: for (i = 0; i < nvertices; i++) {
1054: int jstart,jend;
1055: jstart = grid->ia[i]-1;
1056: jend = grid->ia[i+1]-1;
1057: k = ia[i];
1058: for (j=jstart; j < jend; j++) {
1059: inode = grid->ja[j]-1;
1060: if (inode != i) ja[k++] = inode;
1061: }
1062: }
1063: ICALLOC(nvertices,&grid->part_thr);
1064: PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1065: options[0] = 0;
1066: /* Call the pmetis library routine */
1067: if (max_threads > 1)
1068: METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1069: &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1070: PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1071: /* Write the partition vector to disk */
1072: flg = PETSC_FALSE;
1073: PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1074: if (flg) {
1075: int *partv_loc, *partv_glo;
1076: int *disp,*counts,*loc2glo_glo;
1077: char part_file[PETSC_MAX_PATH_LEN];
1078: FILE *fp;
1080: ICALLOC(nnodes, &partv_glo);
1081: ICALLOC(nnodesLoc, &partv_loc);
1082: for (i = 0; i < nnodesLoc; i++)
1083: /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1084: partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1085: ICALLOC(size,&disp);
1086: ICALLOC(size,&counts);
1087: MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1088: disp[0] = 0;
1089: for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1090: ICALLOC(nnodes, &loc2glo_glo);
1091: MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1092: MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1093: if (!rank) {
1094: PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1095: sprintf(part_file,"hyb_part_vec.%d",2*size);
1096: fp = fopen(part_file,"w");
1097: for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1098: fclose(fp);
1099: }
1100: PetscFree(partv_loc);
1101: PetscFree(partv_glo);
1102: PetscFree(disp);
1103: PetscFree(counts);
1104: PetscFree(loc2glo_glo);
1105: }
1107: /* Divide the work among threads */
1108: k = 0;
1109: ICALLOC((max_threads+1),&grid->nedge_thr);
1110: PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1111: cross_edges = 0;
1112: for (i = 0; i < nedgeLoc; i++) {
1113: node1 = grid->eptr[k++]-1;
1114: node2 = grid->eptr[k++]-1;
1115: thr1 = grid->part_thr[node1];
1116: thr2 = grid->part_thr[node2];
1117: grid->nedge_thr[thr1]+=1;
1118: if (thr1 != thr2) {
1119: grid->nedge_thr[thr2]+=1;
1120: cross_edges++;
1121: }
1122: }
1123: PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1124: ned1 = grid->nedge_thr[0];
1125: grid->nedge_thr[0] = 1;
1126: for (i = 1; i <= max_threads; i++) {
1127: ned2 = grid->nedge_thr[i];
1128: grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1129: ned1 = ned2;
1130: }
1131: /* Allocate a shared edge array. Note that a cut edge is evaluated
1132: by both the threads but updates are done only for the locally
1133: owned node */
1134: grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1135: ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1136: ICALLOC(max_threads,&tmp);
1137: FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1138: for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1139: k = 0;
1140: for (i = 0; i < nedgeLoc; i++) {
1141: int ie1,ie2,ie3;
1142: node1 = grid->eptr[k++];
1143: node2 = grid->eptr[k++];
1144: thr1 = grid->part_thr[node1-1];
1145: thr2 = grid->part_thr[node2-1];
1146: ie1 = 2*tmp[thr1];
1147: ie2 = 4*tmp[thr1];
1148: ie3 = 4*i;
1150: grid->edge_thr[ie1] = node1;
1151: grid->edge_thr[ie1+1] = node2;
1152: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1153: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1154: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1155: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1157: tmp[thr1]+=1;
1158: if (thr1 != thr2) {
1159: ie1 = 2*tmp[thr2];
1160: ie2 = 4*tmp[thr2];
1162: grid->edge_thr[ie1] = node1;
1163: grid->edge_thr[ie1+1] = node2;
1164: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1165: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1166: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1167: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1169: tmp[thr2]+=1;
1170: }
1171: }
1172: }
1173: #endif
1174: #endif
1176: /* Remap coordinates */
1177: /*nnodesLocEst = nnodes/size;*/
1178: nnodesLocEst = PetscMin(nnodes,500000);
1179: FCALLOC(nnodesLocEst,&ftmp);
1180: FCALLOC(3*nvertices,&grid->xyz);
1181: remNodes = nnodes;
1182: i = 0;
1183: PetscTime(&time_ini);
1184: while (remNodes > 0) {
1185: readNodes = PetscMin(remNodes,nnodesLocEst);
1186: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1187: for (j = 0; j < readNodes; j++) {
1188: if (a2l[i+j] >= 0) {
1189: #if defined(INTERLACING)
1190: grid->xyz[3*a2l[i+j]] = ftmp[j];
1191: #else
1192: grid->xyz[a2l[i+j]] = ftmp[j];
1193: #endif
1194: }
1195: }
1196: i += nnodesLocEst;
1197: remNodes -= nnodesLocEst;
1198: MPI_Barrier(comm);
1199: }
1201: remNodes = nnodes;
1202: i = 0;
1203: while (remNodes > 0) {
1204: readNodes = PetscMin(remNodes,nnodesLocEst);
1205: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1206: for (j = 0; j < readNodes; j++) {
1207: if (a2l[i+j] >= 0) {
1208: #if defined(INTERLACING)
1209: grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1210: #else
1211: grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1212: #endif
1213: }
1214: }
1215: i += nnodesLocEst;
1216: remNodes -= nnodesLocEst;
1217: MPI_Barrier(comm);
1218: }
1220: remNodes = nnodes;
1221: i = 0;
1222: while (remNodes > 0) {
1223: readNodes = PetscMin(remNodes,nnodesLocEst);
1224: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1225: for (j = 0; j < readNodes; j++) {
1226: if (a2l[i+j] >= 0) {
1227: #if defined(INTERLACING)
1228: grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1229: #else
1230: grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1231: #endif
1232: }
1233: }
1234: i += nnodesLocEst;
1235: remNodes -= nnodesLocEst;
1236: MPI_Barrier(comm);
1237: }
1240: /* Renumber dual volume "area" */
1241: FCALLOC(nvertices,&grid->area);
1242: remNodes = nnodes;
1243: i = 0;
1244: while (remNodes > 0) {
1245: readNodes = PetscMin(remNodes,nnodesLocEst);
1246: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1247: for (j = 0; j < readNodes; j++)
1248: if (a2l[i+j] >= 0)
1249: grid->area[a2l[i+j]] = ftmp[j];
1250: i += nnodesLocEst;
1251: remNodes -= nnodesLocEst;
1252: MPI_Barrier(comm);
1253: }
1255: PetscFree(ftmp);
1256: PetscTime(&time_fin);
1257: time_fin -= time_ini;
1258: PetscPrintf(comm,"Coordinates remapped\n");
1259: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1261: /* Now,handle all the solid boundaries - things to be done :
1262: * 1. Identify the nodes belonging to the solid
1263: * boundaries and count them.
1264: * 2. Put proper indices into f2ntn array,after making it
1265: * of suitable size.
1266: * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1267: * and sa arrays).
1268: */
1269: ICALLOC(nnbound, &grid->nntet);
1270: ICALLOC(nnbound, &grid->nnpts);
1271: ICALLOC(4*nnfacet,&grid->f2ntn);
1272: ICALLOC(nsnode,&grid->isnode);
1273: FCALLOC(nsnode,&grid->sxn);
1274: FCALLOC(nsnode,&grid->syn);
1275: FCALLOC(nsnode,&grid->szn);
1276: FCALLOC(nsnode,&grid->sa);
1277: PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1278: PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1279: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1280: PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1281: PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1282: PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1283: PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);
1285: isurf = 0;
1286: nsnodeLoc = 0;
1287: nnfacetLoc = 0;
1288: nb = 0;
1289: nte = 0;
1290: ICALLOC(3*nnfacet,&tmp);
1291: ICALLOC(nsnode,&tmp1);
1292: ICALLOC(nnodes,&tmp2);
1293: FCALLOC(4*nsnode,&ftmp);
1294: PetscMemzero(tmp,3*nnfacet*sizeof(int));
1295: PetscMemzero(tmp1,nsnode*sizeof(int));
1296: PetscMemzero(tmp2,nnodes*sizeof(int));
1298: j = 0;
1299: for (i = 0; i < nsnode; i++) {
1300: node1 = a2l[grid->isnode[i] - 1];
1301: if (node1 >= 0) {
1302: tmp1[nsnodeLoc] = node1;
1303: tmp2[node1] = nsnodeLoc;
1304: ftmp[j++] = grid->sxn[i];
1305: ftmp[j++] = grid->syn[i];
1306: ftmp[j++] = grid->szn[i];
1307: ftmp[j++] = grid->sa[i];
1308: nsnodeLoc++;
1309: }
1310: }
1311: for (i = 0; i < nnbound; i++) {
1312: for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1313: node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1314: node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1315: node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];
1317: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1318: nnfacetLoc++;
1319: nte++;
1320: tmp[nb++] = tmp2[node1];
1321: tmp[nb++] = tmp2[node2];
1322: tmp[nb++] = tmp2[node3];
1323: }
1324: }
1325: isurf += grid->nntet[i];
1326: /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1327: grid->nntet[i] = nte;
1328: /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1329: nte = 0;
1330: }
1331: PetscFree(grid->f2ntn);
1332: PetscFree(grid->isnode);
1333: PetscFree(grid->sxn);
1334: PetscFree(grid->syn);
1335: PetscFree(grid->szn);
1336: PetscFree(grid->sa);
1337: ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1338: ICALLOC(nsnodeLoc,&grid->isnode);
1339: FCALLOC(nsnodeLoc,&grid->sxn);
1340: FCALLOC(nsnodeLoc,&grid->syn);
1341: FCALLOC(nsnodeLoc,&grid->szn);
1342: FCALLOC(nsnodeLoc,&grid->sa);
1343: j = 0;
1344: for (i = 0; i < nsnodeLoc; i++) {
1345: grid->isnode[i] = tmp1[i] + 1;
1346: grid->sxn[i] = ftmp[j++];
1347: grid->syn[i] = ftmp[j++];
1348: grid->szn[i] = ftmp[j++];
1349: grid->sa[i] = ftmp[j++];
1350: }
1351: j = 0;
1352: for (i = 0; i < nnfacetLoc; i++) {
1353: grid->f2ntn[i] = tmp[j++] + 1;
1354: grid->f2ntn[nnfacetLoc+i] = tmp[j++] + 1;
1355: grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1356: }
1357: PetscFree(tmp);
1358: PetscFree(tmp1);
1359: PetscFree(tmp2);
1360: PetscFree(ftmp);
1362: /* Now identify the triangles on which the current proceesor
1363: would perform force calculation */
1364: ICALLOC(nnfacetLoc,&grid->sface_bit);
1365: PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1366: for (i = 0; i < nnfacetLoc; i++) {
1367: node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1368: node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1369: node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1370: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1371: && (v2p[node3] >= rank)) &&
1372: ((v2p[node1] == rank) || (v2p[node2] == rank)
1373: || (v2p[node3] == rank)))
1374: grid->sface_bit[i] = 1;
1375: }
1376: /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1377: PetscPrintf(comm,"Solid boundaries partitioned\n");
1379: /* Now,handle all the viscous boundaries - things to be done :
1380: * 1. Identify the nodes belonging to the viscous
1381: * boundaries and count them.
1382: * 2. Put proper indices into f2ntv array,after making it
1383: * of suitable size
1384: * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1385: * and va arrays).
1386: */
1387: ICALLOC(nvbound, &grid->nvtet);
1388: ICALLOC(nvbound, &grid->nvpts);
1389: ICALLOC(4*nvfacet,&grid->f2ntv);
1390: ICALLOC(nvnode,&grid->ivnode);
1391: FCALLOC(nvnode,&grid->vxn);
1392: FCALLOC(nvnode,&grid->vyn);
1393: FCALLOC(nvnode,&grid->vzn);
1394: FCALLOC(nvnode,&grid->va);
1395: PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1396: PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1397: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1398: PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1399: PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1400: PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1401: PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);
1403: isurf = 0;
1404: nvnodeLoc = 0;
1405: nvfacetLoc = 0;
1406: nb = 0;
1407: nte = 0;
1408: ICALLOC(3*nvfacet,&tmp);
1409: ICALLOC(nvnode,&tmp1);
1410: ICALLOC(nnodes,&tmp2);
1411: FCALLOC(4*nvnode,&ftmp);
1412: PetscMemzero(tmp,3*nvfacet*sizeof(int));
1413: PetscMemzero(tmp1,nvnode*sizeof(int));
1414: PetscMemzero(tmp2,nnodes*sizeof(int));
1416: j = 0;
1417: for (i = 0; i < nvnode; i++) {
1418: node1 = a2l[grid->ivnode[i] - 1];
1419: if (node1 >= 0) {
1420: tmp1[nvnodeLoc] = node1;
1421: tmp2[node1] = nvnodeLoc;
1422: ftmp[j++] = grid->vxn[i];
1423: ftmp[j++] = grid->vyn[i];
1424: ftmp[j++] = grid->vzn[i];
1425: ftmp[j++] = grid->va[i];
1426: nvnodeLoc++;
1427: }
1428: }
1429: for (i = 0; i < nvbound; i++) {
1430: for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1431: node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1432: node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1433: node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1434: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1435: nvfacetLoc++;
1436: nte++;
1437: tmp[nb++] = tmp2[node1];
1438: tmp[nb++] = tmp2[node2];
1439: tmp[nb++] = tmp2[node3];
1440: }
1441: }
1442: isurf += grid->nvtet[i];
1443: grid->nvtet[i] = nte;
1444: nte = 0;
1445: }
1446: PetscFree(grid->f2ntv);
1447: PetscFree(grid->ivnode);
1448: PetscFree(grid->vxn);
1449: PetscFree(grid->vyn);
1450: PetscFree(grid->vzn);
1451: PetscFree(grid->va);
1452: ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1453: ICALLOC(nvnodeLoc,&grid->ivnode);
1454: FCALLOC(nvnodeLoc,&grid->vxn);
1455: FCALLOC(nvnodeLoc,&grid->vyn);
1456: FCALLOC(nvnodeLoc,&grid->vzn);
1457: FCALLOC(nvnodeLoc,&grid->va);
1458: j = 0;
1459: for (i = 0; i < nvnodeLoc; i++) {
1460: grid->ivnode[i] = tmp1[i] + 1;
1461: grid->vxn[i] = ftmp[j++];
1462: grid->vyn[i] = ftmp[j++];
1463: grid->vzn[i] = ftmp[j++];
1464: grid->va[i] = ftmp[j++];
1465: }
1466: j = 0;
1467: for (i = 0; i < nvfacetLoc; i++) {
1468: grid->f2ntv[i] = tmp[j++] + 1;
1469: grid->f2ntv[nvfacetLoc+i] = tmp[j++] + 1;
1470: grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1471: }
1472: PetscFree(tmp);
1473: PetscFree(tmp1);
1474: PetscFree(tmp2);
1475: PetscFree(ftmp);
1477: /* Now identify the triangles on which the current proceesor
1478: would perform force calculation */
1479: ICALLOC(nvfacetLoc,&grid->vface_bit);
1480: PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1481: for (i = 0; i < nvfacetLoc; i++) {
1482: node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1483: node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1484: node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1485: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1486: && (v2p[node3] >= rank)) &&
1487: ((v2p[node1] == rank) || (v2p[node2] == rank)
1488: || (v2p[node3] == rank))) {
1489: grid->vface_bit[i] = 1;
1490: }
1491: }
1492: PetscFree(v2p);
1493: PetscPrintf(comm,"Viscous boundaries partitioned\n");
1495: /* Now,handle all the free boundaries - things to be done :
1496: * 1. Identify the nodes belonging to the free
1497: * boundaries and count them.
1498: * 2. Put proper indices into f2ntf array,after making it
1499: * of suitable size
1500: * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1501: * and fa arrays).
1502: */
1504: ICALLOC(nfbound, &grid->nftet);
1505: ICALLOC(nfbound, &grid->nfpts);
1506: ICALLOC(4*nffacet,&grid->f2ntf);
1507: ICALLOC(nfnode,&grid->ifnode);
1508: FCALLOC(nfnode,&grid->fxn);
1509: FCALLOC(nfnode,&grid->fyn);
1510: FCALLOC(nfnode,&grid->fzn);
1511: FCALLOC(nfnode,&grid->fa);
1512: PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1513: PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1514: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1515: PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1516: PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1517: PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1518: PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);
1520: isurf = 0;
1521: nfnodeLoc = 0;
1522: nffacetLoc = 0;
1523: nb = 0;
1524: nte = 0;
1525: ICALLOC(3*nffacet,&tmp);
1526: ICALLOC(nfnode,&tmp1);
1527: ICALLOC(nnodes,&tmp2);
1528: FCALLOC(4*nfnode,&ftmp);
1529: PetscMemzero(tmp,3*nffacet*sizeof(int));
1530: PetscMemzero(tmp1,nfnode*sizeof(int));
1531: PetscMemzero(tmp2,nnodes*sizeof(int));
1533: j = 0;
1534: for (i = 0; i < nfnode; i++) {
1535: node1 = a2l[grid->ifnode[i] - 1];
1536: if (node1 >= 0) {
1537: tmp1[nfnodeLoc] = node1;
1538: tmp2[node1] = nfnodeLoc;
1539: ftmp[j++] = grid->fxn[i];
1540: ftmp[j++] = grid->fyn[i];
1541: ftmp[j++] = grid->fzn[i];
1542: ftmp[j++] = grid->fa[i];
1543: nfnodeLoc++;
1544: }
1545: }
1546: for (i = 0; i < nfbound; i++) {
1547: for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1548: node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1549: node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1550: node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1551: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1552: nffacetLoc++;
1553: nte++;
1554: tmp[nb++] = tmp2[node1];
1555: tmp[nb++] = tmp2[node2];
1556: tmp[nb++] = tmp2[node3];
1557: }
1558: }
1559: isurf += grid->nftet[i];
1560: grid->nftet[i] = nte;
1561: nte = 0;
1562: }
1563: PetscFree(grid->f2ntf);
1564: PetscFree(grid->ifnode);
1565: PetscFree(grid->fxn);
1566: PetscFree(grid->fyn);
1567: PetscFree(grid->fzn);
1568: PetscFree(grid->fa);
1569: ICALLOC(4*nffacetLoc,&grid->f2ntf);
1570: ICALLOC(nfnodeLoc,&grid->ifnode);
1571: FCALLOC(nfnodeLoc,&grid->fxn);
1572: FCALLOC(nfnodeLoc,&grid->fyn);
1573: FCALLOC(nfnodeLoc,&grid->fzn);
1574: FCALLOC(nfnodeLoc,&grid->fa);
1575: j = 0;
1576: for (i = 0; i < nfnodeLoc; i++) {
1577: grid->ifnode[i] = tmp1[i] + 1;
1578: grid->fxn[i] = ftmp[j++];
1579: grid->fyn[i] = ftmp[j++];
1580: grid->fzn[i] = ftmp[j++];
1581: grid->fa[i] = ftmp[j++];
1582: }
1583: j = 0;
1584: for (i = 0; i < nffacetLoc; i++) {
1585: grid->f2ntf[i] = tmp[j++] + 1;
1586: grid->f2ntf[nffacetLoc+i] = tmp[j++] + 1;
1587: grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1588: }
1591: PetscFree(tmp);
1592: PetscFree(tmp1);
1593: PetscFree(tmp2);
1594: PetscFree(ftmp);
1595: PetscPrintf(comm,"Free boundaries partitioned\n");
1597: flg = PETSC_FALSE;
1598: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1599: if (flg) {
1600: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1601: }
1603: /* Put different mappings and other info into grid */
1604: /* ICALLOC(nvertices,&grid->loc2pet);
1605: ICALLOC(nvertices,&grid->loc2glo);
1606: PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1607: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1608: PetscFree(l2a);
1609: PetscFree(l2p);*/
1611: grid->nnodesLoc = nnodesLoc;
1612: grid->nedgeLoc = nedgeLoc;
1613: grid->nvertices = nvertices;
1614: grid->nsnodeLoc = nsnodeLoc;
1615: grid->nvnodeLoc = nvnodeLoc;
1616: grid->nfnodeLoc = nfnodeLoc;
1617: grid->nnfacetLoc = nnfacetLoc;
1618: grid->nvfacetLoc = nvfacetLoc;
1619: grid->nffacetLoc = nffacetLoc;
1620: /*
1621: * FCALLOC(nvertices*4, &grid->gradx);
1622: * FCALLOC(nvertices*4, &grid->grady);
1623: * FCALLOC(nvertices*4, &grid->gradz);
1624: */
1625: FCALLOC(nvertices, &grid->cdt);
1626: FCALLOC(nvertices*4, &grid->phi);
1627: /*
1628: FCALLOC(nvertices, &grid->r11);
1629: FCALLOC(nvertices, &grid->r12);
1630: FCALLOC(nvertices, &grid->r13);
1631: FCALLOC(nvertices, &grid->r22);
1632: FCALLOC(nvertices, &grid->r23);
1633: FCALLOC(nvertices, &grid->r33);
1634: */
1635: FCALLOC(7*nnodesLoc, &grid->rxy);
1637: /* Map the 'ja' array in petsc ordering */
1638: for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1639: AOApplicationToPetsc(ao,nnz,grid->ja);
1640: AODestroy(&ao);
1642: /* Print the different mappings
1643: *
1644: */
1645: {
1646: int partLoc[7],partMax[7],partMin[7],partSum[7];
1647: partLoc[0] = nnodesLoc;
1648: partLoc[1] = nvertices;
1649: partLoc[2] = nedgeLoc;
1650: partLoc[3] = nnfacetLoc;
1651: partLoc[4] = nffacetLoc;
1652: partLoc[5] = nsnodeLoc;
1653: partLoc[6] = nfnodeLoc;
1654: for (i = 0; i < 7; i++) {
1655: partMin[i] = 0;
1656: partMax[i] = 0;
1657: partSum[i] = 0;
1658: }
1660: MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1661: MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1662: MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1663: PetscPrintf(comm,"==============================\n");
1664: PetscPrintf(comm,"Partitioning quality info ....\n");
1665: PetscPrintf(comm,"==============================\n");
1666: PetscPrintf(comm,"------------------------------------------------------------\n");
1667: PetscPrintf(comm,"Item Min Max Average Total\n");
1668: PetscPrintf(comm,"------------------------------------------------------------\n");
1669: PetscPrintf(comm,"Local Nodes %9d %9d %9d %9d\n",
1670: partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1671: PetscPrintf(comm,"Local+Ghost Nodes %9d %9d %9d %9d\n",
1672: partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1673: PetscPrintf(comm,"Local Edges %9d %9d %9d %9d\n",
1674: partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1675: PetscPrintf(comm,"Local solid faces %9d %9d %9d %9d\n",
1676: partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1677: PetscPrintf(comm,"Local free faces %9d %9d %9d %9d\n",
1678: partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1679: PetscPrintf(comm,"Local solid nodes %9d %9d %9d %9d\n",
1680: partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1681: PetscPrintf(comm,"Local free nodes %9d %9d %9d %9d\n",
1682: partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1683: PetscPrintf(comm,"------------------------------------------------------------\n");
1684: }
1685: flg = PETSC_FALSE;
1686: PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1687: if (flg) {
1688: char part_file[PETSC_MAX_PATH_LEN];
1689: sprintf(part_file,"output.%d",rank);
1690: fptr1 = fopen(part_file,"w");
1692: fprintf(fptr1,"---------------------------------------------\n");
1693: fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1694: fprintf(fptr1,"---------------------------------------------\n");
1695: fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1696: fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1697: fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1698: fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1699: fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1700: fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1701: fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1702: fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1703: fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1704: fprintf(fptr1,"\n");
1705: fprintf(fptr1,"nvertices = %d\n",nvertices);
1706: fprintf(fptr1,"nnbound = %d\n",nnbound);
1707: fprintf(fptr1,"nvbound = %d\n",nvbound);
1708: fprintf(fptr1,"nfbound = %d\n",nfbound);
1709: fprintf(fptr1,"\n");
1711: fprintf(fptr1,"---------------------------------------------\n");
1712: fprintf(fptr1,"Different Orderings\n");
1713: fprintf(fptr1,"---------------------------------------------\n");
1714: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1715: fprintf(fptr1,"---------------------------------------------\n");
1716: for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1717: fprintf(fptr1,"\n");
1719: fprintf(fptr1,"---------------------------------------------\n");
1720: fprintf(fptr1,"Solid Boundary Nodes\n");
1721: fprintf(fptr1,"---------------------------------------------\n");
1722: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1723: fprintf(fptr1,"---------------------------------------------\n");
1724: for (i = 0; i < nsnodeLoc; i++) {
1725: j = grid->isnode[i]-1;
1726: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1727: }
1728: fprintf(fptr1,"\n");
1729: fprintf(fptr1,"---------------------------------------------\n");
1730: fprintf(fptr1,"f2ntn array\n");
1731: fprintf(fptr1,"---------------------------------------------\n");
1732: for (i = 0; i < nnfacetLoc; i++) {
1733: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1734: grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1735: }
1736: fprintf(fptr1,"\n");
1739: fprintf(fptr1,"---------------------------------------------\n");
1740: fprintf(fptr1,"Viscous Boundary Nodes\n");
1741: fprintf(fptr1,"---------------------------------------------\n");
1742: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1743: fprintf(fptr1,"---------------------------------------------\n");
1744: for (i = 0; i < nvnodeLoc; i++) {
1745: j = grid->ivnode[i]-1;
1746: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1747: }
1748: fprintf(fptr1,"\n");
1749: fprintf(fptr1,"---------------------------------------------\n");
1750: fprintf(fptr1,"f2ntv array\n");
1751: fprintf(fptr1,"---------------------------------------------\n");
1752: for (i = 0; i < nvfacetLoc; i++) {
1753: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1754: grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1755: }
1756: fprintf(fptr1,"\n");
1758: fprintf(fptr1,"---------------------------------------------\n");
1759: fprintf(fptr1,"Free Boundary Nodes\n");
1760: fprintf(fptr1,"---------------------------------------------\n");
1761: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1762: fprintf(fptr1,"---------------------------------------------\n");
1763: for (i = 0; i < nfnodeLoc; i++) {
1764: j = grid->ifnode[i]-1;
1765: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1766: }
1767: fprintf(fptr1,"\n");
1768: fprintf(fptr1,"---------------------------------------------\n");
1769: fprintf(fptr1,"f2ntf array\n");
1770: fprintf(fptr1,"---------------------------------------------\n");
1771: for (i = 0; i < nffacetLoc; i++) {
1772: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1773: grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1774: }
1775: fprintf(fptr1,"\n");
1777: fprintf(fptr1,"---------------------------------------------\n");
1778: fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1779: fprintf(fptr1,"---------------------------------------------\n");
1780: ICALLOC(nnodes,&p2l);
1781: for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1782: for (i = 0; i < nnodesLoc; i++) {
1783: jstart = grid->ia[grid->loc2glo[i]] - 1;
1784: jend = grid->ia[grid->loc2glo[i]+1] - 1;
1785: fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1786: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1787: fprintf(fptr1,"\n");
1789: fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1790: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1791: fprintf(fptr1,"\n");
1793: fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1794: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1795: fprintf(fptr1,"\n");
1797: }
1798: fprintf(fptr1,"\n");
1799: PetscFree(p2l);
1800: fclose(fptr1);
1801: }
1803: /* Free the temporary arrays */
1804: PetscFree(a2l);
1805: MPI_Barrier(comm);
1806: return(0);
1807: }
1809: /*
1810: encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1811: */
1812: void *base64_encodeblock(void *vout,const void *vin,int len)
1813: {
1814: unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1815: /* Translation Table as described in RFC1113 */
1816: static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1817: memcpy(in,vin,PetscMin(len,3));
1818: out[0] = cb64[in[0] >> 2];
1819: out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1820: out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1821: out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1822: return (void*)(out+4);
1823: }
1825: /* Write binary data, does not do byte swapping. */
1826: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1827: {
1829: PetscMPIInt rank;
1832: if (n < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Trying to write a negative amount of data %D",n);
1833: if (!n) return(0);
1834: MPI_Comm_rank(comm,&rank);
1835: if (!rank) {
1836: size_t count;
1837: int bytes;
1838: switch (dtype) {
1839: case PETSC_DOUBLE:
1840: size = sizeof(double);
1841: break;
1842: case PETSC_FLOAT:
1843: size = sizeof(float);
1844: break;
1845: case PETSC_INT:
1846: size = sizeof(PetscInt);
1847: break;
1848: case PETSC_CHAR:
1849: size = sizeof(char);
1850: break;
1851: default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1852: }
1853: bytes = size*n;
1854: if (base64) {
1855: unsigned char *buf,*ptr;
1856: int i;
1857: size_t b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1858: PetscMalloc(b64alloc,&buf);
1859: ptr = buf;
1860: ptr = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1861: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1862: for (i=0; i<bytes; i+=3) {
1863: int left = bytes - i;
1864: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1865: }
1866: *ptr++ = '\n';
1867: /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1868: count = fwrite(buf,1,ptr-buf,fp);
1869: if (count < (size_t)(ptr-buf)) {
1870: perror("");
1871: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D of %D bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1872: }
1873: PetscFree(buf);
1874: } else {
1875: count = fwrite(&bytes,sizeof(int),1,fp);
1876: if (count != 1) {
1877: perror("");
1878: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1879: }
1880: count = fwrite(data,size,(size_t)n,fp);
1881: if ((int)count != n) {
1882: perror("");
1883: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D/%D array members of size %D",(PetscInt)count,n,(PetscInt)size);
1884: }
1885: }
1886: }
1887: return(0);
1888: }
1890: static void SortInt2(PetscInt *a,PetscInt *b)
1891: {
1892: if (*b < *a) {
1893: PetscInt c = *b;
1894: *b = *a;
1895: *a = c;
1896: }
1897: }
1899: /* b = intersection(a,b) */
1900: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1901: {
1902: PetscInt i,n,j;
1905: j = 0;
1906: n = 0;
1907: for (i=0; i<*nb; i++) {
1908: while (j<na && a[j]<b[i]) j++;
1909: if (j<na && a[j]==b[i]) {
1910: b[n++] = b[i];
1911: j++;
1912: }
1913: }
1914: *nb = n;
1915: return(0);
1916: }
1918: /*
1919: This function currently has a semantic bug: it only produces cells containing all local edges. Since the local mesh
1920: does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1921: be constructed. This causes visualization artifacts.
1923: This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1924: layer of edges in this function.
1925: */
1926: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1927: {
1929: PetscInt ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1930: PetscInt *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1931: #if defined(INTERLACING)
1932: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1933: #else
1934: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1935: #endif
1938: *incell = -1;
1939: *iconn = NULL;
1940: acell = 100000; /* allocate for this many cells */
1941: PetscMalloc1(acell,&conn);
1942: PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1943: PetscCalloc1(nnodes,&utmp);
1944: /* count the number of edges in the upper-triangular matrix u */
1945: for (i=0; i<nedge; i++) { /* count number of nonzeros in upper triangular matrix */
1946: GetEdge(eptr,i,node0,node1);
1947: utmp[PetscMin(node0,node1)]++;
1948: }
1949: rowmax = 0;
1950: ui[0] = 0;
1951: for (i=0; i<nnodes; i++) {
1952: rowmax = PetscMax(rowmax,utmp[i]);
1953: ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1954: utmp[i] = 0;
1955: }
1956: for (i=0; i<nedge; i++) { /* assemble upper triangular matrix U */
1957: GetEdge(eptr,i,node0,node1);
1958: SortInt2(&node0,&node1);
1959: uj[ui[node0] + utmp[node0]++] = node1;
1960: }
1961: PetscFree(utmp);
1962: for (i=0; i<nnodes; i++) { /* sort every row */
1963: PetscInt n = ui[i+1] - ui[i];
1964: PetscSortInt(n,&uj[ui[i]]);
1965: }
1967: /* Infer cells */
1968: ncell = 0;
1969: PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1970: for (i=0; i<nnodes; i++) {
1971: node0 = i;
1972: ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1973: PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1974: for (j=0; j<ntmp1; j++) {
1975: node1 = tmp1[j];
1976: if (node1 < 0 || nnodes <= node1) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node1,nnodes);
1977: if (node1 <= node0) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node0,node1);
1978: ntmp2 = ui[node1+1] - ui[node1];
1979: PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1980: IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1981: for (k=0; k<ntmp2; k++) {
1982: node2 = tmp2[k];
1983: if (node2 < 0 || nnodes <= node2) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node2,nnodes);
1984: if (node2 <= node1) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node1,node2);
1985: ntmp3 = ui[node2+1] - ui[node2];
1986: PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1987: IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1988: for (l=0; l<ntmp3; l++) {
1989: node3 = tmp3[l];
1990: if (node3 < 0 || nnodes <= node3) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node3,nnodes);
1991: if (node3 <= node2) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node2,node3);
1992: if (ncell > acell) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer exceeded");
1993: if (ntmp3 < 3) continue;
1994: conn[ncell][0] = node0;
1995: conn[ncell][1] = node1;
1996: conn[ncell][2] = node2;
1997: conn[ncell][3] = node3;
1998: if (0) {
1999: PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
2000: PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
2001: PetscIntView(ntmp1,tmp1,viewer);
2002: PetscIntView(ntmp2,tmp2,viewer);
2003: PetscIntView(ntmp3,tmp3,viewer);
2004: /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
2005: * can tell, that means it is an invalid mesh. I don't know what the intent was. */
2006: if (ntmp3 > 2) SETERRQ(PETSC_COMM_SELF,1,"More than two ways to complete a tetrahedron using a common triangle");
2007: }
2008: ncell++;
2009: }
2010: }
2011: }
2012: }
2013: PetscFree3(tmp1,tmp2,tmp3);
2014: PetscFree2(ui,uj);
2016: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %D cells with nnodes=%D nedge=%D\n",ncell,nnodes,nedge);
2017: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2018: *incell = ncell;
2019: *iconn = (PetscInt*)conn;
2020: return(0);
2021: }
2023: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2024: {
2026: PetscInt nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2027: PetscInt *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2028: Vec VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2029: PetscScalar *vne,*vnei;
2030: IS isglobal,isedgeOv;
2031: VecScatter nescat,neiscat;
2032: PetscBool flg;
2035: nnodes = grid->nnodes; /* Total number of global nodes */
2036: nnodesLoc = grid->nnodesLoc; /* Number of owned nodes */
2037: nvertices = grid->nvertices; /* Number of owned+ghosted nodes */
2038: nedgeLoc = grid->nedgeLoc; /* Number of edges connected to owned nodes */
2040: /* Count the number of neighbors of each owned node */
2041: MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2042: rstart -= nnodesLoc;
2043: PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2044: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2045: for (i=0; i<nedgeLoc; i++) {
2046: GetEdge(grid->eptr,i,node0,node1);
2047: node0p = grid->loc2pet[node0];
2048: node1p = grid->loc2pet[node1];
2049: if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2050: if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2051: }
2052: /* Get the offset in the node-based edge array */
2053: nodeEdgeOffset[0] = 0;
2054: for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2055: nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];
2057: /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2058: VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2059: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2060: VecGetArray(VNodeEdge,&vne);
2061: for (i=0; i<nedgeLoc; i++) {
2062: GetEdge(grid->eptr,i,node0,node1);
2063: node0p = grid->loc2pet[node0];
2064: node1p = grid->loc2pet[node1];
2065: if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2066: if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2067: }
2068: VecRestoreArray(VNodeEdge,&vne);
2069: VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);
2071: /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2072: VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2073: VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2074: VecSetBlockSize(VNodeEdgeInfo,2);
2075: VecSetType(VNodeEdgeInfo,VECMPI);
2077: VecGetArray(VNodeEdgeInfo,&vnei);
2078: for (i=0; i<nnodesLoc; i++) {
2079: vnei[i*2+0] = nodeEdgeCount[i]; /* Total number of edges from this vertex */
2080: vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2081: }
2082: VecRestoreArray(VNodeEdgeInfo,&vnei);
2083: PetscFree2(nodeEdgeCount,nodeEdgeOffset);
2085: /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2086: VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2087: VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2088: VecSetBlockSize(VNodeEdgeInfoOv,2);
2089: VecSetType(VNodeEdgeInfoOv,VECSEQ);
2091: ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2092: VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2093: VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2094: VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2095: VecScatterDestroy(&neiscat);
2096: VecDestroy(&VNodeEdgeInfo);
2097: ISDestroy(&isglobal);
2099: /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2100: nedgeOv = 0; /* First count the number of edges in the complete overlap */
2101: VecGetArray(VNodeEdgeInfoOv,&vnei);
2102: for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2103: /* Allocate for the global indices in VNodeEdge of the edges to receive */
2104: PetscMalloc1(nedgeOv,&eIdxOv);
2105: for (i=0,cnt=0; i<nvertices; i++) {
2106: for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2107: }
2108: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2109: ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2110: VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2111: VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2112: VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2113: VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2114: VecScatterDestroy(&nescat);
2115: VecDestroy(&VNodeEdge);
2116: ISDestroy(&isedgeOv);
2117: PetscFree(eIdxOv);
2119: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %D, half=%D\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);
2121: /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2122: PetscMalloc1(nnodes,&p2l);
2123: for (i=0; i<nnodes; i++) p2l[i] = -1;
2124: for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2125: if (1) {
2126: PetscInt m = 0;
2127: for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2128: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %D; nvertices=%D nnodesLoc=%D nnodes=%D\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2129: }
2131: /* Log each edge connecting nodes in owned+ghosted exactly once */
2132: VecGetArray(VNodeEdgeInfoOv,&vnei);
2133: VecGetArray(VNodeEdgeOv,&vne);
2134: /* First count the number of edges to keep */
2135: nedgeOv = 0;
2136: for (i=0,cnt=0; i<nvertices; i++) {
2137: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2138: node0 = i;
2139: for (j=0; j<n; j++) {
2140: node1p = vne[cnt++];
2141: node1 = p2l[node1p];
2142: if (node0 < node1) nedgeOv++;
2143: }
2144: }
2145: /* Array of edges to keep */
2146: PetscMalloc1(2*nedgeOv,&eptrOv);
2147: nedgeOv = 0;
2148: for (i=0,cnt=0; i<nvertices; i++) {
2149: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2150: node0 = i;
2151: for (j=0; j<n; j++) {
2152: node1p = vne[cnt++];
2153: node1 = p2l[node1p];
2154: if (node0 < node1) {
2155: eptrOv[2*nedgeOv+0] = node0;
2156: eptrOv[2*nedgeOv+1] = node1;
2157: nedgeOv++;
2158: }
2159: }
2160: }
2161: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2162: VecRestoreArray(VNodeEdgeOv,&vne);
2163: VecDestroy(&VNodeEdgeInfoOv);
2164: VecDestroy(&VNodeEdgeOv);
2165: PetscFree(p2l);
2167: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%D nedgeOv=%D\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2168: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2170: flg = PETSC_TRUE;
2171: PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2172: if (flg) {
2173: *invertices = grid->nvertices; /* We did not change the number of vertices */
2174: *inedgeOv = nedgeOv;
2175: *ieptrOv = eptrOv;
2176: } else {
2177: *invertices = grid->nvertices;
2178: *inedgeOv = nedgeLoc;
2179: PetscFree(eptrOv);
2180: PetscMalloc1(2*nedgeLoc,&eptrOv);
2181: PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2182: *ieptrOv = eptrOv;
2183: }
2184: return(0);
2185: }
2187: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2188: {
2189: GRID *grid = user->grid;
2190: TstepCtx *tsCtx = user->tsCtx;
2191: FILE *vtu,*pvtu;
2192: char pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2193: MPI_Comm comm;
2194: PetscMPIInt rank,size;
2195: PetscInt i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2196: PetscErrorCode ierr;
2197: Vec Xloc,Xploc,Xuloc;
2198: unsigned char *celltype;
2199: int *celloffset,*conn,*cellrank;
2200: const PetscScalar *x;
2201: PetscScalar *xu,*xp;
2202: const char *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";
2205: GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2206: comm = PETSC_COMM_WORLD;
2207: MPI_Comm_rank(comm,&rank);
2208: MPI_Comm_size(comm,&size);
2209: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2210: SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2211: #endif
2212: PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%D.pvtu",fname,tsCtx->itstep);
2213: PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%D-%D.vtu",fname,tsCtx->itstep,rank);
2214: PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2215: PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2216: PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2217: PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2218: PetscFPrintf(comm,pvtu," <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2219: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2220: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2221: PetscFPrintf(comm,pvtu," </PPointData>\n");
2222: PetscFPrintf(comm,pvtu," <PCellData>\n");
2223: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2224: PetscFPrintf(comm,pvtu," </PCellData>\n");
2225: PetscFPrintf(comm,pvtu," <PPoints>\n");
2226: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2227: PetscFPrintf(comm,pvtu," </PPoints>\n");
2228: PetscFPrintf(comm,pvtu," <PCells>\n");
2229: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2230: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" />\n");
2231: PetscFPrintf(comm,pvtu," <PDataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" />\n");
2232: PetscFPrintf(comm,pvtu," </PCells>\n");
2233: for (i=0; i<size; i++) {
2234: PetscFPrintf(comm,pvtu," <Piece Source=\"%s-%D-%D.vtu\" />\n",fname,tsCtx->itstep,i);
2235: }
2236: PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2237: PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2238: PetscFClose(comm,pvtu);
2240: Xloc = grid->qnodeLoc;
2241: VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2242: VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2243: VecGetBlockSize(Xloc,&bs);
2244: if (bs != 4) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"expected block size 4, got %D",bs);
2245: VecGetSize(Xloc,&nloc);
2246: if (nloc/bs != nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nloc/bs=%D to match nvertices=%D",nloc/bs,nvertices);
2247: if (nvertices != grid->nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nvertices=%D to match grid->nvertices=%D",nvertices,grid->nvertices);
2248: VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);
2250: VecCreate(PETSC_COMM_SELF,&Xuloc);
2251: VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2252: VecSetBlockSize(Xuloc,3);
2253: VecSetType(Xuloc,VECSEQ);
2255: VecGetArrayRead(Xloc,&x);
2256: VecGetArray(Xploc,&xp);
2257: VecGetArray(Xuloc,&xu);
2258: for (i=0; i<nvertices; i++) {
2259: xp[i] = x[i*4+0];
2260: xu[i*3+0] = x[i*4+1];
2261: xu[i*3+1] = x[i*4+2];
2262: xu[i*3+2] = x[i*4+3];
2263: }
2264: VecRestoreArrayRead(Xloc,&x);
2266: InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);
2268: PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2269: PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2270: PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2271: PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2272: PetscFPrintf(PETSC_COMM_SELF,vtu," <Piece NumberOfPoints=\"%D\" NumberOfCells=\"%D\">\n",nvertices,ncells);
2274: /* Solution fields */
2275: PetscFPrintf(PETSC_COMM_SELF,vtu," <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2276: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2277: boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2278: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2279: boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2280: PetscFPrintf(PETSC_COMM_SELF,vtu," </PointData>\n");
2281: PetscFPrintf(PETSC_COMM_SELF,vtu," <CellData>\n");
2282: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2283: boffset += ncells*sizeof(int) + sizeof(int);
2284: PetscFPrintf(PETSC_COMM_SELF,vtu," </CellData>\n");
2285: /* Coordinate positions */
2286: PetscFPrintf(PETSC_COMM_SELF,vtu," <Points>\n");
2287: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2288: boffset += nvertices*3*sizeof(double) + sizeof(int);
2289: PetscFPrintf(PETSC_COMM_SELF,vtu," </Points>\n");
2290: /* Cell connectivity */
2291: PetscFPrintf(PETSC_COMM_SELF,vtu," <Cells>\n");
2292: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2293: boffset += ncells*4*sizeof(int) + sizeof(int);
2294: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2295: boffset += ncells*sizeof(int) + sizeof(int);
2296: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2297: boffset += ncells*sizeof(unsigned char) + sizeof(int);
2298: PetscFPrintf(PETSC_COMM_SELF,vtu," </Cells>\n");
2299: PetscFPrintf(PETSC_COMM_SELF,vtu," </Piece>\n");
2300: PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2301: PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2302: PetscFPrintf(PETSC_COMM_SELF,vtu,"_");
2304: /* Write pressure */
2305: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);
2307: /* Write velocity */
2308: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);
2310: /* Label cell rank, not a measure of computation because nothing is actually computed at cells. This is written
2311: * primarily to aid in debugging. The partition for computation should label vertices. */
2312: PetscMalloc1(ncells,&cellrank);
2313: for (i=0; i<ncells; i++) cellrank[i] = rank;
2314: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2315: PetscFree(cellrank);
2317: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2318: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2319: PetscFree(conn);
2321: PetscMalloc1(ncells,&celloffset);
2322: for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2323: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2324: PetscFree(celloffset);
2326: PetscMalloc1(ncells,&celltype);
2327: for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2328: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2329: PetscFree(celltype);
2331: PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2332: PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2333: PetscFClose(PETSC_COMM_SELF,vtu);
2335: VecRestoreArray(Xploc,&xp);
2336: VecRestoreArray(Xuloc,&xu);
2337: VecDestroy(&Xploc);
2338: VecDestroy(&Xuloc);
2339: PetscFree(eptr);
2340: return(0);
2341: }
2343: /*---------------------------------------------------------------------*/
2344: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2345: /*---------------------------------------------------------------------*/
2346: {
2347: int ierr,i,j,bs;
2348: int nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2349: int nnodesLoc,nvertices;
2350: int *val_diag,*val_offd,*svertices,*loc2pet;
2351: IS isglobal,islocal;
2352: ISLocalToGlobalMapping isl2g;
2353: PetscBool flg;
2354: MPI_Comm comm = PETSC_COMM_WORLD;
2357: nnodes = grid->nnodes;
2358: nnodesLoc = grid->nnodesLoc;
2359: nvertices = grid->nvertices;
2360: loc2pet = grid->loc2pet;
2361: bs = 4;
2363: /* Set up the PETSc datastructures */
2365: VecCreate(comm,&grid->qnode);
2366: VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2367: VecSetBlockSize(grid->qnode,bs);
2368: VecSetType(grid->qnode,VECMPI);
2370: VecDuplicate(grid->qnode,&grid->res);
2371: VecDuplicate(grid->qnode,&tsCtx->qold);
2372: VecDuplicate(grid->qnode,&tsCtx->func);
2374: VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2375: VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2376: VecSetBlockSize(grid->qnodeLoc,bs);
2377: VecSetType(grid->qnodeLoc,VECSEQ);
2379: VecCreate(comm,&grid->grad);
2380: VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2381: VecSetBlockSize(grid->grad,3*bs);
2382: VecSetType(grid->grad,VECMPI);
2384: VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2385: VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2386: VecSetBlockSize(grid->gradLoc,3*bs);
2387: VecSetType(grid->gradLoc,VECSEQ);
2390: /* Create Scatter between the local and global vectors */
2391: /* First create scatter for qnode */
2392: ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2393: #if defined(INTERLACING)
2394: #if defined(BLOCKING)
2395: ICALLOC(nvertices,&svertices);
2396: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2397: ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2398: #else
2399: ICALLOC(bs*nvertices,&svertices);
2400: for (i = 0; i < nvertices; i++)
2401: for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2402: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2403: #endif
2404: #else
2405: ICALLOC(bs*nvertices,&svertices);
2406: for (j = 0; j < bs; j++)
2407: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2408: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2409: #endif
2410: PetscFree(svertices);
2411: VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2412: ISDestroy(&isglobal);
2413: ISDestroy(&islocal);
2415: /* Now create scatter for gradient vector of qnode */
2416: ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2417: #if defined(INTERLACING)
2418: #if defined(BLOCKING)
2419: ICALLOC(nvertices,&svertices);
2420: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2421: ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2422: #else
2423: ICALLOC(3*bs*nvertices,&svertices);
2424: for (i = 0; i < nvertices; i++)
2425: for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2426: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2427: #endif
2428: #else
2429: ICALLOC(3*bs*nvertices,&svertices);
2430: for (j = 0; j < 3*bs; j++)
2431: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2432: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2433: #endif
2434: PetscFree(svertices);
2435: VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2436: ISDestroy(&isglobal);
2437: ISDestroy(&islocal);
2439: /* Store the number of non-zeroes per row */
2440: #if defined(INTERLACING)
2441: #if defined(BLOCKING)
2442: ICALLOC(nnodesLoc,&val_diag);
2443: ICALLOC(nnodesLoc,&val_offd);
2444: for (i = 0; i < nnodesLoc; i++) {
2445: jstart = grid->ia[i] - 1;
2446: jend = grid->ia[i+1] - 1;
2447: nbrs_diag = 0;
2448: nbrs_offd = 0;
2449: for (j = jstart; j < jend; j++) {
2450: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2451: else nbrs_offd++;
2452: }
2453: val_diag[i] = nbrs_diag;
2454: val_offd[i] = nbrs_offd;
2455: }
2456: MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2457: bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2458: PETSC_DEFAULT,val_offd,&grid->A);
2459: #else
2460: ICALLOC(nnodesLoc*4,&val_diag);
2461: ICALLOC(nnodesLoc*4,&val_offd);
2462: for (i = 0; i < nnodesLoc; i++) {
2463: jstart = grid->ia[i] - 1;
2464: jend = grid->ia[i+1] - 1;
2465: nbrs_diag = 0;
2466: nbrs_offd = 0;
2467: for (j = jstart; j < jend; j++) {
2468: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2469: else nbrs_offd++;
2470: }
2471: for (j = 0; j < 4; j++) {
2472: row = 4*i + j;
2473: val_diag[row] = nbrs_diag*4;
2474: val_offd[row] = nbrs_offd*4;
2475: }
2476: }
2477: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2478: bs*nnodes,bs*nnodes,NULL,val_diag,
2479: NULL,val_offd,&grid->A);
2480: #endif
2481: PetscFree(val_diag);
2482: PetscFree(val_offd);
2484: #else
2485: if (size > 1) SETERRQ(PETSC_COMM_SELF,1,"Parallel case not supported in non-interlaced case\n");
2486: ICALLOC(nnodes*4,&val_diag);
2487: ICALLOC(nnodes*4,&val_offd);
2488: for (j = 0; j < 4; j++)
2489: for (i = 0; i < nnodes; i++) {
2490: int row;
2491: row = i + j*nnodes;
2492: jstart = grid->ia[i] - 1;
2493: jend = grid->ia[i+1] - 1;
2494: nbrs_diag = jend - jstart;
2495: val_diag[row] = nbrs_diag*4;
2496: val_offd[row] = 0;
2497: }
2498: /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2499: val,&grid->A);*/
2500: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2501: bs*nnodes,bs*nnodes,NULL,val_diag,
2502: NULL,val_offd,&grid->A);
2503: MatSetBlockSize(grid->A,bs);
2504: PetscFree(val_diag);
2505: PetscFree(val_offd);
2506: #endif
2508: flg = PETSC_FALSE;
2509: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2510: if (flg) {
2511: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2512: }
2514: /* Set local to global mapping for setting the matrix elements in
2515: * local ordering : first set row by row mapping
2516: */
2517: ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2518: MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2519: ISLocalToGlobalMappingDestroy(&isl2g);
2520: return(0);
2521: }
2523: /*================================= CLINK ===================================*/
2524: /* */
2525: /* Used in establishing the links between FORTRAN common blocks and C */
2526: /* */
2527: /*===========================================================================*/
2528: EXTERN_C_BEGIN
2529: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2530: {
2531: c_info = p1;
2532: c_runge = p2;
2533: c_gmcom = p3;
2534: }
2535: EXTERN_C_END
2537: /*========================== SET_UP_GRID====================================*/
2538: /* */
2539: /* Allocates the memory for the fine grid */
2540: /* */
2541: /*==========================================================================*/
2542: int set_up_grid(GRID *grid)
2543: {
2544: int nnodes,nedge;
2545: int nsface,nvface,nfface,nbface;
2546: int tnode,ierr;
2547: /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2548: /*int nsnode,nvnode,nfnode; */
2549: /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2550: /*int jalloc;*/ /* If jalloc=1 allocate space for dfp and dfm */
2551: /*
2552: * stuff to read in dave's grids
2553: */
2554: /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2555: /* end of stuff */
2558: nnodes = grid->nnodes;
2559: tnode = grid->nnodes;
2560: nedge = grid->nedge;
2561: nsface = grid->nsface;
2562: nvface = grid->nvface;
2563: nfface = grid->nfface;
2564: nbface = nsface + nvface + nfface;
2566: /*ncell = grid->ncell;
2567: vface = grid->nedge;
2568: lnodes = grid->nnodes;
2569: nsnode = grid->nsnode;
2570: nvnode = grid->nvnode;
2571: nfnode = grid->nfnode;
2572: nsrch = c_gmcom->nsrch;
2573: ilu0 = c_gmcom->ilu0;
2574: ileast = grid->ileast;
2575: ifcn = c_gmcom->ifcn;
2576: jalloc = 0;
2577: kvisc = grid->jvisc;*/
2579: /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/
2581: /*
2582: * stuff to read in dave's grids
2583: */
2584: /*nnbound = grid->nnbound;
2585: nvbound = grid->nvbound;
2586: nfbound = grid->nfbound;
2587: nnfacet = grid->nnfacet;
2588: nvfacet = grid->nvfacet;
2589: nffacet = grid->nffacet;
2590: ntte = grid->ntte;*/
2591: /* end of stuff */
2594: /* if (!ileast) lnodes = 1;
2595: printf("In set_up_grid->jvisc = %d\n",grid->jvisc);
2597: if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2598: printf(" vface = %d \n",vface);
2599: if (grid->jvisc < 3) tnode = 1;
2600: valloc = 1;
2601: if (grid->jvisc == 0)valloc = 0;*/
2603: /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2604: /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2605: PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2606: /* Now allocate memory for the other grid arrays */
2607: /* ICALLOC(nedge*2, &grid->eptr); */
2608: ICALLOC(nsface, &grid->isface);
2609: ICALLOC(nvface, &grid->ivface);
2610: ICALLOC(nfface, &grid->ifface);
2611: /* ICALLOC(nsnode, &grid->isnode);
2612: ICALLOC(nvnode, &grid->ivnode);
2613: ICALLOC(nfnode, &grid->ifnode);*/
2614: /*ICALLOC(nnodes, &grid->clist);
2615: ICALLOC(nnodes, &grid->iupdate);
2616: ICALLOC(nsface*2, &grid->sface);
2617: ICALLOC(nvface*2, &grid->vface);
2618: ICALLOC(nfface*2, &grid->fface);
2619: ICALLOC(lnodes, &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/
line2656">2656: FCALLOC(nbface*15,&grid-&
line2657">2657: FCALLOC(nbface*15,&grid-&
line2658">2658: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2671">2671: if (grid->jvisc >
line2672">2672: FCALLOC(tnode, &grid->
line2673">2673: FCALLOC(nnodes, &grid->t
line2674">2674: FCALLOC(nnodes, &grid->
line2675">2675: FCALLOC(tnode, &grid->tu
line2676">2676: FCALLOC(nedge, &grid->
line2677">2677: FCALLOC(nedge, &grid->
line2678">2678:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2738">2738: return
line2739">2739
/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2747">2747: int write_fine_grid(GRID *grid)
line2748">2748
line2749">2749: FILE *
/* open file for output */
/* call the output frame.out */
line2755">2755: if (!(output = fopen("frame.out","a"))) SETERRQ(PETSC_COMM_SELF,1,"can't open frame.out"
line2756">2756: fprintf(output,"information for fine grid\n"
line2757">2757: fprintf(output,"\n"
line2758">2758: fprintf(output," address of fine grid = %p\n",(void*
line2760">2760: fprintf(output,"grid.nnodes = %d\n",grid->n
line2761">2761: fprintf(output,"grid.ncell = %d\n",grid->
line2762">2762: fprintf(output,"grid.nedge = %d\n",grid->
line2763">2763: fprintf(output,"grid.nsface = %d\n",grid->n
line2764">2764: fprintf(output,"grid.nvface = %d\n",grid->n
line2765">2765: fprintf(output,"grid.nfface = %d\n",grid->n
line2766">2766: fprintf(output,"grid.nsnode = %d\n",grid->n
line2767">2767: fprintf(output,"grid.nvnode = %d\n",grid->n
line2768">2768: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
/*
* close output file
*/
line2811">2811: fclose(o
line2812">2812: return
line2813">2813
line2815">2815: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2816">2816: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte)
line2817">2817
line2818">2818: int ncolore = *nc
line2819">2819: int iedg = 0,ib = 0,ie = nedge,ta
line2820">2820: int i
line2821">2821: in
line2822">2822: ICALLOC(nnodes,&am
line2823">2823: while (ib <
line2824">2824: for (i = 0; i < nnodes; i++) tag[
line2825">2825: counte[ncolor
line2826">2826: for (i = ib; i < ie;
line2827">2827: n1 =
line2828">2828: n2 = e2n[i+
line2829">2829: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2831">2831: if (!tagc
line2832">2832: tag[n1]
line2833">2833: tag[n2]
line2834">2834: e2n[i] = e2n
line2835">2835: e2n[i+nedge] = e2n[iedg+
line2836">2836: e2n[iedg]
line2837">2837: e2n[iedg+nedge
line2838">2838: n1 = ep
line2839">2839: eperm[i] = eperm
line2840">2840: eperm[iedg]
line2841">2841:
line2842">2842: counte[ncolor
line2843">2843:
line2844">2844:
line2845">2845: ib
line2846">2846: nco
line2847">2847:
line2848">2848: *ncle = n
line2849">2849: return
line2850">2850
line2851">2851: #endif
line2852">2852: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2853">2853: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters)
line2854">2854
line2856">2856: if ((*gen_start = start_counters(event0,event1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in start_counters\n"
line2857">2857: PetscTime(&time_start_cou
line2858">2858: return
line2859">2859
line2861">2861: int EventCountersEnd(int gen_start,PetscScalar time_start_counters)
line2862">2862
line2863">2863: int gen_rea
line2864">2864: PetscScalar time_read_co
line2865">2865: long long _counter0,_co
line2867">2867: if ((gen_read = read_counters(event0,&_counter0,event1,&_counter1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n"
line2868">2868: PetscTime(&&time_read_cou
line2869">2869: if (gen_read != gen_start) SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n"
line2870">2870: counter0 += _co
line2871">2871: counter1 += _co
line2872">2872: time_counters += time_read_counters-time_start_co
line2873">2873: return
line2874">2874
line2875">2875: #endif