Actual source code: aijfact.c
petsc-3.12.3 2020-01-03
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscbt.h>
5: #include <../src/mat/utils/freespace.h>
7: /*
8: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
10: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11: */
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
15: PetscErrorCode ierr;
16: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17: const PetscInt *ai = a->i, *aj = a->j;
18: const PetscScalar *aa = a->a;
19: PetscBool *done;
20: PetscReal best,past = 0,future;
23: /* pick initial row */
24: best = -1;
25: for (i=0; i<n; i++) {
26: future = 0.0;
27: for (j=ai[i]; j<ai[i+1]; j++) {
28: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29: else past = PetscAbsScalar(aa[j]);
30: }
31: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32: if (past/future > best) {
33: best = past/future;
34: current = i;
35: }
36: }
38: PetscMalloc1(n,&done);
39: PetscArrayzero(done,n);
40: PetscMalloc1(n,&order);
41: order[0] = current;
42: for (i=0; i<n-1; i++) {
43: done[current] = PETSC_TRUE;
44: best = -1;
45: /* loop over all neighbors of current pivot */
46: for (j=ai[current]; j<ai[current+1]; j++) {
47: jj = aj[j];
48: if (done[jj]) continue;
49: /* loop over columns of potential next row computing weights for below and above diagonal */
50: past = future = 0.0;
51: for (k=ai[jj]; k<ai[jj+1]; k++) {
52: kk = aj[k];
53: if (done[kk]) past += PetscAbsScalar(aa[k]);
54: else if (kk != jj) future += PetscAbsScalar(aa[k]);
55: }
56: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57: if (past/future > best) {
58: best = past/future;
59: newcurrent = jj;
60: }
61: }
62: if (best == -1) { /* no neighbors to select from so select best of all that remain */
63: best = -1;
64: for (k=0; k<n; k++) {
65: if (done[k]) continue;
66: future = 0.0;
67: past = 0.0;
68: for (j=ai[k]; j<ai[k+1]; j++) {
69: kk = aj[j];
70: if (done[kk]) past += PetscAbsScalar(aa[j]);
71: else if (kk != k) future += PetscAbsScalar(aa[j]);
72: }
73: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74: if (past/future > best) {
75: best = past/future;
76: newcurrent = k;
77: }
78: }
79: }
80: if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81: current = newcurrent;
82: order[i+1] = current;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);
85: *icol = *irow;
86: PetscObjectReference((PetscObject)*irow);
87: PetscFree(done);
88: PetscFree(order);
89: return(0);
90: }
92: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
93: {
94: PetscInt n = A->rmap->n;
98: #if defined(PETSC_USE_COMPLEX)
99: if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported");
100: #endif
101: MatCreate(PetscObjectComm((PetscObject)A),B);
102: MatSetSizes(*B,n,n,n,n);
103: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
104: MatSetType(*B,MATSEQAIJ);
106: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
107: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
109: MatSetBlockSizesFromMats(*B,A,A);
110: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
111: MatSetType(*B,MATSEQSBAIJ);
112: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);
114: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
115: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
116: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
117: (*B)->factortype = ftype;
119: PetscFree((*B)->solvertype);
120: PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);
121: return(0);
122: }
124: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
125: {
126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
127: IS isicol;
128: PetscErrorCode ierr;
129: const PetscInt *r,*ic;
130: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
131: PetscInt *bi,*bj,*ajtmp;
132: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
133: PetscReal f;
134: PetscInt nlnk,*lnk,k,**bi_ptr;
135: PetscFreeSpaceList free_space=NULL,current_space=NULL;
136: PetscBT lnkbt;
137: PetscBool missing;
140: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
141: MatMissingDiagonal(A,&missing,&i);
142: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
144: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
145: ISGetIndices(isrow,&r);
146: ISGetIndices(isicol,&ic);
148: /* get new row pointers */
149: PetscMalloc1(n+1,&bi);
150: bi[0] = 0;
152: /* bdiag is location of diagonal in factor */
153: PetscMalloc1(n+1,&bdiag);
154: bdiag[0] = 0;
156: /* linked list for storing column indices of the active row */
157: nlnk = n + 1;
158: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
160: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
162: /* initial FreeSpace size is f*(ai[n]+1) */
163: f = info->fill;
164: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
165: current_space = free_space;
167: for (i=0; i<n; i++) {
168: /* copy previous fill into linked list */
169: nzi = 0;
170: nnz = ai[r[i]+1] - ai[r[i]];
171: ajtmp = aj + ai[r[i]];
172: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
173: nzi += nlnk;
175: /* add pivot rows into linked list */
176: row = lnk[n];
177: while (row < i) {
178: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
179: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
180: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
181: nzi += nlnk;
182: row = lnk[row];
183: }
184: bi[i+1] = bi[i] + nzi;
185: im[i] = nzi;
187: /* mark bdiag */
188: nzbd = 0;
189: nnz = nzi;
190: k = lnk[n];
191: while (nnz-- && k < i) {
192: nzbd++;
193: k = lnk[k];
194: }
195: bdiag[i] = bi[i] + nzbd;
197: /* if free space is not available, make more free space */
198: if (current_space->local_remaining<nzi) {
199: nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
200: PetscFreeSpaceGet(nnz,¤t_space);
201: reallocs++;
202: }
204: /* copy data into free space, then initialize lnk */
205: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
207: bi_ptr[i] = current_space->array;
208: current_space->array += nzi;
209: current_space->local_used += nzi;
210: current_space->local_remaining -= nzi;
211: }
212: #if defined(PETSC_USE_INFO)
213: if (ai[n] != 0) {
214: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
215: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
216: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
217: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
218: PetscInfo(A,"for best performance.\n");
219: } else {
220: PetscInfo(A,"Empty matrix\n");
221: }
222: #endif
224: ISRestoreIndices(isrow,&r);
225: ISRestoreIndices(isicol,&ic);
227: /* destroy list of free space and other temporary array(s) */
228: PetscMalloc1(bi[n]+1,&bj);
229: PetscFreeSpaceContiguous(&free_space,bj);
230: PetscLLDestroy(lnk,lnkbt);
231: PetscFree2(bi_ptr,im);
233: /* put together the new matrix */
234: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
235: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
236: b = (Mat_SeqAIJ*)(B)->data;
238: b->free_a = PETSC_TRUE;
239: b->free_ij = PETSC_TRUE;
240: b->singlemalloc = PETSC_FALSE;
242: PetscMalloc1(bi[n]+1,&b->a);
243: b->j = bj;
244: b->i = bi;
245: b->diag = bdiag;
246: b->ilen = 0;
247: b->imax = 0;
248: b->row = isrow;
249: b->col = iscol;
250: PetscObjectReference((PetscObject)isrow);
251: PetscObjectReference((PetscObject)iscol);
252: b->icol = isicol;
253: PetscMalloc1(n+1,&b->solve_work);
255: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
256: PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
257: b->maxnz = b->nz = bi[n];
259: (B)->factortype = MAT_FACTOR_LU;
260: (B)->info.factor_mallocs = reallocs;
261: (B)->info.fill_ratio_given = f;
263: if (ai[n]) {
264: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
265: } else {
266: (B)->info.fill_ratio_needed = 0.0;
267: }
268: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
269: if (a->inode.size) {
270: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
271: }
272: return(0);
273: }
275: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
276: {
277: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
278: IS isicol;
279: PetscErrorCode ierr;
280: const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
281: PetscInt i,n=A->rmap->n;
282: PetscInt *bi,*bj;
283: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
284: PetscReal f;
285: PetscInt nlnk,*lnk,k,**bi_ptr;
286: PetscFreeSpaceList free_space=NULL,current_space=NULL;
287: PetscBT lnkbt;
288: PetscBool missing;
291: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
292: MatMissingDiagonal(A,&missing,&i);
293: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
294:
295: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
296: ISGetIndices(isrow,&r);
297: ISGetIndices(isicol,&ic);
299: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
300: PetscMalloc1(n+1,&bi);
301: PetscMalloc1(n+1,&bdiag);
302: bi[0] = bdiag[0] = 0;
304: /* linked list for storing column indices of the active row */
305: nlnk = n + 1;
306: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
308: PetscMalloc2(n+1,&bi_ptr,n+1,&im);
310: /* initial FreeSpace size is f*(ai[n]+1) */
311: f = info->fill;
312: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
313: current_space = free_space;
315: for (i=0; i<n; i++) {
316: /* copy previous fill into linked list */
317: nzi = 0;
318: nnz = ai[r[i]+1] - ai[r[i]];
319: ajtmp = aj + ai[r[i]];
320: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
321: nzi += nlnk;
323: /* add pivot rows into linked list */
324: row = lnk[n];
325: while (row < i) {
326: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
327: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
328: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
329: nzi += nlnk;
330: row = lnk[row];
331: }
332: bi[i+1] = bi[i] + nzi;
333: im[i] = nzi;
335: /* mark bdiag */
336: nzbd = 0;
337: nnz = nzi;
338: k = lnk[n];
339: while (nnz-- && k < i) {
340: nzbd++;
341: k = lnk[k];
342: }
343: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
345: /* if free space is not available, make more free space */
346: if (current_space->local_remaining<nzi) {
347: /* estimated additional space needed */
348: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
349: PetscFreeSpaceGet(nnz,¤t_space);
350: reallocs++;
351: }
353: /* copy data into free space, then initialize lnk */
354: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
356: bi_ptr[i] = current_space->array;
357: current_space->array += nzi;
358: current_space->local_used += nzi;
359: current_space->local_remaining -= nzi;
360: }
362: ISRestoreIndices(isrow,&r);
363: ISRestoreIndices(isicol,&ic);
365: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
366: PetscMalloc1(bi[n]+1,&bj);
367: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
368: PetscLLDestroy(lnk,lnkbt);
369: PetscFree2(bi_ptr,im);
371: /* put together the new matrix */
372: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
373: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
374: b = (Mat_SeqAIJ*)(B)->data;
376: b->free_a = PETSC_TRUE;
377: b->free_ij = PETSC_TRUE;
378: b->singlemalloc = PETSC_FALSE;
380: PetscMalloc1(bdiag[0]+1,&b->a);
382: b->j = bj;
383: b->i = bi;
384: b->diag = bdiag;
385: b->ilen = 0;
386: b->imax = 0;
387: b->row = isrow;
388: b->col = iscol;
389: PetscObjectReference((PetscObject)isrow);
390: PetscObjectReference((PetscObject)iscol);
391: b->icol = isicol;
392: PetscMalloc1(n+1,&b->solve_work);
394: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
395: PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
396: b->maxnz = b->nz = bdiag[0]+1;
398: B->factortype = MAT_FACTOR_LU;
399: B->info.factor_mallocs = reallocs;
400: B->info.fill_ratio_given = f;
402: if (ai[n]) {
403: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
404: } else {
405: B->info.fill_ratio_needed = 0.0;
406: }
407: #if defined(PETSC_USE_INFO)
408: if (ai[n] != 0) {
409: PetscReal af = B->info.fill_ratio_needed;
410: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
411: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
412: PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
413: PetscInfo(A,"for best performance.\n");
414: } else {
415: PetscInfo(A,"Empty matrix\n");
416: }
417: #endif
418: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
419: if (a->inode.size) {
420: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
421: }
422: MatSeqAIJCheckInode_FactorLU(B);
423: return(0);
424: }
426: /*
427: Trouble in factorization, should we dump the original matrix?
428: */
429: PetscErrorCode MatFactorDumpMatrix(Mat A)
430: {
432: PetscBool flg = PETSC_FALSE;
435: PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);
436: if (flg) {
437: PetscViewer viewer;
438: char filename[PETSC_MAX_PATH_LEN];
440: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
441: PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);
442: MatView(A,viewer);
443: PetscViewerDestroy(&viewer);
444: }
445: return(0);
446: }
448: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
449: {
450: Mat C =B;
451: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
452: IS isrow = b->row,isicol = b->icol;
453: PetscErrorCode ierr;
454: const PetscInt *r,*ic,*ics;
455: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
456: PetscInt i,j,k,nz,nzL,row,*pj;
457: const PetscInt *ajtmp,*bjtmp;
458: MatScalar *rtmp,*pc,multiplier,*pv;
459: const MatScalar *aa=a->a,*v;
460: PetscBool row_identity,col_identity;
461: FactorShiftCtx sctx;
462: const PetscInt *ddiag;
463: PetscReal rs;
464: MatScalar d;
467: /* MatPivotSetUp(): initialize shift context sctx */
468: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
470: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
471: ddiag = a->diag;
472: sctx.shift_top = info->zeropivot;
473: for (i=0; i<n; i++) {
474: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
475: d = (aa)[ddiag[i]];
476: rs = -PetscAbsScalar(d) - PetscRealPart(d);
477: v = aa+ai[i];
478: nz = ai[i+1] - ai[i];
479: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
480: if (rs>sctx.shift_top) sctx.shift_top = rs;
481: }
482: sctx.shift_top *= 1.1;
483: sctx.nshift_max = 5;
484: sctx.shift_lo = 0.;
485: sctx.shift_hi = 1.;
486: }
488: ISGetIndices(isrow,&r);
489: ISGetIndices(isicol,&ic);
490: PetscMalloc1(n+1,&rtmp);
491: ics = ic;
493: do {
494: sctx.newshift = PETSC_FALSE;
495: for (i=0; i<n; i++) {
496: /* zero rtmp */
497: /* L part */
498: nz = bi[i+1] - bi[i];
499: bjtmp = bj + bi[i];
500: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
502: /* U part */
503: nz = bdiag[i]-bdiag[i+1];
504: bjtmp = bj + bdiag[i+1]+1;
505: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
507: /* load in initial (unfactored row) */
508: nz = ai[r[i]+1] - ai[r[i]];
509: ajtmp = aj + ai[r[i]];
510: v = aa + ai[r[i]];
511: for (j=0; j<nz; j++) {
512: rtmp[ics[ajtmp[j]]] = v[j];
513: }
514: /* ZeropivotApply() */
515: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
517: /* elimination */
518: bjtmp = bj + bi[i];
519: row = *bjtmp++;
520: nzL = bi[i+1] - bi[i];
521: for (k=0; k < nzL; k++) {
522: pc = rtmp + row;
523: if (*pc != 0.0) {
524: pv = b->a + bdiag[row];
525: multiplier = *pc * (*pv);
526: *pc = multiplier;
528: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
529: pv = b->a + bdiag[row+1]+1;
530: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
532: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
533: PetscLogFlops(1+2*nz);
534: }
535: row = *bjtmp++;
536: }
538: /* finished row so stick it into b->a */
539: rs = 0.0;
540: /* L part */
541: pv = b->a + bi[i];
542: pj = b->j + bi[i];
543: nz = bi[i+1] - bi[i];
544: for (j=0; j<nz; j++) {
545: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
546: }
548: /* U part */
549: pv = b->a + bdiag[i+1]+1;
550: pj = b->j + bdiag[i+1]+1;
551: nz = bdiag[i] - bdiag[i+1]-1;
552: for (j=0; j<nz; j++) {
553: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
554: }
556: sctx.rs = rs;
557: sctx.pv = rtmp[i];
558: MatPivotCheck(B,A,info,&sctx,i);
559: if (sctx.newshift) break; /* break for-loop */
560: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
562: /* Mark diagonal and invert diagonal for simplier triangular solves */
563: pv = b->a + bdiag[i];
564: *pv = 1.0/rtmp[i];
566: } /* endof for (i=0; i<n; i++) { */
568: /* MatPivotRefine() */
569: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
570: /*
571: * if no shift in this attempt & shifting & started shifting & can refine,
572: * then try lower shift
573: */
574: sctx.shift_hi = sctx.shift_fraction;
575: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
576: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
577: sctx.newshift = PETSC_TRUE;
578: sctx.nshift++;
579: }
580: } while (sctx.newshift);
582: PetscFree(rtmp);
583: ISRestoreIndices(isicol,&ic);
584: ISRestoreIndices(isrow,&r);
586: ISIdentity(isrow,&row_identity);
587: ISIdentity(isicol,&col_identity);
588: if (b->inode.size) {
589: C->ops->solve = MatSolve_SeqAIJ_Inode;
590: } else if (row_identity && col_identity) {
591: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
592: } else {
593: C->ops->solve = MatSolve_SeqAIJ;
594: }
595: C->ops->solveadd = MatSolveAdd_SeqAIJ;
596: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
597: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
598: C->ops->matsolve = MatMatSolve_SeqAIJ;
599: C->assembled = PETSC_TRUE;
600: C->preallocated = PETSC_TRUE;
602: PetscLogFlops(C->cmap->n);
604: /* MatShiftView(A,info,&sctx) */
605: if (sctx.nshift) {
606: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
607: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
608: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
609: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
610: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
611: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
612: }
613: }
614: return(0);
615: }
617: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
618: {
619: Mat C =B;
620: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
621: IS isrow = b->row,isicol = b->icol;
622: PetscErrorCode ierr;
623: const PetscInt *r,*ic,*ics;
624: PetscInt nz,row,i,j,n=A->rmap->n,diag;
625: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
626: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
627: MatScalar *pv,*rtmp,*pc,multiplier,d;
628: const MatScalar *v,*aa=a->a;
629: PetscReal rs=0.0;
630: FactorShiftCtx sctx;
631: const PetscInt *ddiag;
632: PetscBool row_identity, col_identity;
635: /* MatPivotSetUp(): initialize shift context sctx */
636: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
638: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
639: ddiag = a->diag;
640: sctx.shift_top = info->zeropivot;
641: for (i=0; i<n; i++) {
642: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
643: d = (aa)[ddiag[i]];
644: rs = -PetscAbsScalar(d) - PetscRealPart(d);
645: v = aa+ai[i];
646: nz = ai[i+1] - ai[i];
647: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
648: if (rs>sctx.shift_top) sctx.shift_top = rs;
649: }
650: sctx.shift_top *= 1.1;
651: sctx.nshift_max = 5;
652: sctx.shift_lo = 0.;
653: sctx.shift_hi = 1.;
654: }
656: ISGetIndices(isrow,&r);
657: ISGetIndices(isicol,&ic);
658: PetscMalloc1(n+1,&rtmp);
659: ics = ic;
661: do {
662: sctx.newshift = PETSC_FALSE;
663: for (i=0; i<n; i++) {
664: nz = bi[i+1] - bi[i];
665: bjtmp = bj + bi[i];
666: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
668: /* load in initial (unfactored row) */
669: nz = ai[r[i]+1] - ai[r[i]];
670: ajtmp = aj + ai[r[i]];
671: v = aa + ai[r[i]];
672: for (j=0; j<nz; j++) {
673: rtmp[ics[ajtmp[j]]] = v[j];
674: }
675: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
677: row = *bjtmp++;
678: while (row < i) {
679: pc = rtmp + row;
680: if (*pc != 0.0) {
681: pv = b->a + diag_offset[row];
682: pj = b->j + diag_offset[row] + 1;
683: multiplier = *pc / *pv++;
684: *pc = multiplier;
685: nz = bi[row+1] - diag_offset[row] - 1;
686: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
687: PetscLogFlops(1+2*nz);
688: }
689: row = *bjtmp++;
690: }
691: /* finished row so stick it into b->a */
692: pv = b->a + bi[i];
693: pj = b->j + bi[i];
694: nz = bi[i+1] - bi[i];
695: diag = diag_offset[i] - bi[i];
696: rs = 0.0;
697: for (j=0; j<nz; j++) {
698: pv[j] = rtmp[pj[j]];
699: rs += PetscAbsScalar(pv[j]);
700: }
701: rs -= PetscAbsScalar(pv[diag]);
703: sctx.rs = rs;
704: sctx.pv = pv[diag];
705: MatPivotCheck(B,A,info,&sctx,i);
706: if (sctx.newshift) break;
707: pv[diag] = sctx.pv;
708: }
710: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
711: /*
712: * if no shift in this attempt & shifting & started shifting & can refine,
713: * then try lower shift
714: */
715: sctx.shift_hi = sctx.shift_fraction;
716: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
717: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
718: sctx.newshift = PETSC_TRUE;
719: sctx.nshift++;
720: }
721: } while (sctx.newshift);
723: /* invert diagonal entries for simplier triangular solves */
724: for (i=0; i<n; i++) {
725: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
726: }
727: PetscFree(rtmp);
728: ISRestoreIndices(isicol,&ic);
729: ISRestoreIndices(isrow,&r);
731: ISIdentity(isrow,&row_identity);
732: ISIdentity(isicol,&col_identity);
733: if (row_identity && col_identity) {
734: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
735: } else {
736: C->ops->solve = MatSolve_SeqAIJ_inplace;
737: }
738: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
739: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
740: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
741: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
743: C->assembled = PETSC_TRUE;
744: C->preallocated = PETSC_TRUE;
746: PetscLogFlops(C->cmap->n);
747: if (sctx.nshift) {
748: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
749: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
750: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
751: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
752: }
753: }
754: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
755: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
757: MatSeqAIJCheckInode(C);
758: return(0);
759: }
761: /*
762: This routine implements inplace ILU(0) with row or/and column permutations.
763: Input:
764: A - original matrix
765: Output;
766: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
767: a->j (col index) is permuted by the inverse of colperm, then sorted
768: a->a reordered accordingly with a->j
769: a->diag (ptr to diagonal elements) is updated.
770: */
771: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
772: {
773: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
774: IS isrow = a->row,isicol = a->icol;
775: PetscErrorCode ierr;
776: const PetscInt *r,*ic,*ics;
777: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
778: PetscInt *ajtmp,nz,row;
779: PetscInt *diag = a->diag,nbdiag,*pj;
780: PetscScalar *rtmp,*pc,multiplier,d;
781: MatScalar *pv,*v;
782: PetscReal rs;
783: FactorShiftCtx sctx;
784: const MatScalar *aa=a->a,*vtmp;
787: if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
789: /* MatPivotSetUp(): initialize shift context sctx */
790: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
792: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
793: const PetscInt *ddiag = a->diag;
794: sctx.shift_top = info->zeropivot;
795: for (i=0; i<n; i++) {
796: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
797: d = (aa)[ddiag[i]];
798: rs = -PetscAbsScalar(d) - PetscRealPart(d);
799: vtmp = aa+ai[i];
800: nz = ai[i+1] - ai[i];
801: for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
802: if (rs>sctx.shift_top) sctx.shift_top = rs;
803: }
804: sctx.shift_top *= 1.1;
805: sctx.nshift_max = 5;
806: sctx.shift_lo = 0.;
807: sctx.shift_hi = 1.;
808: }
810: ISGetIndices(isrow,&r);
811: ISGetIndices(isicol,&ic);
812: PetscMalloc1(n+1,&rtmp);
813: PetscArrayzero(rtmp,n+1);
814: ics = ic;
816: #if defined(MV)
817: sctx.shift_top = 0.;
818: sctx.nshift_max = 0;
819: sctx.shift_lo = 0.;
820: sctx.shift_hi = 0.;
821: sctx.shift_fraction = 0.;
823: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
824: sctx.shift_top = 0.;
825: for (i=0; i<n; i++) {
826: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
827: d = (a->a)[diag[i]];
828: rs = -PetscAbsScalar(d) - PetscRealPart(d);
829: v = a->a+ai[i];
830: nz = ai[i+1] - ai[i];
831: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
832: if (rs>sctx.shift_top) sctx.shift_top = rs;
833: }
834: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
835: sctx.shift_top *= 1.1;
836: sctx.nshift_max = 5;
837: sctx.shift_lo = 0.;
838: sctx.shift_hi = 1.;
839: }
841: sctx.shift_amount = 0.;
842: sctx.nshift = 0;
843: #endif
845: do {
846: sctx.newshift = PETSC_FALSE;
847: for (i=0; i<n; i++) {
848: /* load in initial unfactored row */
849: nz = ai[r[i]+1] - ai[r[i]];
850: ajtmp = aj + ai[r[i]];
851: v = a->a + ai[r[i]];
852: /* sort permuted ajtmp and values v accordingly */
853: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
854: PetscSortIntWithScalarArray(nz,ajtmp,v);
856: diag[r[i]] = ai[r[i]];
857: for (j=0; j<nz; j++) {
858: rtmp[ajtmp[j]] = v[j];
859: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
860: }
861: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
863: row = *ajtmp++;
864: while (row < i) {
865: pc = rtmp + row;
866: if (*pc != 0.0) {
867: pv = a->a + diag[r[row]];
868: pj = aj + diag[r[row]] + 1;
870: multiplier = *pc / *pv++;
871: *pc = multiplier;
872: nz = ai[r[row]+1] - diag[r[row]] - 1;
873: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
874: PetscLogFlops(1+2*nz);
875: }
876: row = *ajtmp++;
877: }
878: /* finished row so overwrite it onto a->a */
879: pv = a->a + ai[r[i]];
880: pj = aj + ai[r[i]];
881: nz = ai[r[i]+1] - ai[r[i]];
882: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
884: rs = 0.0;
885: for (j=0; j<nz; j++) {
886: pv[j] = rtmp[pj[j]];
887: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
888: }
890: sctx.rs = rs;
891: sctx.pv = pv[nbdiag];
892: MatPivotCheck(B,A,info,&sctx,i);
893: if (sctx.newshift) break;
894: pv[nbdiag] = sctx.pv;
895: }
897: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
898: /*
899: * if no shift in this attempt & shifting & started shifting & can refine,
900: * then try lower shift
901: */
902: sctx.shift_hi = sctx.shift_fraction;
903: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
904: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
905: sctx.newshift = PETSC_TRUE;
906: sctx.nshift++;
907: }
908: } while (sctx.newshift);
910: /* invert diagonal entries for simplier triangular solves */
911: for (i=0; i<n; i++) {
912: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
913: }
915: PetscFree(rtmp);
916: ISRestoreIndices(isicol,&ic);
917: ISRestoreIndices(isrow,&r);
919: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
920: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
921: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
922: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
924: A->assembled = PETSC_TRUE;
925: A->preallocated = PETSC_TRUE;
927: PetscLogFlops(A->cmap->n);
928: if (sctx.nshift) {
929: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
930: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
931: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
932: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
933: }
934: }
935: return(0);
936: }
938: /* ----------------------------------------------------------- */
939: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
940: {
942: Mat C;
945: MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);
946: MatLUFactorSymbolic(C,A,row,col,info);
947: MatLUFactorNumeric(C,A,info);
949: A->ops->solve = C->ops->solve;
950: A->ops->solvetranspose = C->ops->solvetranspose;
952: MatHeaderMerge(A,&C);
953: PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);
954: return(0);
955: }
956: /* ----------------------------------------------------------- */
959: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
960: {
961: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
962: IS iscol = a->col,isrow = a->row;
963: PetscErrorCode ierr;
964: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
965: PetscInt nz;
966: const PetscInt *rout,*cout,*r,*c;
967: PetscScalar *x,*tmp,*tmps,sum;
968: const PetscScalar *b;
969: const MatScalar *aa = a->a,*v;
972: if (!n) return(0);
974: VecGetArrayRead(bb,&b);
975: VecGetArrayWrite(xx,&x);
976: tmp = a->solve_work;
978: ISGetIndices(isrow,&rout); r = rout;
979: ISGetIndices(iscol,&cout); c = cout + (n-1);
981: /* forward solve the lower triangular */
982: tmp[0] = b[*r++];
983: tmps = tmp;
984: for (i=1; i<n; i++) {
985: v = aa + ai[i];
986: vi = aj + ai[i];
987: nz = a->diag[i] - ai[i];
988: sum = b[*r++];
989: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
990: tmp[i] = sum;
991: }
993: /* backward solve the upper triangular */
994: for (i=n-1; i>=0; i--) {
995: v = aa + a->diag[i] + 1;
996: vi = aj + a->diag[i] + 1;
997: nz = ai[i+1] - a->diag[i] - 1;
998: sum = tmp[i];
999: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1000: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1001: }
1003: ISRestoreIndices(isrow,&rout);
1004: ISRestoreIndices(iscol,&cout);
1005: VecRestoreArrayRead(bb,&b);
1006: VecRestoreArrayWrite(xx,&x);
1007: PetscLogFlops(2.0*a->nz - A->cmap->n);
1008: return(0);
1009: }
1011: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1012: {
1013: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1014: IS iscol = a->col,isrow = a->row;
1015: PetscErrorCode ierr;
1016: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1017: PetscInt nz,neq;
1018: const PetscInt *rout,*cout,*r,*c;
1019: PetscScalar *x,*tmp,*tmps,sum;
1020: const PetscScalar *aa = a->a,*v;
1021: const PetscScalar *b;
1022: PetscBool bisdense,xisdense;
1025: if (!n) return(0);
1027: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1028: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1029: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1030: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1032: MatDenseGetArrayRead(B,&b);
1033: MatDenseGetArray(X,&x);
1035: tmp = a->solve_work;
1036: ISGetIndices(isrow,&rout); r = rout;
1037: ISGetIndices(iscol,&cout); c = cout;
1039: for (neq=0; neq<B->cmap->n; neq++) {
1040: /* forward solve the lower triangular */
1041: tmp[0] = b[r[0]];
1042: tmps = tmp;
1043: for (i=1; i<n; i++) {
1044: v = aa + ai[i];
1045: vi = aj + ai[i];
1046: nz = a->diag[i] - ai[i];
1047: sum = b[r[i]];
1048: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1049: tmp[i] = sum;
1050: }
1051: /* backward solve the upper triangular */
1052: for (i=n-1; i>=0; i--) {
1053: v = aa + a->diag[i] + 1;
1054: vi = aj + a->diag[i] + 1;
1055: nz = ai[i+1] - a->diag[i] - 1;
1056: sum = tmp[i];
1057: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1059: }
1061: b += n;
1062: x += n;
1063: }
1064: ISRestoreIndices(isrow,&rout);
1065: ISRestoreIndices(iscol,&cout);
1066: MatDenseRestoreArrayRead(B,&b);
1067: MatDenseRestoreArray(X,&x);
1068: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1069: return(0);
1070: }
1072: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1073: {
1074: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1075: IS iscol = a->col,isrow = a->row;
1076: PetscErrorCode ierr;
1077: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1078: PetscInt nz,neq;
1079: const PetscInt *rout,*cout,*r,*c;
1080: PetscScalar *x,*tmp,sum;
1081: const PetscScalar *b;
1082: const PetscScalar *aa = a->a,*v;
1083: PetscBool bisdense,xisdense;
1086: if (!n) return(0);
1088: PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1089: if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1090: PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1091: if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1093: MatDenseGetArrayRead(B,&b);
1094: MatDenseGetArray(X,&x);
1096: tmp = a->solve_work;
1097: ISGetIndices(isrow,&rout); r = rout;
1098: ISGetIndices(iscol,&cout); c = cout;
1100: for (neq=0; neq<B->cmap->n; neq++) {
1101: /* forward solve the lower triangular */
1102: tmp[0] = b[r[0]];
1103: v = aa;
1104: vi = aj;
1105: for (i=1; i<n; i++) {
1106: nz = ai[i+1] - ai[i];
1107: sum = b[r[i]];
1108: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1109: tmp[i] = sum;
1110: v += nz; vi += nz;
1111: }
1113: /* backward solve the upper triangular */
1114: for (i=n-1; i>=0; i--) {
1115: v = aa + adiag[i+1]+1;
1116: vi = aj + adiag[i+1]+1;
1117: nz = adiag[i]-adiag[i+1]-1;
1118: sum = tmp[i];
1119: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1120: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1121: }
1123: b += n;
1124: x += n;
1125: }
1126: ISRestoreIndices(isrow,&rout);
1127: ISRestoreIndices(iscol,&cout);
1128: MatDenseRestoreArrayRead(B,&b);
1129: MatDenseRestoreArray(X,&x);
1130: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1131: return(0);
1132: }
1134: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1135: {
1136: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1137: IS iscol = a->col,isrow = a->row;
1138: PetscErrorCode ierr;
1139: const PetscInt *r,*c,*rout,*cout;
1140: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1141: PetscInt nz,row;
1142: PetscScalar *x,*tmp,*tmps,sum;
1143: const PetscScalar *b;
1144: const MatScalar *aa = a->a,*v;
1147: if (!n) return(0);
1149: VecGetArrayRead(bb,&b);
1150: VecGetArrayWrite(xx,&x);
1151: tmp = a->solve_work;
1153: ISGetIndices(isrow,&rout); r = rout;
1154: ISGetIndices(iscol,&cout); c = cout + (n-1);
1156: /* forward solve the lower triangular */
1157: tmp[0] = b[*r++];
1158: tmps = tmp;
1159: for (row=1; row<n; row++) {
1160: i = rout[row]; /* permuted row */
1161: v = aa + ai[i];
1162: vi = aj + ai[i];
1163: nz = a->diag[i] - ai[i];
1164: sum = b[*r++];
1165: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1166: tmp[row] = sum;
1167: }
1169: /* backward solve the upper triangular */
1170: for (row=n-1; row>=0; row--) {
1171: i = rout[row]; /* permuted row */
1172: v = aa + a->diag[i] + 1;
1173: vi = aj + a->diag[i] + 1;
1174: nz = ai[i+1] - a->diag[i] - 1;
1175: sum = tmp[row];
1176: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1177: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1178: }
1180: ISRestoreIndices(isrow,&rout);
1181: ISRestoreIndices(iscol,&cout);
1182: VecRestoreArrayRead(bb,&b);
1183: VecRestoreArrayWrite(xx,&x);
1184: PetscLogFlops(2.0*a->nz - A->cmap->n);
1185: return(0);
1186: }
1188: /* ----------------------------------------------------------- */
1189: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1190: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1191: {
1192: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1193: PetscErrorCode ierr;
1194: PetscInt n = A->rmap->n;
1195: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1196: PetscScalar *x;
1197: const PetscScalar *b;
1198: const MatScalar *aa = a->a;
1199: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1200: PetscInt adiag_i,i,nz,ai_i;
1201: const PetscInt *vi;
1202: const MatScalar *v;
1203: PetscScalar sum;
1204: #endif
1207: if (!n) return(0);
1209: VecGetArrayRead(bb,&b);
1210: VecGetArrayWrite(xx,&x);
1212: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1213: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1214: #else
1215: /* forward solve the lower triangular */
1216: x[0] = b[0];
1217: for (i=1; i<n; i++) {
1218: ai_i = ai[i];
1219: v = aa + ai_i;
1220: vi = aj + ai_i;
1221: nz = adiag[i] - ai_i;
1222: sum = b[i];
1223: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1224: x[i] = sum;
1225: }
1227: /* backward solve the upper triangular */
1228: for (i=n-1; i>=0; i--) {
1229: adiag_i = adiag[i];
1230: v = aa + adiag_i + 1;
1231: vi = aj + adiag_i + 1;
1232: nz = ai[i+1] - adiag_i - 1;
1233: sum = x[i];
1234: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1235: x[i] = sum*aa[adiag_i];
1236: }
1237: #endif
1238: PetscLogFlops(2.0*a->nz - A->cmap->n);
1239: VecRestoreArrayRead(bb,&b);
1240: VecRestoreArrayWrite(xx,&x);
1241: return(0);
1242: }
1244: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1245: {
1246: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1247: IS iscol = a->col,isrow = a->row;
1248: PetscErrorCode ierr;
1249: PetscInt i, n = A->rmap->n,j;
1250: PetscInt nz;
1251: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1252: PetscScalar *x,*tmp,sum;
1253: const PetscScalar *b;
1254: const MatScalar *aa = a->a,*v;
1257: if (yy != xx) {VecCopy(yy,xx);}
1259: VecGetArrayRead(bb,&b);
1260: VecGetArray(xx,&x);
1261: tmp = a->solve_work;
1263: ISGetIndices(isrow,&rout); r = rout;
1264: ISGetIndices(iscol,&cout); c = cout + (n-1);
1266: /* forward solve the lower triangular */
1267: tmp[0] = b[*r++];
1268: for (i=1; i<n; i++) {
1269: v = aa + ai[i];
1270: vi = aj + ai[i];
1271: nz = a->diag[i] - ai[i];
1272: sum = b[*r++];
1273: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1274: tmp[i] = sum;
1275: }
1277: /* backward solve the upper triangular */
1278: for (i=n-1; i>=0; i--) {
1279: v = aa + a->diag[i] + 1;
1280: vi = aj + a->diag[i] + 1;
1281: nz = ai[i+1] - a->diag[i] - 1;
1282: sum = tmp[i];
1283: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1284: tmp[i] = sum*aa[a->diag[i]];
1285: x[*c--] += tmp[i];
1286: }
1288: ISRestoreIndices(isrow,&rout);
1289: ISRestoreIndices(iscol,&cout);
1290: VecRestoreArrayRead(bb,&b);
1291: VecRestoreArray(xx,&x);
1292: PetscLogFlops(2.0*a->nz);
1293: return(0);
1294: }
1296: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1297: {
1298: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1299: IS iscol = a->col,isrow = a->row;
1300: PetscErrorCode ierr;
1301: PetscInt i, n = A->rmap->n,j;
1302: PetscInt nz;
1303: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1304: PetscScalar *x,*tmp,sum;
1305: const PetscScalar *b;
1306: const MatScalar *aa = a->a,*v;
1309: if (yy != xx) {VecCopy(yy,xx);}
1311: VecGetArrayRead(bb,&b);
1312: VecGetArray(xx,&x);
1313: tmp = a->solve_work;
1315: ISGetIndices(isrow,&rout); r = rout;
1316: ISGetIndices(iscol,&cout); c = cout;
1318: /* forward solve the lower triangular */
1319: tmp[0] = b[r[0]];
1320: v = aa;
1321: vi = aj;
1322: for (i=1; i<n; i++) {
1323: nz = ai[i+1] - ai[i];
1324: sum = b[r[i]];
1325: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1326: tmp[i] = sum;
1327: v += nz;
1328: vi += nz;
1329: }
1331: /* backward solve the upper triangular */
1332: v = aa + adiag[n-1];
1333: vi = aj + adiag[n-1];
1334: for (i=n-1; i>=0; i--) {
1335: nz = adiag[i] - adiag[i+1] - 1;
1336: sum = tmp[i];
1337: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1338: tmp[i] = sum*v[nz];
1339: x[c[i]] += tmp[i];
1340: v += nz+1; vi += nz+1;
1341: }
1343: ISRestoreIndices(isrow,&rout);
1344: ISRestoreIndices(iscol,&cout);
1345: VecRestoreArrayRead(bb,&b);
1346: VecRestoreArray(xx,&x);
1347: PetscLogFlops(2.0*a->nz);
1348: return(0);
1349: }
1351: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1352: {
1353: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1354: IS iscol = a->col,isrow = a->row;
1355: PetscErrorCode ierr;
1356: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1357: PetscInt i,n = A->rmap->n,j;
1358: PetscInt nz;
1359: PetscScalar *x,*tmp,s1;
1360: const MatScalar *aa = a->a,*v;
1361: const PetscScalar *b;
1364: VecGetArrayRead(bb,&b);
1365: VecGetArrayWrite(xx,&x);
1366: tmp = a->solve_work;
1368: ISGetIndices(isrow,&rout); r = rout;
1369: ISGetIndices(iscol,&cout); c = cout;
1371: /* copy the b into temp work space according to permutation */
1372: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1374: /* forward solve the U^T */
1375: for (i=0; i<n; i++) {
1376: v = aa + diag[i];
1377: vi = aj + diag[i] + 1;
1378: nz = ai[i+1] - diag[i] - 1;
1379: s1 = tmp[i];
1380: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1381: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1382: tmp[i] = s1;
1383: }
1385: /* backward solve the L^T */
1386: for (i=n-1; i>=0; i--) {
1387: v = aa + diag[i] - 1;
1388: vi = aj + diag[i] - 1;
1389: nz = diag[i] - ai[i];
1390: s1 = tmp[i];
1391: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1392: }
1394: /* copy tmp into x according to permutation */
1395: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1397: ISRestoreIndices(isrow,&rout);
1398: ISRestoreIndices(iscol,&cout);
1399: VecRestoreArrayRead(bb,&b);
1400: VecRestoreArrayWrite(xx,&x);
1402: PetscLogFlops(2.0*a->nz-A->cmap->n);
1403: return(0);
1404: }
1406: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1407: {
1408: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1409: IS iscol = a->col,isrow = a->row;
1410: PetscErrorCode ierr;
1411: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1412: PetscInt i,n = A->rmap->n,j;
1413: PetscInt nz;
1414: PetscScalar *x,*tmp,s1;
1415: const MatScalar *aa = a->a,*v;
1416: const PetscScalar *b;
1419: VecGetArrayRead(bb,&b);
1420: VecGetArrayWrite(xx,&x);
1421: tmp = a->solve_work;
1423: ISGetIndices(isrow,&rout); r = rout;
1424: ISGetIndices(iscol,&cout); c = cout;
1426: /* copy the b into temp work space according to permutation */
1427: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1429: /* forward solve the U^T */
1430: for (i=0; i<n; i++) {
1431: v = aa + adiag[i+1] + 1;
1432: vi = aj + adiag[i+1] + 1;
1433: nz = adiag[i] - adiag[i+1] - 1;
1434: s1 = tmp[i];
1435: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1436: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1437: tmp[i] = s1;
1438: }
1440: /* backward solve the L^T */
1441: for (i=n-1; i>=0; i--) {
1442: v = aa + ai[i];
1443: vi = aj + ai[i];
1444: nz = ai[i+1] - ai[i];
1445: s1 = tmp[i];
1446: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1447: }
1449: /* copy tmp into x according to permutation */
1450: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1452: ISRestoreIndices(isrow,&rout);
1453: ISRestoreIndices(iscol,&cout);
1454: VecRestoreArrayRead(bb,&b);
1455: VecRestoreArrayWrite(xx,&x);
1457: PetscLogFlops(2.0*a->nz-A->cmap->n);
1458: return(0);
1459: }
1461: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1462: {
1463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1464: IS iscol = a->col,isrow = a->row;
1465: PetscErrorCode ierr;
1466: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1467: PetscInt i,n = A->rmap->n,j;
1468: PetscInt nz;
1469: PetscScalar *x,*tmp,s1;
1470: const MatScalar *aa = a->a,*v;
1471: const PetscScalar *b;
1474: if (zz != xx) {VecCopy(zz,xx);}
1475: VecGetArrayRead(bb,&b);
1476: VecGetArray(xx,&x);
1477: tmp = a->solve_work;
1479: ISGetIndices(isrow,&rout); r = rout;
1480: ISGetIndices(iscol,&cout); c = cout;
1482: /* copy the b into temp work space according to permutation */
1483: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1485: /* forward solve the U^T */
1486: for (i=0; i<n; i++) {
1487: v = aa + diag[i];
1488: vi = aj + diag[i] + 1;
1489: nz = ai[i+1] - diag[i] - 1;
1490: s1 = tmp[i];
1491: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1492: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1493: tmp[i] = s1;
1494: }
1496: /* backward solve the L^T */
1497: for (i=n-1; i>=0; i--) {
1498: v = aa + diag[i] - 1;
1499: vi = aj + diag[i] - 1;
1500: nz = diag[i] - ai[i];
1501: s1 = tmp[i];
1502: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1503: }
1505: /* copy tmp into x according to permutation */
1506: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1508: ISRestoreIndices(isrow,&rout);
1509: ISRestoreIndices(iscol,&cout);
1510: VecRestoreArrayRead(bb,&b);
1511: VecRestoreArray(xx,&x);
1513: PetscLogFlops(2.0*a->nz-A->cmap->n);
1514: return(0);
1515: }
1517: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1518: {
1519: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1520: IS iscol = a->col,isrow = a->row;
1521: PetscErrorCode ierr;
1522: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1523: PetscInt i,n = A->rmap->n,j;
1524: PetscInt nz;
1525: PetscScalar *x,*tmp,s1;
1526: const MatScalar *aa = a->a,*v;
1527: const PetscScalar *b;
1530: if (zz != xx) {VecCopy(zz,xx);}
1531: VecGetArrayRead(bb,&b);
1532: VecGetArray(xx,&x);
1533: tmp = a->solve_work;
1535: ISGetIndices(isrow,&rout); r = rout;
1536: ISGetIndices(iscol,&cout); c = cout;
1538: /* copy the b into temp work space according to permutation */
1539: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1541: /* forward solve the U^T */
1542: for (i=0; i<n; i++) {
1543: v = aa + adiag[i+1] + 1;
1544: vi = aj + adiag[i+1] + 1;
1545: nz = adiag[i] - adiag[i+1] - 1;
1546: s1 = tmp[i];
1547: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1548: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1549: tmp[i] = s1;
1550: }
1553: /* backward solve the L^T */
1554: for (i=n-1; i>=0; i--) {
1555: v = aa + ai[i];
1556: vi = aj + ai[i];
1557: nz = ai[i+1] - ai[i];
1558: s1 = tmp[i];
1559: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1560: }
1562: /* copy tmp into x according to permutation */
1563: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1565: ISRestoreIndices(isrow,&rout);
1566: ISRestoreIndices(iscol,&cout);
1567: VecRestoreArrayRead(bb,&b);
1568: VecRestoreArray(xx,&x);
1570: PetscLogFlops(2.0*a->nz-A->cmap->n);
1571: return(0);
1572: }
1574: /* ----------------------------------------------------------------*/
1576: /*
1577: ilu() under revised new data structure.
1578: Factored arrays bj and ba are stored as
1579: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1581: bi=fact->i is an array of size n+1, in which
1582: bi+
1583: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1584: bi[n]: points to L(n-1,n-1)+1
1586: bdiag=fact->diag is an array of size n+1,in which
1587: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1588: bdiag[n]: points to entry of U(n-1,0)-1
1590: U(i,:) contains bdiag[i] as its last entry, i.e.,
1591: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1592: */
1593: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1594: {
1595: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1597: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1598: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1599: IS isicol;
1602: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1603: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1604: b = (Mat_SeqAIJ*)(fact)->data;
1606: /* allocate matrix arrays for new data structure */
1607: PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);
1608: PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1610: b->singlemalloc = PETSC_TRUE;
1611: if (!b->diag) {
1612: PetscMalloc1(n+1,&b->diag);
1613: PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));
1614: }
1615: bdiag = b->diag;
1617: if (n > 0) {
1618: PetscArrayzero(b->a,ai[n]);
1619: }
1621: /* set bi and bj with new data structure */
1622: bi = b->i;
1623: bj = b->j;
1625: /* L part */
1626: bi[0] = 0;
1627: for (i=0; i<n; i++) {
1628: nz = adiag[i] - ai[i];
1629: bi[i+1] = bi[i] + nz;
1630: aj = a->j + ai[i];
1631: for (j=0; j<nz; j++) {
1632: /* *bj = aj[j]; bj++; */
1633: bj[k++] = aj[j];
1634: }
1635: }
1637: /* U part */
1638: bdiag[n] = bi[n]-1;
1639: for (i=n-1; i>=0; i--) {
1640: nz = ai[i+1] - adiag[i] - 1;
1641: aj = a->j + adiag[i] + 1;
1642: for (j=0; j<nz; j++) {
1643: /* *bj = aj[j]; bj++; */
1644: bj[k++] = aj[j];
1645: }
1646: /* diag[i] */
1647: /* *bj = i; bj++; */
1648: bj[k++] = i;
1649: bdiag[i] = bdiag[i+1] + nz + 1;
1650: }
1652: fact->factortype = MAT_FACTOR_ILU;
1653: fact->info.factor_mallocs = 0;
1654: fact->info.fill_ratio_given = info->fill;
1655: fact->info.fill_ratio_needed = 1.0;
1656: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1657: MatSeqAIJCheckInode_FactorLU(fact);
1659: b = (Mat_SeqAIJ*)(fact)->data;
1660: b->row = isrow;
1661: b->col = iscol;
1662: b->icol = isicol;
1663: PetscMalloc1(fact->rmap->n+1,&b->solve_work);
1664: PetscObjectReference((PetscObject)isrow);
1665: PetscObjectReference((PetscObject)iscol);
1666: return(0);
1667: }
1669: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1670: {
1671: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1672: IS isicol;
1673: PetscErrorCode ierr;
1674: const PetscInt *r,*ic;
1675: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1676: PetscInt *bi,*cols,nnz,*cols_lvl;
1677: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1678: PetscInt i,levels,diagonal_fill;
1679: PetscBool col_identity,row_identity,missing;
1680: PetscReal f;
1681: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1682: PetscBT lnkbt;
1683: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1684: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1685: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1688: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1689: MatMissingDiagonal(A,&missing,&i);
1690: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1691:
1692: levels = (PetscInt)info->levels;
1693: ISIdentity(isrow,&row_identity);
1694: ISIdentity(iscol,&col_identity);
1695: if (!levels && row_identity && col_identity) {
1696: /* special case: ilu(0) with natural ordering */
1697: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1698: if (a->inode.size) {
1699: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1700: }
1701: return(0);
1702: }
1704: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1705: ISGetIndices(isrow,&r);
1706: ISGetIndices(isicol,&ic);
1708: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1709: PetscMalloc1(n+1,&bi);
1710: PetscMalloc1(n+1,&bdiag);
1711: bi[0] = bdiag[0] = 0;
1712: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1714: /* create a linked list for storing column indices of the active row */
1715: nlnk = n + 1;
1716: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1718: /* initial FreeSpace size is f*(ai[n]+1) */
1719: f = info->fill;
1720: diagonal_fill = (PetscInt)info->diagonal_fill;
1721: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1722: current_space = free_space;
1723: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1724: current_space_lvl = free_space_lvl;
1725: for (i=0; i<n; i++) {
1726: nzi = 0;
1727: /* copy current row into linked list */
1728: nnz = ai[r[i]+1] - ai[r[i]];
1729: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1730: cols = aj + ai[r[i]];
1731: lnk[i] = -1; /* marker to indicate if diagonal exists */
1732: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1733: nzi += nlnk;
1735: /* make sure diagonal entry is included */
1736: if (diagonal_fill && lnk[i] == -1) {
1737: fm = n;
1738: while (lnk[fm] < i) fm = lnk[fm];
1739: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1740: lnk[fm] = i;
1741: lnk_lvl[i] = 0;
1742: nzi++; dcount++;
1743: }
1745: /* add pivot rows into the active row */
1746: nzbd = 0;
1747: prow = lnk[n];
1748: while (prow < i) {
1749: nnz = bdiag[prow];
1750: cols = bj_ptr[prow] + nnz + 1;
1751: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1752: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1753: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1754: nzi += nlnk;
1755: prow = lnk[prow];
1756: nzbd++;
1757: }
1758: bdiag[i] = nzbd;
1759: bi[i+1] = bi[i] + nzi;
1760: /* if free space is not available, make more free space */
1761: if (current_space->local_remaining<nzi) {
1762: nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1763: PetscFreeSpaceGet(nnz,¤t_space);
1764: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1765: reallocs++;
1766: }
1768: /* copy data into free_space and free_space_lvl, then initialize lnk */
1769: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1770: bj_ptr[i] = current_space->array;
1771: bjlvl_ptr[i] = current_space_lvl->array;
1773: /* make sure the active row i has diagonal entry */
1774: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1776: current_space->array += nzi;
1777: current_space->local_used += nzi;
1778: current_space->local_remaining -= nzi;
1779: current_space_lvl->array += nzi;
1780: current_space_lvl->local_used += nzi;
1781: current_space_lvl->local_remaining -= nzi;
1782: }
1784: ISRestoreIndices(isrow,&r);
1785: ISRestoreIndices(isicol,&ic);
1786: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1787: PetscMalloc1(bi[n]+1,&bj);
1788: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1790: PetscIncompleteLLDestroy(lnk,lnkbt);
1791: PetscFreeSpaceDestroy(free_space_lvl);
1792: PetscFree2(bj_ptr,bjlvl_ptr);
1794: #if defined(PETSC_USE_INFO)
1795: {
1796: PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1797: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1798: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1799: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1800: PetscInfo(A,"for best performance.\n");
1801: if (diagonal_fill) {
1802: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1803: }
1804: }
1805: #endif
1806: /* put together the new matrix */
1807: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
1808: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
1809: b = (Mat_SeqAIJ*)(fact)->data;
1811: b->free_a = PETSC_TRUE;
1812: b->free_ij = PETSC_TRUE;
1813: b->singlemalloc = PETSC_FALSE;
1815: PetscMalloc1(bdiag[0]+1,&b->a);
1817: b->j = bj;
1818: b->i = bi;
1819: b->diag = bdiag;
1820: b->ilen = 0;
1821: b->imax = 0;
1822: b->row = isrow;
1823: b->col = iscol;
1824: PetscObjectReference((PetscObject)isrow);
1825: PetscObjectReference((PetscObject)iscol);
1826: b->icol = isicol;
1828: PetscMalloc1(n+1,&b->solve_work);
1829: /* In b structure: Free imax, ilen, old a, old j.
1830: Allocate bdiag, solve_work, new a, new j */
1831: PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1832: b->maxnz = b->nz = bdiag[0]+1;
1834: (fact)->info.factor_mallocs = reallocs;
1835: (fact)->info.fill_ratio_given = f;
1836: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1837: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1838: if (a->inode.size) {
1839: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1840: }
1841: MatSeqAIJCheckInode_FactorLU(fact);
1842: return(0);
1843: }
1845: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1846: {
1847: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1848: IS isicol;
1849: PetscErrorCode ierr;
1850: const PetscInt *r,*ic;
1851: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1852: PetscInt *bi,*cols,nnz,*cols_lvl;
1853: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1854: PetscInt i,levels,diagonal_fill;
1855: PetscBool col_identity,row_identity;
1856: PetscReal f;
1857: PetscInt nlnk,*lnk,*lnk_lvl=NULL;
1858: PetscBT lnkbt;
1859: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1860: PetscFreeSpaceList free_space =NULL,current_space=NULL;
1861: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1862: PetscBool missing;
1865: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1866: MatMissingDiagonal(A,&missing,&i);
1867: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1869: f = info->fill;
1870: levels = (PetscInt)info->levels;
1871: diagonal_fill = (PetscInt)info->diagonal_fill;
1873: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1875: ISIdentity(isrow,&row_identity);
1876: ISIdentity(iscol,&col_identity);
1877: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1878: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1880: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1881: if (a->inode.size) {
1882: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1883: }
1884: fact->factortype = MAT_FACTOR_ILU;
1885: (fact)->info.factor_mallocs = 0;
1886: (fact)->info.fill_ratio_given = info->fill;
1887: (fact)->info.fill_ratio_needed = 1.0;
1889: b = (Mat_SeqAIJ*)(fact)->data;
1890: b->row = isrow;
1891: b->col = iscol;
1892: b->icol = isicol;
1893: PetscMalloc1((fact)->rmap->n+1,&b->solve_work);
1894: PetscObjectReference((PetscObject)isrow);
1895: PetscObjectReference((PetscObject)iscol);
1896: return(0);
1897: }
1899: ISGetIndices(isrow,&r);
1900: ISGetIndices(isicol,&ic);
1902: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1903: PetscMalloc1(n+1,&bi);
1904: PetscMalloc1(n+1,&bdiag);
1905: bi[0] = bdiag[0] = 0;
1907: PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);
1909: /* create a linked list for storing column indices of the active row */
1910: nlnk = n + 1;
1911: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1913: /* initial FreeSpace size is f*(ai[n]+1) */
1914: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
1915: current_space = free_space;
1916: PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);
1917: current_space_lvl = free_space_lvl;
1919: for (i=0; i<n; i++) {
1920: nzi = 0;
1921: /* copy current row into linked list */
1922: nnz = ai[r[i]+1] - ai[r[i]];
1923: if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1924: cols = aj + ai[r[i]];
1925: lnk[i] = -1; /* marker to indicate if diagonal exists */
1926: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1927: nzi += nlnk;
1929: /* make sure diagonal entry is included */
1930: if (diagonal_fill && lnk[i] == -1) {
1931: fm = n;
1932: while (lnk[fm] < i) fm = lnk[fm];
1933: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1934: lnk[fm] = i;
1935: lnk_lvl[i] = 0;
1936: nzi++; dcount++;
1937: }
1939: /* add pivot rows into the active row */
1940: nzbd = 0;
1941: prow = lnk[n];
1942: while (prow < i) {
1943: nnz = bdiag[prow];
1944: cols = bj_ptr[prow] + nnz + 1;
1945: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1946: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1947: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1948: nzi += nlnk;
1949: prow = lnk[prow];
1950: nzbd++;
1951: }
1952: bdiag[i] = nzbd;
1953: bi[i+1] = bi[i] + nzi;
1955: /* if free space is not available, make more free space */
1956: if (current_space->local_remaining<nzi) {
1957: nnz = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1958: PetscFreeSpaceGet(nnz,¤t_space);
1959: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1960: reallocs++;
1961: }
1963: /* copy data into free_space and free_space_lvl, then initialize lnk */
1964: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1965: bj_ptr[i] = current_space->array;
1966: bjlvl_ptr[i] = current_space_lvl->array;
1968: /* make sure the active row i has diagonal entry */
1969: if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1971: current_space->array += nzi;
1972: current_space->local_used += nzi;
1973: current_space->local_remaining -= nzi;
1974: current_space_lvl->array += nzi;
1975: current_space_lvl->local_used += nzi;
1976: current_space_lvl->local_remaining -= nzi;
1977: }
1979: ISRestoreIndices(isrow,&r);
1980: ISRestoreIndices(isicol,&ic);
1982: /* destroy list of free space and other temporary arrays */
1983: PetscMalloc1(bi[n]+1,&bj);
1984: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
1985: PetscIncompleteLLDestroy(lnk,lnkbt);
1986: PetscFreeSpaceDestroy(free_space_lvl);
1987: PetscFree2(bj_ptr,bjlvl_ptr);
1989: #if defined(PETSC_USE_INFO)
1990: {
1991: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1992: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
1993: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);
1994: PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);
1995: PetscInfo(A,"for best performance.\n");
1996: if (diagonal_fill) {
1997: PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);
1998: }
1999: }
2000: #endif
2002: /* put together the new matrix */
2003: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);
2004: PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);
2005: b = (Mat_SeqAIJ*)(fact)->data;
2007: b->free_a = PETSC_TRUE;
2008: b->free_ij = PETSC_TRUE;
2009: b->singlemalloc = PETSC_FALSE;
2011: PetscMalloc1(bi[n],&b->a);
2012: b->j = bj;
2013: b->i = bi;
2014: for (i=0; i<n; i++) bdiag[i] += bi[i];
2015: b->diag = bdiag;
2016: b->ilen = 0;
2017: b->imax = 0;
2018: b->row = isrow;
2019: b->col = iscol;
2020: PetscObjectReference((PetscObject)isrow);
2021: PetscObjectReference((PetscObject)iscol);
2022: b->icol = isicol;
2023: PetscMalloc1(n+1,&b->solve_work);
2024: /* In b structure: Free imax, ilen, old a, old j.
2025: Allocate bdiag, solve_work, new a, new j */
2026: PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2027: b->maxnz = b->nz = bi[n];
2029: (fact)->info.factor_mallocs = reallocs;
2030: (fact)->info.fill_ratio_given = f;
2031: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2032: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2033: if (a->inode.size) {
2034: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2035: }
2036: return(0);
2037: }
2039: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2040: {
2041: Mat C = B;
2042: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2043: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2044: IS ip=b->row,iip = b->icol;
2046: const PetscInt *rip,*riip;
2047: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2048: PetscInt *ai=a->i,*aj=a->j;
2049: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2050: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2051: PetscBool perm_identity;
2052: FactorShiftCtx sctx;
2053: PetscReal rs;
2054: MatScalar d,*v;
2057: /* MatPivotSetUp(): initialize shift context sctx */
2058: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2060: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2061: sctx.shift_top = info->zeropivot;
2062: for (i=0; i<mbs; i++) {
2063: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2064: d = (aa)[a->diag[i]];
2065: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2066: v = aa+ai[i];
2067: nz = ai[i+1] - ai[i];
2068: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2069: if (rs>sctx.shift_top) sctx.shift_top = rs;
2070: }
2071: sctx.shift_top *= 1.1;
2072: sctx.nshift_max = 5;
2073: sctx.shift_lo = 0.;
2074: sctx.shift_hi = 1.;
2075: }
2077: ISGetIndices(ip,&rip);
2078: ISGetIndices(iip,&riip);
2080: /* allocate working arrays
2081: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2082: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2083: */
2084: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);
2086: do {
2087: sctx.newshift = PETSC_FALSE;
2089: for (i=0; i<mbs; i++) c2r[i] = mbs;
2090: if (mbs) il[0] = 0;
2092: for (k = 0; k<mbs; k++) {
2093: /* zero rtmp */
2094: nz = bi[k+1] - bi[k];
2095: bjtmp = bj + bi[k];
2096: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2098: /* load in initial unfactored row */
2099: bval = ba + bi[k];
2100: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2101: for (j = jmin; j < jmax; j++) {
2102: col = riip[aj[j]];
2103: if (col >= k) { /* only take upper triangular entry */
2104: rtmp[col] = aa[j];
2105: *bval++ = 0.0; /* for in-place factorization */
2106: }
2107: }
2108: /* shift the diagonal of the matrix: ZeropivotApply() */
2109: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2111: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2112: dk = rtmp[k];
2113: i = c2r[k]; /* first row to be added to k_th row */
2115: while (i < k) {
2116: nexti = c2r[i]; /* next row to be added to k_th row */
2118: /* compute multiplier, update diag(k) and U(i,k) */
2119: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2120: uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2121: dk += uikdi*ba[ili]; /* update diag[k] */
2122: ba[ili] = uikdi; /* -U(i,k) */
2124: /* add multiple of row i to k-th row */
2125: jmin = ili + 1; jmax = bi[i+1];
2126: if (jmin < jmax) {
2127: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2128: /* update il and c2r for row i */
2129: il[i] = jmin;
2130: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2131: }
2132: i = nexti;
2133: }
2135: /* copy data into U(k,:) */
2136: rs = 0.0;
2137: jmin = bi[k]; jmax = bi[k+1]-1;
2138: if (jmin < jmax) {
2139: for (j=jmin; j<jmax; j++) {
2140: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2141: }
2142: /* add the k-th row into il and c2r */
2143: il[k] = jmin;
2144: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2145: }
2147: /* MatPivotCheck() */
2148: sctx.rs = rs;
2149: sctx.pv = dk;
2150: MatPivotCheck(B,A,info,&sctx,i);
2151: if (sctx.newshift) break;
2152: dk = sctx.pv;
2154: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2155: }
2156: } while (sctx.newshift);
2158: PetscFree3(rtmp,il,c2r);
2159: ISRestoreIndices(ip,&rip);
2160: ISRestoreIndices(iip,&riip);
2162: ISIdentity(ip,&perm_identity);
2163: if (perm_identity) {
2164: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2165: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2166: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2167: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2168: } else {
2169: B->ops->solve = MatSolve_SeqSBAIJ_1;
2170: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2171: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2172: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2173: }
2175: C->assembled = PETSC_TRUE;
2176: C->preallocated = PETSC_TRUE;
2178: PetscLogFlops(C->rmap->n);
2180: /* MatPivotView() */
2181: if (sctx.nshift) {
2182: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2183: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
2184: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2185: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2186: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2187: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
2188: }
2189: }
2190: return(0);
2191: }
2193: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2194: {
2195: Mat C = B;
2196: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2197: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2198: IS ip=b->row,iip = b->icol;
2200: const PetscInt *rip,*riip;
2201: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2202: PetscInt *ai=a->i,*aj=a->j;
2203: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2204: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2205: PetscBool perm_identity;
2206: FactorShiftCtx sctx;
2207: PetscReal rs;
2208: MatScalar d,*v;
2211: /* MatPivotSetUp(): initialize shift context sctx */
2212: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2214: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2215: sctx.shift_top = info->zeropivot;
2216: for (i=0; i<mbs; i++) {
2217: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2218: d = (aa)[a->diag[i]];
2219: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2220: v = aa+ai[i];
2221: nz = ai[i+1] - ai[i];
2222: for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2223: if (rs>sctx.shift_top) sctx.shift_top = rs;
2224: }
2225: sctx.shift_top *= 1.1;
2226: sctx.nshift_max = 5;
2227: sctx.shift_lo = 0.;
2228: sctx.shift_hi = 1.;
2229: }
2231: ISGetIndices(ip,&rip);
2232: ISGetIndices(iip,&riip);
2234: /* initialization */
2235: PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);
2237: do {
2238: sctx.newshift = PETSC_FALSE;
2240: for (i=0; i<mbs; i++) jl[i] = mbs;
2241: il[0] = 0;
2243: for (k = 0; k<mbs; k++) {
2244: /* zero rtmp */
2245: nz = bi[k+1] - bi[k];
2246: bjtmp = bj + bi[k];
2247: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2249: bval = ba + bi[k];
2250: /* initialize k-th row by the perm[k]-th row of A */
2251: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2252: for (j = jmin; j < jmax; j++) {
2253: col = riip[aj[j]];
2254: if (col >= k) { /* only take upper triangular entry */
2255: rtmp[col] = aa[j];
2256: *bval++ = 0.0; /* for in-place factorization */
2257: }
2258: }
2259: /* shift the diagonal of the matrix */
2260: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2262: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2263: dk = rtmp[k];
2264: i = jl[k]; /* first row to be added to k_th row */
2266: while (i < k) {
2267: nexti = jl[i]; /* next row to be added to k_th row */
2269: /* compute multiplier, update diag(k) and U(i,k) */
2270: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2271: uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2272: dk += uikdi*ba[ili];
2273: ba[ili] = uikdi; /* -U(i,k) */
2275: /* add multiple of row i to k-th row */
2276: jmin = ili + 1; jmax = bi[i+1];
2277: if (jmin < jmax) {
2278: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2279: /* update il and jl for row i */
2280: il[i] = jmin;
2281: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2282: }
2283: i = nexti;
2284: }
2286: /* shift the diagonals when zero pivot is detected */
2287: /* compute rs=sum of abs(off-diagonal) */
2288: rs = 0.0;
2289: jmin = bi[k]+1;
2290: nz = bi[k+1] - jmin;
2291: bcol = bj + jmin;
2292: for (j=0; j<nz; j++) {
2293: rs += PetscAbsScalar(rtmp[bcol[j]]);
2294: }
2296: sctx.rs = rs;
2297: sctx.pv = dk;
2298: MatPivotCheck(B,A,info,&sctx,k);
2299: if (sctx.newshift) break;
2300: dk = sctx.pv;
2302: /* copy data into U(k,:) */
2303: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2304: jmin = bi[k]+1; jmax = bi[k+1];
2305: if (jmin < jmax) {
2306: for (j=jmin; j<jmax; j++) {
2307: col = bj[j]; ba[j] = rtmp[col];
2308: }
2309: /* add the k-th row into il and jl */
2310: il[k] = jmin;
2311: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2312: }
2313: }
2314: } while (sctx.newshift);
2316: PetscFree3(rtmp,il,jl);
2317: ISRestoreIndices(ip,&rip);
2318: ISRestoreIndices(iip,&riip);
2320: ISIdentity(ip,&perm_identity);
2321: if (perm_identity) {
2322: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2323: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2324: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2325: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2326: } else {
2327: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2328: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2329: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2330: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2331: }
2333: C->assembled = PETSC_TRUE;
2334: C->preallocated = PETSC_TRUE;
2336: PetscLogFlops(C->rmap->n);
2337: if (sctx.nshift) {
2338: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2339: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2340: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2341: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
2342: }
2343: }
2344: return(0);
2345: }
2347: /*
2348: icc() under revised new data structure.
2349: Factored arrays bj and ba are stored as
2350: U(0,:),...,U(i,:),U(n-1,:)
2352: ui=fact->i is an array of size n+1, in which
2353: ui+
2354: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2355: ui[n]: points to U(n-1,n-1)+1
2357: udiag=fact->diag is an array of size n,in which
2358: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2360: U(i,:) contains udiag[i] as its last entry, i.e.,
2361: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2362: */
2364: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2365: {
2366: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2367: Mat_SeqSBAIJ *b;
2368: PetscErrorCode ierr;
2369: PetscBool perm_identity,missing;
2370: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2371: const PetscInt *rip,*riip;
2372: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2373: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2374: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2375: PetscReal fill =info->fill,levels=info->levels;
2376: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2377: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2378: PetscBT lnkbt;
2379: IS iperm;
2382: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2383: MatMissingDiagonal(A,&missing,&d);
2384: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2385: ISIdentity(perm,&perm_identity);
2386: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2388: PetscMalloc1(am+1,&ui);
2389: PetscMalloc1(am+1,&udiag);
2390: ui[0] = 0;
2392: /* ICC(0) without matrix ordering: simply rearrange column indices */
2393: if (!levels && perm_identity) {
2394: for (i=0; i<am; i++) {
2395: ncols = ai[i+1] - a->diag[i];
2396: ui[i+1] = ui[i] + ncols;
2397: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2398: }
2399: PetscMalloc1(ui[am]+1,&uj);
2400: cols = uj;
2401: for (i=0; i<am; i++) {
2402: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2403: ncols = ai[i+1] - a->diag[i] -1;
2404: for (j=0; j<ncols; j++) *cols++ = aj[j];
2405: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2406: }
2407: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2408: ISGetIndices(iperm,&riip);
2409: ISGetIndices(perm,&rip);
2411: /* initialization */
2412: PetscMalloc1(am+1,&ajtmp);
2414: /* jl: linked list for storing indices of the pivot rows
2415: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2416: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2417: for (i=0; i<am; i++) {
2418: jl[i] = am; il[i] = 0;
2419: }
2421: /* create and initialize a linked list for storing column indices of the active row k */
2422: nlnk = am + 1;
2423: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2425: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2426: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2427: current_space = free_space;
2428: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);
2429: current_space_lvl = free_space_lvl;
2431: for (k=0; k<am; k++) { /* for each active row k */
2432: /* initialize lnk by the column indices of row rip[k] of A */
2433: nzk = 0;
2434: ncols = ai[rip[k]+1] - ai[rip[k]];
2435: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2436: ncols_upper = 0;
2437: for (j=0; j<ncols; j++) {
2438: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2439: if (riip[i] >= k) { /* only take upper triangular entry */
2440: ajtmp[ncols_upper] = i;
2441: ncols_upper++;
2442: }
2443: }
2444: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2445: nzk += nlnk;
2447: /* update lnk by computing fill-in for each pivot row to be merged in */
2448: prow = jl[k]; /* 1st pivot row */
2450: while (prow < k) {
2451: nextprow = jl[prow];
2453: /* merge prow into k-th row */
2454: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2455: jmax = ui[prow+1];
2456: ncols = jmax-jmin;
2457: i = jmin - ui[prow];
2458: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2459: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2460: j = *(uj - 1);
2461: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2462: nzk += nlnk;
2464: /* update il and jl for prow */
2465: if (jmin < jmax) {
2466: il[prow] = jmin;
2467: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2468: }
2469: prow = nextprow;
2470: }
2472: /* if free space is not available, make more free space */
2473: if (current_space->local_remaining<nzk) {
2474: i = am - k + 1; /* num of unfactored rows */
2475: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2476: PetscFreeSpaceGet(i,¤t_space);
2477: PetscFreeSpaceGet(i,¤t_space_lvl);
2478: reallocs++;
2479: }
2481: /* copy data into free_space and free_space_lvl, then initialize lnk */
2482: if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2483: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2485: /* add the k-th row into il and jl */
2486: if (nzk > 1) {
2487: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2488: jl[k] = jl[i]; jl[i] = k;
2489: il[k] = ui[k] + 1;
2490: }
2491: uj_ptr[k] = current_space->array;
2492: uj_lvl_ptr[k] = current_space_lvl->array;
2494: current_space->array += nzk;
2495: current_space->local_used += nzk;
2496: current_space->local_remaining -= nzk;
2498: current_space_lvl->array += nzk;
2499: current_space_lvl->local_used += nzk;
2500: current_space_lvl->local_remaining -= nzk;
2502: ui[k+1] = ui[k] + nzk;
2503: }
2505: ISRestoreIndices(perm,&rip);
2506: ISRestoreIndices(iperm,&riip);
2507: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2508: PetscFree(ajtmp);
2510: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2511: PetscMalloc1(ui[am]+1,&uj);
2512: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2513: PetscIncompleteLLDestroy(lnk,lnkbt);
2514: PetscFreeSpaceDestroy(free_space_lvl);
2516: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2518: /* put together the new matrix in MATSEQSBAIJ format */
2519: b = (Mat_SeqSBAIJ*)(fact)->data;
2520: b->singlemalloc = PETSC_FALSE;
2522: PetscMalloc1(ui[am]+1,&b->a);
2524: b->j = uj;
2525: b->i = ui;
2526: b->diag = udiag;
2527: b->free_diag = PETSC_TRUE;
2528: b->ilen = 0;
2529: b->imax = 0;
2530: b->row = perm;
2531: b->col = perm;
2532: PetscObjectReference((PetscObject)perm);
2533: PetscObjectReference((PetscObject)perm);
2534: b->icol = iperm;
2535: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2537: PetscMalloc1(am+1,&b->solve_work);
2538: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2540: b->maxnz = b->nz = ui[am];
2541: b->free_a = PETSC_TRUE;
2542: b->free_ij = PETSC_TRUE;
2544: fact->info.factor_mallocs = reallocs;
2545: fact->info.fill_ratio_given = fill;
2546: if (ai[am] != 0) {
2547: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2548: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2549: } else {
2550: fact->info.fill_ratio_needed = 0.0;
2551: }
2552: #if defined(PETSC_USE_INFO)
2553: if (ai[am] != 0) {
2554: PetscReal af = fact->info.fill_ratio_needed;
2555: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2556: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2557: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2558: } else {
2559: PetscInfo(A,"Empty matrix.\n");
2560: }
2561: #endif
2562: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2563: return(0);
2564: }
2566: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2567: {
2568: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2569: Mat_SeqSBAIJ *b;
2570: PetscErrorCode ierr;
2571: PetscBool perm_identity,missing;
2572: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2573: const PetscInt *rip,*riip;
2574: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2575: PetscInt nlnk,*lnk,*lnk_lvl=NULL,d;
2576: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2577: PetscReal fill =info->fill,levels=info->levels;
2578: PetscFreeSpaceList free_space =NULL,current_space=NULL;
2579: PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2580: PetscBT lnkbt;
2581: IS iperm;
2584: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2585: MatMissingDiagonal(A,&missing,&d);
2586: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2587: ISIdentity(perm,&perm_identity);
2588: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2590: PetscMalloc1(am+1,&ui);
2591: PetscMalloc1(am+1,&udiag);
2592: ui[0] = 0;
2594: /* ICC(0) without matrix ordering: simply copies fill pattern */
2595: if (!levels && perm_identity) {
2597: for (i=0; i<am; i++) {
2598: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2599: udiag[i] = ui[i];
2600: }
2601: PetscMalloc1(ui[am]+1,&uj);
2602: cols = uj;
2603: for (i=0; i<am; i++) {
2604: aj = a->j + a->diag[i];
2605: ncols = ui[i+1] - ui[i];
2606: for (j=0; j<ncols; j++) *cols++ = *aj++;
2607: }
2608: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2609: ISGetIndices(iperm,&riip);
2610: ISGetIndices(perm,&rip);
2612: /* initialization */
2613: PetscMalloc1(am+1,&ajtmp);
2615: /* jl: linked list for storing indices of the pivot rows
2616: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2617: PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);
2618: for (i=0; i<am; i++) {
2619: jl[i] = am; il[i] = 0;
2620: }
2622: /* create and initialize a linked list for storing column indices of the active row k */
2623: nlnk = am + 1;
2624: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2626: /* initial FreeSpace size is fill*(ai[am]+1) */
2627: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2628: current_space = free_space;
2629: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);
2630: current_space_lvl = free_space_lvl;
2632: for (k=0; k<am; k++) { /* for each active row k */
2633: /* initialize lnk by the column indices of row rip[k] of A */
2634: nzk = 0;
2635: ncols = ai[rip[k]+1] - ai[rip[k]];
2636: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2637: ncols_upper = 0;
2638: for (j=0; j<ncols; j++) {
2639: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2640: if (riip[i] >= k) { /* only take upper triangular entry */
2641: ajtmp[ncols_upper] = i;
2642: ncols_upper++;
2643: }
2644: }
2645: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2646: nzk += nlnk;
2648: /* update lnk by computing fill-in for each pivot row to be merged in */
2649: prow = jl[k]; /* 1st pivot row */
2651: while (prow < k) {
2652: nextprow = jl[prow];
2654: /* merge prow into k-th row */
2655: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2656: jmax = ui[prow+1];
2657: ncols = jmax-jmin;
2658: i = jmin - ui[prow];
2659: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2660: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2661: j = *(uj - 1);
2662: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2663: nzk += nlnk;
2665: /* update il and jl for prow */
2666: if (jmin < jmax) {
2667: il[prow] = jmin;
2668: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2669: }
2670: prow = nextprow;
2671: }
2673: /* if free space is not available, make more free space */
2674: if (current_space->local_remaining<nzk) {
2675: i = am - k + 1; /* num of unfactored rows */
2676: i = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2677: PetscFreeSpaceGet(i,¤t_space);
2678: PetscFreeSpaceGet(i,¤t_space_lvl);
2679: reallocs++;
2680: }
2682: /* copy data into free_space and free_space_lvl, then initialize lnk */
2683: if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2684: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2686: /* add the k-th row into il and jl */
2687: if (nzk > 1) {
2688: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2689: jl[k] = jl[i]; jl[i] = k;
2690: il[k] = ui[k] + 1;
2691: }
2692: uj_ptr[k] = current_space->array;
2693: uj_lvl_ptr[k] = current_space_lvl->array;
2695: current_space->array += nzk;
2696: current_space->local_used += nzk;
2697: current_space->local_remaining -= nzk;
2699: current_space_lvl->array += nzk;
2700: current_space_lvl->local_used += nzk;
2701: current_space_lvl->local_remaining -= nzk;
2703: ui[k+1] = ui[k] + nzk;
2704: }
2706: #if defined(PETSC_USE_INFO)
2707: if (ai[am] != 0) {
2708: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2709: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2710: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2711: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2712: } else {
2713: PetscInfo(A,"Empty matrix.\n");
2714: }
2715: #endif
2717: ISRestoreIndices(perm,&rip);
2718: ISRestoreIndices(iperm,&riip);
2719: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2720: PetscFree(ajtmp);
2722: /* destroy list of free space and other temporary array(s) */
2723: PetscMalloc1(ui[am]+1,&uj);
2724: PetscFreeSpaceContiguous(&free_space,uj);
2725: PetscIncompleteLLDestroy(lnk,lnkbt);
2726: PetscFreeSpaceDestroy(free_space_lvl);
2728: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2730: /* put together the new matrix in MATSEQSBAIJ format */
2732: b = (Mat_SeqSBAIJ*)fact->data;
2733: b->singlemalloc = PETSC_FALSE;
2735: PetscMalloc1(ui[am]+1,&b->a);
2737: b->j = uj;
2738: b->i = ui;
2739: b->diag = udiag;
2740: b->free_diag = PETSC_TRUE;
2741: b->ilen = 0;
2742: b->imax = 0;
2743: b->row = perm;
2744: b->col = perm;
2746: PetscObjectReference((PetscObject)perm);
2747: PetscObjectReference((PetscObject)perm);
2749: b->icol = iperm;
2750: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2751: PetscMalloc1(am+1,&b->solve_work);
2752: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2753: b->maxnz = b->nz = ui[am];
2754: b->free_a = PETSC_TRUE;
2755: b->free_ij = PETSC_TRUE;
2757: fact->info.factor_mallocs = reallocs;
2758: fact->info.fill_ratio_given = fill;
2759: if (ai[am] != 0) {
2760: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2761: } else {
2762: fact->info.fill_ratio_needed = 0.0;
2763: }
2764: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2765: return(0);
2766: }
2768: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2769: {
2770: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2771: Mat_SeqSBAIJ *b;
2772: PetscErrorCode ierr;
2773: PetscBool perm_identity,missing;
2774: PetscReal fill = info->fill;
2775: const PetscInt *rip,*riip;
2776: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2777: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2778: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2779: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2780: PetscBT lnkbt;
2781: IS iperm;
2784: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2785: MatMissingDiagonal(A,&missing,&i);
2786: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2788: /* check whether perm is the identity mapping */
2789: ISIdentity(perm,&perm_identity);
2790: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2791: ISGetIndices(iperm,&riip);
2792: ISGetIndices(perm,&rip);
2794: /* initialization */
2795: PetscMalloc1(am+1,&ui);
2796: PetscMalloc1(am+1,&udiag);
2797: ui[0] = 0;
2799: /* jl: linked list for storing indices of the pivot rows
2800: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2801: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2802: for (i=0; i<am; i++) {
2803: jl[i] = am; il[i] = 0;
2804: }
2806: /* create and initialize a linked list for storing column indices of the active row k */
2807: nlnk = am + 1;
2808: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2810: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2811: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);
2812: current_space = free_space;
2814: for (k=0; k<am; k++) { /* for each active row k */
2815: /* initialize lnk by the column indices of row rip[k] of A */
2816: nzk = 0;
2817: ncols = ai[rip[k]+1] - ai[rip[k]];
2818: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2819: ncols_upper = 0;
2820: for (j=0; j<ncols; j++) {
2821: i = riip[*(aj + ai[rip[k]] + j)];
2822: if (i >= k) { /* only take upper triangular entry */
2823: cols[ncols_upper] = i;
2824: ncols_upper++;
2825: }
2826: }
2827: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2828: nzk += nlnk;
2830: /* update lnk by computing fill-in for each pivot row to be merged in */
2831: prow = jl[k]; /* 1st pivot row */
2833: while (prow < k) {
2834: nextprow = jl[prow];
2835: /* merge prow into k-th row */
2836: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2837: jmax = ui[prow+1];
2838: ncols = jmax-jmin;
2839: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2840: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2841: nzk += nlnk;
2843: /* update il and jl for prow */
2844: if (jmin < jmax) {
2845: il[prow] = jmin;
2846: j = *uj_ptr;
2847: jl[prow] = jl[j];
2848: jl[j] = prow;
2849: }
2850: prow = nextprow;
2851: }
2853: /* if free space is not available, make more free space */
2854: if (current_space->local_remaining<nzk) {
2855: i = am - k + 1; /* num of unfactored rows */
2856: i = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2857: PetscFreeSpaceGet(i,¤t_space);
2858: reallocs++;
2859: }
2861: /* copy data into free space, then initialize lnk */
2862: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2864: /* add the k-th row into il and jl */
2865: if (nzk > 1) {
2866: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2867: jl[k] = jl[i]; jl[i] = k;
2868: il[k] = ui[k] + 1;
2869: }
2870: ui_ptr[k] = current_space->array;
2872: current_space->array += nzk;
2873: current_space->local_used += nzk;
2874: current_space->local_remaining -= nzk;
2876: ui[k+1] = ui[k] + nzk;
2877: }
2879: ISRestoreIndices(perm,&rip);
2880: ISRestoreIndices(iperm,&riip);
2881: PetscFree4(ui_ptr,jl,il,cols);
2883: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2884: PetscMalloc1(ui[am]+1,&uj);
2885: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2886: PetscLLDestroy(lnk,lnkbt);
2888: /* put together the new matrix in MATSEQSBAIJ format */
2890: b = (Mat_SeqSBAIJ*)fact->data;
2891: b->singlemalloc = PETSC_FALSE;
2892: b->free_a = PETSC_TRUE;
2893: b->free_ij = PETSC_TRUE;
2895: PetscMalloc1(ui[am]+1,&b->a);
2897: b->j = uj;
2898: b->i = ui;
2899: b->diag = udiag;
2900: b->free_diag = PETSC_TRUE;
2901: b->ilen = 0;
2902: b->imax = 0;
2903: b->row = perm;
2904: b->col = perm;
2906: PetscObjectReference((PetscObject)perm);
2907: PetscObjectReference((PetscObject)perm);
2909: b->icol = iperm;
2910: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2912: PetscMalloc1(am+1,&b->solve_work);
2913: PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2915: b->maxnz = b->nz = ui[am];
2917: fact->info.factor_mallocs = reallocs;
2918: fact->info.fill_ratio_given = fill;
2919: if (ai[am] != 0) {
2920: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2921: fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2922: } else {
2923: fact->info.fill_ratio_needed = 0.0;
2924: }
2925: #if defined(PETSC_USE_INFO)
2926: if (ai[am] != 0) {
2927: PetscReal af = fact->info.fill_ratio_needed;
2928: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
2929: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
2930: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
2931: } else {
2932: PetscInfo(A,"Empty matrix.\n");
2933: }
2934: #endif
2935: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2936: return(0);
2937: }
2939: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2940: {
2941: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2942: Mat_SeqSBAIJ *b;
2943: PetscErrorCode ierr;
2944: PetscBool perm_identity,missing;
2945: PetscReal fill = info->fill;
2946: const PetscInt *rip,*riip;
2947: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2948: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2949: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2950: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2951: PetscBT lnkbt;
2952: IS iperm;
2955: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2956: MatMissingDiagonal(A,&missing,&i);
2957: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2959: /* check whether perm is the identity mapping */
2960: ISIdentity(perm,&perm_identity);
2961: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2962: ISGetIndices(iperm,&riip);
2963: ISGetIndices(perm,&rip);
2965: /* initialization */
2966: PetscMalloc1(am+1,&ui);
2967: ui[0] = 0;
2969: /* jl: linked list for storing indices of the pivot rows
2970: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2971: PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);
2972: for (i=0; i<am; i++) {
2973: jl[i] = am; il[i] = 0;
2974: }
2976: /* create and initialize a linked list for storing column indices of the active row k */
2977: nlnk = am + 1;
2978: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2980: /* initial FreeSpace size is fill*(ai[am]+1) */
2981: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);
2982: current_space = free_space;
2984: for (k=0; k<am; k++) { /* for each active row k */
2985: /* initialize lnk by the column indices of row rip[k] of A */
2986: nzk = 0;
2987: ncols = ai[rip[k]+1] - ai[rip[k]];
2988: if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2989: ncols_upper = 0;
2990: for (j=0; j<ncols; j++) {
2991: i = riip[*(aj + ai[rip[k]] + j)];
2992: if (i >= k) { /* only take upper triangular entry */
2993: cols[ncols_upper] = i;
2994: ncols_upper++;
2995: }
2996: }
2997: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2998: nzk += nlnk;
3000: /* update lnk by computing fill-in for each pivot row to be merged in */
3001: prow = jl[k]; /* 1st pivot row */
3003: while (prow < k) {
3004: nextprow = jl[prow];
3005: /* merge prow into k-th row */
3006: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3007: jmax = ui[prow+1];
3008: ncols = jmax-jmin;
3009: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3010: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3011: nzk += nlnk;
3013: /* update il and jl for prow */
3014: if (jmin < jmax) {
3015: il[prow] = jmin;
3016: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3017: }
3018: prow = nextprow;
3019: }
3021: /* if free space is not available, make more free space */
3022: if (current_space->local_remaining<nzk) {
3023: i = am - k + 1; /* num of unfactored rows */
3024: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3025: PetscFreeSpaceGet(i,¤t_space);
3026: reallocs++;
3027: }
3029: /* copy data into free space, then initialize lnk */
3030: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3032: /* add the k-th row into il and jl */
3033: if (nzk-1 > 0) {
3034: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3035: jl[k] = jl[i]; jl[i] = k;
3036: il[k] = ui[k] + 1;
3037: }
3038: ui_ptr[k] = current_space->array;
3040: current_space->array += nzk;
3041: current_space->local_used += nzk;
3042: current_space->local_remaining -= nzk;
3044: ui[k+1] = ui[k] + nzk;
3045: }
3047: #if defined(PETSC_USE_INFO)
3048: if (ai[am] != 0) {
3049: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3050: PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
3051: PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
3052: PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
3053: } else {
3054: PetscInfo(A,"Empty matrix.\n");
3055: }
3056: #endif
3058: ISRestoreIndices(perm,&rip);
3059: ISRestoreIndices(iperm,&riip);
3060: PetscFree4(ui_ptr,jl,il,cols);
3062: /* destroy list of free space and other temporary array(s) */
3063: PetscMalloc1(ui[am]+1,&uj);
3064: PetscFreeSpaceContiguous(&free_space,uj);
3065: PetscLLDestroy(lnk,lnkbt);
3067: /* put together the new matrix in MATSEQSBAIJ format */
3069: b = (Mat_SeqSBAIJ*)fact->data;
3070: b->singlemalloc = PETSC_FALSE;
3071: b->free_a = PETSC_TRUE;
3072: b->free_ij = PETSC_TRUE;
3074: PetscMalloc1(ui[am]+1,&b->a);
3076: b->j = uj;
3077: b->i = ui;
3078: b->diag = 0;
3079: b->ilen = 0;
3080: b->imax = 0;
3081: b->row = perm;
3082: b->col = perm;
3084: PetscObjectReference((PetscObject)perm);
3085: PetscObjectReference((PetscObject)perm);
3087: b->icol = iperm;
3088: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3090: PetscMalloc1(am+1,&b->solve_work);
3091: PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3092: b->maxnz = b->nz = ui[am];
3094: fact->info.factor_mallocs = reallocs;
3095: fact->info.fill_ratio_given = fill;
3096: if (ai[am] != 0) {
3097: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3098: } else {
3099: fact->info.fill_ratio_needed = 0.0;
3100: }
3101: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3102: return(0);
3103: }
3105: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3106: {
3107: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3108: PetscErrorCode ierr;
3109: PetscInt n = A->rmap->n;
3110: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3111: PetscScalar *x,sum;
3112: const PetscScalar *b;
3113: const MatScalar *aa = a->a,*v;
3114: PetscInt i,nz;
3117: if (!n) return(0);
3119: VecGetArrayRead(bb,&b);
3120: VecGetArrayWrite(xx,&x);
3122: /* forward solve the lower triangular */
3123: x[0] = b[0];
3124: v = aa;
3125: vi = aj;
3126: for (i=1; i<n; i++) {
3127: nz = ai[i+1] - ai[i];
3128: sum = b[i];
3129: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3130: v += nz;
3131: vi += nz;
3132: x[i] = sum;
3133: }
3135: /* backward solve the upper triangular */
3136: for (i=n-1; i>=0; i--) {
3137: v = aa + adiag[i+1] + 1;
3138: vi = aj + adiag[i+1] + 1;
3139: nz = adiag[i] - adiag[i+1]-1;
3140: sum = x[i];
3141: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3142: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3143: }
3145: PetscLogFlops(2.0*a->nz - A->cmap->n);
3146: VecRestoreArrayRead(bb,&b);
3147: VecRestoreArrayWrite(xx,&x);
3148: return(0);
3149: }
3151: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3152: {
3153: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3154: IS iscol = a->col,isrow = a->row;
3155: PetscErrorCode ierr;
3156: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3157: const PetscInt *rout,*cout,*r,*c;
3158: PetscScalar *x,*tmp,sum;
3159: const PetscScalar *b;
3160: const MatScalar *aa = a->a,*v;
3163: if (!n) return(0);
3165: VecGetArrayRead(bb,&b);
3166: VecGetArrayWrite(xx,&x);
3167: tmp = a->solve_work;
3169: ISGetIndices(isrow,&rout); r = rout;
3170: ISGetIndices(iscol,&cout); c = cout;
3172: /* forward solve the lower triangular */
3173: tmp[0] = b[r[0]];
3174: v = aa;
3175: vi = aj;
3176: for (i=1; i<n; i++) {
3177: nz = ai[i+1] - ai[i];
3178: sum = b[r[i]];
3179: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3180: tmp[i] = sum;
3181: v += nz; vi += nz;
3182: }
3184: /* backward solve the upper triangular */
3185: for (i=n-1; i>=0; i--) {
3186: v = aa + adiag[i+1]+1;
3187: vi = aj + adiag[i+1]+1;
3188: nz = adiag[i]-adiag[i+1]-1;
3189: sum = tmp[i];
3190: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3191: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3192: }
3194: ISRestoreIndices(isrow,&rout);
3195: ISRestoreIndices(iscol,&cout);
3196: VecRestoreArrayRead(bb,&b);
3197: VecRestoreArrayWrite(xx,&x);
3198: PetscLogFlops(2*a->nz - A->cmap->n);
3199: return(0);
3200: }
3202: /*
3203: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3204: */
3205: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3206: {
3207: Mat B = *fact;
3208: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3209: IS isicol;
3211: const PetscInt *r,*ic;
3212: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3213: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3214: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3215: PetscInt nlnk,*lnk;
3216: PetscBT lnkbt;
3217: PetscBool row_identity,icol_identity;
3218: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3219: const PetscInt *ics;
3220: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3221: PetscReal dt =info->dt,shift=info->shiftamount;
3222: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3223: PetscBool missing;
3226: if (dt == PETSC_DEFAULT) dt = 0.005;
3227: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3229: /* ------- symbolic factorization, can be reused ---------*/
3230: MatMissingDiagonal(A,&missing,&i);
3231: if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3232: adiag=a->diag;
3234: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3236: /* bdiag is location of diagonal in factor */
3237: PetscMalloc1(n+1,&bdiag); /* becomes b->diag */
3238: PetscMalloc1(n+1,&bdiag_rev); /* temporary */
3240: /* allocate row pointers bi */
3241: PetscMalloc1(2*n+2,&bi);
3243: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3244: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3245: nnz_max = ai[n]+2*n*dtcount+2;
3247: PetscMalloc1(nnz_max+1,&bj);
3248: PetscMalloc1(nnz_max+1,&ba);
3250: /* put together the new matrix */
3251: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);
3252: PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
3253: b = (Mat_SeqAIJ*)B->data;
3255: b->free_a = PETSC_TRUE;
3256: b->free_ij = PETSC_TRUE;
3257: b->singlemalloc = PETSC_FALSE;
3259: b->a = ba;
3260: b->j = bj;
3261: b->i = bi;
3262: b->diag = bdiag;
3263: b->ilen = 0;
3264: b->imax = 0;
3265: b->row = isrow;
3266: b->col = iscol;
3267: PetscObjectReference((PetscObject)isrow);
3268: PetscObjectReference((PetscObject)iscol);
3269: b->icol = isicol;
3271: PetscMalloc1(n+1,&b->solve_work);
3272: PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3273: b->maxnz = nnz_max;
3275: B->factortype = MAT_FACTOR_ILUDT;
3276: B->info.factor_mallocs = 0;
3277: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3278: /* ------- end of symbolic factorization ---------*/
3280: ISGetIndices(isrow,&r);
3281: ISGetIndices(isicol,&ic);
3282: ics = ic;
3284: /* linked list for storing column indices of the active row */
3285: nlnk = n + 1;
3286: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3288: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3289: PetscMalloc2(n,&im,n,&jtmp);
3290: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3291: PetscMalloc2(n,&rtmp,n,&vtmp);
3292: PetscArrayzero(rtmp,n);
3294: bi[0] = 0;
3295: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3296: bdiag_rev[n] = bdiag[0];
3297: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3298: for (i=0; i<n; i++) {
3299: /* copy initial fill into linked list */
3300: nzi = ai[r[i]+1] - ai[r[i]];
3301: if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3302: nzi_al = adiag[r[i]] - ai[r[i]];
3303: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3304: ajtmp = aj + ai[r[i]];
3305: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3307: /* load in initial (unfactored row) */
3308: aatmp = a->a + ai[r[i]];
3309: for (j=0; j<nzi; j++) {
3310: rtmp[ics[*ajtmp++]] = *aatmp++;
3311: }
3313: /* add pivot rows into linked list */
3314: row = lnk[n];
3315: while (row < i) {
3316: nzi_bl = bi[row+1] - bi[row] + 1;
3317: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3318: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3319: nzi += nlnk;
3320: row = lnk[row];
3321: }
3323: /* copy data from lnk into jtmp, then initialize lnk */
3324: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3326: /* numerical factorization */
3327: bjtmp = jtmp;
3328: row = *bjtmp++; /* 1st pivot row */
3329: while (row < i) {
3330: pc = rtmp + row;
3331: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3332: multiplier = (*pc) * (*pv);
3333: *pc = multiplier;
3334: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3335: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3336: pv = ba + bdiag[row+1] + 1;
3337: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3338: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3339: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3340: PetscLogFlops(1+2*nz);
3341: }
3342: row = *bjtmp++;
3343: }
3345: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3346: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3347: nzi_bl = 0; j = 0;
3348: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3349: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3350: nzi_bl++; j++;
3351: }
3352: nzi_bu = nzi - nzi_bl -1;
3353: while (j < nzi) {
3354: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3355: j++;
3356: }
3358: bjtmp = bj + bi[i];
3359: batmp = ba + bi[i];
3360: /* apply level dropping rule to L part */
3361: ncut = nzi_al + dtcount;
3362: if (ncut < nzi_bl) {
3363: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3364: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3365: } else {
3366: ncut = nzi_bl;
3367: }
3368: for (j=0; j<ncut; j++) {
3369: bjtmp[j] = jtmp[j];
3370: batmp[j] = vtmp[j];
3371: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3372: }
3373: bi[i+1] = bi[i] + ncut;
3374: nzi = ncut + 1;
3376: /* apply level dropping rule to U part */
3377: ncut = nzi_au + dtcount;
3378: if (ncut < nzi_bu) {
3379: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3380: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3381: } else {
3382: ncut = nzi_bu;
3383: }
3384: nzi += ncut;
3386: /* mark bdiagonal */
3387: bdiag[i+1] = bdiag[i] - (ncut + 1);
3388: bdiag_rev[n-i-1] = bdiag[i+1];
3389: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3390: bjtmp = bj + bdiag[i];
3391: batmp = ba + bdiag[i];
3392: *bjtmp = i;
3393: *batmp = diag_tmp; /* rtmp[i]; */
3394: if (*batmp == 0.0) {
3395: *batmp = dt+shift;
3396: /* printf(" row %d add shift %g\n",i,shift); */
3397: }
3398: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3399: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3401: bjtmp = bj + bdiag[i+1]+1;
3402: batmp = ba + bdiag[i+1]+1;
3403: for (k=0; k<ncut; k++) {
3404: bjtmp[k] = jtmp[nzi_bl+1+k];
3405: batmp[k] = vtmp[nzi_bl+1+k];
3406: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3407: }
3408: /* printf("\n"); */
3410: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3411: /*
3412: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3413: printf(" ----------------------------\n");
3414: */
3415: } /* for (i=0; i<n; i++) */
3416: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3417: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3419: ISRestoreIndices(isrow,&r);
3420: ISRestoreIndices(isicol,&ic);
3422: PetscLLDestroy(lnk,lnkbt);
3423: PetscFree2(im,jtmp);
3424: PetscFree2(rtmp,vtmp);
3425: PetscFree(bdiag_rev);
3427: PetscLogFlops(B->cmap->n);
3428: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3430: ISIdentity(isrow,&row_identity);
3431: ISIdentity(isicol,&icol_identity);
3432: if (row_identity && icol_identity) {
3433: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3434: } else {
3435: B->ops->solve = MatSolve_SeqAIJ;
3436: }
3438: B->ops->solveadd = 0;
3439: B->ops->solvetranspose = 0;
3440: B->ops->solvetransposeadd = 0;
3441: B->ops->matsolve = 0;
3442: B->assembled = PETSC_TRUE;
3443: B->preallocated = PETSC_TRUE;
3444: return(0);
3445: }
3447: /* a wraper of MatILUDTFactor_SeqAIJ() */
3448: /*
3449: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3450: */
3452: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3453: {
3457: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3458: return(0);
3459: }
3461: /*
3462: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3463: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3464: */
3465: /*
3466: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3467: */
3469: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3470: {
3471: Mat C =fact;
3472: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3473: IS isrow = b->row,isicol = b->icol;
3475: const PetscInt *r,*ic,*ics;
3476: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3477: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3478: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3479: PetscReal dt=info->dt,shift=info->shiftamount;
3480: PetscBool row_identity, col_identity;
3483: ISGetIndices(isrow,&r);
3484: ISGetIndices(isicol,&ic);
3485: PetscMalloc1(n+1,&rtmp);
3486: ics = ic;
3488: for (i=0; i<n; i++) {
3489: /* initialize rtmp array */
3490: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3491: bjtmp = bj + bi[i];
3492: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3493: rtmp[i] = 0.0;
3494: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3495: bjtmp = bj + bdiag[i+1] + 1;
3496: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3498: /* load in initial unfactored row of A */
3499: /* printf("row %d\n",i); */
3500: nz = ai[r[i]+1] - ai[r[i]];
3501: ajtmp = aj + ai[r[i]];
3502: v = aa + ai[r[i]];
3503: for (j=0; j<nz; j++) {
3504: rtmp[ics[*ajtmp++]] = v[j];
3505: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3506: }
3507: /* printf("\n"); */
3509: /* numerical factorization */
3510: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3511: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3512: k = 0;
3513: while (k < nzl) {
3514: row = *bjtmp++;
3515: /* printf(" prow %d\n",row); */
3516: pc = rtmp + row;
3517: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3518: multiplier = (*pc) * (*pv);
3519: *pc = multiplier;
3520: if (PetscAbsScalar(multiplier) > dt) {
3521: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3522: pv = b->a + bdiag[row+1] + 1;
3523: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3524: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3525: PetscLogFlops(1+2*nz);
3526: }
3527: k++;
3528: }
3530: /* finished row so stick it into b->a */
3531: /* L-part */
3532: pv = b->a + bi[i];
3533: pj = bj + bi[i];
3534: nzl = bi[i+1] - bi[i];
3535: for (j=0; j<nzl; j++) {
3536: pv[j] = rtmp[pj[j]];
3537: /* printf(" (%d,%g),",pj[j],pv[j]); */
3538: }
3540: /* diagonal: invert diagonal entries for simplier triangular solves */
3541: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3542: b->a[bdiag[i]] = 1.0/rtmp[i];
3543: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3545: /* U-part */
3546: pv = b->a + bdiag[i+1] + 1;
3547: pj = bj + bdiag[i+1] + 1;
3548: nzu = bdiag[i] - bdiag[i+1] - 1;
3549: for (j=0; j<nzu; j++) {
3550: pv[j] = rtmp[pj[j]];
3551: /* printf(" (%d,%g),",pj[j],pv[j]); */
3552: }
3553: /* printf("\n"); */
3554: }
3556: PetscFree(rtmp);
3557: ISRestoreIndices(isicol,&ic);
3558: ISRestoreIndices(isrow,&r);
3560: ISIdentity(isrow,&row_identity);
3561: ISIdentity(isicol,&col_identity);
3562: if (row_identity && col_identity) {
3563: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3564: } else {
3565: C->ops->solve = MatSolve_SeqAIJ;
3566: }
3567: C->ops->solveadd = 0;
3568: C->ops->solvetranspose = 0;
3569: C->ops->solvetransposeadd = 0;
3570: C->ops->matsolve = 0;
3571: C->assembled = PETSC_TRUE;
3572: C->preallocated = PETSC_TRUE;
3574: PetscLogFlops(C->cmap->n);
3575: return(0);
3576: }