Actual source code: mpiaij.c
petsc-3.9.1 2018-04-29
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <petsc/private/vecimpl.h>
5: #include <petsc/private/isimpl.h>
6: #include <petscblaslapack.h>
7: #include <petscsf.h>
9: /*MC
10: MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
12: This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13: and MATMPIAIJ otherwise. As a result, for single process communicators,
14: MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15: for communicators controlling multiple processes. It is recommended that you call both of
16: the above preallocation routines for simplicity.
18: Options Database Keys:
19: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
21: Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22: enough exist.
24: Level: beginner
26: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27: M*/
29: /*MC
30: MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32: This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33: and MATMPIAIJCRL otherwise. As a result, for single process communicators,
34: MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35: for communicators controlling multiple processes. It is recommended that you call both of
36: the above preallocation routines for simplicity.
38: Options Database Keys:
39: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41: Level: beginner
43: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44: M*/
46: PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47: {
49: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
52: if (mat->A) {
53: MatSetBlockSizes(mat->A,rbs,cbs);
54: MatSetBlockSizes(mat->B,rbs,1);
55: }
56: return(0);
57: }
59: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60: {
61: PetscErrorCode ierr;
62: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
63: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
64: Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
65: const PetscInt *ia,*ib;
66: const MatScalar *aa,*bb;
67: PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
68: PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
71: *keptrows = 0;
72: ia = a->i;
73: ib = b->i;
74: for (i=0; i<m; i++) {
75: na = ia[i+1] - ia[i];
76: nb = ib[i+1] - ib[i];
77: if (!na && !nb) {
78: cnt++;
79: goto ok1;
80: }
81: aa = a->a + ia[i];
82: for (j=0; j<na; j++) {
83: if (aa[j] != 0.0) goto ok1;
84: }
85: bb = b->a + ib[i];
86: for (j=0; j <nb; j++) {
87: if (bb[j] != 0.0) goto ok1;
88: }
89: cnt++;
90: ok1:;
91: }
92: MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));
93: if (!n0rows) return(0);
94: PetscMalloc1(M->rmap->n-cnt,&rows);
95: cnt = 0;
96: for (i=0; i<m; i++) {
97: na = ia[i+1] - ia[i];
98: nb = ib[i+1] - ib[i];
99: if (!na && !nb) continue;
100: aa = a->a + ia[i];
101: for (j=0; j<na;j++) {
102: if (aa[j] != 0.0) {
103: rows[cnt++] = rstart + i;
104: goto ok2;
105: }
106: }
107: bb = b->a + ib[i];
108: for (j=0; j<nb; j++) {
109: if (bb[j] != 0.0) {
110: rows[cnt++] = rstart + i;
111: goto ok2;
112: }
113: }
114: ok2:;
115: }
116: ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
117: return(0);
118: }
120: PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121: {
122: PetscErrorCode ierr;
123: Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data;
126: if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127: MatDiagonalSet(aij->A,D,is);
128: } else {
129: MatDiagonalSet_Default(Y,D,is);
130: }
131: return(0);
132: }
134: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135: {
136: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
138: PetscInt i,rstart,nrows,*rows;
141: *zrows = NULL;
142: MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
143: MatGetOwnershipRange(M,&rstart,NULL);
144: for (i=0; i<nrows; i++) rows[i] += rstart;
145: ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
146: return(0);
147: }
149: PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150: {
152: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
153: PetscInt i,n,*garray = aij->garray;
154: Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
155: Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
156: PetscReal *work;
159: MatGetSize(A,NULL,&n);
160: PetscCalloc1(n,&work);
161: if (type == NORM_2) {
162: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164: }
165: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167: }
168: } else if (type == NORM_1) {
169: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170: work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171: }
172: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173: work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174: }
175: } else if (type == NORM_INFINITY) {
176: for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177: work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178: }
179: for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180: work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181: }
183: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184: if (type == NORM_INFINITY) {
185: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
186: } else {
187: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
188: }
189: PetscFree(work);
190: if (type == NORM_2) {
191: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192: }
193: return(0);
194: }
196: PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197: {
198: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
199: IS sis,gis;
200: PetscErrorCode ierr;
201: const PetscInt *isis,*igis;
202: PetscInt n,*iis,nsis,ngis,rstart,i;
205: MatFindOffBlockDiagonalEntries(a->A,&sis);
206: MatFindNonzeroRows(a->B,&gis);
207: ISGetSize(gis,&ngis);
208: ISGetSize(sis,&nsis);
209: ISGetIndices(sis,&isis);
210: ISGetIndices(gis,&igis);
212: PetscMalloc1(ngis+nsis,&iis);
213: PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));
214: PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));
215: n = ngis + nsis;
216: PetscSortRemoveDupsInt(&n,iis);
217: MatGetOwnershipRange(A,&rstart,NULL);
218: for (i=0; i<n; i++) iis[i] += rstart;
219: ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);
221: ISRestoreIndices(sis,&isis);
222: ISRestoreIndices(gis,&igis);
223: ISDestroy(&sis);
224: ISDestroy(&gis);
225: return(0);
226: }
228: /*
229: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232: Only for square matrices
234: Used by a preconditioner, hence PETSC_EXTERN
235: */
236: PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237: {
238: PetscMPIInt rank,size;
239: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241: Mat mat;
242: Mat_SeqAIJ *gmata;
243: PetscMPIInt tag;
244: MPI_Status status;
245: PetscBool aij;
246: MatScalar *gmataa,*ao,*ad,*gmataarestore=0;
249: MPI_Comm_rank(comm,&rank);
250: MPI_Comm_size(comm,&size);
251: if (!rank) {
252: PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
253: if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254: }
255: if (reuse == MAT_INITIAL_MATRIX) {
256: MatCreate(comm,&mat);
257: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
258: MatGetBlockSizes(gmat,&bses[0],&bses[1]);
259: MPI_Bcast(bses,2,MPIU_INT,0,comm);
260: MatSetBlockSizes(mat,bses[0],bses[1]);
261: MatSetType(mat,MATAIJ);
262: PetscMalloc1(size+1,&rowners);
263: PetscMalloc2(m,&dlens,m,&olens);
264: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
266: rowners[0] = 0;
267: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268: rstart = rowners[rank];
269: rend = rowners[rank+1];
270: PetscObjectGetNewTag((PetscObject)mat,&tag);
271: if (!rank) {
272: gmata = (Mat_SeqAIJ*) gmat->data;
273: /* send row lengths to all processors */
274: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275: for (i=1; i<size; i++) {
276: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
277: }
278: /* determine number diagonal and off-diagonal counts */
279: PetscMemzero(olens,m*sizeof(PetscInt));
280: PetscCalloc1(m,&ld);
281: jj = 0;
282: for (i=0; i<m; i++) {
283: for (j=0; j<dlens[i]; j++) {
284: if (gmata->j[jj] < rstart) ld[i]++;
285: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286: jj++;
287: }
288: }
289: /* send column indices to other processes */
290: for (i=1; i<size; i++) {
291: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
293: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
294: }
296: /* send numerical values to other processes */
297: for (i=1; i<size; i++) {
298: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
300: }
301: gmataa = gmata->a;
302: gmataj = gmata->j;
304: } else {
305: /* receive row lengths */
306: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
307: /* receive column indices */
308: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
309: PetscMalloc2(nz,&gmataa,nz,&gmataj);
310: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
311: /* determine number diagonal and off-diagonal counts */
312: PetscMemzero(olens,m*sizeof(PetscInt));
313: PetscCalloc1(m,&ld);
314: jj = 0;
315: for (i=0; i<m; i++) {
316: for (j=0; j<dlens[i]; j++) {
317: if (gmataj[jj] < rstart) ld[i]++;
318: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319: jj++;
320: }
321: }
322: /* receive numerical values */
323: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
324: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
325: }
326: /* set preallocation */
327: for (i=0; i<m; i++) {
328: dlens[i] -= olens[i];
329: }
330: MatSeqAIJSetPreallocation(mat,0,dlens);
331: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
333: for (i=0; i<m; i++) {
334: dlens[i] += olens[i];
335: }
336: cnt = 0;
337: for (i=0; i<m; i++) {
338: row = rstart + i;
339: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
340: cnt += dlens[i];
341: }
342: if (rank) {
343: PetscFree2(gmataa,gmataj);
344: }
345: PetscFree2(dlens,olens);
346: PetscFree(rowners);
348: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350: *inmat = mat;
351: } else { /* column indices are already set; only need to move over numerical values from process 0 */
352: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354: mat = *inmat;
355: PetscObjectGetNewTag((PetscObject)mat,&tag);
356: if (!rank) {
357: /* send numerical values to other processes */
358: gmata = (Mat_SeqAIJ*) gmat->data;
359: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
360: gmataa = gmata->a;
361: for (i=1; i<size; i++) {
362: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
364: }
365: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366: } else {
367: /* receive numerical values from process 0*/
368: nz = Ad->nz + Ao->nz;
369: PetscMalloc1(nz,&gmataa); gmataarestore = gmataa;
370: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
371: }
372: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374: ad = Ad->a;
375: ao = Ao->a;
376: if (mat->rmap->n) {
377: i = 0;
378: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
379: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
380: }
381: for (i=1; i<mat->rmap->n; i++) {
382: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
383: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
384: }
385: i--;
386: if (mat->rmap->n) {
387: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));
388: }
389: if (rank) {
390: PetscFree(gmataarestore);
391: }
392: }
393: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
394: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
395: return(0);
396: }
398: /*
399: Local utility routine that creates a mapping from the global column
400: number to the local number in the off-diagonal part of the local
401: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
402: a slightly higher hash table cost; without it it is not scalable (each processor
403: has an order N integer array but is fast to acess.
404: */
405: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406: {
407: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
409: PetscInt n = aij->B->cmap->n,i;
412: if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413: #if defined(PETSC_USE_CTABLE)
414: PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
415: for (i=0; i<n; i++) {
416: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
417: }
418: #else
419: PetscCalloc1(mat->cmap->N+1,&aij->colmap);
420: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
421: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422: #endif
423: return(0);
424: }
426: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \
427: { \
428: if (col <= lastcol1) low1 = 0; \
429: else high1 = nrow1; \
430: lastcol1 = col;\
431: while (high1-low1 > 5) { \
432: t = (low1+high1)/2; \
433: if (rp1[t] > col) high1 = t; \
434: else low1 = t; \
435: } \
436: for (_i=low1; _i<high1; _i++) { \
437: if (rp1[_i] > col) break; \
438: if (rp1[_i] == col) { \
439: if (addv == ADD_VALUES) ap1[_i] += value; \
440: else ap1[_i] = value; \
441: goto a_noinsert; \
442: } \
443: } \
444: if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445: if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
446: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448: N = nrow1++ - 1; a->nz++; high1++; \
449: /* shift up all the later entries in this row */ \
450: for (ii=N; ii>=_i; ii--) { \
451: rp1[ii+1] = rp1[ii]; \
452: ap1[ii+1] = ap1[ii]; \
453: } \
454: rp1[_i] = col; \
455: ap1[_i] = value; \
456: A->nonzerostate++;\
457: a_noinsert: ; \
458: ailen[row] = nrow1; \
459: }
461: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462: { \
463: if (col <= lastcol2) low2 = 0; \
464: else high2 = nrow2; \
465: lastcol2 = col; \
466: while (high2-low2 > 5) { \
467: t = (low2+high2)/2; \
468: if (rp2[t] > col) high2 = t; \
469: else low2 = t; \
470: } \
471: for (_i=low2; _i<high2; _i++) { \
472: if (rp2[_i] > col) break; \
473: if (rp2[_i] == col) { \
474: if (addv == ADD_VALUES) ap2[_i] += value; \
475: else ap2[_i] = value; \
476: goto b_noinsert; \
477: } \
478: } \
479: if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480: if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483: N = nrow2++ - 1; b->nz++; high2++; \
484: /* shift up all the later entries in this row */ \
485: for (ii=N; ii>=_i; ii--) { \
486: rp2[ii+1] = rp2[ii]; \
487: ap2[ii+1] = ap2[ii]; \
488: } \
489: rp2[_i] = col; \
490: ap2[_i] = value; \
491: B->nonzerostate++; \
492: b_noinsert: ; \
493: bilen[row] = nrow2; \
494: }
496: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497: {
498: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
499: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501: PetscInt l,*garray = mat->garray,diag;
504: /* code only works for square matrices A */
506: /* find size of row to the left of the diagonal part */
507: MatGetOwnershipRange(A,&diag,0);
508: row = row - diag;
509: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510: if (garray[b->j[b->i[row]+l]] > diag) break;
511: }
512: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
514: /* diagonal part */
515: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
517: /* right of diagonal part */
518: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
519: return(0);
520: }
522: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523: {
524: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
525: PetscScalar value;
527: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
528: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529: PetscBool roworiented = aij->roworiented;
531: /* Some Variables required in the macro */
532: Mat A = aij->A;
533: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
534: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535: MatScalar *aa = a->a;
536: PetscBool ignorezeroentries = a->ignorezeroentries;
537: Mat B = aij->B;
538: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
539: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540: MatScalar *ba = b->a;
542: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543: PetscInt nonew;
544: MatScalar *ap1,*ap2;
547: for (i=0; i<m; i++) {
548: if (im[i] < 0) continue;
549: #if defined(PETSC_USE_DEBUG)
550: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551: #endif
552: if (im[i] >= rstart && im[i] < rend) {
553: row = im[i] - rstart;
554: lastcol1 = -1;
555: rp1 = aj + ai[row];
556: ap1 = aa + ai[row];
557: rmax1 = aimax[row];
558: nrow1 = ailen[row];
559: low1 = 0;
560: high1 = nrow1;
561: lastcol2 = -1;
562: rp2 = bj + bi[row];
563: ap2 = ba + bi[row];
564: rmax2 = bimax[row];
565: nrow2 = bilen[row];
566: low2 = 0;
567: high2 = nrow2;
569: for (j=0; j<n; j++) {
570: if (roworiented) value = v[i*n+j];
571: else value = v[i+j*m];
572: if (in[j] >= cstart && in[j] < cend) {
573: col = in[j] - cstart;
574: nonew = a->nonew;
575: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576: MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577: } else if (in[j] < 0) continue;
578: #if defined(PETSC_USE_DEBUG)
579: else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580: #endif
581: else {
582: if (mat->was_assembled) {
583: if (!aij->colmap) {
584: MatCreateColmap_MPIAIJ_Private(mat);
585: }
586: #if defined(PETSC_USE_CTABLE)
587: PetscTableFind(aij->colmap,in[j]+1,&col);
588: col--;
589: #else
590: col = aij->colmap[in[j]] - 1;
591: #endif
592: if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593: MatDisAssemble_MPIAIJ(mat);
594: col = in[j];
595: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596: B = aij->B;
597: b = (Mat_SeqAIJ*)B->data;
598: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599: rp2 = bj + bi[row];
600: ap2 = ba + bi[row];
601: rmax2 = bimax[row];
602: nrow2 = bilen[row];
603: low2 = 0;
604: high2 = nrow2;
605: bm = aij->B->rmap->n;
606: ba = b->a;
607: } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608: } else col = in[j];
609: nonew = b->nonew;
610: MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611: }
612: }
613: } else {
614: if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615: if (!aij->donotstash) {
616: mat->assembled = PETSC_FALSE;
617: if (roworiented) {
618: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
619: } else {
620: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
621: }
622: }
623: }
624: }
625: return(0);
626: }
628: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629: {
630: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
632: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
636: for (i=0; i<m; i++) {
637: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639: if (idxm[i] >= rstart && idxm[i] < rend) {
640: row = idxm[i] - rstart;
641: for (j=0; j<n; j++) {
642: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644: if (idxn[j] >= cstart && idxn[j] < cend) {
645: col = idxn[j] - cstart;
646: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
647: } else {
648: if (!aij->colmap) {
649: MatCreateColmap_MPIAIJ_Private(mat);
650: }
651: #if defined(PETSC_USE_CTABLE)
652: PetscTableFind(aij->colmap,idxn[j]+1,&col);
653: col--;
654: #else
655: col = aij->colmap[idxn[j]] - 1;
656: #endif
657: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658: else {
659: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
660: }
661: }
662: }
663: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664: }
665: return(0);
666: }
668: extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
670: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671: {
672: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
674: PetscInt nstash,reallocs;
677: if (aij->donotstash || mat->nooffprocentries) return(0);
679: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
680: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
681: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
682: return(0);
683: }
685: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686: {
687: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
688: Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data;
690: PetscMPIInt n;
691: PetscInt i,j,rstart,ncols,flg;
692: PetscInt *row,*col;
693: PetscBool other_disassembled;
694: PetscScalar *val;
696: /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
699: if (!aij->donotstash && !mat->nooffprocentries) {
700: while (1) {
701: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
702: if (!flg) break;
704: for (i=0; i<n; ) {
705: /* Now identify the consecutive vals belonging to the same row */
706: for (j=i,rstart=row[j]; j<n; j++) {
707: if (row[j] != rstart) break;
708: }
709: if (j < n) ncols = j-i;
710: else ncols = n-i;
711: /* Now assemble all these values with a single function call */
712: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
714: i = j;
715: }
716: }
717: MatStashScatterEnd_Private(&mat->stash);
718: }
719: MatAssemblyBegin(aij->A,mode);
720: MatAssemblyEnd(aij->A,mode);
722: /* determine if any processor has disassembled, if so we must
723: also disassemble ourselfs, in order that we may reassemble. */
724: /*
725: if nonzero structure of submatrix B cannot change then we know that
726: no processor disassembled thus we can skip this stuff
727: */
728: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729: MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
730: if (mat->was_assembled && !other_disassembled) {
731: MatDisAssemble_MPIAIJ(mat);
732: }
733: }
734: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735: MatSetUpMultiply_MPIAIJ(mat);
736: }
737: MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
738: MatAssemblyBegin(aij->B,mode);
739: MatAssemblyEnd(aij->B,mode);
741: PetscFree2(aij->rowvalues,aij->rowindices);
743: aij->rowvalues = 0;
745: VecDestroy(&aij->diag);
746: if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
748: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750: PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751: MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
752: }
753: return(0);
754: }
756: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757: {
758: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
762: MatZeroEntries(l->A);
763: MatZeroEntries(l->B);
764: return(0);
765: }
767: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768: {
769: Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
770: PetscInt *lrows;
771: PetscInt r, len;
775: /* get locally owned rows */
776: MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);
777: /* fix right hand side if needed */
778: if (x && b) {
779: const PetscScalar *xx;
780: PetscScalar *bb;
782: VecGetArrayRead(x, &xx);
783: VecGetArray(b, &bb);
784: for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785: VecRestoreArrayRead(x, &xx);
786: VecRestoreArray(b, &bb);
787: }
788: /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789: MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
790: if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791: PetscBool cong;
792: PetscLayoutCompare(A->rmap,A->cmap,&cong);
793: if (cong) A->congruentlayouts = 1;
794: else A->congruentlayouts = 0;
795: }
796: if ((diag != 0.0) && A->congruentlayouts) {
797: MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
798: } else if (diag != 0.0) {
799: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
800: if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801: for (r = 0; r < len; ++r) {
802: const PetscInt row = lrows[r] + A->rmap->rstart;
803: MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
804: }
805: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
806: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
807: } else {
808: MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
809: }
810: PetscFree(lrows);
812: /* only change matrix nonzero state if pattern was allowed to be changed */
813: if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814: PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815: MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
816: }
817: return(0);
818: }
820: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821: {
822: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
823: PetscErrorCode ierr;
824: PetscMPIInt n = A->rmap->n;
825: PetscInt i,j,r,m,p = 0,len = 0;
826: PetscInt *lrows,*owners = A->rmap->range;
827: PetscSFNode *rrows;
828: PetscSF sf;
829: const PetscScalar *xx;
830: PetscScalar *bb,*mask;
831: Vec xmask,lmask;
832: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
833: const PetscInt *aj, *ii,*ridx;
834: PetscScalar *aa;
837: /* Create SF where leaves are input rows and roots are owned rows */
838: PetscMalloc1(n, &lrows);
839: for (r = 0; r < n; ++r) lrows[r] = -1;
840: PetscMalloc1(N, &rrows);
841: for (r = 0; r < N; ++r) {
842: const PetscInt idx = rows[r];
843: if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844: if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845: PetscLayoutFindOwner(A->rmap,idx,&p);
846: }
847: rrows[r].rank = p;
848: rrows[r].index = rows[r] - owners[p];
849: }
850: PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
851: PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
852: /* Collect flags for rows to be zeroed */
853: PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
854: PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
855: PetscSFDestroy(&sf);
856: /* Compress and put in row numbers */
857: for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858: /* zero diagonal part of matrix */
859: MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
860: /* handle off diagonal part of matrix */
861: MatCreateVecs(A,&xmask,NULL);
862: VecDuplicate(l->lvec,&lmask);
863: VecGetArray(xmask,&bb);
864: for (i=0; i<len; i++) bb[lrows[i]] = 1;
865: VecRestoreArray(xmask,&bb);
866: VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
867: VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
868: VecDestroy(&xmask);
869: if (x) {
870: VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
871: VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
872: VecGetArrayRead(l->lvec,&xx);
873: VecGetArray(b,&bb);
874: }
875: VecGetArray(lmask,&mask);
876: /* remove zeroed rows of off diagonal matrix */
877: ii = aij->i;
878: for (i=0; i<len; i++) {
879: PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));
880: }
881: /* loop over all elements of off process part of matrix zeroing removed columns*/
882: if (aij->compressedrow.use) {
883: m = aij->compressedrow.nrows;
884: ii = aij->compressedrow.i;
885: ridx = aij->compressedrow.rindex;
886: for (i=0; i<m; i++) {
887: n = ii[i+1] - ii[i];
888: aj = aij->j + ii[i];
889: aa = aij->a + ii[i];
891: for (j=0; j<n; j++) {
892: if (PetscAbsScalar(mask[*aj])) {
893: if (b) bb[*ridx] -= *aa*xx[*aj];
894: *aa = 0.0;
895: }
896: aa++;
897: aj++;
898: }
899: ridx++;
900: }
901: } else { /* do not use compressed row format */
902: m = l->B->rmap->n;
903: for (i=0; i<m; i++) {
904: n = ii[i+1] - ii[i];
905: aj = aij->j + ii[i];
906: aa = aij->a + ii[i];
907: for (j=0; j<n; j++) {
908: if (PetscAbsScalar(mask[*aj])) {
909: if (b) bb[i] -= *aa*xx[*aj];
910: *aa = 0.0;
911: }
912: aa++;
913: aj++;
914: }
915: }
916: }
917: if (x) {
918: VecRestoreArray(b,&bb);
919: VecRestoreArrayRead(l->lvec,&xx);
920: }
921: VecRestoreArray(lmask,&mask);
922: VecDestroy(&lmask);
923: PetscFree(lrows);
925: /* only change matrix nonzero state if pattern was allowed to be changed */
926: if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927: PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928: MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
929: }
930: return(0);
931: }
933: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934: {
935: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
937: PetscInt nt;
938: VecScatter Mvctx = a->Mvctx;
941: VecGetLocalSize(xx,&nt);
942: if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
944: VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
945: (*a->A->ops->mult)(a->A,xx,yy);
946: VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
947: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
948: return(0);
949: }
951: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952: {
953: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
957: MatMultDiagonalBlock(a->A,bb,xx);
958: return(0);
959: }
961: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962: {
963: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
965: VecScatter Mvctx = a->Mvctx;
968: if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
969: VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
970: (*a->A->ops->multadd)(a->A,xx,yy,zz);
971: VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
972: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
973: return(0);
974: }
976: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
977: {
978: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
980: PetscBool merged;
983: VecScatterGetMerged(a->Mvctx,&merged);
984: /* do nondiagonal part */
985: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
986: if (!merged) {
987: /* send it on its way */
988: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
989: /* do local part */
990: (*a->A->ops->multtranspose)(a->A,xx,yy);
991: /* receive remote parts: note this assumes the values are not actually */
992: /* added in yy until the next line, */
993: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
994: } else {
995: /* do local part */
996: (*a->A->ops->multtranspose)(a->A,xx,yy);
997: /* send it on its way */
998: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
999: /* values actually were received in the Begin() but we need to call this nop */
1000: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1001: }
1002: return(0);
1003: }
1005: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1006: {
1007: MPI_Comm comm;
1008: Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1009: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1010: IS Me,Notme;
1012: PetscInt M,N,first,last,*notme,i;
1013: PetscMPIInt size;
1016: /* Easy test: symmetric diagonal block */
1017: Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1018: MatIsTranspose(Adia,Bdia,tol,f);
1019: if (!*f) return(0);
1020: PetscObjectGetComm((PetscObject)Amat,&comm);
1021: MPI_Comm_size(comm,&size);
1022: if (size == 1) return(0);
1024: /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025: MatGetSize(Amat,&M,&N);
1026: MatGetOwnershipRange(Amat,&first,&last);
1027: PetscMalloc1(N-last+first,¬me);
1028: for (i=0; i<first; i++) notme[i] = i;
1029: for (i=last; i<M; i++) notme[i-last+first] = i;
1030: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1031: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1032: MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1033: Aoff = Aoffs[0];
1034: MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1035: Boff = Boffs[0];
1036: MatIsTranspose(Aoff,Boff,tol,f);
1037: MatDestroyMatrices(1,&Aoffs);
1038: MatDestroyMatrices(1,&Boffs);
1039: ISDestroy(&Me);
1040: ISDestroy(&Notme);
1041: PetscFree(notme);
1042: return(0);
1043: }
1045: PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f)
1046: {
1050: MatIsTranspose_MPIAIJ(A,A,tol,f);
1051: return(0);
1052: }
1054: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055: {
1056: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1060: /* do nondiagonal part */
1061: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1062: /* send it on its way */
1063: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1064: /* do local part */
1065: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1066: /* receive remote parts */
1067: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1068: return(0);
1069: }
1071: /*
1072: This only works correctly for square matrices where the subblock A->A is the
1073: diagonal block
1074: */
1075: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1076: {
1078: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1081: if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1082: if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1083: MatGetDiagonal(a->A,v);
1084: return(0);
1085: }
1087: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1088: {
1089: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1093: MatScale(a->A,aa);
1094: MatScale(a->B,aa);
1095: return(0);
1096: }
1098: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099: {
1100: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1104: #if defined(PETSC_USE_LOG)
1105: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106: #endif
1107: MatStashDestroy_Private(&mat->stash);
1108: VecDestroy(&aij->diag);
1109: MatDestroy(&aij->A);
1110: MatDestroy(&aij->B);
1111: #if defined(PETSC_USE_CTABLE)
1112: PetscTableDestroy(&aij->colmap);
1113: #else
1114: PetscFree(aij->colmap);
1115: #endif
1116: PetscFree(aij->garray);
1117: VecDestroy(&aij->lvec);
1118: VecScatterDestroy(&aij->Mvctx);
1119: if (aij->Mvctx_mpi1) {VecScatterDestroy(&aij->Mvctx_mpi1);}
1120: PetscFree2(aij->rowvalues,aij->rowindices);
1121: PetscFree(aij->ld);
1122: PetscFree(mat->data);
1124: PetscObjectChangeTypeName((PetscObject)mat,0);
1125: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1126: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1127: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1128: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1129: PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);
1130: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1131: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1132: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1133: #if defined(PETSC_HAVE_ELEMENTAL)
1134: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);
1135: #endif
1136: #if defined(PETSC_HAVE_HYPRE)
1137: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);
1138: PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);
1139: #endif
1140: return(0);
1141: }
1143: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1144: {
1145: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1146: Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1147: Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1149: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
1150: int fd;
1151: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
1152: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1153: PetscScalar *column_values;
1154: PetscInt message_count,flowcontrolcount;
1155: FILE *file;
1158: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1159: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
1160: nz = A->nz + B->nz;
1161: PetscViewerBinaryGetDescriptor(viewer,&fd);
1162: if (!rank) {
1163: header[0] = MAT_FILE_CLASSID;
1164: header[1] = mat->rmap->N;
1165: header[2] = mat->cmap->N;
1167: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1168: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
1169: /* get largest number of rows any processor has */
1170: rlen = mat->rmap->n;
1171: range = mat->rmap->range;
1172: for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1173: } else {
1174: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1175: rlen = mat->rmap->n;
1176: }
1178: /* load up the local row counts */
1179: PetscMalloc1(rlen+1,&row_lengths);
1180: for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1182: /* store the row lengths to the file */
1183: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1184: if (!rank) {
1185: PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);
1186: for (i=1; i<size; i++) {
1187: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1188: rlen = range[i+1] - range[i];
1189: MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1190: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
1191: }
1192: PetscViewerFlowControlEndMaster(viewer,&message_count);
1193: } else {
1194: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1195: MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1196: PetscViewerFlowControlEndWorker(viewer,&message_count);
1197: }
1198: PetscFree(row_lengths);
1200: /* load up the local column indices */
1201: nzmax = nz; /* th processor needs space a largest processor needs */
1202: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));
1203: PetscMalloc1(nzmax+1,&column_indices);
1204: cnt = 0;
1205: for (i=0; i<mat->rmap->n; i++) {
1206: for (j=B->i[i]; j<B->i[i+1]; j++) {
1207: if ((col = garray[B->j[j]]) > cstart) break;
1208: column_indices[cnt++] = col;
1209: }
1210: for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1211: for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1212: }
1213: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1215: /* store the column indices to the file */
1216: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1217: if (!rank) {
1218: MPI_Status status;
1219: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
1220: for (i=1; i<size; i++) {
1221: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1222: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1223: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1224: MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));
1225: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
1226: }
1227: PetscViewerFlowControlEndMaster(viewer,&message_count);
1228: } else {
1229: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1230: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1231: MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1232: PetscViewerFlowControlEndWorker(viewer,&message_count);
1233: }
1234: PetscFree(column_indices);
1236: /* load up the local column values */
1237: PetscMalloc1(nzmax+1,&column_values);
1238: cnt = 0;
1239: for (i=0; i<mat->rmap->n; i++) {
1240: for (j=B->i[i]; j<B->i[i+1]; j++) {
1241: if (garray[B->j[j]] > cstart) break;
1242: column_values[cnt++] = B->a[j];
1243: }
1244: for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1245: for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1246: }
1247: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1249: /* store the column values to the file */
1250: PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);
1251: if (!rank) {
1252: MPI_Status status;
1253: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
1254: for (i=1; i<size; i++) {
1255: PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);
1256: MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);
1257: if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1258: MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));
1259: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
1260: }
1261: PetscViewerFlowControlEndMaster(viewer,&message_count);
1262: } else {
1263: PetscViewerFlowControlStepWorker(viewer,rank,&message_count);
1264: MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));
1265: MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
1266: PetscViewerFlowControlEndWorker(viewer,&message_count);
1267: }
1268: PetscFree(column_values);
1270: PetscViewerBinaryGetInfoPointer(viewer,&file);
1271: if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1272: return(0);
1273: }
1275: #include <petscdraw.h>
1276: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277: {
1278: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1279: PetscErrorCode ierr;
1280: PetscMPIInt rank = aij->rank,size = aij->size;
1281: PetscBool isdraw,iascii,isbinary;
1282: PetscViewer sviewer;
1283: PetscViewerFormat format;
1286: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1287: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1288: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1289: if (iascii) {
1290: PetscViewerGetFormat(viewer,&format);
1291: if (format == PETSC_VIEWER_LOAD_BALANCE) {
1292: PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1293: PetscMalloc1(size,&nz);
1294: MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
1295: for (i=0; i<(PetscInt)size; i++) {
1296: nmax = PetscMax(nmax,nz[i]);
1297: nmin = PetscMin(nmin,nz[i]);
1298: navg += nz[i];
1299: }
1300: PetscFree(nz);
1301: navg = navg/size;
1302: PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);
1303: return(0);
1304: }
1305: PetscViewerGetFormat(viewer,&format);
1306: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1307: MatInfo info;
1308: PetscBool inodes;
1310: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1311: MatGetInfo(mat,MAT_LOCAL,&info);
1312: MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);
1313: PetscViewerASCIIPushSynchronized(viewer);
1314: if (!inodes) {
1315: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1316: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);
1317: } else {
1318: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1319: rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);
1320: }
1321: MatGetInfo(aij->A,MAT_LOCAL,&info);
1322: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1323: MatGetInfo(aij->B,MAT_LOCAL,&info);
1324: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
1325: PetscViewerFlush(viewer);
1326: PetscViewerASCIIPopSynchronized(viewer);
1327: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1328: VecScatterView(aij->Mvctx,viewer);
1329: return(0);
1330: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1331: PetscInt inodecount,inodelimit,*inodes;
1332: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1333: if (inodes) {
1334: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
1335: } else {
1336: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1337: }
1338: return(0);
1339: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1340: return(0);
1341: }
1342: } else if (isbinary) {
1343: if (size == 1) {
1344: PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1345: MatView(aij->A,viewer);
1346: } else {
1347: MatView_MPIAIJ_Binary(mat,viewer);
1348: }
1349: return(0);
1350: } else if (isdraw) {
1351: PetscDraw draw;
1352: PetscBool isnull;
1353: PetscViewerDrawGetDraw(viewer,0,&draw);
1354: PetscDrawIsNull(draw,&isnull);
1355: if (isnull) return(0);
1356: }
1358: {
1359: /* assemble the entire matrix onto first processor. */
1360: Mat A;
1361: Mat_SeqAIJ *Aloc;
1362: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1363: MatScalar *a;
1365: MatCreate(PetscObjectComm((PetscObject)mat),&A);
1366: if (!rank) {
1367: MatSetSizes(A,M,N,M,N);
1368: } else {
1369: MatSetSizes(A,0,0,M,N);
1370: }
1371: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1372: MatSetType(A,MATMPIAIJ);
1373: MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);
1374: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
1375: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
1377: /* copy over the A part */
1378: Aloc = (Mat_SeqAIJ*)aij->A->data;
1379: m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1380: row = mat->rmap->rstart;
1381: for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1382: for (i=0; i<m; i++) {
1383: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
1384: row++;
1385: a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1386: }
1387: aj = Aloc->j;
1388: for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1390: /* copy over the B part */
1391: Aloc = (Mat_SeqAIJ*)aij->B->data;
1392: m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1393: row = mat->rmap->rstart;
1394: PetscMalloc1(ai[m]+1,&cols);
1395: ct = cols;
1396: for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1397: for (i=0; i<m; i++) {
1398: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
1399: row++;
1400: a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1401: }
1402: PetscFree(ct);
1403: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1404: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1405: /*
1406: Everyone has to call to draw the matrix since the graphics waits are
1407: synchronized across all processors that share the PetscDraw object
1408: */
1409: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1410: if (!rank) {
1411: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);
1412: MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);
1413: }
1414: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1415: PetscViewerFlush(viewer);
1416: MatDestroy(&A);
1417: }
1418: return(0);
1419: }
1421: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1422: {
1424: PetscBool iascii,isdraw,issocket,isbinary;
1427: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1428: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1429: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1430: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1431: if (iascii || isdraw || isbinary || issocket) {
1432: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1433: }
1434: return(0);
1435: }
1437: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1438: {
1439: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1441: Vec bb1 = 0;
1442: PetscBool hasop;
1445: if (flag == SOR_APPLY_UPPER) {
1446: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1447: return(0);
1448: }
1450: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1451: VecDuplicate(bb,&bb1);
1452: }
1454: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1455: if (flag & SOR_ZERO_INITIAL_GUESS) {
1456: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1457: its--;
1458: }
1460: while (its--) {
1461: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1462: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1464: /* update rhs: bb1 = bb - B*x */
1465: VecScale(mat->lvec,-1.0);
1466: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1468: /* local sweep */
1469: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1470: }
1471: } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1472: if (flag & SOR_ZERO_INITIAL_GUESS) {
1473: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1474: its--;
1475: }
1476: while (its--) {
1477: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1478: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1480: /* update rhs: bb1 = bb - B*x */
1481: VecScale(mat->lvec,-1.0);
1482: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1484: /* local sweep */
1485: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1486: }
1487: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1488: if (flag & SOR_ZERO_INITIAL_GUESS) {
1489: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1490: its--;
1491: }
1492: while (its--) {
1493: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1494: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1496: /* update rhs: bb1 = bb - B*x */
1497: VecScale(mat->lvec,-1.0);
1498: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1500: /* local sweep */
1501: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1502: }
1503: } else if (flag & SOR_EISENSTAT) {
1504: Vec xx1;
1506: VecDuplicate(bb,&xx1);
1507: (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);
1509: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1510: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1511: if (!mat->diag) {
1512: MatCreateVecs(matin,&mat->diag,NULL);
1513: MatGetDiagonal(matin,mat->diag);
1514: }
1515: MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1516: if (hasop) {
1517: MatMultDiagonalBlock(matin,xx,bb1);
1518: } else {
1519: VecPointwiseMult(bb1,mat->diag,xx);
1520: }
1521: VecAYPX(bb1,(omega-2.0)/omega,bb);
1523: MatMultAdd(mat->B,mat->lvec,bb1,bb1);
1525: /* local sweep */
1526: (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1527: VecAXPY(xx,1.0,xx1);
1528: VecDestroy(&xx1);
1529: } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1531: VecDestroy(&bb1);
1533: matin->factorerrortype = mat->A->factorerrortype;
1534: return(0);
1535: }
1537: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1538: {
1539: Mat aA,aB,Aperm;
1540: const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1541: PetscScalar *aa,*ba;
1542: PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1543: PetscSF rowsf,sf;
1544: IS parcolp = NULL;
1545: PetscBool done;
1549: MatGetLocalSize(A,&m,&n);
1550: ISGetIndices(rowp,&rwant);
1551: ISGetIndices(colp,&cwant);
1552: PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);
1554: /* Invert row permutation to find out where my rows should go */
1555: PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1556: PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1557: PetscSFSetFromOptions(rowsf);
1558: for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1559: PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1560: PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);
1562: /* Invert column permutation to find out where my columns should go */
1563: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1564: PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1565: PetscSFSetFromOptions(sf);
1566: for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1567: PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1568: PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);
1569: PetscSFDestroy(&sf);
1571: ISRestoreIndices(rowp,&rwant);
1572: ISRestoreIndices(colp,&cwant);
1573: MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);
1575: /* Find out where my gcols should go */
1576: MatGetSize(aB,NULL,&ng);
1577: PetscMalloc1(ng,&gcdest);
1578: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1579: PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1580: PetscSFSetFromOptions(sf);
1581: PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);
1582: PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);
1583: PetscSFDestroy(&sf);
1585: PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1586: MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1587: MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1588: for (i=0; i<m; i++) {
1589: PetscInt row = rdest[i],rowner;
1590: PetscLayoutFindOwner(A->rmap,row,&rowner);
1591: for (j=ai[i]; j<ai[i+1]; j++) {
1592: PetscInt cowner,col = cdest[aj[j]];
1593: PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1594: if (rowner == cowner) dnnz[i]++;
1595: else onnz[i]++;
1596: }
1597: for (j=bi[i]; j<bi[i+1]; j++) {
1598: PetscInt cowner,col = gcdest[bj[j]];
1599: PetscLayoutFindOwner(A->cmap,col,&cowner);
1600: if (rowner == cowner) dnnz[i]++;
1601: else onnz[i]++;
1602: }
1603: }
1604: PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);
1605: PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);
1606: PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);
1607: PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);
1608: PetscSFDestroy(&rowsf);
1610: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1611: MatSeqAIJGetArray(aA,&aa);
1612: MatSeqAIJGetArray(aB,&ba);
1613: for (i=0; i<m; i++) {
1614: PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1615: PetscInt j0,rowlen;
1616: rowlen = ai[i+1] - ai[i];
1617: for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1618: for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1619: MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);
1620: }
1621: rowlen = bi[i+1] - bi[i];
1622: for (j0=j=0; j<rowlen; j0=j) {
1623: for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1624: MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);
1625: }
1626: }
1627: MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1628: MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1629: MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1630: MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1631: MatSeqAIJRestoreArray(aA,&aa);
1632: MatSeqAIJRestoreArray(aB,&ba);
1633: PetscFree4(dnnz,onnz,tdnnz,tonnz);
1634: PetscFree3(work,rdest,cdest);
1635: PetscFree(gcdest);
1636: if (parcolp) {ISDestroy(&colp);}
1637: *B = Aperm;
1638: return(0);
1639: }
1641: PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1642: {
1643: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1647: MatGetSize(aij->B,NULL,nghosts);
1648: if (ghosts) *ghosts = aij->garray;
1649: return(0);
1650: }
1652: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1653: {
1654: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1655: Mat A = mat->A,B = mat->B;
1657: PetscReal isend[5],irecv[5];
1660: info->block_size = 1.0;
1661: MatGetInfo(A,MAT_LOCAL,info);
1663: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1664: isend[3] = info->memory; isend[4] = info->mallocs;
1666: MatGetInfo(B,MAT_LOCAL,info);
1668: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1669: isend[3] += info->memory; isend[4] += info->mallocs;
1670: if (flag == MAT_LOCAL) {
1671: info->nz_used = isend[0];
1672: info->nz_allocated = isend[1];
1673: info->nz_unneeded = isend[2];
1674: info->memory = isend[3];
1675: info->mallocs = isend[4];
1676: } else if (flag == MAT_GLOBAL_MAX) {
1677: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));
1679: info->nz_used = irecv[0];
1680: info->nz_allocated = irecv[1];
1681: info->nz_unneeded = irecv[2];
1682: info->memory = irecv[3];
1683: info->mallocs = irecv[4];
1684: } else if (flag == MAT_GLOBAL_SUM) {
1685: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));
1687: info->nz_used = irecv[0];
1688: info->nz_allocated = irecv[1];
1689: info->nz_unneeded = irecv[2];
1690: info->memory = irecv[3];
1691: info->mallocs = irecv[4];
1692: }
1693: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1694: info->fill_ratio_needed = 0;
1695: info->factor_mallocs = 0;
1696: return(0);
1697: }
1699: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1700: {
1701: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1705: switch (op) {
1706: case MAT_NEW_NONZERO_LOCATIONS:
1707: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1708: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1709: case MAT_KEEP_NONZERO_PATTERN:
1710: case MAT_NEW_NONZERO_LOCATION_ERR:
1711: case MAT_USE_INODES:
1712: case MAT_IGNORE_ZERO_ENTRIES:
1713: MatCheckPreallocated(A,1);
1714: MatSetOption(a->A,op,flg);
1715: MatSetOption(a->B,op,flg);
1716: break;
1717: case MAT_ROW_ORIENTED:
1718: MatCheckPreallocated(A,1);
1719: a->roworiented = flg;
1721: MatSetOption(a->A,op,flg);
1722: MatSetOption(a->B,op,flg);
1723: break;
1724: case MAT_NEW_DIAGONALS:
1725: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1726: break;
1727: case MAT_IGNORE_OFF_PROC_ENTRIES:
1728: a->donotstash = flg;
1729: break;
1730: case MAT_SPD:
1731: A->spd_set = PETSC_TRUE;
1732: A->spd = flg;
1733: if (flg) {
1734: A->symmetric = PETSC_TRUE;
1735: A->structurally_symmetric = PETSC_TRUE;
1736: A->symmetric_set = PETSC_TRUE;
1737: A->structurally_symmetric_set = PETSC_TRUE;
1738: }
1739: break;
1740: case MAT_SYMMETRIC:
1741: MatCheckPreallocated(A,1);
1742: MatSetOption(a->A,op,flg);
1743: break;
1744: case MAT_STRUCTURALLY_SYMMETRIC:
1745: MatCheckPreallocated(A,1);
1746: MatSetOption(a->A,op,flg);
1747: break;
1748: case MAT_HERMITIAN:
1749: MatCheckPreallocated(A,1);
1750: MatSetOption(a->A,op,flg);
1751: break;
1752: case MAT_SYMMETRY_ETERNAL:
1753: MatCheckPreallocated(A,1);
1754: MatSetOption(a->A,op,flg);
1755: break;
1756: case MAT_SUBMAT_SINGLEIS:
1757: A->submat_singleis = flg;
1758: break;
1759: case MAT_STRUCTURE_ONLY:
1760: /* The option is handled directly by MatSetOption() */
1761: break;
1762: default:
1763: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1764: }
1765: return(0);
1766: }
1768: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1769: {
1770: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1771: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1773: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1774: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1775: PetscInt *cmap,*idx_p;
1778: if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1779: mat->getrowactive = PETSC_TRUE;
1781: if (!mat->rowvalues && (idx || v)) {
1782: /*
1783: allocate enough space to hold information from the longest row.
1784: */
1785: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1786: PetscInt max = 1,tmp;
1787: for (i=0; i<matin->rmap->n; i++) {
1788: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1789: if (max < tmp) max = tmp;
1790: }
1791: PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1792: }
1794: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1795: lrow = row - rstart;
1797: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1798: if (!v) {pvA = 0; pvB = 0;}
1799: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1800: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1801: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1802: nztot = nzA + nzB;
1804: cmap = mat->garray;
1805: if (v || idx) {
1806: if (nztot) {
1807: /* Sort by increasing column numbers, assuming A and B already sorted */
1808: PetscInt imark = -1;
1809: if (v) {
1810: *v = v_p = mat->rowvalues;
1811: for (i=0; i<nzB; i++) {
1812: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1813: else break;
1814: }
1815: imark = i;
1816: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1817: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1818: }
1819: if (idx) {
1820: *idx = idx_p = mat->rowindices;
1821: if (imark > -1) {
1822: for (i=0; i<imark; i++) {
1823: idx_p[i] = cmap[cworkB[i]];
1824: }
1825: } else {
1826: for (i=0; i<nzB; i++) {
1827: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1828: else break;
1829: }
1830: imark = i;
1831: }
1832: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1833: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1834: }
1835: } else {
1836: if (idx) *idx = 0;
1837: if (v) *v = 0;
1838: }
1839: }
1840: *nz = nztot;
1841: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1842: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1843: return(0);
1844: }
1846: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1847: {
1848: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1851: if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1852: aij->getrowactive = PETSC_FALSE;
1853: return(0);
1854: }
1856: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1857: {
1858: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1859: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1861: PetscInt i,j,cstart = mat->cmap->rstart;
1862: PetscReal sum = 0.0;
1863: MatScalar *v;
1866: if (aij->size == 1) {
1867: MatNorm(aij->A,type,norm);
1868: } else {
1869: if (type == NORM_FROBENIUS) {
1870: v = amat->a;
1871: for (i=0; i<amat->nz; i++) {
1872: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1873: }
1874: v = bmat->a;
1875: for (i=0; i<bmat->nz; i++) {
1876: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877: }
1878: MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1879: *norm = PetscSqrtReal(*norm);
1880: PetscLogFlops(2*amat->nz+2*bmat->nz);
1881: } else if (type == NORM_1) { /* max column norm */
1882: PetscReal *tmp,*tmp2;
1883: PetscInt *jj,*garray = aij->garray;
1884: PetscCalloc1(mat->cmap->N+1,&tmp);
1885: PetscMalloc1(mat->cmap->N+1,&tmp2);
1886: *norm = 0.0;
1887: v = amat->a; jj = amat->j;
1888: for (j=0; j<amat->nz; j++) {
1889: tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1890: }
1891: v = bmat->a; jj = bmat->j;
1892: for (j=0; j<bmat->nz; j++) {
1893: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1894: }
1895: MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1896: for (j=0; j<mat->cmap->N; j++) {
1897: if (tmp2[j] > *norm) *norm = tmp2[j];
1898: }
1899: PetscFree(tmp);
1900: PetscFree(tmp2);
1901: PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
1902: } else if (type == NORM_INFINITY) { /* max row norm */
1903: PetscReal ntemp = 0.0;
1904: for (j=0; j<aij->A->rmap->n; j++) {
1905: v = amat->a + amat->i[j];
1906: sum = 0.0;
1907: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1908: sum += PetscAbsScalar(*v); v++;
1909: }
1910: v = bmat->a + bmat->i[j];
1911: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1912: sum += PetscAbsScalar(*v); v++;
1913: }
1914: if (sum > ntemp) ntemp = sum;
1915: }
1916: MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
1917: PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
1918: } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1919: }
1920: return(0);
1921: }
1923: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1924: {
1925: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1926: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1928: PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1929: PetscInt cstart = A->cmap->rstart,ncol;
1930: Mat B;
1931: MatScalar *array;
1934: if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1936: ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1937: ai = Aloc->i; aj = Aloc->j;
1938: bi = Bloc->i; bj = Bloc->j;
1939: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1940: PetscInt *d_nnz,*g_nnz,*o_nnz;
1941: PetscSFNode *oloc;
1942: PETSC_UNUSED PetscSF sf;
1944: PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
1945: /* compute d_nnz for preallocation */
1946: PetscMemzero(d_nnz,na*sizeof(PetscInt));
1947: for (i=0; i<ai[ma]; i++) {
1948: d_nnz[aj[i]]++;
1949: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1950: }
1951: /* compute local off-diagonal contributions */
1952: PetscMemzero(g_nnz,nb*sizeof(PetscInt));
1953: for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1954: /* map those to global */
1955: PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1956: PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
1957: PetscSFSetFromOptions(sf);
1958: PetscMemzero(o_nnz,na*sizeof(PetscInt));
1959: PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1960: PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1961: PetscSFDestroy(&sf);
1963: MatCreate(PetscObjectComm((PetscObject)A),&B);
1964: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1965: MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1966: MatSetType(B,((PetscObject)A)->type_name);
1967: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
1968: PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
1969: } else {
1970: B = *matout;
1971: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
1972: for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1973: }
1975: /* copy over the A part */
1976: array = Aloc->a;
1977: row = A->rmap->rstart;
1978: for (i=0; i<ma; i++) {
1979: ncol = ai[i+1]-ai[i];
1980: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1981: row++;
1982: array += ncol; aj += ncol;
1983: }
1984: aj = Aloc->j;
1985: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1987: /* copy over the B part */
1988: PetscCalloc1(bi[mb],&cols);
1989: array = Bloc->a;
1990: row = A->rmap->rstart;
1991: for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1992: cols_tmp = cols;
1993: for (i=0; i<mb; i++) {
1994: ncol = bi[i+1]-bi[i];
1995: MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);
1996: row++;
1997: array += ncol; cols_tmp += ncol;
1998: }
1999: PetscFree(cols);
2001: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2002: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2003: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2004: *matout = B;
2005: } else {
2006: MatHeaderMerge(A,&B);
2007: }
2008: return(0);
2009: }
2011: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2012: {
2013: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2014: Mat a = aij->A,b = aij->B;
2016: PetscInt s1,s2,s3;
2019: MatGetLocalSize(mat,&s2,&s3);
2020: if (rr) {
2021: VecGetLocalSize(rr,&s1);
2022: if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2023: /* Overlap communication with computation. */
2024: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2025: }
2026: if (ll) {
2027: VecGetLocalSize(ll,&s1);
2028: if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2029: (*b->ops->diagonalscale)(b,ll,0);
2030: }
2031: /* scale the diagonal block */
2032: (*a->ops->diagonalscale)(a,ll,rr);
2034: if (rr) {
2035: /* Do a scatter end and then right scale the off-diagonal block */
2036: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
2037: (*b->ops->diagonalscale)(b,0,aij->lvec);
2038: }
2039: return(0);
2040: }
2042: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2043: {
2044: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2048: MatSetUnfactored(a->A);
2049: return(0);
2050: }
2052: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2053: {
2054: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2055: Mat a,b,c,d;
2056: PetscBool flg;
2060: a = matA->A; b = matA->B;
2061: c = matB->A; d = matB->B;
2063: MatEqual(a,c,&flg);
2064: if (flg) {
2065: MatEqual(b,d,&flg);
2066: }
2067: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2068: return(0);
2069: }
2071: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2072: {
2074: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2075: Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2078: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2079: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2080: /* because of the column compression in the off-processor part of the matrix a->B,
2081: the number of columns in a->B and b->B may be different, hence we cannot call
2082: the MatCopy() directly on the two parts. If need be, we can provide a more
2083: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2084: then copying the submatrices */
2085: MatCopy_Basic(A,B,str);
2086: } else {
2087: MatCopy(a->A,b->A,str);
2088: MatCopy(a->B,b->B,str);
2089: }
2090: PetscObjectStateIncrease((PetscObject)B);
2091: return(0);
2092: }
2094: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2095: {
2099: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
2100: return(0);
2101: }
2103: /*
2104: Computes the number of nonzeros per row needed for preallocation when X and Y
2105: have different nonzero structure.
2106: */
2107: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2108: {
2109: PetscInt i,j,k,nzx,nzy;
2112: /* Set the number of nonzeros in the new matrix */
2113: for (i=0; i<m; i++) {
2114: const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2115: nzx = xi[i+1] - xi[i];
2116: nzy = yi[i+1] - yi[i];
2117: nnz[i] = 0;
2118: for (j=0,k=0; j<nzx; j++) { /* Point in X */
2119: for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2120: if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */
2121: nnz[i]++;
2122: }
2123: for (; k<nzy; k++) nnz[i]++;
2124: }
2125: return(0);
2126: }
2128: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2129: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2130: {
2132: PetscInt m = Y->rmap->N;
2133: Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2134: Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2137: MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2138: return(0);
2139: }
2141: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2142: {
2144: Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2145: PetscBLASInt bnz,one=1;
2146: Mat_SeqAIJ *x,*y;
2149: if (str == SAME_NONZERO_PATTERN) {
2150: PetscScalar alpha = a;
2151: x = (Mat_SeqAIJ*)xx->A->data;
2152: PetscBLASIntCast(x->nz,&bnz);
2153: y = (Mat_SeqAIJ*)yy->A->data;
2154: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2155: x = (Mat_SeqAIJ*)xx->B->data;
2156: y = (Mat_SeqAIJ*)yy->B->data;
2157: PetscBLASIntCast(x->nz,&bnz);
2158: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2159: PetscObjectStateIncrease((PetscObject)Y);
2160: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2161: MatAXPY_Basic(Y,a,X,str);
2162: } else {
2163: Mat B;
2164: PetscInt *nnz_d,*nnz_o;
2165: PetscMalloc1(yy->A->rmap->N,&nnz_d);
2166: PetscMalloc1(yy->B->rmap->N,&nnz_o);
2167: MatCreate(PetscObjectComm((PetscObject)Y),&B);
2168: PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2169: MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);
2170: MatSetBlockSizesFromMats(B,Y,Y);
2171: MatSetType(B,MATMPIAIJ);
2172: MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2173: MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2174: MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2175: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2176: MatHeaderReplace(Y,&B);
2177: PetscFree(nnz_d);
2178: PetscFree(nnz_o);
2179: }
2180: return(0);
2181: }
2183: extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2185: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2186: {
2187: #if defined(PETSC_USE_COMPLEX)
2189: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2192: MatConjugate_SeqAIJ(aij->A);
2193: MatConjugate_SeqAIJ(aij->B);
2194: #else
2196: #endif
2197: return(0);
2198: }
2200: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2201: {
2202: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2206: MatRealPart(a->A);
2207: MatRealPart(a->B);
2208: return(0);
2209: }
2211: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2212: {
2213: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2217: MatImaginaryPart(a->A);
2218: MatImaginaryPart(a->B);
2219: return(0);
2220: }
2222: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223: {
2224: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2226: PetscInt i,*idxb = 0;
2227: PetscScalar *va,*vb;
2228: Vec vtmp;
2231: MatGetRowMaxAbs(a->A,v,idx);
2232: VecGetArray(v,&va);
2233: if (idx) {
2234: for (i=0; i<A->rmap->n; i++) {
2235: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2236: }
2237: }
2239: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2240: if (idx) {
2241: PetscMalloc1(A->rmap->n,&idxb);
2242: }
2243: MatGetRowMaxAbs(a->B,vtmp,idxb);
2244: VecGetArray(vtmp,&vb);
2246: for (i=0; i<A->rmap->n; i++) {
2247: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2248: va[i] = vb[i];
2249: if (idx) idx[i] = a->garray[idxb[i]];
2250: }
2251: }
2253: VecRestoreArray(v,&va);
2254: VecRestoreArray(vtmp,&vb);
2255: PetscFree(idxb);
2256: VecDestroy(&vtmp);
2257: return(0);
2258: }
2260: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2261: {
2262: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2264: PetscInt i,*idxb = 0;
2265: PetscScalar *va,*vb;
2266: Vec vtmp;
2269: MatGetRowMinAbs(a->A,v,idx);
2270: VecGetArray(v,&va);
2271: if (idx) {
2272: for (i=0; i<A->cmap->n; i++) {
2273: if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2274: }
2275: }
2277: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
2278: if (idx) {
2279: PetscMalloc1(A->rmap->n,&idxb);
2280: }
2281: MatGetRowMinAbs(a->B,vtmp,idxb);
2282: VecGetArray(vtmp,&vb);
2284: for (i=0; i<A->rmap->n; i++) {
2285: if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2286: va[i] = vb[i];
2287: if (idx) idx[i] = a->garray[idxb[i]];
2288: }
2289: }
2291: VecRestoreArray(v,&va);
2292: VecRestoreArray(vtmp,&vb);
2293: PetscFree(idxb);
2294: VecDestroy(&vtmp);
2295: return(0);
2296: }
2298: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299: {
2300: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2301: PetscInt n = A->rmap->n;
2302: PetscInt cstart = A->cmap->rstart;
2303: PetscInt *cmap = mat->garray;
2304: PetscInt *diagIdx, *offdiagIdx;
2305: Vec diagV, offdiagV;
2306: PetscScalar *a, *diagA, *offdiagA;
2307: PetscInt r;
2311: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2312: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);
2313: VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);
2314: MatGetRowMin(mat->A, diagV, diagIdx);
2315: MatGetRowMin(mat->B, offdiagV, offdiagIdx);
2316: VecGetArray(v, &a);
2317: VecGetArray(diagV, &diagA);
2318: VecGetArray(offdiagV, &offdiagA);
2319: for (r = 0; r < n; ++r) {
2320: if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2321: a[r] = diagA[r];
2322: idx[r] = cstart + diagIdx[r];
2323: } else {
2324: a[r] = offdiagA[r];
2325: idx[r] = cmap[offdiagIdx[r]];
2326: }
2327: }
2328: VecRestoreArray(v, &a);
2329: VecRestoreArray(diagV, &diagA);
2330: VecRestoreArray(offdiagV, &offdiagA);
2331: VecDestroy(&diagV);
2332: VecDestroy(&offdiagV);
2333: PetscFree2(diagIdx, offdiagIdx);
2334: return(0);
2335: }
2337: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338: {
2339: Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2340: PetscInt n = A->rmap->n;
2341: PetscInt cstart = A->cmap->rstart;
2342: PetscInt *cmap = mat->garray;
2343: PetscInt *diagIdx, *offdiagIdx;
2344: Vec diagV, offdiagV;
2345: PetscScalar *a, *diagA, *offdiagA;
2346: PetscInt r;
2350: PetscMalloc2(n,&diagIdx,n,&offdiagIdx);
2351: VecCreateSeq(PETSC_COMM_SELF, n, &diagV);
2352: VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);
2353: MatGetRowMax(mat->A, diagV, diagIdx);
2354: MatGetRowMax(mat->B, offdiagV, offdiagIdx);
2355: VecGetArray(v, &a);
2356: VecGetArray(diagV, &diagA);
2357: VecGetArray(offdiagV, &offdiagA);
2358: for (r = 0; r < n; ++r) {
2359: if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2360: a[r] = diagA[r];
2361: idx[r] = cstart + diagIdx[r];
2362: } else {
2363: a[r] = offdiagA[r];
2364: idx[r] = cmap[offdiagIdx[r]];
2365: }
2366: }
2367: VecRestoreArray(v, &a);
2368: VecRestoreArray(diagV, &diagA);
2369: VecRestoreArray(offdiagV, &offdiagA);
2370: VecDestroy(&diagV);
2371: VecDestroy(&offdiagV);
2372: PetscFree2(diagIdx, offdiagIdx);
2373: return(0);
2374: }
2376: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2377: {
2379: Mat *dummy;
2382: MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2383: *newmat = *dummy;
2384: PetscFree(dummy);
2385: return(0);
2386: }
2388: PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2389: {
2390: Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2394: MatInvertBlockDiagonal(a->A,values);
2395: A->factorerrortype = a->A->factorerrortype;
2396: return(0);
2397: }
2399: static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2400: {
2402: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
2405: MatSetRandom(aij->A,rctx);
2406: MatSetRandom(aij->B,rctx);
2407: MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
2408: MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
2409: return(0);
2410: }
2412: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2413: {
2415: if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2416: else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2417: return(0);
2418: }
2420: /*@
2421: MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2423: Collective on Mat
2425: Input Parameters:
2426: + A - the matrix
2427: - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2429: Level: advanced
2431: @*/
2432: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2433: {
2434: PetscErrorCode ierr;
2437: PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2438: return(0);
2439: }
2441: PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2442: {
2443: PetscErrorCode ierr;
2444: PetscBool sc = PETSC_FALSE,flg;
2447: PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");
2448: PetscObjectOptionsBegin((PetscObject)A);
2449: if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2450: PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);
2451: if (flg) {
2452: MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);
2453: }
2454: PetscOptionsEnd();
2455: return(0);
2456: }
2458: PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2459: {
2461: Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data;
2462: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data;
2465: if (!Y->preallocated) {
2466: MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);
2467: } else if (!aij->nz) {
2468: PetscInt nonew = aij->nonew;
2469: MatSeqAIJSetPreallocation(maij->A,1,NULL);
2470: aij->nonew = nonew;
2471: }
2472: MatShift_Basic(Y,a);
2473: return(0);
2474: }
2476: PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d)
2477: {
2478: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2482: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2483: MatMissingDiagonal(a->A,missing,d);
2484: if (d) {
2485: PetscInt rstart;
2486: MatGetOwnershipRange(A,&rstart,NULL);
2487: *d += rstart;
2489: }
2490: return(0);
2491: }
2494: /* -------------------------------------------------------------------*/
2495: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2496: MatGetRow_MPIAIJ,
2497: MatRestoreRow_MPIAIJ,
2498: MatMult_MPIAIJ,
2499: /* 4*/ MatMultAdd_MPIAIJ,
2500: MatMultTranspose_MPIAIJ,
2501: MatMultTransposeAdd_MPIAIJ,
2502: 0,
2503: 0,
2504: 0,
2505: /*10*/ 0,
2506: 0,
2507: 0,
2508: MatSOR_MPIAIJ,
2509: MatTranspose_MPIAIJ,
2510: /*15*/ MatGetInfo_MPIAIJ,
2511: MatEqual_MPIAIJ,
2512: MatGetDiagonal_MPIAIJ,
2513: MatDiagonalScale_MPIAIJ,
2514: MatNorm_MPIAIJ,
2515: /*20*/ MatAssemblyBegin_MPIAIJ,
2516: MatAssemblyEnd_MPIAIJ,
2517: MatSetOption_MPIAIJ,
2518: MatZeroEntries_MPIAIJ,
2519: /*24*/ MatZeroRows_MPIAIJ,
2520: 0,
2521: 0,
2522: 0,
2523: 0,
2524: /*29*/ MatSetUp_MPIAIJ,
2525: 0,
2526: 0,
2527: MatGetDiagonalBlock_MPIAIJ,
2528: 0,
2529: /*34*/ MatDuplicate_MPIAIJ,
2530: 0,
2531: 0,
2532: 0,
2533: 0,
2534: /*39*/ MatAXPY_MPIAIJ,
2535: MatCreateSubMatrices_MPIAIJ,
2536: MatIncreaseOverlap_MPIAIJ,
2537: MatGetValues_MPIAIJ,
2538: MatCopy_MPIAIJ,
2539: /*44*/ MatGetRowMax_MPIAIJ,
2540: MatScale_MPIAIJ,
2541: MatShift_MPIAIJ,
2542: MatDiagonalSet_MPIAIJ,
2543: MatZeroRowsColumns_MPIAIJ,
2544: /*49*/ MatSetRandom_MPIAIJ,
2545: 0,
2546: 0,
2547: 0,
2548: 0,
2549: /*54*/ MatFDColoringCreate_MPIXAIJ,
2550: 0,
2551: MatSetUnfactored_MPIAIJ,
2552: MatPermute_MPIAIJ,
2553: 0,
2554: /*59*/ MatCreateSubMatrix_MPIAIJ,
2555: MatDestroy_MPIAIJ,
2556: MatView_MPIAIJ,
2557: 0,
2558: MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2559: /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2560: MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2561: 0,
2562: 0,
2563: 0,
2564: /*69*/ MatGetRowMaxAbs_MPIAIJ,
2565: MatGetRowMinAbs_MPIAIJ,
2566: 0,
2567: 0,
2568: 0,
2569: 0,
2570: /*75*/ MatFDColoringApply_AIJ,
2571: MatSetFromOptions_MPIAIJ,
2572: 0,
2573: 0,
2574: MatFindZeroDiagonals_MPIAIJ,
2575: /*80*/ 0,
2576: 0,
2577: 0,
2578: /*83*/ MatLoad_MPIAIJ,
2579: MatIsSymmetric_MPIAIJ,
2580: 0,
2581: 0,
2582: 0,
2583: 0,
2584: /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2585: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2586: MatMatMultNumeric_MPIAIJ_MPIAIJ,
2587: MatPtAP_MPIAIJ_MPIAIJ,
2588: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2589: /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2590: 0,
2591: 0,
2592: 0,
2593: 0,
2594: /*99*/ 0,
2595: 0,
2596: 0,
2597: MatConjugate_MPIAIJ,
2598: 0,
2599: /*104*/MatSetValuesRow_MPIAIJ,
2600: MatRealPart_MPIAIJ,
2601: MatImaginaryPart_MPIAIJ,
2602: 0,
2603: 0,
2604: /*109*/0,
2605: 0,
2606: MatGetRowMin_MPIAIJ,
2607: 0,
2608: MatMissingDiagonal_MPIAIJ,
2609: /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2610: 0,
2611: MatGetGhosts_MPIAIJ,
2612: 0,
2613: 0,
2614: /*119*/0,
2615: 0,
2616: 0,
2617: 0,
2618: MatGetMultiProcBlock_MPIAIJ,
2619: /*124*/MatFindNonzeroRows_MPIAIJ,
2620: MatGetColumnNorms_MPIAIJ,
2621: MatInvertBlockDiagonal_MPIAIJ,
2622: 0,
2623: MatCreateSubMatricesMPI_MPIAIJ,
2624: /*129*/0,
2625: MatTransposeMatMult_MPIAIJ_MPIAIJ,
2626: MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2627: MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2628: 0,
2629: /*134*/0,
2630: 0,
2631: MatRARt_MPIAIJ_MPIAIJ,
2632: 0,
2633: 0,
2634: /*139*/MatSetBlockSizes_MPIAIJ,
2635: 0,
2636: 0,
2637: MatFDColoringSetUp_MPIXAIJ,
2638: MatFindOffBlockDiagonalEntries_MPIAIJ,
2639: /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2640: };
2642: /* ----------------------------------------------------------------------------------------*/
2644: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2645: {
2646: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2650: MatStoreValues(aij->A);
2651: MatStoreValues(aij->B);
2652: return(0);
2653: }
2655: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2656: {
2657: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2661: MatRetrieveValues(aij->A);
2662: MatRetrieveValues(aij->B);
2663: return(0);
2664: }
2666: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2667: {
2668: Mat_MPIAIJ *b;
2672: PetscLayoutSetUp(B->rmap);
2673: PetscLayoutSetUp(B->cmap);
2674: b = (Mat_MPIAIJ*)B->data;
2676: #if defined(PETSC_USE_CTABLE)
2677: PetscTableDestroy(&b->colmap);
2678: #else
2679: PetscFree(b->colmap);
2680: #endif
2681: PetscFree(b->garray);
2682: VecDestroy(&b->lvec);
2683: VecScatterDestroy(&b->Mvctx);
2685: /* Because the B will have been resized we simply destroy it and create a new one each time */
2686: MatDestroy(&b->B);
2687: MatCreate(PETSC_COMM_SELF,&b->B);
2688: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
2689: MatSetBlockSizesFromMats(b->B,B,B);
2690: MatSetType(b->B,MATSEQAIJ);
2691: PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);
2693: if (!B->preallocated) {
2694: MatCreate(PETSC_COMM_SELF,&b->A);
2695: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2696: MatSetBlockSizesFromMats(b->A,B,B);
2697: MatSetType(b->A,MATSEQAIJ);
2698: PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2699: }
2701: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
2702: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
2703: B->preallocated = PETSC_TRUE;
2704: B->was_assembled = PETSC_FALSE;
2705: B->assembled = PETSC_FALSE;;
2706: return(0);
2707: }
2709: PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2710: {
2711: Mat_MPIAIJ *b;
2716: PetscLayoutSetUp(B->rmap);
2717: PetscLayoutSetUp(B->cmap);
2718: b = (Mat_MPIAIJ*)B->data;
2720: #if defined(PETSC_USE_CTABLE)
2721: PetscTableDestroy(&b->colmap);
2722: #else
2723: PetscFree(b->colmap);
2724: #endif
2725: PetscFree(b->garray);
2726: VecDestroy(&b->lvec);
2727: VecScatterDestroy(&b->Mvctx);
2729: MatResetPreallocation(b->A);
2730: MatResetPreallocation(b->B);
2731: B->preallocated = PETSC_TRUE;
2732: B->was_assembled = PETSC_FALSE;
2733: B->assembled = PETSC_FALSE;
2734: return(0);
2735: }
2737: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2738: {
2739: Mat mat;
2740: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2744: *newmat = 0;
2745: MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2746: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2747: MatSetBlockSizesFromMats(mat,matin,matin);
2748: MatSetType(mat,((PetscObject)matin)->type_name);
2749: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2750: a = (Mat_MPIAIJ*)mat->data;
2752: mat->factortype = matin->factortype;
2753: mat->assembled = PETSC_TRUE;
2754: mat->insertmode = NOT_SET_VALUES;
2755: mat->preallocated = PETSC_TRUE;
2757: a->size = oldmat->size;
2758: a->rank = oldmat->rank;
2759: a->donotstash = oldmat->donotstash;
2760: a->roworiented = oldmat->roworiented;
2761: a->rowindices = 0;
2762: a->rowvalues = 0;
2763: a->getrowactive = PETSC_FALSE;
2765: PetscLayoutReference(matin->rmap,&mat->rmap);
2766: PetscLayoutReference(matin->cmap,&mat->cmap);
2768: if (oldmat->colmap) {
2769: #if defined(PETSC_USE_CTABLE)
2770: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2771: #else
2772: PetscMalloc1(mat->cmap->N,&a->colmap);
2773: PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
2774: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));
2775: #endif
2776: } else a->colmap = 0;
2777: if (oldmat->garray) {
2778: PetscInt len;
2779: len = oldmat->B->cmap->n;
2780: PetscMalloc1(len+1,&a->garray);
2781: PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2782: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
2783: } else a->garray = 0;
2785: VecDuplicate(oldmat->lvec,&a->lvec);
2786: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2787: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2788: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2790: if (oldmat->Mvctx_mpi1) {
2791: VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);
2792: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);
2793: }
2795: MatDuplicate(oldmat->A,cpvalues,&a->A);
2796: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2797: MatDuplicate(oldmat->B,cpvalues,&a->B);
2798: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2799: PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2800: *newmat = mat;
2801: return(0);
2802: }
2804: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2805: {
2806: PetscScalar *vals,*svals;
2807: MPI_Comm comm;
2809: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
2810: PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0;
2811: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
2812: PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2813: PetscInt cend,cstart,n,*rowners;
2814: int fd;
2815: PetscInt bs = newMat->rmap->bs;
2818: /* force binary viewer to load .info file if it has not yet done so */
2819: PetscViewerSetUp(viewer);
2820: PetscObjectGetComm((PetscObject)viewer,&comm);
2821: MPI_Comm_size(comm,&size);
2822: MPI_Comm_rank(comm,&rank);
2823: PetscViewerBinaryGetDescriptor(viewer,&fd);
2824: if (!rank) {
2825: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
2826: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2827: if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2828: }
2830: PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");
2831: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);
2832: PetscOptionsEnd();
2833: if (bs < 0) bs = 1;
2835: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2836: M = header[1]; N = header[2];
2838: /* If global sizes are set, check if they are consistent with that given in the file */
2839: if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2840: if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2842: /* determine ownership of all (block) rows */
2843: if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2844: if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */
2845: else m = newMat->rmap->n; /* Set by user */
2847: PetscMalloc1(size+1,&rowners);
2848: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2850: /* First process needs enough room for process with most rows */
2851: if (!rank) {
2852: mmax = rowners[1];
2853: for (i=2; i<=size; i++) {
2854: mmax = PetscMax(mmax, rowners[i]);
2855: }
2856: } else mmax = -1; /* unused, but compilers complain */
2858: rowners[0] = 0;
2859: for (i=2; i<=size; i++) {
2860: rowners[i] += rowners[i-1];
2861: }
2862: rstart = rowners[rank];
2863: rend = rowners[rank+1];
2865: /* distribute row lengths to all processors */
2866: PetscMalloc2(m,&ourlens,m,&offlens);
2867: if (!rank) {
2868: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
2869: PetscMalloc1(mmax,&rowlengths);
2870: PetscCalloc1(size,&procsnz);
2871: for (j=0; j<m; j++) {
2872: procsnz[0] += ourlens[j];
2873: }
2874: for (i=1; i<size; i++) {
2875: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
2876: /* calculate the number of nonzeros on each processor */
2877: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2878: procsnz[i] += rowlengths[j];
2879: }
2880: MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
2881: }
2882: PetscFree(rowlengths);
2883: } else {
2884: MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);
2885: }
2887: if (!rank) {
2888: /* determine max buffer needed and allocate it */
2889: maxnz = 0;
2890: for (i=0; i<size; i++) {
2891: maxnz = PetscMax(maxnz,procsnz[i]);
2892: }
2893: PetscMalloc1(maxnz,&cols);
2895: /* read in my part of the matrix column indices */
2896: nz = procsnz[0];
2897: PetscMalloc1(nz,&mycols);
2898: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2900: /* read in every one elses and ship off */
2901: for (i=1; i<size; i++) {
2902: nz = procsnz[i];
2903: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2904: MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);
2905: }
2906: PetscFree(cols);
2907: } else {
2908: /* determine buffer space needed for message */
2909: nz = 0;
2910: for (i=0; i<m; i++) {
2911: nz += ourlens[i];
2912: }
2913: PetscMalloc1(nz,&mycols);
2915: /* receive message of column indices*/
2916: MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);
2917: }
2919: /* determine column ownership if matrix is not square */
2920: if (N != M) {
2921: if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2922: else n = newMat->cmap->n;
2923: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
2924: cstart = cend - n;
2925: } else {
2926: cstart = rstart;
2927: cend = rend;
2928: n = cend - cstart;
2929: }
2931: /* loop over local rows, determining number of off diagonal entries */
2932: PetscMemzero(offlens,m*sizeof(PetscInt));
2933: jj = 0;
2934: for (i=0; i<m; i++) {
2935: for (j=0; j<ourlens[i]; j++) {
2936: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2937: jj++;
2938: }
2939: }
2941: for (i=0; i<m; i++) {
2942: ourlens[i] -= offlens[i];
2943: }
2944: MatSetSizes(newMat,m,n,M,N);
2946: if (bs > 1) {MatSetBlockSize(newMat,bs);}
2948: MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);
2950: for (i=0; i<m; i++) {
2951: ourlens[i] += offlens[i];
2952: }
2954: if (!rank) {
2955: PetscMalloc1(maxnz+1,&vals);
2957: /* read in my part of the matrix numerical values */
2958: nz = procsnz[0];
2959: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2961: /* insert into matrix */
2962: jj = rstart;
2963: smycols = mycols;
2964: svals = vals;
2965: for (i=0; i<m; i++) {
2966: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2967: smycols += ourlens[i];
2968: svals += ourlens[i];
2969: jj++;
2970: }
2972: /* read in other processors and ship out */
2973: for (i=1; i<size; i++) {
2974: nz = procsnz[i];
2975: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2976: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);
2977: }
2978: PetscFree(procsnz);
2979: } else {
2980: /* receive numeric values */
2981: PetscMalloc1(nz+1,&vals);
2983: /* receive message of values*/
2984: MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);
2986: /* insert into matrix */
2987: jj = rstart;
2988: smycols = mycols;
2989: svals = vals;
2990: for (i=0; i<m; i++) {
2991: MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2992: smycols += ourlens[i];
2993: svals += ourlens[i];
2994: jj++;
2995: }
2996: }
2997: PetscFree2(ourlens,offlens);
2998: PetscFree(vals);
2999: PetscFree(mycols);
3000: PetscFree(rowners);
3001: MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);
3002: MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);
3003: return(0);
3004: }
3006: /* Not scalable because of ISAllGather() unless getting all columns. */
3007: PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3008: {
3010: IS iscol_local;
3011: PetscBool isstride;
3012: PetscMPIInt lisstride=0,gisstride;
3015: /* check if we are grabbing all columns*/
3016: PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);
3018: if (isstride) {
3019: PetscInt start,len,mstart,mlen;
3020: ISStrideGetInfo(iscol,&start,NULL);
3021: ISGetLocalSize(iscol,&len);
3022: MatGetOwnershipRangeColumn(mat,&mstart,&mlen);
3023: if (mstart == start && mlen-mstart == len) lisstride = 1;
3024: }
3026: MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));
3027: if (gisstride) {
3028: PetscInt N;
3029: MatGetSize(mat,NULL,&N);
3030: ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);
3031: ISSetIdentity(iscol_local);
3032: PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");
3033: } else {
3034: PetscInt cbs;
3035: ISGetBlockSize(iscol,&cbs);
3036: ISAllGather(iscol,&iscol_local);
3037: ISSetBlockSize(iscol_local,cbs);
3038: }
3040: *isseq = iscol_local;
3041: return(0);
3042: }
3044: /*
3045: Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3046: (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3048: Input Parameters:
3049: mat - matrix
3050: isrow - parallel row index set; its local indices are a subset of local columns of mat,
3051: i.e., mat->rstart <= isrow[i] < mat->rend
3052: iscol - parallel column index set; its local indices are a subset of local columns of mat,
3053: i.e., mat->cstart <= iscol[i] < mat->cend
3054: Output Parameter:
3055: isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3056: iscol_o - sequential column index set for retrieving mat->B
3057: garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3058: */
3059: PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3060: {
3062: Vec x,cmap;
3063: const PetscInt *is_idx;
3064: PetscScalar *xarray,*cmaparray;
3065: PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count;
3066: Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3067: Mat B=a->B;
3068: Vec lvec=a->lvec,lcmap;
3069: PetscInt i,cstart,cend,Bn=B->cmap->N;
3070: MPI_Comm comm;
3071: VecScatter Mvctx=a->Mvctx;
3074: PetscObjectGetComm((PetscObject)mat,&comm);
3075: ISGetLocalSize(iscol,&ncols);
3077: /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3078: MatCreateVecs(mat,&x,NULL);
3079: VecSet(x,-1.0);
3080: VecDuplicate(x,&cmap);
3081: VecSet(cmap,-1.0);
3083: /* Get start indices */
3084: MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);
3085: isstart -= ncols;
3086: MatGetOwnershipRangeColumn(mat,&cstart,&cend);
3088: ISGetIndices(iscol,&is_idx);
3089: VecGetArray(x,&xarray);
3090: VecGetArray(cmap,&cmaparray);
3091: PetscMalloc1(ncols,&idx);
3092: for (i=0; i<ncols; i++) {
3093: xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i];
3094: cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */
3095: idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */
3096: }
3097: VecRestoreArray(x,&xarray);
3098: VecRestoreArray(cmap,&cmaparray);
3099: ISRestoreIndices(iscol,&is_idx);
3101: /* Get iscol_d */
3102: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);
3103: ISGetBlockSize(iscol,&i);
3104: ISSetBlockSize(*iscol_d,i);
3106: /* Get isrow_d */
3107: ISGetLocalSize(isrow,&m);
3108: rstart = mat->rmap->rstart;
3109: PetscMalloc1(m,&idx);
3110: ISGetIndices(isrow,&is_idx);
3111: for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3112: ISRestoreIndices(isrow,&is_idx);
3114: ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);
3115: ISGetBlockSize(isrow,&i);
3116: ISSetBlockSize(*isrow_d,i);
3118: /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3119: VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);
3120: VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);
3122: VecDuplicate(lvec,&lcmap);
3124: VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);
3125: VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);
3127: /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3128: /* off-process column indices */
3129: count = 0;
3130: PetscMalloc1(Bn,&idx);
3131: PetscMalloc1(Bn,&cmap1);
3133: VecGetArray(lvec,&xarray);
3134: VecGetArray(lcmap,&cmaparray);
3135: for (i=0; i<Bn; i++) {
3136: if (PetscRealPart(xarray[i]) > -1.0) {
3137: idx[count] = i; /* local column index in off-diagonal part B */
3138: cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3139: count++;
3140: }
3141: }
3142: VecRestoreArray(lvec,&xarray);
3143: VecRestoreArray(lcmap,&cmaparray);
3145: ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);
3146: /* cannot ensure iscol_o has same blocksize as iscol! */
3148: PetscFree(idx);
3149: *garray = cmap1;
3151: VecDestroy(&x);
3152: VecDestroy(&cmap);
3153: VecDestroy(&lcmap);
3154: return(0);
3155: }
3157: /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3158: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3159: {
3161: Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub;
3162: Mat M = NULL;
3163: MPI_Comm comm;
3164: IS iscol_d,isrow_d,iscol_o;
3165: Mat Asub = NULL,Bsub = NULL;
3166: PetscInt n;
3169: PetscObjectGetComm((PetscObject)mat,&comm);
3171: if (call == MAT_REUSE_MATRIX) {
3172: /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3173: PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);
3174: if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3176: PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);
3177: if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3179: PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);
3180: if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3182: /* Update diagonal and off-diagonal portions of submat */
3183: asub = (Mat_MPIAIJ*)(*submat)->data;
3184: MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);
3185: ISGetLocalSize(iscol_o,&n);
3186: if (n) {
3187: MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);
3188: }
3189: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3190: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3192: } else { /* call == MAT_INITIAL_MATRIX) */
3193: const PetscInt *garray;
3194: PetscInt BsubN;
3196: /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3197: ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);
3199: /* Create local submatrices Asub and Bsub */
3200: MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);
3201: MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);
3203: /* Create submatrix M */
3204: MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);
3206: /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3207: asub = (Mat_MPIAIJ*)M->data;
3209: ISGetLocalSize(iscol_o,&BsubN);
3210: n = asub->B->cmap->N;
3211: if (BsubN > n) {
3212: /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3213: const PetscInt *idx;
3214: PetscInt i,j,*idx_new,*subgarray = asub->garray;
3215: PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);
3217: PetscMalloc1(n,&idx_new);
3218: j = 0;
3219: ISGetIndices(iscol_o,&idx);
3220: for (i=0; i<n; i++) {
3221: if (j >= BsubN) break;
3222: while (subgarray[i] > garray[j]) j++;
3224: if (subgarray[i] == garray[j]) {
3225: idx_new[i] = idx[j++];
3226: } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3227: }
3228: ISRestoreIndices(iscol_o,&idx);
3230: ISDestroy(&iscol_o);
3231: ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);
3233: } else if (BsubN < n) {
3234: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3235: }
3237: PetscFree(garray);
3238: *submat = M;
3240: /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3241: PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);
3242: ISDestroy(&isrow_d);
3244: PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);
3245: ISDestroy(&iscol_d);
3247: PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);
3248: ISDestroy(&iscol_o);
3249: }
3250: return(0);
3251: }
3253: PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3254: {
3256: IS iscol_local=NULL,isrow_d;
3257: PetscInt csize;
3258: PetscInt n,i,j,start,end;
3259: PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3260: MPI_Comm comm;
3263: /* If isrow has same processor distribution as mat,
3264: call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3265: if (call == MAT_REUSE_MATRIX) {
3266: PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);
3267: if (isrow_d) {
3268: sameRowDist = PETSC_TRUE;
3269: tsameDist[1] = PETSC_TRUE; /* sameColDist */
3270: } else {
3271: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);
3272: if (iscol_local) {
3273: sameRowDist = PETSC_TRUE;
3274: tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3275: }
3276: }
3277: } else {
3278: /* Check if isrow has same processor distribution as mat */
3279: sameDist[0] = PETSC_FALSE;
3280: ISGetLocalSize(isrow,&n);
3281: if (!n) {
3282: sameDist[0] = PETSC_TRUE;
3283: } else {
3284: ISGetMinMax(isrow,&i,&j);
3285: MatGetOwnershipRange(mat,&start,&end);
3286: if (i >= start && j < end) {
3287: sameDist[0] = PETSC_TRUE;
3288: }
3289: }
3291: /* Check if iscol has same processor distribution as mat */
3292: sameDist[1] = PETSC_FALSE;
3293: ISGetLocalSize(iscol,&n);
3294: if (!n) {
3295: sameDist[1] = PETSC_TRUE;
3296: } else {
3297: ISGetMinMax(iscol,&i,&j);
3298: MatGetOwnershipRangeColumn(mat,&start,&end);
3299: if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3300: }
3302: PetscObjectGetComm((PetscObject)mat,&comm);
3303: MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);
3304: sameRowDist = tsameDist[0];
3305: }
3307: if (sameRowDist) {
3308: if (tsameDist[1]) { /* sameRowDist & sameColDist */
3309: /* isrow and iscol have same processor distribution as mat */
3310: MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);
3311: return(0);
3312: } else { /* sameRowDist */
3313: /* isrow has same processor distribution as mat */
3314: if (call == MAT_INITIAL_MATRIX) {
3315: PetscBool sorted;
3316: ISGetSeqIS_Private(mat,iscol,&iscol_local);
3317: ISGetLocalSize(iscol_local,&n); /* local size of iscol_local = global columns of newmat */
3318: ISGetSize(iscol,&i);
3319: if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3321: ISSorted(iscol_local,&sorted);
3322: if (sorted) {
3323: /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3324: MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);
3325: return(0);
3326: }
3327: } else { /* call == MAT_REUSE_MATRIX */
3328: IS iscol_sub;
3329: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3330: if (iscol_sub) {
3331: MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);
3332: return(0);
3333: }
3334: }
3335: }
3336: }
3338: /* General case: iscol -> iscol_local which has global size of iscol */
3339: if (call == MAT_REUSE_MATRIX) {
3340: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3341: if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3342: } else {
3343: if (!iscol_local) {
3344: ISGetSeqIS_Private(mat,iscol,&iscol_local);
3345: }
3346: }
3348: ISGetLocalSize(iscol,&csize);
3349: MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);
3351: if (call == MAT_INITIAL_MATRIX) {
3352: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3353: ISDestroy(&iscol_local);
3354: }
3355: return(0);
3356: }
3358: /*@C
3359: MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3360: and "off-diagonal" part of the matrix in CSR format.
3362: Collective on MPI_Comm
3364: Input Parameters:
3365: + comm - MPI communicator
3366: . A - "diagonal" portion of matrix
3367: . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3368: - garray - global index of B columns
3370: Output Parameter:
3371: . mat - the matrix, with input A as its local diagonal matrix
3372: Level: advanced
3374: Notes:
3375: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3376: A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3378: .seealso: MatCreateMPIAIJWithSplitArrays()
3379: @*/
3380: PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3381: {
3383: Mat_MPIAIJ *maij;
3384: Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew;
3385: PetscInt *oi=b->i,*oj=b->j,i,nz,col;
3386: PetscScalar *oa=b->a;
3387: Mat Bnew;
3388: PetscInt m,n,N;
3391: MatCreate(comm,mat);
3392: MatGetSize(A,&m,&n);
3393: if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3394: if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3395: /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3396: /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3398: /* Get global columns of mat */
3399: MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);
3401: MatSetSizes(*mat,m,n,PETSC_DECIDE,N);
3402: MatSetType(*mat,MATMPIAIJ);
3403: MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);
3404: maij = (Mat_MPIAIJ*)(*mat)->data;
3406: (*mat)->preallocated = PETSC_TRUE;
3408: PetscLayoutSetUp((*mat)->rmap);
3409: PetscLayoutSetUp((*mat)->cmap);
3411: /* Set A as diagonal portion of *mat */
3412: maij->A = A;
3414: nz = oi[m];
3415: for (i=0; i<nz; i++) {
3416: col = oj[i];
3417: oj[i] = garray[col];
3418: }
3420: /* Set Bnew as off-diagonal portion of *mat */
3421: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);
3422: bnew = (Mat_SeqAIJ*)Bnew->data;
3423: bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3424: maij->B = Bnew;
3426: if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3428: b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3429: b->free_a = PETSC_FALSE;
3430: b->free_ij = PETSC_FALSE;
3431: MatDestroy(&B);
3433: bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3434: bnew->free_a = PETSC_TRUE;
3435: bnew->free_ij = PETSC_TRUE;
3437: /* condense columns of maij->B */
3438: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
3439: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3440: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3441: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
3442: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3443: return(0);
3444: }
3446: extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3448: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3449: {
3451: PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs;
3452: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3453: Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3454: Mat M,Msub,B=a->B;
3455: MatScalar *aa;
3456: Mat_SeqAIJ *aij;
3457: PetscInt *garray = a->garray,*colsub,Ncols;
3458: PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3459: IS iscol_sub,iscmap;
3460: const PetscInt *is_idx,*cmap;
3461: PetscBool allcolumns=PETSC_FALSE;
3462: MPI_Comm comm;
3465: PetscObjectGetComm((PetscObject)mat,&comm);
3467: if (call == MAT_REUSE_MATRIX) {
3468: PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3469: if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3470: ISGetLocalSize(iscol_sub,&count);
3472: PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);
3473: if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3475: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);
3476: if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3478: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);
3480: } else { /* call == MAT_INITIAL_MATRIX) */
3481: PetscBool flg;
3483: ISGetLocalSize(iscol,&n);
3484: ISGetSize(iscol,&Ncols);
3486: /* (1) iscol -> nonscalable iscol_local */
3487: /* Check for special case: each processor gets entire matrix columns */
3488: ISIdentity(iscol_local,&flg);
3489: if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3490: if (allcolumns) {
3491: iscol_sub = iscol_local;
3492: PetscObjectReference((PetscObject)iscol_local);
3493: ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);
3495: } else {
3496: /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3497: PetscInt *idx,*cmap1,k;
3498: PetscMalloc1(Ncols,&idx);
3499: PetscMalloc1(Ncols,&cmap1);
3500: ISGetIndices(iscol_local,&is_idx);
3501: count = 0;
3502: k = 0;
3503: for (i=0; i<Ncols; i++) {
3504: j = is_idx[i];
3505: if (j >= cstart && j < cend) {
3506: /* diagonal part of mat */
3507: idx[count] = j;
3508: cmap1[count++] = i; /* column index in submat */
3509: } else if (Bn) {
3510: /* off-diagonal part of mat */
3511: if (j == garray[k]) {
3512: idx[count] = j;
3513: cmap1[count++] = i; /* column index in submat */
3514: } else if (j > garray[k]) {
3515: while (j > garray[k] && k < Bn-1) k++;
3516: if (j == garray[k]) {
3517: idx[count] = j;
3518: cmap1[count++] = i; /* column index in submat */
3519: }
3520: }
3521: }
3522: }
3523: ISRestoreIndices(iscol_local,&is_idx);
3525: ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);
3526: ISGetBlockSize(iscol,&cbs);
3527: ISSetBlockSize(iscol_sub,cbs);
3529: ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);
3530: }
3532: /* (3) Create sequential Msub */
3533: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);
3534: }
3536: ISGetLocalSize(iscol_sub,&count);
3537: aij = (Mat_SeqAIJ*)(Msub)->data;
3538: ii = aij->i;
3539: ISGetIndices(iscmap,&cmap);
3541: /*
3542: m - number of local rows
3543: Ncols - number of columns (same on all processors)
3544: rstart - first row in new global matrix generated
3545: */
3546: MatGetSize(Msub,&m,NULL);
3548: if (call == MAT_INITIAL_MATRIX) {
3549: /* (4) Create parallel newmat */
3550: PetscMPIInt rank,size;
3551: PetscInt csize;
3553: MPI_Comm_size(comm,&size);
3554: MPI_Comm_rank(comm,&rank);
3556: /*
3557: Determine the number of non-zeros in the diagonal and off-diagonal
3558: portions of the matrix in order to do correct preallocation
3559: */
3561: /* first get start and end of "diagonal" columns */
3562: ISGetLocalSize(iscol,&csize);
3563: if (csize == PETSC_DECIDE) {
3564: ISGetSize(isrow,&mglobal);
3565: if (mglobal == Ncols) { /* square matrix */
3566: nlocal = m;
3567: } else {
3568: nlocal = Ncols/size + ((Ncols % size) > rank);
3569: }
3570: } else {
3571: nlocal = csize;
3572: }
3573: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3574: rstart = rend - nlocal;
3575: if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3577: /* next, compute all the lengths */
3578: jj = aij->j;
3579: PetscMalloc1(2*m+1,&dlens);
3580: olens = dlens + m;
3581: for (i=0; i<m; i++) {
3582: jend = ii[i+1] - ii[i];
3583: olen = 0;
3584: dlen = 0;
3585: for (j=0; j<jend; j++) {
3586: if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3587: else dlen++;
3588: jj++;
3589: }
3590: olens[i] = olen;
3591: dlens[i] = dlen;
3592: }
3594: ISGetBlockSize(isrow,&bs);
3595: ISGetBlockSize(iscol,&cbs);
3597: MatCreate(comm,&M);
3598: MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);
3599: MatSetBlockSizes(M,bs,cbs);
3600: MatSetType(M,((PetscObject)mat)->type_name);
3601: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3602: PetscFree(dlens);
3604: } else { /* call == MAT_REUSE_MATRIX */
3605: M = *newmat;
3606: MatGetLocalSize(M,&i,NULL);
3607: if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3608: MatZeroEntries(M);
3609: /*
3610: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3611: rather than the slower MatSetValues().
3612: */
3613: M->was_assembled = PETSC_TRUE;
3614: M->assembled = PETSC_FALSE;
3615: }
3617: /* (5) Set values of Msub to *newmat */
3618: PetscMalloc1(count,&colsub);
3619: MatGetOwnershipRange(M,&rstart,NULL);
3621: jj = aij->j;
3622: aa = aij->a;
3623: for (i=0; i<m; i++) {
3624: row = rstart + i;
3625: nz = ii[i+1] - ii[i];
3626: for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3627: MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);
3628: jj += nz; aa += nz;
3629: }
3630: ISRestoreIndices(iscmap,&cmap);
3632: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3633: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3635: PetscFree(colsub);
3637: /* save Msub, iscol_sub and iscmap used in processor for next request */
3638: if (call == MAT_INITIAL_MATRIX) {
3639: *newmat = M;
3640: PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);
3641: MatDestroy(&Msub);
3643: PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);
3644: ISDestroy(&iscol_sub);
3646: PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);
3647: ISDestroy(&iscmap);
3649: if (iscol_local) {
3650: PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);
3651: ISDestroy(&iscol_local);
3652: }
3653: }
3654: return(0);
3655: }
3657: /*
3658: Not great since it makes two copies of the submatrix, first an SeqAIJ
3659: in local and then by concatenating the local matrices the end result.
3660: Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3662: Note: This requires a sequential iscol with all indices.
3663: */
3664: PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3665: {
3667: PetscMPIInt rank,size;
3668: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3669: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3670: Mat M,Mreuse;
3671: MatScalar *aa,*vwork;
3672: MPI_Comm comm;
3673: Mat_SeqAIJ *aij;
3674: PetscBool colflag,allcolumns=PETSC_FALSE;
3677: PetscObjectGetComm((PetscObject)mat,&comm);
3678: MPI_Comm_rank(comm,&rank);
3679: MPI_Comm_size(comm,&size);
3681: /* Check for special case: each processor gets entire matrix columns */
3682: ISIdentity(iscol,&colflag);
3683: ISGetLocalSize(iscol,&n);
3684: if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3686: if (call == MAT_REUSE_MATRIX) {
3687: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3688: if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3689: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);
3690: } else {
3691: MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);
3692: }
3694: /*
3695: m - number of local rows
3696: n - number of columns (same on all processors)
3697: rstart - first row in new global matrix generated
3698: */
3699: MatGetSize(Mreuse,&m,&n);
3700: MatGetBlockSizes(Mreuse,&bs,&cbs);
3701: if (call == MAT_INITIAL_MATRIX) {
3702: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3703: ii = aij->i;
3704: jj = aij->j;
3706: /*
3707: Determine the number of non-zeros in the diagonal and off-diagonal
3708: portions of the matrix in order to do correct preallocation
3709: */
3711: /* first get start and end of "diagonal" columns */
3712: if (csize == PETSC_DECIDE) {
3713: ISGetSize(isrow,&mglobal);
3714: if (mglobal == n) { /* square matrix */
3715: nlocal = m;
3716: } else {
3717: nlocal = n/size + ((n % size) > rank);
3718: }
3719: } else {
3720: nlocal = csize;
3721: }
3722: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3723: rstart = rend - nlocal;
3724: if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3726: /* next, compute all the lengths */
3727: PetscMalloc1(2*m+1,&dlens);
3728: olens = dlens + m;
3729: for (i=0; i<m; i++) {
3730: jend = ii[i+1] - ii[i];
3731: olen = 0;
3732: dlen = 0;
3733: for (j=0; j<jend; j++) {
3734: if (*jj < rstart || *jj >= rend) olen++;
3735: else dlen++;
3736: jj++;
3737: }
3738: olens[i] = olen;
3739: dlens[i] = dlen;
3740: }
3741: MatCreate(comm,&M);
3742: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3743: MatSetBlockSizes(M,bs,cbs);
3744: MatSetType(M,((PetscObject)mat)->type_name);
3745: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3746: PetscFree(dlens);
3747: } else {
3748: PetscInt ml,nl;
3750: M = *newmat;
3751: MatGetLocalSize(M,&ml,&nl);
3752: if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3753: MatZeroEntries(M);
3754: /*
3755: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3756: rather than the slower MatSetValues().
3757: */
3758: M->was_assembled = PETSC_TRUE;
3759: M->assembled = PETSC_FALSE;
3760: }
3761: MatGetOwnershipRange(M,&rstart,&rend);
3762: aij = (Mat_SeqAIJ*)(Mreuse)->data;
3763: ii = aij->i;
3764: jj = aij->j;
3765: aa = aij->a;
3766: for (i=0; i<m; i++) {
3767: row = rstart + i;
3768: nz = ii[i+1] - ii[i];
3769: cwork = jj; jj += nz;
3770: vwork = aa; aa += nz;
3771: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3772: }
3774: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3775: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3776: *newmat = M;
3778: /* save submatrix used in processor for next request */
3779: if (call == MAT_INITIAL_MATRIX) {
3780: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3781: MatDestroy(&Mreuse);
3782: }
3783: return(0);
3784: }
3786: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3787: {
3788: PetscInt m,cstart, cend,j,nnz,i,d;
3789: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3790: const PetscInt *JJ;
3791: PetscScalar *values;
3793: PetscBool nooffprocentries;
3796: if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3798: PetscLayoutSetUp(B->rmap);
3799: PetscLayoutSetUp(B->cmap);
3800: m = B->rmap->n;
3801: cstart = B->cmap->rstart;
3802: cend = B->cmap->rend;
3803: rstart = B->rmap->rstart;
3805: PetscMalloc2(m,&d_nnz,m,&o_nnz);
3807: #if defined(PETSC_USE_DEBUG)
3808: for (i=0; i<m; i++) {
3809: nnz = Ii[i+1]- Ii[i];
3810: JJ = J + Ii[i];
3811: if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3812: if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3813: if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3814: }
3815: #endif
3817: for (i=0; i<m; i++) {
3818: nnz = Ii[i+1]- Ii[i];
3819: JJ = J + Ii[i];
3820: nnz_max = PetscMax(nnz_max,nnz);
3821: d = 0;
3822: for (j=0; j<nnz; j++) {
3823: if (cstart <= JJ[j] && JJ[j] < cend) d++;
3824: }
3825: d_nnz[i] = d;
3826: o_nnz[i] = nnz - d;
3827: }
3828: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3829: PetscFree2(d_nnz,o_nnz);
3831: if (v) values = (PetscScalar*)v;
3832: else {
3833: PetscCalloc1(nnz_max+1,&values);
3834: }
3836: for (i=0; i<m; i++) {
3837: ii = i + rstart;
3838: nnz = Ii[i+1]- Ii[i];
3839: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
3840: }
3841: nooffprocentries = B->nooffprocentries;
3842: B->nooffprocentries = PETSC_TRUE;
3843: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3844: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3845: B->nooffprocentries = nooffprocentries;
3847: if (!v) {
3848: PetscFree(values);
3849: }
3850: MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3851: return(0);
3852: }
3854: /*@
3855: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3856: (the default parallel PETSc format).
3858: Collective on MPI_Comm
3860: Input Parameters:
3861: + B - the matrix
3862: . i - the indices into j for the start of each local row (starts with zero)
3863: . j - the column indices for each local row (starts with zero)
3864: - v - optional values in the matrix
3866: Level: developer
3868: Notes:
3869: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3870: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3871: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875: The format which is used for the sparse matrix input, is equivalent to a
3876: row-major ordering.. i.e for the following matrix, the input data expected is
3877: as shown
3879: $ 1 0 0
3880: $ 2 0 3 P0
3881: $ -------
3882: $ 4 5 6 P1
3883: $
3884: $ Process0 [P0]: rows_owned=[0,1]
3885: $ i = {0,1,3} [size = nrow+1 = 2+1]
3886: $ j = {0,0,2} [size = 3]
3887: $ v = {1,2,3} [size = 3]
3888: $
3889: $ Process1 [P1]: rows_owned=[2]
3890: $ i = {0,3} [size = nrow+1 = 1+1]
3891: $ j = {0,1,2} [size = 3]
3892: $ v = {4,5,6} [size = 3]
3894: .keywords: matrix, aij, compressed row, sparse, parallel
3896: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3897: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3898: @*/
3899: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3900: {
3904: PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3905: return(0);
3906: }
3908: /*@C
3909: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3910: (the default parallel PETSc format). For good matrix assembly performance
3911: the user should preallocate the matrix storage by setting the parameters
3912: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3913: performance can be increased by more than a factor of 50.
3915: Collective on MPI_Comm
3917: Input Parameters:
3918: + B - the matrix
3919: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3920: (same value is used for all local rows)
3921: . d_nnz - array containing the number of nonzeros in the various rows of the
3922: DIAGONAL portion of the local submatrix (possibly different for each row)
3923: or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3924: The size of this array is equal to the number of local rows, i.e 'm'.
3925: For matrices that will be factored, you must leave room for (and set)
3926: the diagonal entry even if it is zero.
3927: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3928: submatrix (same value is used for all local rows).
3929: - o_nnz - array containing the number of nonzeros in the various rows of the
3930: OFF-DIAGONAL portion of the local submatrix (possibly different for
3931: each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3932: structure. The size of this array is equal to the number
3933: of local rows, i.e 'm'.
3935: If the *_nnz parameter is given then the *_nz parameter is ignored
3937: The AIJ format (also called the Yale sparse matrix format or
3938: compressed row storage (CSR)), is fully compatible with standard Fortran 77
3939: storage. The stored row and column indices begin with zero.
3940: See Users-Manual: ch_mat for details.
3942: The parallel matrix is partitioned such that the first m0 rows belong to
3943: process 0, the next m1 rows belong to process 1, the next m2 rows belong
3944: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3946: The DIAGONAL portion of the local submatrix of a processor can be defined
3947: as the submatrix which is obtained by extraction the part corresponding to
3948: the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3949: first row that belongs to the processor, r2 is the last row belonging to
3950: the this processor, and c1-c2 is range of indices of the local part of a
3951: vector suitable for applying the matrix to. This is an mxn matrix. In the
3952: common case of a square matrix, the row and column ranges are the same and
3953: the DIAGONAL part is also square. The remaining portion of the local
3954: submatrix (mxN) constitute the OFF-DIAGONAL portion.
3956: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3958: You can call MatGetInfo() to get information on how effective the preallocation was;
3959: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3960: You can also run with the option -info and look for messages with the string
3961: malloc in them to see if additional memory allocation was needed.
3963: Example usage:
3965: Consider the following 8x8 matrix with 34 non-zero values, that is
3966: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3967: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3968: as follows:
3970: .vb
3971: 1 2 0 | 0 3 0 | 0 4
3972: Proc0 0 5 6 | 7 0 0 | 8 0
3973: 9 0 10 | 11 0 0 | 12 0
3974: -------------------------------------
3975: 13 0 14 | 15 16 17 | 0 0
3976: Proc1 0 18 0 | 19 20 21 | 0 0
3977: 0 0 0 | 22 23 0 | 24 0
3978: -------------------------------------
3979: Proc2 25 26 27 | 0 0 28 | 29 0
3980: 30 0 0 | 31 32 33 | 0 34
3981: .ve
3983: This can be represented as a collection of submatrices as:
3985: .vb
3986: A B C
3987: D E F
3988: G H I
3989: .ve
3991: Where the submatrices A,B,C are owned by proc0, D,E,F are
3992: owned by proc1, G,H,I are owned by proc2.
3994: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3995: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3996: The 'M','N' parameters are 8,8, and have the same values on all procs.
3998: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3999: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4000: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4001: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4002: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4003: matrix, ans [DF] as another SeqAIJ matrix.
4005: When d_nz, o_nz parameters are specified, d_nz storage elements are
4006: allocated for every row of the local diagonal submatrix, and o_nz
4007: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4008: One way to choose d_nz and o_nz is to use the max nonzerors per local
4009: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4010: In this case, the values of d_nz,o_nz are:
4011: .vb
4012: proc0 : dnz = 2, o_nz = 2
4013: proc1 : dnz = 3, o_nz = 2
4014: proc2 : dnz = 1, o_nz = 4
4015: .ve
4016: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4017: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4018: for proc3. i.e we are using 12+15+10=37 storage locations to store
4019: 34 values.
4021: When d_nnz, o_nnz parameters are specified, the storage is specified
4022: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4023: In the above case the values for d_nnz,o_nnz are:
4024: .vb
4025: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4026: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4027: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4028: .ve
4029: Here the space allocated is sum of all the above values i.e 34, and
4030: hence pre-allocation is perfect.
4032: Level: intermediate
4034: .keywords: matrix, aij, compressed row, sparse, parallel
4036: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4037: MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4038: @*/
4039: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4040: {
4046: PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
4047: return(0);
4048: }
4050: /*@
4051: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4052: CSR format the local rows.
4054: Collective on MPI_Comm
4056: Input Parameters:
4057: + comm - MPI communicator
4058: . m - number of local rows (Cannot be PETSC_DECIDE)
4059: . n - This value should be the same as the local size used in creating the
4060: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4061: calculated if N is given) For square matrices n is almost always m.
4062: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4063: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4064: . i - row indices
4065: . j - column indices
4066: - a - matrix values
4068: Output Parameter:
4069: . mat - the matrix
4071: Level: intermediate
4073: Notes:
4074: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4075: thus you CANNOT change the matrix entries by changing the values of a[] after you have
4076: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4078: The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4080: The format which is used for the sparse matrix input, is equivalent to a
4081: row-major ordering.. i.e for the following matrix, the input data expected is
4082: as shown
4084: $ 1 0 0
4085: $ 2 0 3 P0
4086: $ -------
4087: $ 4 5 6 P1
4088: $
4089: $ Process0 [P0]: rows_owned=[0,1]
4090: $ i = {0,1,3} [size = nrow+1 = 2+1]
4091: $ j = {0,0,2} [size = 3]
4092: $ v = {1,2,3} [size = 3]
4093: $
4094: $ Process1 [P1]: rows_owned=[2]
4095: $ i = {0,3} [size = nrow+1 = 1+1]
4096: $ j = {0,1,2} [size = 3]
4097: $ v = {4,5,6} [size = 3]
4099: .keywords: matrix, aij, compressed row, sparse, parallel
4101: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4102: MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4103: @*/
4104: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4105: {
4109: if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4110: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4111: MatCreate(comm,mat);
4112: MatSetSizes(*mat,m,n,M,N);
4113: /* MatSetBlockSizes(M,bs,cbs); */
4114: MatSetType(*mat,MATMPIAIJ);
4115: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4116: return(0);
4117: }
4119: /*@C
4120: MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4121: (the default parallel PETSc format). For good matrix assembly performance
4122: the user should preallocate the matrix storage by setting the parameters
4123: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4124: performance can be increased by more than a factor of 50.
4126: Collective on MPI_Comm
4128: Input Parameters:
4129: + comm - MPI communicator
4130: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4131: This value should be the same as the local size used in creating the
4132: y vector for the matrix-vector product y = Ax.
4133: . n - This value should be the same as the local size used in creating the
4134: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4135: calculated if N is given) For square matrices n is almost always m.
4136: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4137: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4138: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4139: (same value is used for all local rows)
4140: . d_nnz - array containing the number of nonzeros in the various rows of the
4141: DIAGONAL portion of the local submatrix (possibly different for each row)
4142: or NULL, if d_nz is used to specify the nonzero structure.
4143: The size of this array is equal to the number of local rows, i.e 'm'.
4144: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4145: submatrix (same value is used for all local rows).
4146: - o_nnz - array containing the number of nonzeros in the various rows of the
4147: OFF-DIAGONAL portion of the local submatrix (possibly different for
4148: each row) or NULL, if o_nz is used to specify the nonzero
4149: structure. The size of this array is equal to the number
4150: of local rows, i.e 'm'.
4152: Output Parameter:
4153: . A - the matrix
4155: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4156: MatXXXXSetPreallocation() paradgm instead of this routine directly.
4157: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4159: Notes:
4160: If the *_nnz parameter is given then the *_nz parameter is ignored
4162: m,n,M,N parameters specify the size of the matrix, and its partitioning across
4163: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4164: storage requirements for this matrix.
4166: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4167: processor than it must be used on all processors that share the object for
4168: that argument.
4170: The user MUST specify either the local or global matrix dimensions
4171: (possibly both).
4173: The parallel matrix is partitioned across processors such that the
4174: first m0 rows belong to process 0, the next m1 rows belong to
4175: process 1, the next m2 rows belong to process 2 etc.. where
4176: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4177: values corresponding to [m x N] submatrix.
4179: The columns are logically partitioned with the n0 columns belonging
4180: to 0th partition, the next n1 columns belonging to the next
4181: partition etc.. where n0,n1,n2... are the input parameter 'n'.
4183: The DIAGONAL portion of the local submatrix on any given processor
4184: is the submatrix corresponding to the rows and columns m,n
4185: corresponding to the given processor. i.e diagonal matrix on
4186: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4187: etc. The remaining portion of the local submatrix [m x (N-n)]
4188: constitute the OFF-DIAGONAL portion. The example below better
4189: illustrates this concept.
4191: For a square global matrix we define each processor's diagonal portion
4192: to be its local rows and the corresponding columns (a square submatrix);
4193: each processor's off-diagonal portion encompasses the remainder of the
4194: local matrix (a rectangular submatrix).
4196: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4198: When calling this routine with a single process communicator, a matrix of
4199: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4200: type of communicator, use the construction mechanism
4201: .vb
4202: MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4203: .ve
4205: $ MatCreate(...,&A);
4206: $ MatSetType(A,MATMPIAIJ);
4207: $ MatSetSizes(A, m,n,M,N);
4208: $ MatMPIAIJSetPreallocation(A,...);
4210: By default, this format uses inodes (identical nodes) when possible.
4211: We search for consecutive rows with the same nonzero structure, thereby
4212: reusing matrix information to achieve increased efficiency.
4214: Options Database Keys:
4215: + -mat_no_inode - Do not use inodes
4216: - -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4220: Example usage:
4222: Consider the following 8x8 matrix with 34 non-zero values, that is
4223: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4224: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4225: as follows
4227: .vb
4228: 1 2 0 | 0 3 0 | 0 4
4229: Proc0 0 5 6 | 7 0 0 | 8 0
4230: 9 0 10 | 11 0 0 | 12 0
4231: -------------------------------------
4232: 13 0 14 | 15 16 17 | 0 0
4233: Proc1 0 18 0 | 19 20 21 | 0 0
4234: 0 0 0 | 22 23 0 | 24 0
4235: -------------------------------------
4236: Proc2 25 26 27 | 0 0 28 | 29 0
4237: 30 0 0 | 31 32 33 | 0 34
4238: .ve
4240: This can be represented as a collection of submatrices as
4242: .vb
4243: A B C
4244: D E F
4245: G H I
4246: .ve
4248: Where the submatrices A,B,C are owned by proc0, D,E,F are
4249: owned by proc1, G,H,I are owned by proc2.
4251: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4252: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4253: The 'M','N' parameters are 8,8, and have the same values on all procs.
4255: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4256: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4257: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4258: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4259: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4260: matrix, ans [DF] as another SeqAIJ matrix.
4262: When d_nz, o_nz parameters are specified, d_nz storage elements are
4263: allocated for every row of the local diagonal submatrix, and o_nz
4264: storage locations are allocated for every row of the OFF-DIAGONAL submat.
4265: One way to choose d_nz and o_nz is to use the max nonzerors per local
4266: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4267: In this case, the values of d_nz,o_nz are
4268: .vb
4269: proc0 : dnz = 2, o_nz = 2
4270: proc1 : dnz = 3, o_nz = 2
4271: proc2 : dnz = 1, o_nz = 4
4272: .ve
4273: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4274: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4275: for proc3. i.e we are using 12+15+10=37 storage locations to store
4276: 34 values.
4278: When d_nnz, o_nnz parameters are specified, the storage is specified
4279: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4280: In the above case the values for d_nnz,o_nnz are
4281: .vb
4282: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4283: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4284: proc2: d_nnz = [1,1] and o_nnz = [4,4]
4285: .ve
4286: Here the space allocated is sum of all the above values i.e 34, and
4287: hence pre-allocation is perfect.
4289: Level: intermediate
4291: .keywords: matrix, aij, compressed row, sparse, parallel
4293: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4294: MATMPIAIJ, MatCreateMPIAIJWithArrays()
4295: @*/
4296: PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4297: {
4299: PetscMPIInt size;
4302: MatCreate(comm,A);
4303: MatSetSizes(*A,m,n,M,N);
4304: MPI_Comm_size(comm,&size);
4305: if (size > 1) {
4306: MatSetType(*A,MATMPIAIJ);
4307: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4308: } else {
4309: MatSetType(*A,MATSEQAIJ);
4310: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4311: }
4312: return(0);
4313: }
4315: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4316: {
4317: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4318: PetscBool flg;
4320:
4322: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
4323: if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4324: if (Ad) *Ad = a->A;
4325: if (Ao) *Ao = a->B;
4326: if (colmap) *colmap = a->garray;
4327: return(0);
4328: }
4330: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4331: {
4333: PetscInt m,N,i,rstart,nnz,Ii;
4334: PetscInt *indx;
4335: PetscScalar *values;
4338: MatGetSize(inmat,&m,&N);
4339: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4340: PetscInt *dnz,*onz,sum,bs,cbs;
4342: if (n == PETSC_DECIDE) {
4343: PetscSplitOwnership(comm,&n,&N);
4344: }
4345: /* Check sum(n) = N */
4346: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
4347: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4349: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4350: rstart -= m;
4352: MatPreallocateInitialize(comm,m,n,dnz,onz);
4353: for (i=0; i<m; i++) {
4354: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4355: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4356: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4357: }
4359: MatCreate(comm,outmat);
4360: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4361: MatGetBlockSizes(inmat,&bs,&cbs);
4362: MatSetBlockSizes(*outmat,bs,cbs);
4363: MatSetType(*outmat,MATAIJ);
4364: MatSeqAIJSetPreallocation(*outmat,0,dnz);
4365: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4366: MatPreallocateFinalize(dnz,onz);
4367: }
4369: /* numeric phase */
4370: MatGetOwnershipRange(*outmat,&rstart,NULL);
4371: for (i=0; i<m; i++) {
4372: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4373: Ii = i + rstart;
4374: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4375: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4376: }
4377: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4378: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4379: return(0);
4380: }
4382: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4383: {
4384: PetscErrorCode ierr;
4385: PetscMPIInt rank;
4386: PetscInt m,N,i,rstart,nnz;
4387: size_t len;
4388: const PetscInt *indx;
4389: PetscViewer out;
4390: char *name;
4391: Mat B;
4392: const PetscScalar *values;
4395: MatGetLocalSize(A,&m,0);
4396: MatGetSize(A,0,&N);
4397: /* Should this be the type of the diagonal block of A? */
4398: MatCreate(PETSC_COMM_SELF,&B);
4399: MatSetSizes(B,m,N,m,N);
4400: MatSetBlockSizesFromMats(B,A,A);
4401: MatSetType(B,MATSEQAIJ);
4402: MatSeqAIJSetPreallocation(B,0,NULL);
4403: MatGetOwnershipRange(A,&rstart,0);
4404: for (i=0; i<m; i++) {
4405: MatGetRow(A,i+rstart,&nnz,&indx,&values);
4406: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4407: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4408: }
4409: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4410: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
4412: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4413: PetscStrlen(outfile,&len);
4414: PetscMalloc1(len+5,&name);
4415: sprintf(name,"%s.%d",outfile,rank);
4416: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4417: PetscFree(name);
4418: MatView(B,out);
4419: PetscViewerDestroy(&out);
4420: MatDestroy(&B);
4421: return(0);
4422: }
4424: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4425: {
4426: PetscErrorCode ierr;
4427: Mat_Merge_SeqsToMPI *merge;
4428: PetscContainer container;
4431: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);
4432: if (container) {
4433: PetscContainerGetPointer(container,(void**)&merge);
4434: PetscFree(merge->id_r);
4435: PetscFree(merge->len_s);
4436: PetscFree(merge->len_r);
4437: PetscFree(merge->bi);
4438: PetscFree(merge->bj);
4439: PetscFree(merge->buf_ri[0]);
4440: PetscFree(merge->buf_ri);
4441: PetscFree(merge->buf_rj[0]);
4442: PetscFree(merge->buf_rj);
4443: PetscFree(merge->coi);
4444: PetscFree(merge->coj);
4445: PetscFree(merge->owners_co);
4446: PetscLayoutDestroy(&merge->rowmap);
4447: PetscFree(merge);
4448: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
4449: }
4450: MatDestroy_MPIAIJ(A);
4451: return(0);
4452: }
4454: #include <../src/mat/utils/freespace.h>
4455: #include <petscbt.h>
4457: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4458: {
4459: PetscErrorCode ierr;
4460: MPI_Comm comm;
4461: Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4462: PetscMPIInt size,rank,taga,*len_s;
4463: PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4464: PetscInt proc,m;
4465: PetscInt **buf_ri,**buf_rj;
4466: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4467: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4468: MPI_Request *s_waits,*r_waits;
4469: MPI_Status *status;
4470: MatScalar *aa=a->a;
4471: MatScalar **abuf_r,*ba_i;
4472: Mat_Merge_SeqsToMPI *merge;
4473: PetscContainer container;
4476: PetscObjectGetComm((PetscObject)mpimat,&comm);
4477: PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);
4479: MPI_Comm_size(comm,&size);
4480: MPI_Comm_rank(comm,&rank);
4482: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4483: PetscContainerGetPointer(container,(void**)&merge);
4485: bi = merge->bi;
4486: bj = merge->bj;
4487: buf_ri = merge->buf_ri;
4488: buf_rj = merge->buf_rj;
4490: PetscMalloc1(size,&status);
4491: owners = merge->rowmap->range;
4492: len_s = merge->len_s;
4494: /* send and recv matrix values */
4495: /*-----------------------------*/
4496: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4497: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
4499: PetscMalloc1(merge->nsend+1,&s_waits);
4500: for (proc=0,k=0; proc<size; proc++) {
4501: if (!len_s[proc]) continue;
4502: i = owners[proc];
4503: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4504: k++;
4505: }
4507: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
4508: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
4509: PetscFree(status);
4511: PetscFree(s_waits);
4512: PetscFree(r_waits);
4514: /* insert mat values of mpimat */
4515: /*----------------------------*/
4516: PetscMalloc1(N,&ba_i);
4517: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4519: for (k=0; k<merge->nrecv; k++) {
4520: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4521: nrows = *(buf_ri_k[k]);
4522: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4523: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4524: }
4526: /* set values of ba */
4527: m = merge->rowmap->n;
4528: for (i=0; i<m; i++) {
4529: arow = owners[rank] + i;
4530: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4531: bnzi = bi[i+1] - bi[i];
4532: PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));
4534: /* add local non-zero vals of this proc's seqmat into ba */
4535: anzi = ai[arow+1] - ai[arow];
4536: aj = a->j + ai[arow];
4537: aa = a->a + ai[arow];
4538: nextaj = 0;
4539: for (j=0; nextaj<anzi; j++) {
4540: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4541: ba_i[j] += aa[nextaj++];
4542: }
4543: }
4545: /* add received vals into ba */
4546: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4547: /* i-th row */
4548: if (i == *nextrow[k]) {
4549: anzi = *(nextai[k]+1) - *nextai[k];
4550: aj = buf_rj[k] + *(nextai[k]);
4551: aa = abuf_r[k] + *(nextai[k]);
4552: nextaj = 0;
4553: for (j=0; nextaj<anzi; j++) {
4554: if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4555: ba_i[j] += aa[nextaj++];
4556: }
4557: }
4558: nextrow[k]++; nextai[k]++;
4559: }
4560: }
4561: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4562: }
4563: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4564: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
4566: PetscFree(abuf_r[0]);
4567: PetscFree(abuf_r);
4568: PetscFree(ba_i);
4569: PetscFree3(buf_ri_k,nextrow,nextai);
4570: PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4571: return(0);
4572: }
4574: PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4575: {
4576: PetscErrorCode ierr;
4577: Mat B_mpi;
4578: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4579: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4580: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4581: PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4582: PetscInt len,proc,*dnz,*onz,bs,cbs;
4583: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4584: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4585: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4586: MPI_Status *status;
4587: PetscFreeSpaceList free_space=NULL,current_space=NULL;
4588: PetscBT lnkbt;
4589: Mat_Merge_SeqsToMPI *merge;
4590: PetscContainer container;
4593: PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);
4595: /* make sure it is a PETSc comm */
4596: PetscCommDuplicate(comm,&comm,NULL);
4597: MPI_Comm_size(comm,&size);
4598: MPI_Comm_rank(comm,&rank);
4600: PetscNew(&merge);
4601: PetscMalloc1(size,&status);
4603: /* determine row ownership */
4604: /*---------------------------------------------------------*/
4605: PetscLayoutCreate(comm,&merge->rowmap);
4606: PetscLayoutSetLocalSize(merge->rowmap,m);
4607: PetscLayoutSetSize(merge->rowmap,M);
4608: PetscLayoutSetBlockSize(merge->rowmap,1);
4609: PetscLayoutSetUp(merge->rowmap);
4610: PetscMalloc1(size,&len_si);
4611: PetscMalloc1(size,&merge->len_s);
4613: m = merge->rowmap->n;
4614: owners = merge->rowmap->range;
4616: /* determine the number of messages to send, their lengths */
4617: /*---------------------------------------------------------*/
4618: len_s = merge->len_s;
4620: len = 0; /* length of buf_si[] */
4621: merge->nsend = 0;
4622: for (proc=0; proc<size; proc++) {
4623: len_si[proc] = 0;
4624: if (proc == rank) {
4625: len_s[proc] = 0;
4626: } else {
4627: len_si[proc] = owners[proc+1] - owners[proc] + 1;
4628: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4629: }
4630: if (len_s[proc]) {
4631: merge->nsend++;
4632: nrows = 0;
4633: for (i=owners[proc]; i<owners[proc+1]; i++) {
4634: if (ai[i+1] > ai[i]) nrows++;
4635: }
4636: len_si[proc] = 2*(nrows+1);
4637: len += len_si[proc];
4638: }
4639: }
4641: /* determine the number and length of messages to receive for ij-structure */
4642: /*-------------------------------------------------------------------------*/
4643: PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4644: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
4646: /* post the Irecv of j-structure */
4647: /*-------------------------------*/
4648: PetscCommGetNewTag(comm,&tagj);
4649: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
4651: /* post the Isend of j-structure */
4652: /*--------------------------------*/
4653: PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);
4655: for (proc=0, k=0; proc<size; proc++) {
4656: if (!len_s[proc]) continue;
4657: i = owners[proc];
4658: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4659: k++;
4660: }
4662: /* receives and sends of j-structure are complete */
4663: /*------------------------------------------------*/
4664: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
4665: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
4667: /* send and recv i-structure */
4668: /*---------------------------*/
4669: PetscCommGetNewTag(comm,&tagi);
4670: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
4672: PetscMalloc1(len+1,&buf_s);
4673: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4674: for (proc=0,k=0; proc<size; proc++) {
4675: if (!len_s[proc]) continue;
4676: /* form outgoing message for i-structure:
4677: buf_si[0]: nrows to be sent
4678: [1:nrows]: row index (global)
4679: [nrows+1:2*nrows+1]: i-structure index
4680: */
4681: /*-------------------------------------------*/
4682: nrows = len_si[proc]/2 - 1;
4683: buf_si_i = buf_si + nrows+1;
4684: buf_si[0] = nrows;
4685: buf_si_i[0] = 0;
4686: nrows = 0;
4687: for (i=owners[proc]; i<owners[proc+1]; i++) {
4688: anzi = ai[i+1] - ai[i];
4689: if (anzi) {
4690: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4691: buf_si[nrows+1] = i-owners[proc]; /* local row index */
4692: nrows++;
4693: }
4694: }
4695: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4696: k++;
4697: buf_si += len_si[proc];
4698: }
4700: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
4701: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
4703: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
4704: for (i=0; i<merge->nrecv; i++) {
4705: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4706: }
4708: PetscFree(len_si);
4709: PetscFree(len_ri);
4710: PetscFree(rj_waits);
4711: PetscFree2(si_waits,sj_waits);
4712: PetscFree(ri_waits);
4713: PetscFree(buf_s);
4714: PetscFree(status);
4716: /* compute a local seq matrix in each processor */
4717: /*----------------------------------------------*/
4718: /* allocate bi array and free space for accumulating nonzero column info */
4719: PetscMalloc1(m+1,&bi);
4720: bi[0] = 0;
4722: /* create and initialize a linked list */
4723: nlnk = N+1;
4724: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
4726: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4727: len = ai[owners[rank+1]] - ai[owners[rank]];
4728: PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);
4730: current_space = free_space;
4732: /* determine symbolic info for each local row */
4733: PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);
4735: for (k=0; k<merge->nrecv; k++) {
4736: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4737: nrows = *buf_ri_k[k];
4738: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
4739: nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4740: }
4742: MatPreallocateInitialize(comm,m,n,dnz,onz);
4743: len = 0;
4744: for (i=0; i<m; i++) {
4745: bnzi = 0;
4746: /* add local non-zero cols of this proc's seqmat into lnk */
4747: arow = owners[rank] + i;
4748: anzi = ai[arow+1] - ai[arow];
4749: aj = a->j + ai[arow];
4750: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4751: bnzi += nlnk;
4752: /* add received col data into lnk */
4753: for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4754: if (i == *nextrow[k]) { /* i-th row */
4755: anzi = *(nextai[k]+1) - *nextai[k];
4756: aj = buf_rj[k] + *nextai[k];
4757: PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);
4758: bnzi += nlnk;
4759: nextrow[k]++; nextai[k]++;
4760: }
4761: }
4762: if (len < bnzi) len = bnzi; /* =max(bnzi) */
4764: /* if free space is not available, make more free space */
4765: if (current_space->local_remaining<bnzi) {
4766: PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);
4767: nspacedouble++;
4768: }
4769: /* copy data into free space, then initialize lnk */
4770: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4771: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
4773: current_space->array += bnzi;
4774: current_space->local_used += bnzi;
4775: current_space->local_remaining -= bnzi;
4777: bi[i+1] = bi[i] + bnzi;
4778: }
4780: PetscFree3(buf_ri_k,nextrow,nextai);
4782: PetscMalloc1(bi[m]+1,&bj);
4783: PetscFreeSpaceContiguous(&free_space,bj);
4784: PetscLLDestroy(lnk,lnkbt);
4786: /* create symbolic parallel matrix B_mpi */
4787: /*---------------------------------------*/
4788: MatGetBlockSizes(seqmat,&bs,&cbs);
4789: MatCreate(comm,&B_mpi);
4790: if (n==PETSC_DECIDE) {
4791: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4792: } else {
4793: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4794: }
4795: MatSetBlockSizes(B_mpi,bs,cbs);
4796: MatSetType(B_mpi,MATMPIAIJ);
4797: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4798: MatPreallocateFinalize(dnz,onz);
4799: MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
4801: /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4802: B_mpi->assembled = PETSC_FALSE;
4803: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4804: merge->bi = bi;
4805: merge->bj = bj;
4806: merge->buf_ri = buf_ri;
4807: merge->buf_rj = buf_rj;
4808: merge->coi = NULL;
4809: merge->coj = NULL;
4810: merge->owners_co = NULL;
4812: PetscCommDestroy(&comm);
4814: /* attach the supporting struct to B_mpi for reuse */
4815: PetscContainerCreate(PETSC_COMM_SELF,&container);
4816: PetscContainerSetPointer(container,merge);
4817: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4818: PetscContainerDestroy(&container);
4819: *mpimat = B_mpi;
4821: PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4822: return(0);
4823: }
4825: /*@C
4826: MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4827: matrices from each processor
4829: Collective on MPI_Comm
4831: Input Parameters:
4832: + comm - the communicators the parallel matrix will live on
4833: . seqmat - the input sequential matrices
4834: . m - number of local rows (or PETSC_DECIDE)
4835: . n - number of local columns (or PETSC_DECIDE)
4836: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4838: Output Parameter:
4839: . mpimat - the parallel matrix generated
4841: Level: advanced
4843: Notes:
4844: The dimensions of the sequential matrix in each processor MUST be the same.
4845: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4846: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4847: @*/
4848: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4849: {
4851: PetscMPIInt size;
4854: MPI_Comm_size(comm,&size);
4855: if (size == 1) {
4856: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4857: if (scall == MAT_INITIAL_MATRIX) {
4858: MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4859: } else {
4860: MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4861: }
4862: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4863: return(0);
4864: }
4865: PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4866: if (scall == MAT_INITIAL_MATRIX) {
4867: MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4868: }
4869: MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4870: PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4871: return(0);
4872: }
4874: /*@
4875: MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4876: mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4877: with MatGetSize()
4879: Not Collective
4881: Input Parameters:
4882: + A - the matrix
4883: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4885: Output Parameter:
4886: . A_loc - the local sequential matrix generated
4888: Level: developer
4890: .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4892: @*/
4893: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4894: {
4896: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
4897: Mat_SeqAIJ *mat,*a,*b;
4898: PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4899: MatScalar *aa,*ba,*cam;
4900: PetscScalar *ca;
4901: PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4902: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
4903: PetscBool match;
4904: MPI_Comm comm;
4905: PetscMPIInt size;
4908: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
4909: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4910: PetscObjectGetComm((PetscObject)A,&comm);
4911: MPI_Comm_size(comm,&size);
4912: if (size == 1 && scall == MAT_REUSE_MATRIX) return(0);
4914: PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4915: a = (Mat_SeqAIJ*)(mpimat->A)->data;
4916: b = (Mat_SeqAIJ*)(mpimat->B)->data;
4917: ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4918: aa = a->a; ba = b->a;
4919: if (scall == MAT_INITIAL_MATRIX) {
4920: if (size == 1) {
4921: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);
4922: return(0);
4923: }
4925: PetscMalloc1(1+am,&ci);
4926: ci[0] = 0;
4927: for (i=0; i<am; i++) {
4928: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4929: }
4930: PetscMalloc1(1+ci[am],&cj);
4931: PetscMalloc1(1+ci[am],&ca);
4932: k = 0;
4933: for (i=0; i<am; i++) {
4934: ncols_o = bi[i+1] - bi[i];
4935: ncols_d = ai[i+1] - ai[i];
4936: /* off-diagonal portion of A */
4937: for (jo=0; jo<ncols_o; jo++) {
4938: col = cmap[*bj];
4939: if (col >= cstart) break;
4940: cj[k] = col; bj++;
4941: ca[k++] = *ba++;
4942: }
4943: /* diagonal portion of A */
4944: for (j=0; j<ncols_d; j++) {
4945: cj[k] = cstart + *aj++;
4946: ca[k++] = *aa++;
4947: }
4948: /* off-diagonal portion of A */
4949: for (j=jo; j<ncols_o; j++) {
4950: cj[k] = cmap[*bj++];
4951: ca[k++] = *ba++;
4952: }
4953: }
4954: /* put together the new matrix */
4955: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4956: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4957: /* Since these are PETSc arrays, change flags to free them as necessary. */
4958: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4959: mat->free_a = PETSC_TRUE;
4960: mat->free_ij = PETSC_TRUE;
4961: mat->nonew = 0;
4962: } else if (scall == MAT_REUSE_MATRIX) {
4963: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4964: ci = mat->i; cj = mat->j; cam = mat->a;
4965: for (i=0; i<am; i++) {
4966: /* off-diagonal portion of A */
4967: ncols_o = bi[i+1] - bi[i];
4968: for (jo=0; jo<ncols_o; jo++) {
4969: col = cmap[*bj];
4970: if (col >= cstart) break;
4971: *cam++ = *ba++; bj++;
4972: }
4973: /* diagonal portion of A */
4974: ncols_d = ai[i+1] - ai[i];
4975: for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4976: /* off-diagonal portion of A */
4977: for (j=jo; j<ncols_o; j++) {
4978: *cam++ = *ba++; bj++;
4979: }
4980: }
4981: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4982: PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4983: return(0);
4984: }
4986: /*@C
4987: MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4989: Not Collective
4991: Input Parameters:
4992: + A - the matrix
4993: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994: - row, col - index sets of rows and columns to extract (or NULL)
4996: Output Parameter:
4997: . A_loc - the local sequential matrix generated
4999: Level: developer
5001: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5003: @*/
5004: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5005: {
5006: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5008: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5009: IS isrowa,iscola;
5010: Mat *aloc;
5011: PetscBool match;
5014: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5015: if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5016: PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5017: if (!row) {
5018: start = A->rmap->rstart; end = A->rmap->rend;
5019: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5020: } else {
5021: isrowa = *row;
5022: }
5023: if (!col) {
5024: start = A->cmap->rstart;
5025: cmap = a->garray;
5026: nzA = a->A->cmap->n;
5027: nzB = a->B->cmap->n;
5028: PetscMalloc1(nzA+nzB, &idx);
5029: ncols = 0;
5030: for (i=0; i<nzB; i++) {
5031: if (cmap[i] < start) idx[ncols++] = cmap[i];
5032: else break;
5033: }
5034: imark = i;
5035: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5036: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5037: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5038: } else {
5039: iscola = *col;
5040: }
5041: if (scall != MAT_INITIAL_MATRIX) {
5042: PetscMalloc1(1,&aloc);
5043: aloc[0] = *A_loc;
5044: }
5045: MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5046: *A_loc = aloc[0];
5047: PetscFree(aloc);
5048: if (!row) {
5049: ISDestroy(&isrowa);
5050: }
5051: if (!col) {
5052: ISDestroy(&iscola);
5053: }
5054: PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5055: return(0);
5056: }
5058: /*@C
5059: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5061: Collective on Mat
5063: Input Parameters:
5064: + A,B - the matrices in mpiaij format
5065: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066: - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5068: Output Parameter:
5069: + rowb, colb - index sets of rows and columns of B to extract
5070: - B_seq - the sequential matrix generated
5072: Level: developer
5074: @*/
5075: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5076: {
5077: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5079: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5080: IS isrowb,iscolb;
5081: Mat *bseq=NULL;
5084: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5085: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5086: }
5087: PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);
5089: if (scall == MAT_INITIAL_MATRIX) {
5090: start = A->cmap->rstart;
5091: cmap = a->garray;
5092: nzA = a->A->cmap->n;
5093: nzB = a->B->cmap->n;
5094: PetscMalloc1(nzA+nzB, &idx);
5095: ncols = 0;
5096: for (i=0; i<nzB; i++) { /* row < local row index */
5097: if (cmap[i] < start) idx[ncols++] = cmap[i];
5098: else break;
5099: }
5100: imark = i;
5101: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5102: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5103: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5104: ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5105: } else {
5106: if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5107: isrowb = *rowb; iscolb = *colb;
5108: PetscMalloc1(1,&bseq);
5109: bseq[0] = *B_seq;
5110: }
5111: MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5112: *B_seq = bseq[0];
5113: PetscFree(bseq);
5114: if (!rowb) {
5115: ISDestroy(&isrowb);
5116: } else {
5117: *rowb = isrowb;
5118: }
5119: if (!colb) {
5120: ISDestroy(&iscolb);
5121: } else {
5122: *colb = iscolb;
5123: }
5124: PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5125: return(0);
5126: }
5128: /*
5129: MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5130: of the OFF-DIAGONAL portion of local A
5132: Collective on Mat
5134: Input Parameters:
5135: + A,B - the matrices in mpiaij format
5136: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5138: Output Parameter:
5139: + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5140: . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5141: . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5142: - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5144: Level: developer
5146: */
5147: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5148: {
5149: VecScatter_MPI_General *gen_to,*gen_from;
5150: PetscErrorCode ierr;
5151: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5152: Mat_SeqAIJ *b_oth;
5153: VecScatter ctx;
5154: MPI_Comm comm;
5155: PetscMPIInt *rprocs,*sprocs,tag,rank;
5156: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5157: PetscInt *rvalues,*svalues,*cols,sbs,rbs;
5158: PetscScalar *b_otha,*bufa,*bufA,*vals;
5159: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5160: MPI_Request *rwaits = NULL,*swaits = NULL;
5161: MPI_Status *sstatus,rstatus;
5162: PetscMPIInt jj,size;
5163: VecScatterType type;
5164: PetscBool mpi1;
5167: PetscObjectGetComm((PetscObject)A,&comm);
5168: MPI_Comm_size(comm,&size);
5170: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5171: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5172: }
5173: PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5174: MPI_Comm_rank(comm,&rank);
5176: if (size == 1) {
5177: startsj_s = NULL;
5178: bufa_ptr = NULL;
5179: *B_oth = NULL;
5180: return(0);
5181: }
5183: ctx = a->Mvctx;
5184: VecScatterGetType(ctx,&type);
5185: PetscStrcmp(type,"mpi1",&mpi1);
5186: if (!mpi1) {
5187: /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5188: thus create a->Mvctx_mpi1 */
5189: if (!a->Mvctx_mpi1) {
5190: a->Mvctx_mpi1_flg = PETSC_TRUE;
5191: MatSetUpMultiply_MPIAIJ(A);
5192: }
5193: ctx = a->Mvctx_mpi1;
5194: }
5195: tag = ((PetscObject)ctx)->tag;
5197: gen_to = (VecScatter_MPI_General*)ctx->todata;
5198: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5199: nrecvs = gen_from->n;
5200: nsends = gen_to->n;
5202: PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);
5203: srow = gen_to->indices; /* local row index to be sent */
5204: sstarts = gen_to->starts;
5205: sprocs = gen_to->procs;
5206: sstatus = gen_to->sstatus;
5207: sbs = gen_to->bs;
5208: rstarts = gen_from->starts;
5209: rprocs = gen_from->procs;
5210: rbs = gen_from->bs;
5212: if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5213: if (scall == MAT_INITIAL_MATRIX) {
5214: /* i-array */
5215: /*---------*/
5216: /* post receives */
5217: PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);
5218: for (i=0; i<nrecvs; i++) {
5219: rowlen = rvalues + rstarts[i]*rbs;
5220: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5221: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5222: }
5224: /* pack the outgoing message */
5225: PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);
5227: sstartsj[0] = 0;
5228: rstartsj[0] = 0;
5229: len = 0; /* total length of j or a array to be sent */
5230: k = 0;
5231: PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);
5232: for (i=0; i<nsends; i++) {
5233: rowlen = svalues + sstarts[i]*sbs;
5234: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5235: for (j=0; j<nrows; j++) {
5236: row = srow[k] + B->rmap->range[rank]; /* global row idx */
5237: for (l=0; l<sbs; l++) {
5238: MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */
5240: rowlen[j*sbs+l] = ncols;
5242: len += ncols;
5243: MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5244: }
5245: k++;
5246: }
5247: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
5249: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5250: }
5251: /* recvs and sends of i-array are completed */
5252: i = nrecvs;
5253: while (i--) {
5254: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5255: }
5256: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5257: PetscFree(svalues);
5259: /* allocate buffers for sending j and a arrays */
5260: PetscMalloc1(len+1,&bufj);
5261: PetscMalloc1(len+1,&bufa);
5263: /* create i-array of B_oth */
5264: PetscMalloc1(aBn+2,&b_othi);
5266: b_othi[0] = 0;
5267: len = 0; /* total length of j or a array to be received */
5268: k = 0;
5269: for (i=0; i<nrecvs; i++) {
5270: rowlen = rvalues + rstarts[i]*rbs;
5271: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5272: for (j=0; j<nrows; j++) {
5273: b_othi[k+1] = b_othi[k] + rowlen[j];
5274: PetscIntSumError(rowlen[j],len,&len);
5275: k++;
5276: }
5277: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5278: }
5279: PetscFree(rvalues);
5281: /* allocate space for j and a arrrays of B_oth */
5282: PetscMalloc1(b_othi[aBn]+1,&b_othj);
5283: PetscMalloc1(b_othi[aBn]+1,&b_otha);
5285: /* j-array */
5286: /*---------*/
5287: /* post receives of j-array */
5288: for (i=0; i<nrecvs; i++) {
5289: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5290: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5291: }
5293: /* pack the outgoing message j-array */
5294: k = 0;
5295: for (i=0; i<nsends; i++) {
5296: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5297: bufJ = bufj+sstartsj[i];
5298: for (j=0; j<nrows; j++) {
5299: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5300: for (ll=0; ll<sbs; ll++) {
5301: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5302: for (l=0; l<ncols; l++) {
5303: *bufJ++ = cols[l];
5304: }
5305: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5306: }
5307: }
5308: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5309: }
5311: /* recvs and sends of j-array are completed */
5312: i = nrecvs;
5313: while (i--) {
5314: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5315: }
5316: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5317: } else if (scall == MAT_REUSE_MATRIX) {
5318: sstartsj = *startsj_s;
5319: rstartsj = *startsj_r;
5320: bufa = *bufa_ptr;
5321: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5322: b_otha = b_oth->a;
5323: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5325: /* a-array */
5326: /*---------*/
5327: /* post receives of a-array */
5328: for (i=0; i<nrecvs; i++) {
5329: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5330: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5331: }
5333: /* pack the outgoing message a-array */
5334: k = 0;
5335: for (i=0; i<nsends; i++) {
5336: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5337: bufA = bufa+sstartsj[i];
5338: for (j=0; j<nrows; j++) {
5339: row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5340: for (ll=0; ll<sbs; ll++) {
5341: MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5342: for (l=0; l<ncols; l++) {
5343: *bufA++ = vals[l];
5344: }
5345: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5346: }
5347: }
5348: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5349: }
5350: /* recvs and sends of a-array are completed */
5351: i = nrecvs;
5352: while (i--) {
5353: MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);
5354: }
5355: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
5356: PetscFree2(rwaits,swaits);
5358: if (scall == MAT_INITIAL_MATRIX) {
5359: /* put together the new matrix */
5360: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);
5362: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5363: /* Since these are PETSc arrays, change flags to free them as necessary. */
5364: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5365: b_oth->free_a = PETSC_TRUE;
5366: b_oth->free_ij = PETSC_TRUE;
5367: b_oth->nonew = 0;
5369: PetscFree(bufj);
5370: if (!startsj_s || !bufa_ptr) {
5371: PetscFree2(sstartsj,rstartsj);
5372: PetscFree(bufa_ptr);
5373: } else {
5374: *startsj_s = sstartsj;
5375: *startsj_r = rstartsj;
5376: *bufa_ptr = bufa;
5377: }
5378: }
5379: PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5380: return(0);
5381: }
5383: /*@C
5384: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5386: Not Collective
5388: Input Parameters:
5389: . A - The matrix in mpiaij format
5391: Output Parameter:
5392: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5393: . colmap - A map from global column index to local index into lvec
5394: - multScatter - A scatter from the argument of a matrix-vector product to lvec
5396: Level: developer
5398: @*/
5399: #if defined(PETSC_USE_CTABLE)
5400: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5401: #else
5402: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5403: #endif
5404: {
5405: Mat_MPIAIJ *a;
5412: a = (Mat_MPIAIJ*) A->data;
5413: if (lvec) *lvec = a->lvec;
5414: if (colmap) *colmap = a->colmap;
5415: if (multScatter) *multScatter = a->Mvctx;
5416: return(0);
5417: }
5419: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5420: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5421: #if defined(PETSC_HAVE_MKL_SPARSE)
5422: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5423: #endif
5424: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5425: #if defined(PETSC_HAVE_ELEMENTAL)
5426: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5427: #endif
5428: #if defined(PETSC_HAVE_HYPRE)
5429: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5430: PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5431: #endif
5432: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5433: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5435: /*
5436: Computes (B'*A')' since computing B*A directly is untenable
5438: n p p
5439: ( ) ( ) ( )
5440: m ( A ) * n ( B ) = m ( C )
5441: ( ) ( ) ( )
5443: */
5444: PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5445: {
5447: Mat At,Bt,Ct;
5450: MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5451: MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5452: MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);
5453: MatDestroy(&At);
5454: MatDestroy(&Bt);
5455: MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5456: MatDestroy(&Ct);
5457: return(0);
5458: }
5460: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5461: {
5463: PetscInt m=A->rmap->n,n=B->cmap->n;
5464: Mat Cmat;
5467: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5468: MatCreate(PetscObjectComm((PetscObject)A),&Cmat);
5469: MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
5470: MatSetBlockSizesFromMats(Cmat,A,B);
5471: MatSetType(Cmat,MATMPIDENSE);
5472: MatMPIDenseSetPreallocation(Cmat,NULL);
5473: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
5474: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
5476: Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5478: *C = Cmat;
5479: return(0);
5480: }
5482: /* ----------------------------------------------------------------*/
5483: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5484: {
5488: if (scall == MAT_INITIAL_MATRIX) {
5489: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
5490: MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);
5491: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
5492: }
5493: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
5494: MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);
5495: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
5496: return(0);
5497: }
5499: /*MC
5500: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5502: Options Database Keys:
5503: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5505: Level: beginner
5507: .seealso: MatCreateAIJ()
5508: M*/
5510: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5511: {
5512: Mat_MPIAIJ *b;
5514: PetscMPIInt size;
5517: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
5519: PetscNewLog(B,&b);
5520: B->data = (void*)b;
5521: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
5522: B->assembled = PETSC_FALSE;
5523: B->insertmode = NOT_SET_VALUES;
5524: b->size = size;
5526: MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);
5528: /* build cache for off array entries formed */
5529: MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);
5531: b->donotstash = PETSC_FALSE;
5532: b->colmap = 0;
5533: b->garray = 0;
5534: b->roworiented = PETSC_TRUE;
5536: /* stuff used for matrix vector multiply */
5537: b->lvec = NULL;
5538: b->Mvctx = NULL;
5540: /* stuff for MatGetRow() */
5541: b->rowindices = 0;
5542: b->rowvalues = 0;
5543: b->getrowactive = PETSC_FALSE;
5545: /* flexible pointer used in CUSP/CUSPARSE classes */
5546: b->spptr = NULL;
5548: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);
5549: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
5550: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
5551: PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
5552: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
5553: PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);
5554: PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
5555: PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
5556: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
5557: #if defined(PETSC_HAVE_MKL_SPARSE)
5558: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);
5559: #endif
5560: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
5561: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
5562: #if defined(PETSC_HAVE_ELEMENTAL)
5563: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);
5564: #endif
5565: #if defined(PETSC_HAVE_HYPRE)
5566: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);
5567: #endif
5568: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);
5569: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);
5570: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);
5571: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);
5572: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);
5573: #if defined(PETSC_HAVE_HYPRE)
5574: PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);
5575: #endif
5576: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
5577: return(0);
5578: }
5580: /*@C
5581: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5582: and "off-diagonal" part of the matrix in CSR format.
5584: Collective on MPI_Comm
5586: Input Parameters:
5587: + comm - MPI communicator
5588: . m - number of local rows (Cannot be PETSC_DECIDE)
5589: . n - This value should be the same as the local size used in creating the
5590: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5591: calculated if N is given) For square matrices n is almost always m.
5592: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5593: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5594: . i - row indices for "diagonal" portion of matrix
5595: . j - column indices
5596: . a - matrix values
5597: . oi - row indices for "off-diagonal" portion of matrix
5598: . oj - column indices
5599: - oa - matrix values
5601: Output Parameter:
5602: . mat - the matrix
5604: Level: advanced
5606: Notes:
5607: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5608: must free the arrays once the matrix has been destroyed and not before.
5610: The i and j indices are 0 based
5612: See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5614: This sets local rows and cannot be used to set off-processor values.
5616: Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5617: legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5618: not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5619: the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5620: keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5621: communication if it is known that only local entries will be set.
5623: .keywords: matrix, aij, compressed row, sparse, parallel
5625: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5626: MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5627: @*/
5628: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5629: {
5631: Mat_MPIAIJ *maij;
5634: if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5635: if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5636: if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5637: MatCreate(comm,mat);
5638: MatSetSizes(*mat,m,n,M,N);
5639: MatSetType(*mat,MATMPIAIJ);
5640: maij = (Mat_MPIAIJ*) (*mat)->data;
5642: (*mat)->preallocated = PETSC_TRUE;
5644: PetscLayoutSetUp((*mat)->rmap);
5645: PetscLayoutSetUp((*mat)->cmap);
5647: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
5648: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);
5650: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
5651: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
5652: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
5653: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
5655: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
5656: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
5657: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
5658: MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
5659: MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
5660: return(0);
5661: }
5663: /*
5664: Special version for direct calls from Fortran
5665: */
5666: #include <petsc/private/fortranimpl.h>
5668: /* Change these macros so can be used in void function */
5669: #undef CHKERRQ
5670: #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5671: #undef SETERRQ2
5672: #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5673: #undef SETERRQ3
5674: #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5675: #undef SETERRQ
5676: #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5678: #if defined(PETSC_HAVE_FORTRAN_CAPS)
5679: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5680: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5681: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5682: #else
5683: #endif
5684: PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5685: {
5686: Mat mat = *mmat;
5687: PetscInt m = *mm, n = *mn;
5688: InsertMode addv = *maddv;
5689: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
5690: PetscScalar value;
5693: MatCheckPreallocated(mat,1);
5694: if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5696: #if defined(PETSC_USE_DEBUG)
5697: else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5698: #endif
5699: {
5700: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5701: PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5702: PetscBool roworiented = aij->roworiented;
5704: /* Some Variables required in the macro */
5705: Mat A = aij->A;
5706: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
5707: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5708: MatScalar *aa = a->a;
5709: PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5710: Mat B = aij->B;
5711: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
5712: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5713: MatScalar *ba = b->a;
5715: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5716: PetscInt nonew = a->nonew;
5717: MatScalar *ap1,*ap2;
5720: for (i=0; i<m; i++) {
5721: if (im[i] < 0) continue;
5722: #if defined(PETSC_USE_DEBUG)
5723: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5724: #endif
5725: if (im[i] >= rstart && im[i] < rend) {
5726: row = im[i] - rstart;
5727: lastcol1 = -1;
5728: rp1 = aj + ai[row];
5729: ap1 = aa + ai[row];
5730: rmax1 = aimax[row];
5731: nrow1 = ailen[row];
5732: low1 = 0;
5733: high1 = nrow1;
5734: lastcol2 = -1;
5735: rp2 = bj + bi[row];
5736: ap2 = ba + bi[row];
5737: rmax2 = bimax[row];
5738: nrow2 = bilen[row];
5739: low2 = 0;
5740: high2 = nrow2;
5742: for (j=0; j<n; j++) {
5743: if (roworiented) value = v[i*n+j];
5744: else value = v[i+j*m];
5745: if (in[j] >= cstart && in[j] < cend) {
5746: col = in[j] - cstart;
5747: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5748: MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5749: } else if (in[j] < 0) continue;
5750: #if defined(PETSC_USE_DEBUG)
5751: /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5752: else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5753: #endif
5754: else {
5755: if (mat->was_assembled) {
5756: if (!aij->colmap) {
5757: MatCreateColmap_MPIAIJ_Private(mat);
5758: }
5759: #if defined(PETSC_USE_CTABLE)
5760: PetscTableFind(aij->colmap,in[j]+1,&col);
5761: col--;
5762: #else
5763: col = aij->colmap[in[j]] - 1;
5764: #endif
5765: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5766: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5767: MatDisAssemble_MPIAIJ(mat);
5768: col = in[j];
5769: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5770: B = aij->B;
5771: b = (Mat_SeqAIJ*)B->data;
5772: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5773: rp2 = bj + bi[row];
5774: ap2 = ba + bi[row];
5775: rmax2 = bimax[row];
5776: nrow2 = bilen[row];
5777: low2 = 0;
5778: high2 = nrow2;
5779: bm = aij->B->rmap->n;
5780: ba = b->a;
5781: }
5782: } else col = in[j];
5783: MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5784: }
5785: }
5786: } else if (!aij->donotstash) {
5787: if (roworiented) {
5788: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5789: } else {
5790: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
5791: }
5792: }
5793: }
5794: }
5795: PetscFunctionReturnVoid();
5796: }