Actual source code: mpidense.c
petsc-3.9.0 2018-04-07
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
7: #include <../src/mat/impls/dense/mpi/mpidense.h>
8: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9: #include <petscblaslapack.h>
11: /*@
13: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14: matrix that represents the operator. For sequential matrices it returns itself.
16: Input Parameter:
17: . A - the Seq or MPI dense matrix
19: Output Parameter:
20: . B - the inner matrix
22: Level: intermediate
24: @*/
25: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26: {
27: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
29: PetscBool flg;
32: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
33: if (flg) *B = mat->A;
34: else *B = A;
35: return(0);
36: }
38: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39: {
40: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
42: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46: lrow = row - rstart;
47: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
48: return(0);
49: }
51: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52: {
56: if (idx) {PetscFree(*idx);}
57: if (v) {PetscFree(*v);}
58: return(0);
59: }
61: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62: {
63: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
65: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
66: PetscScalar *array;
67: MPI_Comm comm;
68: Mat B;
71: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
73: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
74: if (!B) {
75: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
76: MatCreate(comm,&B);
77: MatSetSizes(B,m,m,m,m);
78: MatSetType(B,((PetscObject)mdn->A)->type_name);
79: MatDenseGetArray(mdn->A,&array);
80: MatSeqDenseSetPreallocation(B,array+m*rstart);
81: MatDenseRestoreArray(mdn->A,&array);
82: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
83: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
84: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
85: *a = B;
86: MatDestroy(&B);
87: } else *a = B;
88: return(0);
89: }
91: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92: {
93: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
95: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96: PetscBool roworiented = A->roworiented;
99: for (i=0; i<m; i++) {
100: if (idxm[i] < 0) continue;
101: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102: if (idxm[i] >= rstart && idxm[i] < rend) {
103: row = idxm[i] - rstart;
104: if (roworiented) {
105: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
106: } else {
107: for (j=0; j<n; j++) {
108: if (idxn[j] < 0) continue;
109: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
111: }
112: }
113: } else if (!A->donotstash) {
114: mat->assembled = PETSC_FALSE;
115: if (roworiented) {
116: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
117: } else {
118: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
119: }
120: }
121: }
122: return(0);
123: }
125: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126: {
127: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
129: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
132: for (i=0; i<m; i++) {
133: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135: if (idxm[i] >= rstart && idxm[i] < rend) {
136: row = idxm[i] - rstart;
137: for (j=0; j<n; j++) {
138: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
141: }
142: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143: }
144: return(0);
145: }
147: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148: {
149: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
153: MatDenseGetArray(a->A,array);
154: return(0);
155: }
157: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158: {
159: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
163: MatDenseGetArrayRead(a->A,array);
164: return(0);
165: }
167: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168: {
169: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
173: MatDensePlaceArray(a->A,array);
174: return(0);
175: }
177: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178: {
179: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
183: MatDenseResetArray(a->A);
184: return(0);
185: }
187: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188: {
189: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
190: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
192: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193: const PetscInt *irow,*icol;
194: PetscScalar *av,*bv,*v = lmat->v;
195: Mat newmat;
196: IS iscol_local;
199: ISAllGather(iscol,&iscol_local);
200: ISGetIndices(isrow,&irow);
201: ISGetIndices(iscol_local,&icol);
202: ISGetLocalSize(isrow,&nrows);
203: ISGetLocalSize(iscol,&ncols);
204: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
206: /* No parallel redistribution currently supported! Should really check each index set
207: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
208: original matrix! */
210: MatGetLocalSize(A,&nlrows,&nlcols);
211: MatGetOwnershipRange(A,&rstart,&rend);
213: /* Check submatrix call */
214: if (scall == MAT_REUSE_MATRIX) {
215: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
216: /* Really need to test rows and column sizes! */
217: newmat = *B;
218: } else {
219: /* Create and fill new matrix */
220: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
221: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
222: MatSetType(newmat,((PetscObject)A)->type_name);
223: MatMPIDenseSetPreallocation(newmat,NULL);
224: }
226: /* Now extract the data pointers and do the copy, column at a time */
227: newmatd = (Mat_MPIDense*)newmat->data;
228: bv = ((Mat_SeqDense*)newmatd->A->data)->v;
230: for (i=0; i<Ncols; i++) {
231: av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
232: for (j=0; j<nrows; j++) {
233: *bv++ = av[irow[j] - rstart];
234: }
235: }
237: /* Assemble the matrices so that the correct flags are set */
238: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
239: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
241: /* Free work space */
242: ISRestoreIndices(isrow,&irow);
243: ISRestoreIndices(iscol_local,&icol);
244: ISDestroy(&iscol_local);
245: *B = newmat;
246: return(0);
247: }
249: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
250: {
251: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
255: MatDenseRestoreArray(a->A,array);
256: return(0);
257: }
259: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
260: {
261: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
265: MatDenseRestoreArrayRead(a->A,array);
266: return(0);
267: }
269: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270: {
271: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
272: MPI_Comm comm;
274: PetscInt nstash,reallocs;
275: InsertMode addv;
278: PetscObjectGetComm((PetscObject)mat,&comm);
279: /* make sure all processors are either in INSERTMODE or ADDMODE */
280: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);
281: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282: mat->insertmode = addv; /* in case this processor had no cache */
284: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
285: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
286: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
287: return(0);
288: }
290: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
291: {
292: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
294: PetscInt i,*row,*col,flg,j,rstart,ncols;
295: PetscMPIInt n;
296: PetscScalar *val;
299: /* wait on receives */
300: while (1) {
301: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
302: if (!flg) break;
304: for (i=0; i<n;) {
305: /* Now identify the consecutive vals belonging to the same row */
306: for (j=i,rstart=row[j]; j<n; j++) {
307: if (row[j] != rstart) break;
308: }
309: if (j < n) ncols = j-i;
310: else ncols = n-i;
311: /* Now assemble all these values with a single function call */
312: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
313: i = j;
314: }
315: }
316: MatStashScatterEnd_Private(&mat->stash);
318: MatAssemblyBegin(mdn->A,mode);
319: MatAssemblyEnd(mdn->A,mode);
321: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322: MatSetUpMultiply_MPIDense(mat);
323: }
324: return(0);
325: }
327: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
328: {
330: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
333: MatZeroEntries(l->A);
334: return(0);
335: }
337: /* the code does not do the diagonal entries correctly unless the
338: matrix is square and the column and row owerships are identical.
339: This is a BUG. The only way to fix it seems to be to access
340: mdn->A and mdn->B directly and not through the MatZeroRows()
341: routine.
342: */
343: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344: {
345: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
346: PetscErrorCode ierr;
347: PetscInt i,*owners = A->rmap->range;
348: PetscInt *sizes,j,idx,nsends;
349: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
350: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351: PetscInt *lens,*lrows,*values;
352: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
353: MPI_Comm comm;
354: MPI_Request *send_waits,*recv_waits;
355: MPI_Status recv_status,*send_status;
356: PetscBool found;
357: const PetscScalar *xx;
358: PetscScalar *bb;
361: PetscObjectGetComm((PetscObject)A,&comm);
362: if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
363: if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
364: /* first count number of contributors to each processor */
365: PetscCalloc1(2*size,&sizes);
366: PetscMalloc1(N+1,&owner); /* see note*/
367: for (i=0; i<N; i++) {
368: idx = rows[i];
369: found = PETSC_FALSE;
370: for (j=0; j<size; j++) {
371: if (idx >= owners[j] && idx < owners[j+1]) {
372: sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
373: }
374: }
375: if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
376: }
377: nsends = 0;
378: for (i=0; i<size; i++) nsends += sizes[2*i+1];
380: /* inform other processors of number of messages and max length*/
381: PetscMaxSum(comm,sizes,&nmax,&nrecvs);
383: /* post receives: */
384: PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);
385: PetscMalloc1(nrecvs+1,&recv_waits);
386: for (i=0; i<nrecvs; i++) {
387: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
388: }
390: /* do sends:
391: 1) starts[i] gives the starting index in svalues for stuff going to
392: the ith processor
393: */
394: PetscMalloc1(N+1,&svalues);
395: PetscMalloc1(nsends+1,&send_waits);
396: PetscMalloc1(size+1,&starts);
398: starts[0] = 0;
399: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
400: for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
402: starts[0] = 0;
403: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
404: count = 0;
405: for (i=0; i<size; i++) {
406: if (sizes[2*i+1]) {
407: MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
408: }
409: }
410: PetscFree(starts);
412: base = owners[rank];
414: /* wait on receives */
415: PetscMalloc2(nrecvs,&lens,nrecvs,&source);
416: count = nrecvs;
417: slen = 0;
418: while (count) {
419: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
420: /* unpack receives into our local space */
421: MPI_Get_count(&recv_status,MPIU_INT,&n);
423: source[imdex] = recv_status.MPI_SOURCE;
424: lens[imdex] = n;
425: slen += n;
426: count--;
427: }
428: PetscFree(recv_waits);
430: /* move the data into the send scatter */
431: PetscMalloc1(slen+1,&lrows);
432: count = 0;
433: for (i=0; i<nrecvs; i++) {
434: values = rvalues + i*nmax;
435: for (j=0; j<lens[i]; j++) {
436: lrows[count++] = values[j] - base;
437: }
438: }
439: PetscFree(rvalues);
440: PetscFree2(lens,source);
441: PetscFree(owner);
442: PetscFree(sizes);
444: /* fix right hand side if needed */
445: if (x && b) {
446: VecGetArrayRead(x,&xx);
447: VecGetArray(b,&bb);
448: for (i=0; i<slen; i++) {
449: bb[lrows[i]] = diag*xx[lrows[i]];
450: }
451: VecRestoreArrayRead(x,&xx);
452: VecRestoreArray(b,&bb);
453: }
455: /* actually zap the local rows */
456: MatZeroRows(l->A,slen,lrows,0.0,0,0);
457: if (diag != 0.0) {
458: Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
459: PetscInt m = ll->lda, i;
461: for (i=0; i<slen; i++) {
462: ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
463: }
464: }
465: PetscFree(lrows);
467: /* wait on sends */
468: if (nsends) {
469: PetscMalloc1(nsends,&send_status);
470: MPI_Waitall(nsends,send_waits,send_status);
471: PetscFree(send_status);
472: }
473: PetscFree(send_waits);
474: PetscFree(svalues);
475: return(0);
476: }
478: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
479: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
480: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
481: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
483: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
484: {
485: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
489: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
490: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
491: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
492: return(0);
493: }
495: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
496: {
497: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
501: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
502: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
503: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
504: return(0);
505: }
507: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
508: {
509: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
511: PetscScalar zero = 0.0;
514: VecSet(yy,zero);
515: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
516: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
517: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
518: return(0);
519: }
521: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
522: {
523: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
527: VecCopy(yy,zz);
528: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
529: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
530: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
531: return(0);
532: }
534: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
535: {
536: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
537: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
539: PetscInt len,i,n,m = A->rmap->n,radd;
540: PetscScalar *x,zero = 0.0;
543: VecSet(v,zero);
544: VecGetArray(v,&x);
545: VecGetSize(v,&n);
546: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
547: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
548: radd = A->rmap->rstart*m;
549: for (i=0; i<len; i++) {
550: x[i] = aloc->v[radd + i*m + i];
551: }
552: VecRestoreArray(v,&x);
553: return(0);
554: }
556: PetscErrorCode MatDestroy_MPIDense(Mat mat)
557: {
558: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
562: #if defined(PETSC_USE_LOG)
563: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564: #endif
565: MatStashDestroy_Private(&mat->stash);
566: MatDestroy(&mdn->A);
567: VecDestroy(&mdn->lvec);
568: VecScatterDestroy(&mdn->Mvctx);
570: PetscFree(mat->data);
571: PetscObjectChangeTypeName((PetscObject)mat,0);
573: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
574: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
575: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
576: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
577: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
578: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
579: #if defined(PETSC_HAVE_ELEMENTAL)
580: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
581: #endif
582: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
583: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);
584: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);
585: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);
586: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);
587: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);
588: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);
589: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
590: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
591: return(0);
592: }
594: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
595: {
596: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
597: PetscErrorCode ierr;
598: PetscViewerFormat format;
599: int fd;
600: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
601: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
602: PetscScalar *work,*v,*vv;
603: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
606: if (mdn->size == 1) {
607: MatView(mdn->A,viewer);
608: } else {
609: PetscViewerBinaryGetDescriptor(viewer,&fd);
610: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
611: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
613: PetscViewerGetFormat(viewer,&format);
614: if (format == PETSC_VIEWER_NATIVE) {
616: if (!rank) {
617: /* store the matrix as a dense matrix */
618: header[0] = MAT_FILE_CLASSID;
619: header[1] = mat->rmap->N;
620: header[2] = N;
621: header[3] = MATRIX_BINARY_FORMAT_DENSE;
622: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
624: /* get largest work array needed for transposing array */
625: mmax = mat->rmap->n;
626: for (i=1; i<size; i++) {
627: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628: }
629: PetscMalloc1(mmax*N,&work);
631: /* write out local array, by rows */
632: m = mat->rmap->n;
633: v = a->v;
634: for (j=0; j<N; j++) {
635: for (i=0; i<m; i++) {
636: work[j + i*N] = *v++;
637: }
638: }
639: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
640: /* get largest work array to receive messages from other processes, excludes process zero */
641: mmax = 0;
642: for (i=1; i<size; i++) {
643: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644: }
645: PetscMalloc1(mmax*N,&vv);
646: for (k = 1; k < size; k++) {
647: v = vv;
648: m = mat->rmap->range[k+1] - mat->rmap->range[k];
649: MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));
651: for (j = 0; j < N; j++) {
652: for (i = 0; i < m; i++) {
653: work[j + i*N] = *v++;
654: }
655: }
656: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
657: }
658: PetscFree(work);
659: PetscFree(vv);
660: } else {
661: MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));
662: }
663: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
664: }
665: return(0);
666: }
668: extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
669: #include <petscdraw.h>
670: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
671: {
672: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
673: PetscErrorCode ierr;
674: PetscMPIInt rank = mdn->rank;
675: PetscViewerType vtype;
676: PetscBool iascii,isdraw;
677: PetscViewer sviewer;
678: PetscViewerFormat format;
681: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
682: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
683: if (iascii) {
684: PetscViewerGetType(viewer,&vtype);
685: PetscViewerGetFormat(viewer,&format);
686: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687: MatInfo info;
688: MatGetInfo(mat,MAT_LOCAL,&info);
689: PetscViewerASCIIPushSynchronized(viewer);
690: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
691: PetscViewerFlush(viewer);
692: PetscViewerASCIIPopSynchronized(viewer);
693: VecScatterView(mdn->Mvctx,viewer);
694: return(0);
695: } else if (format == PETSC_VIEWER_ASCII_INFO) {
696: return(0);
697: }
698: } else if (isdraw) {
699: PetscDraw draw;
700: PetscBool isnull;
702: PetscViewerDrawGetDraw(viewer,0,&draw);
703: PetscDrawIsNull(draw,&isnull);
704: if (isnull) return(0);
705: }
707: {
708: /* assemble the entire matrix onto first processor. */
709: Mat A;
710: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
711: PetscInt *cols;
712: PetscScalar *vals;
714: MatCreate(PetscObjectComm((PetscObject)mat),&A);
715: if (!rank) {
716: MatSetSizes(A,M,N,M,N);
717: } else {
718: MatSetSizes(A,0,0,M,N);
719: }
720: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
721: MatSetType(A,MATMPIDENSE);
722: MatMPIDenseSetPreallocation(A,NULL);
723: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
725: /* Copy the matrix ... This isn't the most efficient means,
726: but it's quick for now */
727: A->insertmode = INSERT_VALUES;
729: row = mat->rmap->rstart;
730: m = mdn->A->rmap->n;
731: for (i=0; i<m; i++) {
732: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
733: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
734: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
735: row++;
736: }
738: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
739: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
740: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
741: if (!rank) {
742: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
743: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
744: }
745: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
746: PetscViewerFlush(viewer);
747: MatDestroy(&A);
748: }
749: return(0);
750: }
752: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
753: {
755: PetscBool iascii,isbinary,isdraw,issocket;
758: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
759: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
760: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
761: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
763: if (iascii || issocket || isdraw) {
764: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
765: } else if (isbinary) {
766: MatView_MPIDense_Binary(mat,viewer);
767: }
768: return(0);
769: }
771: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772: {
773: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
774: Mat mdn = mat->A;
776: PetscReal isend[5],irecv[5];
779: info->block_size = 1.0;
781: MatGetInfo(mdn,MAT_LOCAL,info);
783: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784: isend[3] = info->memory; isend[4] = info->mallocs;
785: if (flag == MAT_LOCAL) {
786: info->nz_used = isend[0];
787: info->nz_allocated = isend[1];
788: info->nz_unneeded = isend[2];
789: info->memory = isend[3];
790: info->mallocs = isend[4];
791: } else if (flag == MAT_GLOBAL_MAX) {
792: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
794: info->nz_used = irecv[0];
795: info->nz_allocated = irecv[1];
796: info->nz_unneeded = irecv[2];
797: info->memory = irecv[3];
798: info->mallocs = irecv[4];
799: } else if (flag == MAT_GLOBAL_SUM) {
800: MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
802: info->nz_used = irecv[0];
803: info->nz_allocated = irecv[1];
804: info->nz_unneeded = irecv[2];
805: info->memory = irecv[3];
806: info->mallocs = irecv[4];
807: }
808: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
809: info->fill_ratio_needed = 0;
810: info->factor_mallocs = 0;
811: return(0);
812: }
814: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
815: {
816: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
820: switch (op) {
821: case MAT_NEW_NONZERO_LOCATIONS:
822: case MAT_NEW_NONZERO_LOCATION_ERR:
823: case MAT_NEW_NONZERO_ALLOCATION_ERR:
824: MatCheckPreallocated(A,1);
825: MatSetOption(a->A,op,flg);
826: break;
827: case MAT_ROW_ORIENTED:
828: MatCheckPreallocated(A,1);
829: a->roworiented = flg;
830: MatSetOption(a->A,op,flg);
831: break;
832: case MAT_NEW_DIAGONALS:
833: case MAT_KEEP_NONZERO_PATTERN:
834: case MAT_USE_HASH_TABLE:
835: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
836: break;
837: case MAT_IGNORE_OFF_PROC_ENTRIES:
838: a->donotstash = flg;
839: break;
840: case MAT_SYMMETRIC:
841: case MAT_STRUCTURALLY_SYMMETRIC:
842: case MAT_HERMITIAN:
843: case MAT_SYMMETRY_ETERNAL:
844: case MAT_IGNORE_LOWER_TRIANGULAR:
845: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
846: break;
847: default:
848: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
849: }
850: return(0);
851: }
854: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
855: {
856: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
857: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
858: const PetscScalar *l,*r;
859: PetscScalar x,*v;
860: PetscErrorCode ierr;
861: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
864: MatGetLocalSize(A,&s2,&s3);
865: if (ll) {
866: VecGetLocalSize(ll,&s2a);
867: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
868: VecGetArrayRead(ll,&l);
869: for (i=0; i<m; i++) {
870: x = l[i];
871: v = mat->v + i;
872: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
873: }
874: VecRestoreArrayRead(ll,&l);
875: PetscLogFlops(n*m);
876: }
877: if (rr) {
878: VecGetLocalSize(rr,&s3a);
879: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
880: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
881: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
882: VecGetArrayRead(mdn->lvec,&r);
883: for (i=0; i<n; i++) {
884: x = r[i];
885: v = mat->v + i*m;
886: for (j=0; j<m; j++) (*v++) *= x;
887: }
888: VecRestoreArrayRead(mdn->lvec,&r);
889: PetscLogFlops(n*m);
890: }
891: return(0);
892: }
894: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895: {
896: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
897: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
899: PetscInt i,j;
900: PetscReal sum = 0.0;
901: PetscScalar *v = mat->v;
904: if (mdn->size == 1) {
905: MatNorm(mdn->A,type,nrm);
906: } else {
907: if (type == NORM_FROBENIUS) {
908: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
909: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910: }
911: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
912: *nrm = PetscSqrtReal(*nrm);
913: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
914: } else if (type == NORM_1) {
915: PetscReal *tmp,*tmp2;
916: PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
917: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
918: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
919: *nrm = 0.0;
920: v = mat->v;
921: for (j=0; j<mdn->A->cmap->n; j++) {
922: for (i=0; i<mdn->A->rmap->n; i++) {
923: tmp[j] += PetscAbsScalar(*v); v++;
924: }
925: }
926: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
927: for (j=0; j<A->cmap->N; j++) {
928: if (tmp2[j] > *nrm) *nrm = tmp2[j];
929: }
930: PetscFree2(tmp,tmp2);
931: PetscLogFlops(A->cmap->n*A->rmap->n);
932: } else if (type == NORM_INFINITY) { /* max row norm */
933: PetscReal ntemp;
934: MatNorm(mdn->A,type,&ntemp);
935: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
936: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
937: }
938: return(0);
939: }
941: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942: {
943: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
944: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
945: Mat B;
946: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
948: PetscInt j,i;
949: PetscScalar *v;
952: if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
953: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
954: MatCreate(PetscObjectComm((PetscObject)A),&B);
955: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
956: MatSetType(B,((PetscObject)A)->type_name);
957: MatMPIDenseSetPreallocation(B,NULL);
958: } else {
959: B = *matout;
960: }
962: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963: PetscMalloc1(m,&rwork);
964: for (i=0; i<m; i++) rwork[i] = rstart + i;
965: for (j=0; j<n; j++) {
966: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
967: v += m;
968: }
969: PetscFree(rwork);
970: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
971: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
972: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
973: *matout = B;
974: } else {
975: MatHeaderMerge(A,&B);
976: }
977: return(0);
978: }
981: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
982: extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
984: PetscErrorCode MatSetUp_MPIDense(Mat A)
985: {
989: MatMPIDenseSetPreallocation(A,0);
990: return(0);
991: }
993: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
994: {
996: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
999: MatAXPY(A->A,alpha,B->A,str);
1000: PetscObjectStateIncrease((PetscObject)Y);
1001: return(0);
1002: }
1004: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1005: {
1006: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1010: MatConjugate(a->A);
1011: return(0);
1012: }
1014: PetscErrorCode MatRealPart_MPIDense(Mat A)
1015: {
1016: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1020: MatRealPart(a->A);
1021: return(0);
1022: }
1024: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1025: {
1026: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1030: MatImaginaryPart(a->A);
1031: return(0);
1032: }
1034: extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1035: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1036: {
1038: PetscInt i,n;
1039: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1040: PetscReal *work;
1043: MatGetSize(A,NULL,&n);
1044: PetscMalloc1(n,&work);
1045: MatGetColumnNorms_SeqDense(a->A,type,work);
1046: if (type == NORM_2) {
1047: for (i=0; i<n; i++) work[i] *= work[i];
1048: }
1049: if (type == NORM_INFINITY) {
1050: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1051: } else {
1052: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1053: }
1054: PetscFree(work);
1055: if (type == NORM_2) {
1056: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1057: }
1058: return(0);
1059: }
1061: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1062: {
1063: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1065: PetscScalar *a;
1066: PetscInt m,n,i;
1069: MatGetSize(d->A,&m,&n);
1070: MatDenseGetArray(d->A,&a);
1071: for (i=0; i<m*n; i++) {
1072: PetscRandomGetValue(rctx,a+i);
1073: }
1074: MatDenseRestoreArray(d->A,&a);
1075: return(0);
1076: }
1078: extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1080: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1081: {
1083: *missing = PETSC_FALSE;
1084: return(0);
1085: }
1087: /* -------------------------------------------------------------------*/
1088: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1089: MatGetRow_MPIDense,
1090: MatRestoreRow_MPIDense,
1091: MatMult_MPIDense,
1092: /* 4*/ MatMultAdd_MPIDense,
1093: MatMultTranspose_MPIDense,
1094: MatMultTransposeAdd_MPIDense,
1095: 0,
1096: 0,
1097: 0,
1098: /* 10*/ 0,
1099: 0,
1100: 0,
1101: 0,
1102: MatTranspose_MPIDense,
1103: /* 15*/ MatGetInfo_MPIDense,
1104: MatEqual_MPIDense,
1105: MatGetDiagonal_MPIDense,
1106: MatDiagonalScale_MPIDense,
1107: MatNorm_MPIDense,
1108: /* 20*/ MatAssemblyBegin_MPIDense,
1109: MatAssemblyEnd_MPIDense,
1110: MatSetOption_MPIDense,
1111: MatZeroEntries_MPIDense,
1112: /* 24*/ MatZeroRows_MPIDense,
1113: 0,
1114: 0,
1115: 0,
1116: 0,
1117: /* 29*/ MatSetUp_MPIDense,
1118: 0,
1119: 0,
1120: MatGetDiagonalBlock_MPIDense,
1121: 0,
1122: /* 34*/ MatDuplicate_MPIDense,
1123: 0,
1124: 0,
1125: 0,
1126: 0,
1127: /* 39*/ MatAXPY_MPIDense,
1128: MatCreateSubMatrices_MPIDense,
1129: 0,
1130: MatGetValues_MPIDense,
1131: 0,
1132: /* 44*/ 0,
1133: MatScale_MPIDense,
1134: MatShift_Basic,
1135: 0,
1136: 0,
1137: /* 49*/ MatSetRandom_MPIDense,
1138: 0,
1139: 0,
1140: 0,
1141: 0,
1142: /* 54*/ 0,
1143: 0,
1144: 0,
1145: 0,
1146: 0,
1147: /* 59*/ MatCreateSubMatrix_MPIDense,
1148: MatDestroy_MPIDense,
1149: MatView_MPIDense,
1150: 0,
1151: 0,
1152: /* 64*/ 0,
1153: 0,
1154: 0,
1155: 0,
1156: 0,
1157: /* 69*/ 0,
1158: 0,
1159: 0,
1160: 0,
1161: 0,
1162: /* 74*/ 0,
1163: 0,
1164: 0,
1165: 0,
1166: 0,
1167: /* 79*/ 0,
1168: 0,
1169: 0,
1170: 0,
1171: /* 83*/ MatLoad_MPIDense,
1172: 0,
1173: 0,
1174: 0,
1175: 0,
1176: 0,
1177: #if defined(PETSC_HAVE_ELEMENTAL)
1178: /* 89*/ MatMatMult_MPIDense_MPIDense,
1179: MatMatMultSymbolic_MPIDense_MPIDense,
1180: #else
1181: /* 89*/ 0,
1182: 0,
1183: #endif
1184: MatMatMultNumeric_MPIDense,
1185: 0,
1186: 0,
1187: /* 94*/ 0,
1188: 0,
1189: 0,
1190: 0,
1191: 0,
1192: /* 99*/ 0,
1193: 0,
1194: 0,
1195: MatConjugate_MPIDense,
1196: 0,
1197: /*104*/ 0,
1198: MatRealPart_MPIDense,
1199: MatImaginaryPart_MPIDense,
1200: 0,
1201: 0,
1202: /*109*/ 0,
1203: 0,
1204: 0,
1205: 0,
1206: MatMissingDiagonal_MPIDense,
1207: /*114*/ 0,
1208: 0,
1209: 0,
1210: 0,
1211: 0,
1212: /*119*/ 0,
1213: 0,
1214: 0,
1215: 0,
1216: 0,
1217: /*124*/ 0,
1218: MatGetColumnNorms_MPIDense,
1219: 0,
1220: 0,
1221: 0,
1222: /*129*/ 0,
1223: MatTransposeMatMult_MPIDense_MPIDense,
1224: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1225: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1226: 0,
1227: /*134*/ 0,
1228: 0,
1229: 0,
1230: 0,
1231: 0,
1232: /*139*/ 0,
1233: 0,
1234: 0,
1235: 0,
1236: 0,
1237: /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1238: };
1240: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1241: {
1242: Mat_MPIDense *a;
1246: mat->preallocated = PETSC_TRUE;
1247: /* Note: For now, when data is specified above, this assumes the user correctly
1248: allocates the local dense storage space. We should add error checking. */
1250: a = (Mat_MPIDense*)mat->data;
1251: PetscLayoutSetUp(mat->rmap);
1252: PetscLayoutSetUp(mat->cmap);
1253: a->nvec = mat->cmap->n;
1255: MatCreate(PETSC_COMM_SELF,&a->A);
1256: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1257: MatSetType(a->A,MATSEQDENSE);
1258: MatSeqDenseSetPreallocation(a->A,data);
1259: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1260: return(0);
1261: }
1263: #if defined(PETSC_HAVE_ELEMENTAL)
1264: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1265: {
1266: Mat mat_elemental;
1268: PetscScalar *v;
1269: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1270:
1272: if (reuse == MAT_REUSE_MATRIX) {
1273: mat_elemental = *newmat;
1274: MatZeroEntries(*newmat);
1275: } else {
1276: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1277: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1278: MatSetType(mat_elemental,MATELEMENTAL);
1279: MatSetUp(mat_elemental);
1280: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1281: }
1283: PetscMalloc2(m,&rows,N,&cols);
1284: for (i=0; i<N; i++) cols[i] = i;
1285: for (i=0; i<m; i++) rows[i] = rstart + i;
1286:
1287: /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1288: MatDenseGetArray(A,&v);
1289: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1290: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1291: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1292: MatDenseRestoreArray(A,&v);
1293: PetscFree2(rows,cols);
1295: if (reuse == MAT_INPLACE_MATRIX) {
1296: MatHeaderReplace(A,&mat_elemental);
1297: } else {
1298: *newmat = mat_elemental;
1299: }
1300: return(0);
1301: }
1302: #endif
1304: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1305: {
1306: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1310: MatDenseGetColumn(mat->A,col,vals);
1311: return(0);
1312: }
1314: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1315: {
1316: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1320: MatDenseRestoreColumn(mat->A,vals);
1321: return(0);
1322: }
1324: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1325: {
1327: Mat_MPIDense *mat;
1328: PetscInt m,nloc,N;
1331: MatGetSize(inmat,&m,&N);
1332: MatGetLocalSize(inmat,NULL,&nloc);
1333: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1334: PetscInt sum;
1336: if (n == PETSC_DECIDE) {
1337: PetscSplitOwnership(comm,&n,&N);
1338: }
1339: /* Check sum(n) = N */
1340: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1341: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1343: MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1344: }
1346: /* numeric phase */
1347: mat = (Mat_MPIDense*)(*outmat)->data;
1348: MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1349: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1350: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1351: return(0);
1352: }
1354: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1355: {
1356: Mat_MPIDense *a;
1360: PetscNewLog(mat,&a);
1361: mat->data = (void*)a;
1362: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1364: mat->insertmode = NOT_SET_VALUES;
1365: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);
1366: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);
1368: /* build cache for off array entries formed */
1369: a->donotstash = PETSC_FALSE;
1371: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1373: /* stuff used for matrix vector multiply */
1374: a->lvec = 0;
1375: a->Mvctx = 0;
1376: a->roworiented = PETSC_TRUE;
1378: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1379: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1380: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1381: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1382: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1383: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1384: #if defined(PETSC_HAVE_ELEMENTAL)
1385: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1386: #endif
1387: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1388: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);
1389: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);
1390: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);
1392: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);
1393: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);
1394: PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);
1395: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1396: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1397: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1398: return(0);
1399: }
1401: /*MC
1402: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1404: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1405: and MATMPIDENSE otherwise.
1407: Options Database Keys:
1408: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1410: Level: beginner
1413: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1414: M*/
1416: /*@C
1417: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1419: Not collective
1421: Input Parameters:
1422: . B - the matrix
1423: - data - optional location of matrix data. Set data=NULL for PETSc
1424: to control all matrix memory allocation.
1426: Notes:
1427: The dense format is fully compatible with standard Fortran 77
1428: storage by columns.
1430: The data input variable is intended primarily for Fortran programmers
1431: who wish to allocate their own matrix memory space. Most users should
1432: set data=NULL.
1434: Level: intermediate
1436: .keywords: matrix,dense, parallel
1438: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1439: @*/
1440: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1441: {
1445: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1446: return(0);
1447: }
1449: /*@
1450: MatDensePlaceArray - Allows one to replace the array in a dense array with an
1451: array provided by the user. This is useful to avoid copying an array
1452: into a matrix
1454: Not Collective
1456: Input Parameters:
1457: + mat - the matrix
1458: - array - the array in column major order
1460: Notes:
1461: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1462: freed when the matrix is destroyed.
1464: Level: developer
1466: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1468: @*/
1469: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[])
1470: {
1473: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
1474: PetscObjectStateIncrease((PetscObject)mat);
1475: return(0);
1476: }
1478: /*@
1479: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1481: Not Collective
1483: Input Parameters:
1484: . mat - the matrix
1486: Notes:
1487: You can only call this after a call to MatDensePlaceArray()
1489: Level: developer
1491: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1493: @*/
1494: PetscErrorCode MatDenseResetArray(Mat mat)
1495: {
1498: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
1499: PetscObjectStateIncrease((PetscObject)mat);
1500: return(0);
1501: }
1503: /*@C
1504: MatCreateDense - Creates a parallel matrix in dense format.
1506: Collective on MPI_Comm
1508: Input Parameters:
1509: + comm - MPI communicator
1510: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1511: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1512: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1513: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1514: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1515: to control all matrix memory allocation.
1517: Output Parameter:
1518: . A - the matrix
1520: Notes:
1521: The dense format is fully compatible with standard Fortran 77
1522: storage by columns.
1524: The data input variable is intended primarily for Fortran programmers
1525: who wish to allocate their own matrix memory space. Most users should
1526: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1528: The user MUST specify either the local or global matrix dimensions
1529: (possibly both).
1531: Level: intermediate
1533: .keywords: matrix,dense, parallel
1535: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1536: @*/
1537: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1538: {
1540: PetscMPIInt size;
1543: MatCreate(comm,A);
1544: MatSetSizes(*A,m,n,M,N);
1545: MPI_Comm_size(comm,&size);
1546: if (size > 1) {
1547: MatSetType(*A,MATMPIDENSE);
1548: MatMPIDenseSetPreallocation(*A,data);
1549: if (data) { /* user provided data array, so no need to assemble */
1550: MatSetUpMultiply_MPIDense(*A);
1551: (*A)->assembled = PETSC_TRUE;
1552: }
1553: } else {
1554: MatSetType(*A,MATSEQDENSE);
1555: MatSeqDenseSetPreallocation(*A,data);
1556: }
1557: return(0);
1558: }
1560: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1561: {
1562: Mat mat;
1563: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1567: *newmat = 0;
1568: MatCreate(PetscObjectComm((PetscObject)A),&mat);
1569: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1570: MatSetType(mat,((PetscObject)A)->type_name);
1571: a = (Mat_MPIDense*)mat->data;
1572: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1574: mat->factortype = A->factortype;
1575: mat->assembled = PETSC_TRUE;
1576: mat->preallocated = PETSC_TRUE;
1578: a->size = oldmat->size;
1579: a->rank = oldmat->rank;
1580: mat->insertmode = NOT_SET_VALUES;
1581: a->nvec = oldmat->nvec;
1582: a->donotstash = oldmat->donotstash;
1584: PetscLayoutReference(A->rmap,&mat->rmap);
1585: PetscLayoutReference(A->cmap,&mat->cmap);
1587: MatSetUpMultiply_MPIDense(mat);
1588: MatDuplicate(oldmat->A,cpvalues,&a->A);
1589: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1591: *newmat = mat;
1592: return(0);
1593: }
1595: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1596: {
1598: PetscMPIInt rank,size;
1599: const PetscInt *rowners;
1600: PetscInt i,m,n,nz,j,mMax;
1601: PetscScalar *array,*vals,*vals_ptr;
1602: Mat_MPIDense *a = (Mat_MPIDense*)newmat->data;
1605: MPI_Comm_rank(comm,&rank);
1606: MPI_Comm_size(comm,&size);
1608: /* determine ownership of rows and columns */
1609: m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1610: n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1612: MatSetSizes(newmat,m,n,M,N);
1613: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1614: MatMPIDenseSetPreallocation(newmat,NULL);
1615: }
1616: MatDenseGetArray(newmat,&array);
1617: MatGetLocalSize(newmat,&m,NULL);
1618: MatGetOwnershipRanges(newmat,&rowners);
1619: MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);
1620: if (!rank) {
1621: PetscMalloc1(mMax*N,&vals);
1623: /* read in my part of the matrix numerical values */
1624: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1626: /* insert into matrix-by row (this is why cannot directly read into array */
1627: vals_ptr = vals;
1628: for (i=0; i<m; i++) {
1629: for (j=0; j<N; j++) {
1630: array[i + j*m] = *vals_ptr++;
1631: }
1632: }
1634: /* read in other processors and ship out */
1635: for (i=1; i<size; i++) {
1636: nz = (rowners[i+1] - rowners[i])*N;
1637: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1638: MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);
1639: }
1640: } else {
1641: /* receive numeric values */
1642: PetscMalloc1(m*N,&vals);
1644: /* receive message of values*/
1645: MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);
1647: /* insert into matrix-by row (this is why cannot directly read into array */
1648: vals_ptr = vals;
1649: for (i=0; i<m; i++) {
1650: for (j=0; j<N; j++) {
1651: array[i + j*m] = *vals_ptr++;
1652: }
1653: }
1654: }
1655: MatDenseRestoreArray(newmat,&array);
1656: PetscFree(vals);
1657: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1658: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1659: return(0);
1660: }
1662: PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1663: {
1664: Mat_MPIDense *a;
1665: PetscScalar *vals,*svals;
1666: MPI_Comm comm;
1667: MPI_Status status;
1668: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1669: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1670: PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1671: PetscInt i,nz,j,rstart,rend;
1672: int fd;
1676: /* force binary viewer to load .info file if it has not yet done so */
1677: PetscViewerSetUp(viewer);
1678: PetscObjectGetComm((PetscObject)viewer,&comm);
1679: MPI_Comm_size(comm,&size);
1680: MPI_Comm_rank(comm,&rank);
1681: PetscViewerBinaryGetDescriptor(viewer,&fd);
1682: if (!rank) {
1683: PetscBinaryRead(fd,(char*)header,4,PETSC_INT);
1684: if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1685: }
1686: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1687: M = header[1]; N = header[2]; nz = header[3];
1689: /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1690: if (newmat->rmap->N < 0) newmat->rmap->N = M;
1691: if (newmat->cmap->N < 0) newmat->cmap->N = N;
1693: if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1694: if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1696: /*
1697: Handle case where matrix is stored on disk as a dense matrix
1698: */
1699: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1700: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1701: return(0);
1702: }
1704: /* determine ownership of all rows */
1705: if (newmat->rmap->n < 0) {
1706: PetscMPIIntCast(M/size + ((M % size) > rank),&m);
1707: } else {
1708: PetscMPIIntCast(newmat->rmap->n,&m);
1709: }
1710: if (newmat->cmap->n < 0) {
1711: n = PETSC_DECIDE;
1712: } else {
1713: PetscMPIIntCast(newmat->cmap->n,&n);
1714: }
1716: PetscMalloc1(size+2,&rowners);
1717: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1718: rowners[0] = 0;
1719: for (i=2; i<=size; i++) {
1720: rowners[i] += rowners[i-1];
1721: }
1722: rstart = rowners[rank];
1723: rend = rowners[rank+1];
1725: /* distribute row lengths to all processors */
1726: PetscMalloc1(rend-rstart,&ourlens);
1727: if (!rank) {
1728: PetscMalloc1(M,&rowlengths);
1729: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1730: PetscMalloc1(size,&sndcounts);
1731: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1732: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1733: PetscFree(sndcounts);
1734: } else {
1735: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1736: }
1738: if (!rank) {
1739: /* calculate the number of nonzeros on each processor */
1740: PetscMalloc1(size,&procsnz);
1741: PetscMemzero(procsnz,size*sizeof(PetscInt));
1742: for (i=0; i<size; i++) {
1743: for (j=rowners[i]; j< rowners[i+1]; j++) {
1744: procsnz[i] += rowlengths[j];
1745: }
1746: }
1747: PetscFree(rowlengths);
1749: /* determine max buffer needed and allocate it */
1750: maxnz = 0;
1751: for (i=0; i<size; i++) {
1752: maxnz = PetscMax(maxnz,procsnz[i]);
1753: }
1754: PetscMalloc1(maxnz,&cols);
1756: /* read in my part of the matrix column indices */
1757: nz = procsnz[0];
1758: PetscMalloc1(nz,&mycols);
1759: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1761: /* read in every one elses and ship off */
1762: for (i=1; i<size; i++) {
1763: nz = procsnz[i];
1764: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1765: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1766: }
1767: PetscFree(cols);
1768: } else {
1769: /* determine buffer space needed for message */
1770: nz = 0;
1771: for (i=0; i<m; i++) {
1772: nz += ourlens[i];
1773: }
1774: PetscMalloc1(nz+1,&mycols);
1776: /* receive message of column indices*/
1777: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1778: MPI_Get_count(&status,MPIU_INT,&maxnz);
1779: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1780: }
1782: MatSetSizes(newmat,m,n,M,N);
1783: a = (Mat_MPIDense*)newmat->data;
1784: if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1785: MatMPIDenseSetPreallocation(newmat,NULL);
1786: }
1788: if (!rank) {
1789: PetscMalloc1(maxnz,&vals);
1791: /* read in my part of the matrix numerical values */
1792: nz = procsnz[0];
1793: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1795: /* insert into matrix */
1796: jj = rstart;
1797: smycols = mycols;
1798: svals = vals;
1799: for (i=0; i<m; i++) {
1800: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1801: smycols += ourlens[i];
1802: svals += ourlens[i];
1803: jj++;
1804: }
1806: /* read in other processors and ship out */
1807: for (i=1; i<size; i++) {
1808: nz = procsnz[i];
1809: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1810: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);
1811: }
1812: PetscFree(procsnz);
1813: } else {
1814: /* receive numeric values */
1815: PetscMalloc1(nz+1,&vals);
1817: /* receive message of values*/
1818: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);
1819: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1820: if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1822: /* insert into matrix */
1823: jj = rstart;
1824: smycols = mycols;
1825: svals = vals;
1826: for (i=0; i<m; i++) {
1827: MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1828: smycols += ourlens[i];
1829: svals += ourlens[i];
1830: jj++;
1831: }
1832: }
1833: PetscFree(ourlens);
1834: PetscFree(vals);
1835: PetscFree(mycols);
1836: PetscFree(rowners);
1838: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
1839: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
1840: return(0);
1841: }
1843: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
1844: {
1845: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1846: Mat a,b;
1847: PetscBool flg;
1851: a = matA->A;
1852: b = matB->A;
1853: MatEqual(a,b,&flg);
1854: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1855: return(0);
1856: }
1858: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1859: {
1860: PetscErrorCode ierr;
1861: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1862: Mat_TransMatMultDense *atb = a->atbdense;
1865: PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);
1866: (atb->destroy)(A);
1867: PetscFree(atb);
1868: return(0);
1869: }
1871: PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1872: {
1873: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1874: Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1875: Mat_TransMatMultDense *atb = c->atbdense;
1877: MPI_Comm comm;
1878: PetscMPIInt rank,size,*recvcounts=atb->recvcounts;
1879: PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1880: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1881: PetscScalar _DOne=1.0,_DZero=0.0;
1882: PetscBLASInt am,an,bn,aN;
1883: const PetscInt *ranges;
1886: PetscObjectGetComm((PetscObject)A,&comm);
1887: MPI_Comm_rank(comm,&rank);
1888: MPI_Comm_size(comm,&size);
1890: /* compute atbarray = aseq^T * bseq */
1891: PetscBLASIntCast(a->A->cmap->n,&an);
1892: PetscBLASIntCast(b->A->cmap->n,&bn);
1893: PetscBLASIntCast(a->A->rmap->n,&am);
1894: PetscBLASIntCast(A->cmap->N,&aN);
1895: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1896:
1897: MatGetOwnershipRanges(C,&ranges);
1898: for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1899:
1900: /* arrange atbarray into sendbuf */
1901: k = 0;
1902: for (proc=0; proc<size; proc++) {
1903: for (j=0; j<cN; j++) {
1904: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1905: }
1906: }
1907: /* sum all atbarray to local values of C */
1908: MatDenseGetArray(c->A,&carray);
1909: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
1910: MatDenseRestoreArray(c->A,&carray);
1911: return(0);
1912: }
1914: PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1915: {
1916: PetscErrorCode ierr;
1917: Mat Cdense;
1918: MPI_Comm comm;
1919: PetscMPIInt size;
1920: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
1921: Mat_MPIDense *c;
1922: Mat_TransMatMultDense *atb;
1925: PetscObjectGetComm((PetscObject)A,&comm);
1926: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1927: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1928: }
1930: /* create matrix product Cdense */
1931: MatCreate(comm,&Cdense);
1932: MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
1933: MatSetType(Cdense,MATMPIDENSE);
1934: MatMPIDenseSetPreallocation(Cdense,NULL);
1935: MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);
1936: MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);
1937: *C = Cdense;
1939: /* create data structure for reuse Cdense */
1940: MPI_Comm_size(comm,&size);
1941: PetscNew(&atb);
1942: cM = Cdense->rmap->N;
1943: PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);
1944:
1945: c = (Mat_MPIDense*)Cdense->data;
1946: c->atbdense = atb;
1947: atb->destroy = Cdense->ops->destroy;
1948: Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1949: return(0);
1950: }
1952: PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1953: {
1957: if (scall == MAT_INITIAL_MATRIX) {
1958: MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1959: }
1960: MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1961: return(0);
1962: }
1964: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1965: {
1966: PetscErrorCode ierr;
1967: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1968: Mat_MatMultDense *ab = a->abdense;
1971: MatDestroy(&ab->Ce);
1972: MatDestroy(&ab->Ae);
1973: MatDestroy(&ab->Be);
1975: (ab->destroy)(A);
1976: PetscFree(ab);
1977: return(0);
1978: }
1980: #if defined(PETSC_HAVE_ELEMENTAL)
1981: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1982: {
1983: PetscErrorCode ierr;
1984: Mat_MPIDense *c=(Mat_MPIDense*)C->data;
1985: Mat_MatMultDense *ab=c->abdense;
1988: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
1989: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
1990: MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);
1991: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
1992: return(0);
1993: }
1995: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1996: {
1997: PetscErrorCode ierr;
1998: Mat Ae,Be,Ce;
1999: Mat_MPIDense *c;
2000: Mat_MatMultDense *ab;
2003: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2004: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2005: }
2007: /* convert A and B to Elemental matrices Ae and Be */
2008: MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);
2009: MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);
2011: /* Ce = Ae*Be */
2012: MatMatMultSymbolic(Ae,Be,fill,&Ce);
2013: MatMatMultNumeric(Ae,Be,Ce);
2014:
2015: /* convert Ce to C */
2016: MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);
2018: /* create data structure for reuse Cdense */
2019: PetscNew(&ab);
2020: c = (Mat_MPIDense*)(*C)->data;
2021: c->abdense = ab;
2023: ab->Ae = Ae;
2024: ab->Be = Be;
2025: ab->Ce = Ce;
2026: ab->destroy = (*C)->ops->destroy;
2027: (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2028: (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2029: return(0);
2030: }
2032: PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2033: {
2037: if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2038: PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);
2039: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
2040: PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);
2041: } else {
2042: PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);
2043: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
2044: PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);
2045: }
2046: return(0);
2047: }
2048: #endif