Actual source code: axpy.c
petsc-3.12.3 2020-01-03
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: PetscErrorCode ierr,(*f)(Mat,Mat*);
7: Mat A,F;
10: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
11: if (f) {
12: MatTransposeGetMat(T,&A);
13: if (T == X) {
14: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
15: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
16: A = Y;
17: } else {
18: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
19: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
20: }
21: } else {
22: MatHermitianTransposeGetMat(T,&A);
23: if (T == X) {
24: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
25: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
26: A = Y;
27: } else {
28: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
29: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
30: }
31: }
32: MatAXPY(A,a,F,str);
33: MatDestroy(&F);
34: return(0);
35: }
37: /*@
38: MatAXPY - Computes Y = a*X + Y.
40: Logically Collective on Mat
42: Input Parameters:
43: + a - the scalar multiplier
44: . X - the first matrix
45: . Y - the second matrix
46: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
49: Level: intermediate
51: .seealso: MatAYPX()
52: @*/
53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
54: {
56: PetscInt M1,M2,N1,N2;
57: PetscInt m1,m2,n1,n2;
58: MatType t1,t2;
59: PetscBool sametype,transpose;
66: MatGetSize(X,&M1,&N1);
67: MatGetSize(Y,&M2,&N2);
68: MatGetLocalSize(X,&m1,&n1);
69: MatGetLocalSize(Y,&m2,&n2);
70: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
71: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
73: MatGetType(X,&t1);
74: MatGetType(Y,&t2);
75: PetscStrcmp(t1,t2,&sametype);
76: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
77: if (Y->ops->axpy && sametype) {
78: (*Y->ops->axpy)(Y,a,X,str);
79: } else {
80: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
81: if (transpose) {
82: MatTransposeAXPY_Private(Y,a,X,str,X);
83: } else {
84: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
85: if (transpose) {
86: MatTransposeAXPY_Private(Y,a,X,str,Y);
87: } else {
88: if (str != DIFFERENT_NONZERO_PATTERN) {
89: MatAXPY_Basic(Y,a,X,str);
90: } else {
91: Mat B;
93: MatAXPY_Basic_Preallocate(Y,X,&B);
94: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
95: MatHeaderReplace(Y,&B);
96: }
97: }
98: }
99: }
100: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
101: return(0);
102: }
104: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
105: {
107: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
110: /* look for any available faster alternative to the general preallocator */
111: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
112: if (preall) {
113: (*preall)(Y,X,B);
114: } else { /* Use MatPrellocator, assumes same row-col distribution */
115: Mat preallocator;
116: PetscInt r,rstart,rend;
117: PetscInt m,n,M,N;
119: MatGetRowUpperTriangular(Y);
120: MatGetRowUpperTriangular(X);
121: MatGetSize(Y,&M,&N);
122: MatGetLocalSize(Y,&m,&n);
123: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
124: MatSetType(preallocator,MATPREALLOCATOR);
125: MatSetSizes(preallocator,m,n,M,N);
126: MatSetUp(preallocator);
127: MatGetOwnershipRange(preallocator,&rstart,&rend);
128: for (r = rstart; r < rend; ++r) {
129: PetscInt ncols;
130: const PetscInt *row;
131: const PetscScalar *vals;
133: MatGetRow(Y,r,&ncols,&row,&vals);
134: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
135: MatRestoreRow(Y,r,&ncols,&row,&vals);
136: MatGetRow(X,r,&ncols,&row,&vals);
137: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
138: MatRestoreRow(X,r,&ncols,&row,&vals);
139: }
140: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
141: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
142: MatRestoreRowUpperTriangular(Y);
143: MatRestoreRowUpperTriangular(X);
145: MatCreate(PetscObjectComm((PetscObject)Y),B);
146: MatSetType(*B,((PetscObject)Y)->type_name);
147: MatSetSizes(*B,m,n,M,N);
148: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
149: MatDestroy(&preallocator);
150: }
151: return(0);
152: }
154: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
155: {
156: PetscInt i,start,end,j,ncols,m,n;
157: PetscErrorCode ierr;
158: const PetscInt *row;
159: PetscScalar *val;
160: const PetscScalar *vals;
163: MatGetSize(X,&m,&n);
164: MatGetOwnershipRange(X,&start,&end);
165: MatGetRowUpperTriangular(X);
166: if (a == 1.0) {
167: for (i = start; i < end; i++) {
168: MatGetRow(X,i,&ncols,&row,&vals);
169: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
170: MatRestoreRow(X,i,&ncols,&row,&vals);
171: }
172: } else {
173: PetscInt vs = 100;
174: /* realloc if needed, as this function may be used in parallel */
175: PetscMalloc1(vs,&val);
176: for (i=start; i<end; i++) {
177: MatGetRow(X,i,&ncols,&row,&vals);
178: if (vs < ncols) {
179: vs = PetscMin(2*ncols,n);
180: PetscRealloc(vs*sizeof(*val),&val);
181: }
182: for (j=0; j<ncols; j++) val[j] = a*vals[j];
183: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
184: MatRestoreRow(X,i,&ncols,&row,&vals);
185: }
186: PetscFree(val);
187: }
188: MatRestoreRowUpperTriangular(X);
189: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
190: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
191: return(0);
192: }
194: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
195: {
196: PetscInt i,start,end,j,ncols,m,n;
197: PetscErrorCode ierr;
198: const PetscInt *row;
199: PetscScalar *val;
200: const PetscScalar *vals;
203: MatGetSize(X,&m,&n);
204: MatGetOwnershipRange(X,&start,&end);
205: MatGetRowUpperTriangular(Y);
206: MatGetRowUpperTriangular(X);
207: if (a == 1.0) {
208: for (i = start; i < end; i++) {
209: MatGetRow(Y,i,&ncols,&row,&vals);
210: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
211: MatRestoreRow(Y,i,&ncols,&row,&vals);
213: MatGetRow(X,i,&ncols,&row,&vals);
214: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
215: MatRestoreRow(X,i,&ncols,&row,&vals);
216: }
217: } else {
218: PetscInt vs = 100;
219: /* realloc if needed, as this function may be used in parallel */
220: PetscMalloc1(vs,&val);
221: for (i=start; i<end; i++) {
222: MatGetRow(Y,i,&ncols,&row,&vals);
223: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
224: MatRestoreRow(Y,i,&ncols,&row,&vals);
226: MatGetRow(X,i,&ncols,&row,&vals);
227: if (vs < ncols) {
228: vs = PetscMin(2*ncols,n);
229: PetscRealloc(vs*sizeof(*val),&val);
230: }
231: for (j=0; j<ncols; j++) val[j] = a*vals[j];
232: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
233: MatRestoreRow(X,i,&ncols,&row,&vals);
234: }
235: PetscFree(val);
236: }
237: MatRestoreRowUpperTriangular(Y);
238: MatRestoreRowUpperTriangular(X);
239: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
240: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
241: return(0);
242: }
244: /*@
245: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
247: Neighbor-wise Collective on Mat
249: Input Parameters:
250: + Y - the matrices
251: - a - the PetscScalar
253: Level: intermediate
255: Notes:
256: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
257: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
258: entry).
260: To form Y = Y + diag(V) use MatDiagonalSet()
262: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
263: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
265: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
266: @*/
267: PetscErrorCode MatShift(Mat Y,PetscScalar a)
268: {
273: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
274: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
275: MatCheckPreallocated(Y,1);
276: if (a == 0.0) return(0);
278: if (Y->ops->shift) {
279: (*Y->ops->shift)(Y,a);
280: } else {
281: MatShift_Basic(Y,a);
282: }
284: PetscObjectStateIncrease((PetscObject)Y);
285: return(0);
286: }
288: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
289: {
291: PetscInt i,start,end;
292: PetscScalar *v;
295: MatGetOwnershipRange(Y,&start,&end);
296: VecGetArray(D,&v);
297: for (i=start; i<end; i++) {
298: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
299: }
300: VecRestoreArray(D,&v);
301: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
302: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
303: return(0);
304: }
306: /*@
307: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
308: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
309: INSERT_VALUES.
311: Input Parameters:
312: + Y - the input matrix
313: . D - the diagonal matrix, represented as a vector
314: - i - INSERT_VALUES or ADD_VALUES
316: Neighbor-wise Collective on Mat
318: Notes:
319: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
320: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
321: entry).
323: Level: intermediate
325: .seealso: MatShift(), MatScale(), MatDiagonalScale()
326: @*/
327: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
328: {
330: PetscInt matlocal,veclocal;
335: MatGetLocalSize(Y,&matlocal,NULL);
336: VecGetLocalSize(D,&veclocal);
337: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
338: if (Y->ops->diagonalset) {
339: (*Y->ops->diagonalset)(Y,D,is);
340: } else {
341: MatDiagonalSet_Default(Y,D,is);
342: }
343: PetscObjectStateIncrease((PetscObject)Y);
344: return(0);
345: }
347: /*@
348: MatAYPX - Computes Y = a*Y + X.
350: Logically on Mat
352: Input Parameters:
353: + a - the PetscScalar multiplier
354: . Y - the first matrix
355: . X - the second matrix
356: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
358: Level: intermediate
360: .seealso: MatAXPY()
361: @*/
362: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
363: {
364: PetscScalar one = 1.0;
366: PetscInt mX,mY,nX,nY;
372: MatGetSize(X,&mX,&nX);
373: MatGetSize(X,&mY,&nY);
374: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
375: MatScale(Y,a);
376: MatAXPY(Y,one,X,str);
377: return(0);
378: }
380: /*@
381: MatComputeOperator - Computes the explicit matrix
383: Collective on Mat
385: Input Parameter:
386: + inmat - the matrix
387: - mattype - the matrix type for the explicit operator
389: Output Parameter:
390: . mat - the explict operator
392: Notes:
393: This computation is done by applying the operators to columns of the identity matrix.
394: This routine is costly in general, and is recommended for use only with relatively small systems.
395: Currently, this routine uses a dense matrix format if mattype == NULL.
397: Level: advanced
399: @*/
400: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
401: {
407: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
408: return(0);
409: }
411: /*@
412: MatComputeOperatorTranspose - Computes the explicit matrix representation of
413: a give matrix that can apply MatMultTranspose()
415: Collective on Mat
417: Input Parameter:
418: . inmat - the matrix
420: Output Parameter:
421: . mat - the explict operator transposed
423: Notes:
424: This computation is done by applying the transpose of the operator to columns of the identity matrix.
425: This routine is costly in general, and is recommended for use only with relatively small systems.
426: Currently, this routine uses a dense matrix format if mattype == NULL.
428: Level: advanced
430: @*/
431: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
432: {
433: Mat A;
439: MatCreateTranspose(inmat,&A);
440: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
441: MatDestroy(&A);
442: return(0);
443: }
445: /*@
446: MatChop - Set all values in the matrix less than the tolerance to zero
448: Input Parameters:
449: + A - The matrix
450: - tol - The zero tolerance
452: Output Parameters:
453: . A - The chopped matrix
455: Level: intermediate
457: .seealso: MatCreate(), MatZeroEntries()
458: @*/
459: PetscErrorCode MatChop(Mat A, PetscReal tol)
460: {
461: PetscScalar *newVals;
462: PetscInt *newCols;
463: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
467: MatGetOwnershipRange(A, &rStart, &rEnd);
468: for (r = rStart; r < rEnd; ++r) {
469: PetscInt ncols;
471: MatGetRow(A, r, &ncols, NULL, NULL);
472: colMax = PetscMax(colMax, ncols);
473: MatRestoreRow(A, r, &ncols, NULL, NULL);
474: }
475: numRows = rEnd - rStart;
476: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
477: PetscMalloc2(colMax,&newCols,colMax,&newVals);
478: for (r = rStart; r < rStart+maxRows; ++r) {
479: const PetscScalar *vals;
480: const PetscInt *cols;
481: PetscInt ncols, newcols, c;
483: if (r < rEnd) {
484: MatGetRow(A, r, &ncols, &cols, &vals);
485: for (c = 0; c < ncols; ++c) {
486: newCols[c] = cols[c];
487: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
488: }
489: newcols = ncols;
490: MatRestoreRow(A, r, &ncols, &cols, &vals);
491: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
492: }
493: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
494: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
495: }
496: PetscFree2(newCols,newVals);
497: return(0);
498: }