Actual source code: axpy.c
petsc-3.11.0 2019-03-29
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt M1,M2,N1,N2;
26: PetscInt m1,m2,n1,n2;
27: PetscBool sametype;
34: MatGetSize(X,&M1,&N1);
35: MatGetSize(Y,&M2,&N2);
36: MatGetLocalSize(X,&m1,&n1);
37: MatGetLocalSize(Y,&m2,&n2);
38: if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
39: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
41: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
42: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
43: if (Y->ops->axpy && sametype) {
44: (*Y->ops->axpy)(Y,a,X,str);
45: } else {
46: if (str != DIFFERENT_NONZERO_PATTERN) {
47: MatAXPY_Basic(Y,a,X,str);
48: } else {
49: Mat B;
51: MatAXPY_Basic_Preallocate(Y,X,&B);
52: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
53: MatHeaderReplace(Y,&B);
54: }
55: }
56: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
57: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
58: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
59: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
60: }
61: #endif
62: return(0);
63: }
65: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
66: {
68: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
71: /* look for any available faster alternative to the general preallocator */
72: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
73: if (preall) {
74: (*preall)(Y,X,B);
75: } else { /* Use MatPrellocator, assumes same row-col distribution */
76: Mat preallocator;
77: PetscInt r,rstart,rend;
78: PetscInt m,n,M,N;
80: MatGetSize(Y,&M,&N);
81: MatGetLocalSize(Y,&m,&n);
82: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
83: MatSetType(preallocator,MATPREALLOCATOR);
84: MatSetSizes(preallocator,m,n,M,N);
85: MatSetUp(preallocator);
86: MatGetOwnershipRange(preallocator,&rstart,&rend);
87: for (r = rstart; r < rend; ++r) {
88: PetscInt ncols;
89: const PetscInt *row;
90: const PetscScalar *vals;
92: MatGetRow(Y,r,&ncols,&row,&vals);
93: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
94: MatRestoreRow(Y,r,&ncols,&row,&vals);
95: MatGetRow(X,r,&ncols,&row,&vals);
96: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
97: MatRestoreRow(X,r,&ncols,&row,&vals);
98: }
99: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
100: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
102: MatCreate(PetscObjectComm((PetscObject)Y),B);
103: MatSetType(*B,((PetscObject)Y)->type_name);
104: MatSetSizes(*B,m,n,M,N);
105: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
106: MatDestroy(&preallocator);
107: }
108: return(0);
109: }
111: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
112: {
113: PetscInt i,start,end,j,ncols,m,n;
114: PetscErrorCode ierr;
115: const PetscInt *row;
116: PetscScalar *val;
117: const PetscScalar *vals;
120: MatGetSize(X,&m,&n);
121: MatGetOwnershipRange(X,&start,&end);
122: if (a == 1.0) {
123: for (i = start; i < end; i++) {
124: MatGetRow(X,i,&ncols,&row,&vals);
125: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
126: MatRestoreRow(X,i,&ncols,&row,&vals);
127: }
128: } else {
129: PetscInt vs = 100;
130: /* realloc if needed, as this function may be used in parallel */
131: PetscMalloc1(vs,&val);
132: for (i=start; i<end; i++) {
133: MatGetRow(X,i,&ncols,&row,&vals);
134: if (vs < ncols) {
135: vs = PetscMin(2*ncols,n);
136: PetscRealloc(vs*sizeof(*val),&val);
137: }
138: for (j=0; j<ncols; j++) val[j] = a*vals[j];
139: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
140: MatRestoreRow(X,i,&ncols,&row,&vals);
141: }
142: PetscFree(val);
143: }
144: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
145: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
146: return(0);
147: }
149: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
150: {
151: PetscInt i,start,end,j,ncols,m,n;
152: PetscErrorCode ierr;
153: const PetscInt *row;
154: PetscScalar *val;
155: const PetscScalar *vals;
158: MatGetSize(X,&m,&n);
159: MatGetOwnershipRange(X,&start,&end);
160: if (a == 1.0) {
161: for (i = start; i < end; i++) {
162: MatGetRow(Y,i,&ncols,&row,&vals);
163: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
164: MatRestoreRow(Y,i,&ncols,&row,&vals);
166: MatGetRow(X,i,&ncols,&row,&vals);
167: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
168: MatRestoreRow(X,i,&ncols,&row,&vals);
169: }
170: } else {
171: PetscInt vs = 100;
172: /* realloc if needed, as this function may be used in parallel */
173: PetscMalloc1(vs,&val);
174: for (i=start; i<end; i++) {
175: MatGetRow(Y,i,&ncols,&row,&vals);
176: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
177: MatRestoreRow(Y,i,&ncols,&row,&vals);
179: MatGetRow(X,i,&ncols,&row,&vals);
180: if (vs < ncols) {
181: vs = PetscMin(2*ncols,n);
182: PetscRealloc(vs*sizeof(*val),&val);
183: }
184: for (j=0; j<ncols; j++) val[j] = a*vals[j];
185: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
186: MatRestoreRow(X,i,&ncols,&row,&vals);
187: }
188: PetscFree(val);
189: }
190: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
191: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
192: return(0);
193: }
195: /*@
196: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
198: Neighbor-wise Collective on Mat
200: Input Parameters:
201: + Y - the matrices
202: - a - the PetscScalar
204: Level: intermediate
206: Notes:
207: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
208: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
209: entry).
211: To form Y = Y + diag(V) use MatDiagonalSet()
213: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
214: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
216: .keywords: matrix, add, shift
218: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
219: @*/
220: PetscErrorCode MatShift(Mat Y,PetscScalar a)
221: {
226: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
227: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
228: MatCheckPreallocated(Y,1);
230: if (Y->ops->shift) {
231: (*Y->ops->shift)(Y,a);
232: } else {
233: MatShift_Basic(Y,a);
234: }
236: PetscObjectStateIncrease((PetscObject)Y);
237: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
238: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
239: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
240: }
241: #endif
242: return(0);
243: }
245: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
246: {
248: PetscInt i,start,end;
249: PetscScalar *v;
252: MatGetOwnershipRange(Y,&start,&end);
253: VecGetArray(D,&v);
254: for (i=start; i<end; i++) {
255: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
256: }
257: VecRestoreArray(D,&v);
258: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
259: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
260: return(0);
261: }
263: /*@
264: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
265: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
266: INSERT_VALUES.
268: Input Parameters:
269: + Y - the input matrix
270: . D - the diagonal matrix, represented as a vector
271: - i - INSERT_VALUES or ADD_VALUES
273: Neighbor-wise Collective on Mat and Vec
275: Notes:
276: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
277: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
278: entry).
280: Level: intermediate
282: .keywords: matrix, add, shift, diagonal
284: .seealso: MatShift(), MatScale(), MatDiagonalScale()
285: @*/
286: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
287: {
289: PetscInt matlocal,veclocal;
294: MatGetLocalSize(Y,&matlocal,NULL);
295: VecGetLocalSize(D,&veclocal);
296: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
297: if (Y->ops->diagonalset) {
298: (*Y->ops->diagonalset)(Y,D,is);
299: } else {
300: MatDiagonalSet_Default(Y,D,is);
301: }
302: PetscObjectStateIncrease((PetscObject)Y);
303: return(0);
304: }
306: /*@
307: MatAYPX - Computes Y = a*Y + X.
309: Logically on Mat
311: Input Parameters:
312: + a - the PetscScalar multiplier
313: . Y - the first matrix
314: . X - the second matrix
315: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
317: Level: intermediate
319: .keywords: matrix, add
321: .seealso: MatAXPY()
322: @*/
323: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
324: {
325: PetscScalar one = 1.0;
327: PetscInt mX,mY,nX,nY;
333: MatGetSize(X,&mX,&nX);
334: MatGetSize(X,&mY,&nY);
335: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
337: MatScale(Y,a);
338: MatAXPY(Y,one,X,str);
339: return(0);
340: }
342: /*@
343: MatComputeExplicitOperator - Computes the explicit matrix
345: Collective on Mat
347: Input Parameter:
348: . inmat - the matrix
350: Output Parameter:
351: . mat - the explict operator
353: Notes:
354: This computation is done by applying the operators to columns of the
355: identity matrix.
357: Currently, this routine uses a dense matrix format when 1 processor
358: is used and a sparse format otherwise. This routine is costly in general,
359: and is recommended for use only with relatively small systems.
361: Level: advanced
363: .keywords: Mat, compute, explicit, operator
364: @*/
365: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
366: {
368: MPI_Comm comm;
369: PetscMPIInt size;
375: PetscObjectGetComm((PetscObject)inmat,&comm);
376: MPI_Comm_size(comm,&size);
377: MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);
378: return(0);
379: }
381: /*@
382: MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
383: a give matrix that can apply MatMultTranspose()
385: Collective on Mat
387: Input Parameter:
388: . inmat - the matrix
390: Output Parameter:
391: . mat - the explict operator transposed
393: Notes:
394: This computation is done by applying the transpose of the operator to columns of the
395: identity matrix.
397: Currently, this routine uses a dense matrix format when 1 processor
398: is used and a sparse format otherwise. This routine is costly in general,
399: and is recommended for use only with relatively small systems.
401: Level: advanced
403: .keywords: Mat, compute, explicit, operator
404: @*/
405: PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
406: {
407: Vec in,out;
409: PetscInt i,m,n,M,N,*rows,start,end;
410: MPI_Comm comm;
411: PetscScalar *array,zero = 0.0,one = 1.0;
412: PetscMPIInt size;
418: PetscObjectGetComm((PetscObject)inmat,&comm);
419: MPI_Comm_size(comm,&size);
421: MatGetLocalSize(inmat,&m,&n);
422: MatGetSize(inmat,&M,&N);
423: MatCreateVecs(inmat,&in,&out);
424: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
425: VecGetOwnershipRange(out,&start,&end);
426: PetscMalloc1(m,&rows);
427: for (i=0; i<m; i++) rows[i] = start + i;
429: MatCreate(comm,mat);
430: MatSetSizes(*mat,m,n,M,N);
431: if (size == 1) {
432: MatSetType(*mat,MATSEQDENSE);
433: MatSeqDenseSetPreallocation(*mat,NULL);
434: } else {
435: MatSetType(*mat,MATMPIAIJ);
436: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
437: }
439: for (i=0; i<N; i++) {
441: VecSet(in,zero);
442: VecSetValues(in,1,&i,&one,INSERT_VALUES);
443: VecAssemblyBegin(in);
444: VecAssemblyEnd(in);
446: MatMultTranspose(inmat,in,out);
448: VecGetArray(out,&array);
449: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
450: VecRestoreArray(out,&array);
452: }
453: PetscFree(rows);
454: VecDestroy(&out);
455: VecDestroy(&in);
456: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
457: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
458: return(0);
459: }
461: /*@
462: MatChop - Set all values in the matrix less than the tolerance to zero
464: Input Parameters:
465: + A - The matrix
466: - tol - The zero tolerance
468: Output Parameters:
469: . A - The chopped matrix
471: Level: intermediate
473: .seealso: MatCreate(), MatZeroEntries()
474: @*/
475: PetscErrorCode MatChop(Mat A, PetscReal tol)
476: {
477: PetscScalar *newVals;
478: PetscInt *newCols;
479: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
483: MatGetOwnershipRange(A, &rStart, &rEnd);
484: for (r = rStart; r < rEnd; ++r) {
485: PetscInt ncols;
487: MatGetRow(A, r, &ncols, NULL, NULL);
488: colMax = PetscMax(colMax, ncols);
489: MatRestoreRow(A, r, &ncols, NULL, NULL);
490: }
491: numRows = rEnd - rStart;
492: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
493: PetscMalloc2(colMax,&newCols,colMax,&newVals);
494: for (r = rStart; r < rStart+maxRows; ++r) {
495: const PetscScalar *vals;
496: const PetscInt *cols;
497: PetscInt ncols, newcols, c;
499: if (r < rEnd) {
500: MatGetRow(A, r, &ncols, &cols, &vals);
501: for (c = 0; c < ncols; ++c) {
502: newCols[c] = cols[c];
503: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
504: }
505: newcols = ncols;
506: MatRestoreRow(A, r, &ncols, &cols, &vals);
507: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
508: }
509: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
510: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
511: }
512: PetscFree2(newCols,newVals);
513: return(0);
514: }