Actual source code: axpy.c
petsc-3.10.2 2018-10-09
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt m1,m2,n1,n2;
26: PetscBool sametype;
32: MatGetSize(X,&m1,&n1);
33: MatGetSize(Y,&m2,&n2);
34: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
36: PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy && sametype) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
45: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
47: }
48: #endif
49: return(0);
50: }
52: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
53: {
54: PetscInt i,start,end,j,ncols,m,n;
55: PetscErrorCode ierr;
56: const PetscInt *row;
57: PetscScalar *val;
58: const PetscScalar *vals;
61: MatGetSize(X,&m,&n);
62: MatGetOwnershipRange(X,&start,&end);
63: if (a == 1.0) {
64: for (i = start; i < end; i++) {
65: MatGetRow(X,i,&ncols,&row,&vals);
66: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
67: MatRestoreRow(X,i,&ncols,&row,&vals);
68: }
69: } else {
70: PetscMalloc1(n+1,&val);
71: for (i=start; i<end; i++) {
72: MatGetRow(X,i,&ncols,&row,&vals);
73: for (j=0; j<ncols; j++) {
74: val[j] = a*vals[j];
75: }
76: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
77: MatRestoreRow(X,i,&ncols,&row,&vals);
78: }
79: PetscFree(val);
80: }
81: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
82: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
83: return(0);
84: }
86: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
87: {
88: PetscInt i,start,end,j,ncols,m,n;
89: PetscErrorCode ierr;
90: const PetscInt *row;
91: PetscScalar *val;
92: const PetscScalar *vals;
95: MatGetSize(X,&m,&n);
96: MatGetOwnershipRange(X,&start,&end);
97: if (a == 1.0) {
98: for (i = start; i < end; i++) {
99: MatGetRow(Y,i,&ncols,&row,&vals);
100: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
101: MatRestoreRow(Y,i,&ncols,&row,&vals);
103: MatGetRow(X,i,&ncols,&row,&vals);
104: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
105: MatRestoreRow(X,i,&ncols,&row,&vals);
106: }
107: } else {
108: PetscMalloc1(n+1,&val);
109: for (i=start; i<end; i++) {
110: MatGetRow(Y,i,&ncols,&row,&vals);
111: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
112: MatRestoreRow(Y,i,&ncols,&row,&vals);
114: MatGetRow(X,i,&ncols,&row,&vals);
115: for (j=0; j<ncols; j++) {
116: val[j] = a*vals[j];
117: }
118: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
119: MatRestoreRow(X,i,&ncols,&row,&vals);
120: }
121: PetscFree(val);
122: }
123: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
124: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
125: return(0);
126: }
128: /*@
129: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
131: Neighbor-wise Collective on Mat
133: Input Parameters:
134: + Y - the matrices
135: - a - the PetscScalar
137: Level: intermediate
139: Notes:
140: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
141: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
142: entry).
144: To form Y = Y + diag(V) use MatDiagonalSet()
146: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
147: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
149: .keywords: matrix, add, shift
151: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
152: @*/
153: PetscErrorCode MatShift(Mat Y,PetscScalar a)
154: {
159: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
160: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
161: MatCheckPreallocated(Y,1);
163: if (Y->ops->shift) {
164: (*Y->ops->shift)(Y,a);
165: } else {
166: MatShift_Basic(Y,a);
167: }
169: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
170: if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
171: Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
172: }
173: #endif
174: return(0);
175: }
177: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
178: {
180: PetscInt i,start,end;
181: PetscScalar *v;
184: MatGetOwnershipRange(Y,&start,&end);
185: VecGetArray(D,&v);
186: for (i=start; i<end; i++) {
187: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
188: }
189: VecRestoreArray(D,&v);
190: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
191: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
192: return(0);
193: }
195: /*@
196: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
197: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
198: INSERT_VALUES.
200: Input Parameters:
201: + Y - the input matrix
202: . D - the diagonal matrix, represented as a vector
203: - i - INSERT_VALUES or ADD_VALUES
205: Neighbor-wise Collective on Mat and Vec
207: Notes:
208: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
209: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
210: entry).
212: Level: intermediate
214: .keywords: matrix, add, shift, diagonal
216: .seealso: MatShift(), MatScale(), MatDiagonalScale()
217: @*/
218: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
219: {
221: PetscInt matlocal,veclocal;
226: MatGetLocalSize(Y,&matlocal,NULL);
227: VecGetLocalSize(D,&veclocal);
228: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
229: if (Y->ops->diagonalset) {
230: (*Y->ops->diagonalset)(Y,D,is);
231: } else {
232: MatDiagonalSet_Default(Y,D,is);
233: }
234: return(0);
235: }
237: /*@
238: MatAYPX - Computes Y = a*Y + X.
240: Logically on Mat
242: Input Parameters:
243: + a - the PetscScalar multiplier
244: . Y - the first matrix
245: . X - the second matrix
246: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
248: Level: intermediate
250: .keywords: matrix, add
252: .seealso: MatAXPY()
253: @*/
254: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
255: {
256: PetscScalar one = 1.0;
258: PetscInt mX,mY,nX,nY;
264: MatGetSize(X,&mX,&nX);
265: MatGetSize(X,&mY,&nY);
266: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
268: MatScale(Y,a);
269: MatAXPY(Y,one,X,str);
270: return(0);
271: }
273: /*@
274: MatComputeExplicitOperator - Computes the explicit matrix
276: Collective on Mat
278: Input Parameter:
279: . inmat - the matrix
281: Output Parameter:
282: . mat - the explict operator
284: Notes:
285: This computation is done by applying the operators to columns of the
286: identity matrix.
288: Currently, this routine uses a dense matrix format when 1 processor
289: is used and a sparse format otherwise. This routine is costly in general,
290: and is recommended for use only with relatively small systems.
292: Level: advanced
294: .keywords: Mat, compute, explicit, operator
295: @*/
296: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
297: {
298: Vec in,out;
300: PetscInt i,m,n,M,N,*rows,start,end;
301: MPI_Comm comm;
302: PetscScalar *array,zero = 0.0,one = 1.0;
303: PetscMPIInt size;
309: PetscObjectGetComm((PetscObject)inmat,&comm);
310: MPI_Comm_size(comm,&size);
312: MatGetLocalSize(inmat,&m,&n);
313: MatGetSize(inmat,&M,&N);
314: MatCreateVecs(inmat,&in,&out);
315: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
316: VecGetOwnershipRange(out,&start,&end);
317: PetscMalloc1(m,&rows);
318: for (i=0; i<m; i++) rows[i] = start + i;
320: MatCreate(comm,mat);
321: MatSetSizes(*mat,m,n,M,N);
322: if (size == 1) {
323: MatSetType(*mat,MATSEQDENSE);
324: MatSeqDenseSetPreallocation(*mat,NULL);
325: } else {
326: MatSetType(*mat,MATMPIAIJ);
327: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
328: }
330: for (i=0; i<N; i++) {
332: VecSet(in,zero);
333: VecSetValues(in,1,&i,&one,INSERT_VALUES);
334: VecAssemblyBegin(in);
335: VecAssemblyEnd(in);
337: MatMult(inmat,in,out);
339: VecGetArray(out,&array);
340: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
341: VecRestoreArray(out,&array);
343: }
344: PetscFree(rows);
345: VecDestroy(&out);
346: VecDestroy(&in);
347: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
348: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
349: return(0);
350: }
352: /*@
353: MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
354: a give matrix that can apply MatMultTranspose()
356: Collective on Mat
358: Input Parameter:
359: . inmat - the matrix
361: Output Parameter:
362: . mat - the explict operator transposed
364: Notes:
365: This computation is done by applying the transpose of the operator to columns of the
366: identity matrix.
368: Currently, this routine uses a dense matrix format when 1 processor
369: is used and a sparse format otherwise. This routine is costly in general,
370: and is recommended for use only with relatively small systems.
372: Level: advanced
374: .keywords: Mat, compute, explicit, operator
375: @*/
376: PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
377: {
378: Vec in,out;
380: PetscInt i,m,n,M,N,*rows,start,end;
381: MPI_Comm comm;
382: PetscScalar *array,zero = 0.0,one = 1.0;
383: PetscMPIInt size;
389: PetscObjectGetComm((PetscObject)inmat,&comm);
390: MPI_Comm_size(comm,&size);
392: MatGetLocalSize(inmat,&m,&n);
393: MatGetSize(inmat,&M,&N);
394: MatCreateVecs(inmat,&in,&out);
395: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
396: VecGetOwnershipRange(out,&start,&end);
397: PetscMalloc1(m,&rows);
398: for (i=0; i<m; i++) rows[i] = start + i;
400: MatCreate(comm,mat);
401: MatSetSizes(*mat,m,n,M,N);
402: if (size == 1) {
403: MatSetType(*mat,MATSEQDENSE);
404: MatSeqDenseSetPreallocation(*mat,NULL);
405: } else {
406: MatSetType(*mat,MATMPIAIJ);
407: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
408: }
410: for (i=0; i<N; i++) {
412: VecSet(in,zero);
413: VecSetValues(in,1,&i,&one,INSERT_VALUES);
414: VecAssemblyBegin(in);
415: VecAssemblyEnd(in);
417: MatMultTranspose(inmat,in,out);
419: VecGetArray(out,&array);
420: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
421: VecRestoreArray(out,&array);
423: }
424: PetscFree(rows);
425: VecDestroy(&out);
426: VecDestroy(&in);
427: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
428: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
429: return(0);
430: }
432: /*@
433: MatChop - Set all values in the matrix less than the tolerance to zero
435: Input Parameters:
436: + A - The matrix
437: - tol - The zero tolerance
439: Output Parameters:
440: . A - The chopped matrix
442: Level: intermediate
444: .seealso: MatCreate(), MatZeroEntries()
445: @*/
446: PetscErrorCode MatChop(Mat A, PetscReal tol)
447: {
448: PetscScalar *newVals;
449: PetscInt *newCols;
450: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
454: MatGetOwnershipRange(A, &rStart, &rEnd);
455: for (r = rStart; r < rEnd; ++r) {
456: PetscInt ncols;
458: MatGetRow(A, r, &ncols, NULL, NULL);
459: colMax = PetscMax(colMax, ncols);
460: MatRestoreRow(A, r, &ncols, NULL, NULL);
461: }
462: numRows = rEnd - rStart;
463: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
464: PetscMalloc2(colMax,&newCols,colMax,&newVals);
465: for (r = rStart; r < rStart+maxRows; ++r) {
466: const PetscScalar *vals;
467: const PetscInt *cols;
468: PetscInt ncols, newcols, c;
470: if (r < rEnd) {
471: MatGetRow(A, r, &ncols, &cols, &vals);
472: for (c = 0; c < ncols; ++c) {
473: newCols[c] = cols[c];
474: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
475: }
476: newcols = ncols;
477: MatRestoreRow(A, r, &ncols, &cols, &vals);
478: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
479: }
480: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
481: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
482: }
483: PetscFree2(newCols,newVals);
484: return(0);
485: }