Actual source code: axpy.c
petsc-3.8.3 2017-12-09
2: #include <petsc/private/matimpl.h>
4: /*@
5: MatAXPY - Computes Y = a*X + Y.
7: Logically Collective on Mat
9: Input Parameters:
10: + a - the scalar multiplier
11: . X - the first matrix
12: . Y - the second matrix
13: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
16: Level: intermediate
18: .keywords: matrix, add
20: .seealso: MatAYPX()
21: @*/
22: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23: {
25: PetscInt m1,m2,n1,n2;
31: MatGetSize(X,&m1,&n1);
32: MatGetSize(Y,&m2,&n2);
33: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
35: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
36: if (Y->ops->axpy) {
37: (*Y->ops->axpy)(Y,a,X,str);
38: } else {
39: MatAXPY_Basic(Y,a,X,str);
40: }
41: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
42: #if defined(PETSC_HAVE_CUSP)
43: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
44: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
45: }
46: #elif defined(PETSC_HAVE_VIENNACL)
47: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
48: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
49: }
50: #elif defined(PETSC_HAVE_VECCUDA)
51: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
52: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
53: }
54: #endif
55: return(0);
56: }
58: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
59: {
60: PetscInt i,start,end,j,ncols,m,n;
61: PetscErrorCode ierr;
62: const PetscInt *row;
63: PetscScalar *val;
64: const PetscScalar *vals;
67: MatGetSize(X,&m,&n);
68: MatGetOwnershipRange(X,&start,&end);
69: if (a == 1.0) {
70: for (i = start; i < end; i++) {
71: MatGetRow(X,i,&ncols,&row,&vals);
72: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
73: MatRestoreRow(X,i,&ncols,&row,&vals);
74: }
75: } else {
76: PetscMalloc1(n+1,&val);
77: for (i=start; i<end; i++) {
78: MatGetRow(X,i,&ncols,&row,&vals);
79: for (j=0; j<ncols; j++) {
80: val[j] = a*vals[j];
81: }
82: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
83: MatRestoreRow(X,i,&ncols,&row,&vals);
84: }
85: PetscFree(val);
86: }
87: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
88: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
89: return(0);
90: }
92: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
93: {
94: PetscInt i,start,end,j,ncols,m,n;
95: PetscErrorCode ierr;
96: const PetscInt *row;
97: PetscScalar *val;
98: const PetscScalar *vals;
101: MatGetSize(X,&m,&n);
102: MatGetOwnershipRange(X,&start,&end);
103: if (a == 1.0) {
104: for (i = start; i < end; i++) {
105: MatGetRow(Y,i,&ncols,&row,&vals);
106: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
107: MatRestoreRow(Y,i,&ncols,&row,&vals);
109: MatGetRow(X,i,&ncols,&row,&vals);
110: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
111: MatRestoreRow(X,i,&ncols,&row,&vals);
112: }
113: } else {
114: PetscMalloc1(n+1,&val);
115: for (i=start; i<end; i++) {
116: MatGetRow(Y,i,&ncols,&row,&vals);
117: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
118: MatRestoreRow(Y,i,&ncols,&row,&vals);
120: MatGetRow(X,i,&ncols,&row,&vals);
121: for (j=0; j<ncols; j++) {
122: val[j] = a*vals[j];
123: }
124: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
125: MatRestoreRow(X,i,&ncols,&row,&vals);
126: }
127: PetscFree(val);
128: }
129: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
130: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
131: return(0);
132: }
134: /*@
135: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
137: Neighbor-wise Collective on Mat
139: Input Parameters:
140: + Y - the matrices
141: - a - the PetscScalar
143: Level: intermediate
145: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
146: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
147: entry).
149: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
150: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
152: .keywords: matrix, add, shift
154: .seealso: MatDiagonalSet()
155: @*/
156: PetscErrorCode MatShift(Mat Y,PetscScalar a)
157: {
162: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
163: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
164: MatCheckPreallocated(Y,1);
166: if (Y->ops->shift) {
167: (*Y->ops->shift)(Y,a);
168: } else {
169: MatShift_Basic(Y,a);
170: }
172: #if defined(PETSC_HAVE_CUSP)
173: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
174: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
175: }
176: #elif defined(PETSC_HAVE_VIENNACL)
177: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
178: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
179: }
180: #elif defined(PETSC_HAVE_VECCUDA)
181: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
182: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
183: }
184: #endif
185: return(0);
186: }
188: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
189: {
191: PetscInt i,start,end;
192: PetscScalar *v;
195: MatGetOwnershipRange(Y,&start,&end);
196: VecGetArray(D,&v);
197: for (i=start; i<end; i++) {
198: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
199: }
200: VecRestoreArray(D,&v);
201: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
202: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
203: return(0);
204: }
206: /*@
207: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
208: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
209: INSERT_VALUES.
211: Input Parameters:
212: + Y - the input matrix
213: . D - the diagonal matrix, represented as a vector
214: - i - INSERT_VALUES or ADD_VALUES
216: Neighbor-wise Collective on Mat and Vec
218: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
219: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
220: entry).
222: Level: intermediate
224: .keywords: matrix, add, shift, diagonal
226: .seealso: MatShift()
227: @*/
228: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
229: {
231: PetscInt matlocal,veclocal;
236: MatGetLocalSize(Y,&matlocal,NULL);
237: VecGetLocalSize(D,&veclocal);
238: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
239: if (Y->ops->diagonalset) {
240: (*Y->ops->diagonalset)(Y,D,is);
241: } else {
242: MatDiagonalSet_Default(Y,D,is);
243: }
244: return(0);
245: }
247: /*@
248: MatAYPX - Computes Y = a*Y + X.
250: Logically on Mat
252: Input Parameters:
253: + a - the PetscScalar multiplier
254: . Y - the first matrix
255: . X - the second matrix
256: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
258: Level: intermediate
260: .keywords: matrix, add
262: .seealso: MatAXPY()
263: @*/
264: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
265: {
266: PetscScalar one = 1.0;
268: PetscInt mX,mY,nX,nY;
274: MatGetSize(X,&mX,&nX);
275: MatGetSize(X,&mY,&nY);
276: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
278: MatScale(Y,a);
279: MatAXPY(Y,one,X,str);
280: return(0);
281: }
283: /*@
284: MatComputeExplicitOperator - Computes the explicit matrix
286: Collective on Mat
288: Input Parameter:
289: . inmat - the matrix
291: Output Parameter:
292: . mat - the explict preconditioned operator
294: Notes:
295: This computation is done by applying the operators to columns of the
296: identity matrix.
298: Currently, this routine uses a dense matrix format when 1 processor
299: is used and a sparse format otherwise. This routine is costly in general,
300: and is recommended for use only with relatively small systems.
302: Level: advanced
304: .keywords: Mat, compute, explicit, operator
305: @*/
306: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
307: {
308: Vec in,out;
310: PetscInt i,m,n,M,N,*rows,start,end;
311: MPI_Comm comm;
312: PetscScalar *array,zero = 0.0,one = 1.0;
313: PetscMPIInt size;
319: PetscObjectGetComm((PetscObject)inmat,&comm);
320: MPI_Comm_size(comm,&size);
322: MatGetLocalSize(inmat,&m,&n);
323: MatGetSize(inmat,&M,&N);
324: MatCreateVecs(inmat,&in,&out);
325: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
326: VecGetOwnershipRange(out,&start,&end);
327: PetscMalloc1(m,&rows);
328: for (i=0; i<m; i++) rows[i] = start + i;
330: MatCreate(comm,mat);
331: MatSetSizes(*mat,m,n,M,N);
332: if (size == 1) {
333: MatSetType(*mat,MATSEQDENSE);
334: MatSeqDenseSetPreallocation(*mat,NULL);
335: } else {
336: MatSetType(*mat,MATMPIAIJ);
337: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
338: }
340: for (i=0; i<N; i++) {
342: VecSet(in,zero);
343: VecSetValues(in,1,&i,&one,INSERT_VALUES);
344: VecAssemblyBegin(in);
345: VecAssemblyEnd(in);
347: MatMult(inmat,in,out);
349: VecGetArray(out,&array);
350: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
351: VecRestoreArray(out,&array);
353: }
354: PetscFree(rows);
355: VecDestroy(&out);
356: VecDestroy(&in);
357: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
358: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
359: return(0);
360: }
362: /*@
363: MatChop - Set all values in the matrix less than the tolerance to zero
365: Input Parameters:
366: + A - The matrix
367: - tol - The zero tolerance
369: Output Parameters:
370: . A - The chopped matrix
372: Level: intermediate
374: .seealso: MatCreate(), MatZeroEntries()
375: @*/
376: PetscErrorCode MatChop(Mat A, PetscReal tol)
377: {
378: PetscScalar *newVals;
379: PetscInt *newCols;
380: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
384: MatGetOwnershipRange(A, &rStart, &rEnd);
385: for (r = rStart; r < rEnd; ++r) {
386: PetscInt ncols;
388: MatGetRow(A, r, &ncols, NULL, NULL);
389: colMax = PetscMax(colMax, ncols);
390: MatRestoreRow(A, r, &ncols, NULL, NULL);
391: }
392: numRows = rEnd - rStart;
393: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
394: PetscMalloc2(colMax,&newCols,colMax,&newVals);
395: for (r = rStart; r < rStart+maxRows; ++r) {
396: const PetscScalar *vals;
397: const PetscInt *cols;
398: PetscInt ncols, newcols, c;
400: if (r < rEnd) {
401: MatGetRow(A, r, &ncols, &cols, &vals);
402: for (c = 0; c < ncols; ++c) {
403: newCols[c] = cols[c];
404: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
405: }
406: newcols = ncols;
407: MatRestoreRow(A, r, &ncols, &cols, &vals);
408: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
409: }
410: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
411: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
412: }
413: PetscFree2(newCols,newVals);
414: return(0);
415: }