1: /*
2: Defines basic operations for the MATSEQBAIJMKL matrix class.
3: Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4: wherever possible. If used MKL verion is older than 11.3 PETSc default
5: code for sparse matrix operations is used.
6: */
8: #include <../src/mat/impls/baij/seq/baij.h> 9: #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
16: typedef struct {
17: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
18: sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
19: struct matrix_descr descr;
20: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
21: PetscInt *ai1;
22: PetscInt *aj1;
23: #endif
24: } Mat_SeqBAIJMKL;
26: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
27: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);
29: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 30: {
31: /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
32: /* so we will ignore 'MatType type'. */
34: Mat B = *newmat;
35: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
38: if (reuse == MAT_INITIAL_MATRIX) {
39: MatDuplicate(A,MAT_COPY_VALUES,&B);
40: }
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqBAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ;
45: B->ops->destroy = MatDestroy_SeqBAIJ;
46: B->ops->multtranspose = MatMultTranspose_SeqBAIJ;
47: B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
48: B->ops->scale = MatScale_SeqBAIJ;
49: B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ;
50: B->ops->axpy = MatAXPY_SeqBAIJ;
51: 52: switch (A->rmap->bs) {
53: case 1:
54: B->ops->mult = MatMult_SeqBAIJ_1;
55: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
56: break;
57: case 2:
58: B->ops->mult = MatMult_SeqBAIJ_2;
59: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
60: break;
61: case 3:
62: B->ops->mult = MatMult_SeqBAIJ_3;
63: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
64: break;
65: case 4:
66: B->ops->mult = MatMult_SeqBAIJ_4;
67: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
68: break;
69: case 5:
70: B->ops->mult = MatMult_SeqBAIJ_5;
71: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
72: break;
73: case 6:
74: B->ops->mult = MatMult_SeqBAIJ_6;
75: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
76: break;
77: case 7:
78: B->ops->mult = MatMult_SeqBAIJ_7;
79: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
80: break;
81: case 11:
82: B->ops->mult = MatMult_SeqBAIJ_11;
83: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
84: break;
85: case 15:
86: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
87: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
88: break;
89: default: 90: B->ops->mult = MatMult_SeqBAIJ_N;
91: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
92: break;
93: }
94: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);
96: /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
97: * simply involves destroying the MKL sparse matrix handle and then freeing
98: * the spptr pointer. */
99: if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;
101: if (baijmkl->sparse_optimized) {
102: sparse_status_t stat;
103: stat = mkl_sparse_destroy(baijmkl->bsrA);
104: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
105: }
106: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
107: PetscFree(baijmkl->ai1);
108: PetscFree(baijmkl->aj1);
109: #endif
110: PetscFree(B->spptr);
112: /* Change the type of B to MATSEQBAIJ. */
113: PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);
115: *newmat = B;
116: return(0);
117: }
119: PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)120: {
122: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123: 125: if (baijmkl) {
126: /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127: if (baijmkl->sparse_optimized) {
128: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
129: stat = mkl_sparse_destroy(baijmkl->bsrA);
130: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
131: }
132: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
133: PetscFree(baijmkl->ai1);
134: PetscFree(baijmkl->aj1);
135: #endif
136: PetscFree(A->spptr);
137: }
139: /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
140: * to destroy everything that remains. */
141: PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
142: MatDestroy_SeqBAIJ(A);
143: return(0);
144: }
146: PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)147: {
149: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
150: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
151: PetscInt mbs, nbs, nz, bs, i;
152: MatScalar *aa;
153: PetscInt *aj,*ai;
154: sparse_status_t stat;
157: if (baijmkl->sparse_optimized) {
158: /* Matrix has been previously assembled and optimized. Must destroy old
159: * matrix handle before running the optimization step again. */
160: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
161: PetscFree(baijmkl->ai1);
162: PetscFree(baijmkl->aj1);
163: #endif
164: stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
165: }
166: baijmkl->sparse_optimized = PETSC_FALSE;
168: /* Now perform the SpMV2 setup and matrix optimization. */
169: baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
170: baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
171: baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
172: mbs = a->mbs;
173: nbs = a->nbs;
174: nz = a->nz;
175: bs = A->rmap->bs;
176: aa = a->a;
177: 178: if ((nz!=0) & !(A->structure_only)) {
179: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
180: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
181: #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
182: aj = a->j;
183: ai = a->i;
184: stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
185: #else
186: PetscMalloc1(mbs+1,&ai);
187: PetscMalloc1(nz,&aj);
188: for (i=0;i<mbs+1;i++)
189: ai[i] = a->i[i]+1;
190: for (i=0;i<nz;i++)
191: aj[i] = a->j[i]+1;
192: aa = a->a;
193: stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
194: baijmkl->ai1 = ai;
195: baijmkl->aj1 = aj;
196: #endif
197: stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198: stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199: stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
200: baijmkl->sparse_optimized = PETSC_TRUE;
201: }
202: return(0);
203: }
205: PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)206: {
208: Mat_SeqBAIJMKL *baijmkl;
209: Mat_SeqBAIJMKL *baijmkl_dest;
212: MatDuplicate_SeqBAIJ(A,op,M);
213: baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
214: PetscNewLog((*M),&baijmkl_dest);
215: (*M)->spptr = (void*)baijmkl_dest;
216: PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
217: baijmkl_dest->sparse_optimized = PETSC_FALSE;
218: MatSeqBAIJMKL_create_mkl_handle(A);
219: return(0);
220: }
222: PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)223: {
224: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
225: Mat_SeqBAIJMKL *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
226: const PetscScalar *x;
227: PetscScalar *y;
228: PetscErrorCode ierr;
229: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
232: /* If there are no nonzero entries, zero yy and return immediately. */
233: if(!a->nz) {
234: PetscInt i;
235: PetscInt m=a->mbs*A->rmap->bs;
236: VecGetArray(yy,&y);
237: for (i=0; i<m; i++) {
238: y[i] = 0.0;
239: }
240: VecRestoreArray(yy,&y);
241: return(0);
242: }
244: VecGetArrayRead(xx,&x);
245: VecGetArray(yy,&y);
247: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
248: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
249: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
250: if (!baijmkl->sparse_optimized) {
251: MatSeqBAIJMKL_create_mkl_handle(A);
252: }
254: /* Call MKL SpMV2 executor routine to do the MatMult. */
255: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
256: 257: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
258: VecRestoreArrayRead(xx,&x);
259: VecRestoreArray(yy,&y);
260: return(0);
261: }
263: PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)264: {
265: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
266: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
267: const PetscScalar *x;
268: PetscScalar *y;
269: PetscErrorCode ierr;
270: sparse_status_t stat;
273: /* If there are no nonzero entries, zero yy and return immediately. */
274: if(!a->nz) {
275: PetscInt i;
276: PetscInt n=a->nbs;
277: VecGetArray(yy,&y);
278: for (i=0; i<n; i++) {
279: y[i] = 0.0;
280: }
281: VecRestoreArray(yy,&y);
282: return(0);
283: }
285: VecGetArrayRead(xx,&x);
286: VecGetArray(yy,&y);
288: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
289: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
290: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
291: if (!baijmkl->sparse_optimized) {
292: MatSeqBAIJMKL_create_mkl_handle(A);
293: }
295: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
296: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
297: 298: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
299: VecRestoreArrayRead(xx,&x);
300: VecRestoreArray(yy,&y);
301: return(0);
302: }
304: PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)305: {
306: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
307: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
308: const PetscScalar *x;
309: PetscScalar *y,*z;
310: PetscErrorCode ierr;
311: PetscInt m=a->mbs*A->rmap->bs;
312: PetscInt i;
314: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
317: /* If there are no nonzero entries, set zz = yy and return immediately. */
318: if(!a->nz) {
319: PetscInt i;
320: VecGetArrayPair(yy,zz,&y,&z);
321: for (i=0; i<m; i++) {
322: z[i] = y[i];
323: }
324: VecRestoreArrayPair(yy,zz,&y,&z);
325: return(0);
326: }
328: VecGetArrayRead(xx,&x);
329: VecGetArrayPair(yy,zz,&y,&z);
331: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
332: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
333: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
334: if (!baijmkl->sparse_optimized) {
335: MatSeqBAIJMKL_create_mkl_handle(A);
336: }
338: /* Call MKL sparse BLAS routine to do the MatMult. */
339: if (zz == yy) {
340: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
341: * with alpha and beta both set to 1.0. */
342: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
343: } else {
344: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
345: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
346: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
347: for (i=0; i<m; i++) {
348: z[i] += y[i];
349: }
350: }
352: PetscLogFlops(2.0*a->nz);
353: VecRestoreArrayRead(xx,&x);
354: VecRestoreArrayPair(yy,zz,&y,&z);
355: return(0);
356: }
358: PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)359: {
360: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
361: Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
362: const PetscScalar *x;
363: PetscScalar *y,*z;
364: PetscErrorCode ierr;
365: PetscInt n=a->nbs*A->rmap->bs;
366: PetscInt i;
367: /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
368: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
371: /* If there are no nonzero entries, set zz = yy and return immediately. */
372: if(!a->nz) {
373: PetscInt i;
374: VecGetArrayPair(yy,zz,&y,&z);
375: for (i=0; i<n; i++) {
376: z[i] = y[i];
377: }
378: VecRestoreArrayPair(yy,zz,&y,&z);
379: return(0);
380: }
382: VecGetArrayRead(xx,&x);
383: VecGetArrayPair(yy,zz,&y,&z);
385: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
386: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
387: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
388: if (!baijmkl->sparse_optimized) {
389: MatSeqBAIJMKL_create_mkl_handle(A);
390: }
392: /* Call MKL sparse BLAS routine to do the MatMult. */
393: if (zz == yy) {
394: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
395: * with alpha and beta both set to 1.0. */
396: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
397: } else {
398: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
399: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
400: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
401: for (i=0; i<n; i++) {
402: z[i] += y[i];
403: }
404: }
406: PetscLogFlops(2.0*a->nz);
407: VecRestoreArrayRead(xx,&x);
408: VecRestoreArrayPair(yy,zz,&y,&z);
409: return(0);
410: }
412: PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)413: {
417: MatScale_SeqBAIJ(inA,alpha);
418: MatSeqBAIJMKL_create_mkl_handle(inA);
419: return(0);
420: }
422: PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)423: {
427: MatDiagonalScale_SeqBAIJ(A,ll,rr);
428: MatSeqBAIJMKL_create_mkl_handle(A);
429: return(0);
430: }
432: PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)433: {
437: MatAXPY_SeqBAIJ(Y,a,X,str);
438: if (str == SAME_NONZERO_PATTERN) {
439: /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
440: MatSeqBAIJMKL_create_mkl_handle(Y);
441: }
442: return(0);
443: }
444: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
445: * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ()
446: * routine, but can also be used to convert an assembled SeqBAIJ matrix
447: * into a SeqBAIJMKL one. */
448: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)449: {
451: Mat B = *newmat;
452: Mat_SeqBAIJMKL *baijmkl;
453: PetscBool sametype;
456: if (reuse == MAT_INITIAL_MATRIX) {
457: MatDuplicate(A,MAT_COPY_VALUES,&B);
458: }
460: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
461: if (sametype) return(0);
463: PetscNewLog(B,&baijmkl);
464: B->spptr = (void*)baijmkl;
466: /* Set function pointers for methods that we inherit from BAIJ but override.
467: * We also parse some command line options below, since those determine some of the methods we point to. */
468: B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
469: 470: baijmkl->sparse_optimized = PETSC_FALSE;
472: PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
473: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);
475: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
476: *newmat = B;
477: return(0);
478: }
480: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)481: {
482: PetscErrorCode ierr;
485: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
486: MatAssemblyEnd_SeqBAIJ(A, mode);
487: MatSeqBAIJMKL_create_mkl_handle(A);
488: A->ops->destroy = MatDestroy_SeqBAIJMKL;
489: A->ops->mult = MatMult_SeqBAIJMKL_SpMV2;
490: A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2;
491: A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2;
492: A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
493: A->ops->scale = MatScale_SeqBAIJMKL;
494: A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL;
495: A->ops->axpy = MatAXPY_SeqBAIJMKL;
496: A->ops->duplicate = MatDuplicate_SeqBAIJMKL;
497: return(0);
498: }
499: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
501: /*@C
502: MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
503: This type inherits from BAIJ and is largely identical, but uses sparse BLAS
504: routines from Intel MKL whenever possible.
505: MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd506: operations are currently supported.
507: If the installed version of MKL supports the "SpMV2" sparse
508: inspector-executor routines, then those are used by default.
509: Default PETSc kernels are used otherwise.
511: Input Parameters:
512: + comm - MPI communicator, set to PETSC_COMM_SELF513: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
514: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
515: . m - number of rows
516: . n - number of columns
517: . nz - number of nonzero blocks per block row (same for all rows)
518: - nnz - array containing the number of nonzero blocks in the various block rows
519: (possibly different for each block row) or NULL
522: Output Parameter:
523: . A - the matrix
525: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
526: MatXXXXSetPreallocation() paradgm instead of this routine directly.
527: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
529: Options Database Keys:
530: . -mat_no_unroll - uses code that does not unroll the loops in the
531: block calculations (much slower)
532: . -mat_block_size - size of the blocks to use
534: Level: intermediate
536: Notes:
537: The number of rows and columns must be divisible by blocksize.
539: If the nnz parameter is given then the nz parameter is ignored
541: A nonzero block is any block that as 1 or more nonzeros in it
543: The block AIJ format is fully compatible with standard Fortran 77
544: storage. That is, the stored row and column indices can begin at
545: either one (as in Fortran) or zero. See the users' manual for details.
547: Specify the preallocated storage with either nz or nnz (not both).
548: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
549: allocation. See Users-Manual: ch_mat for details.
550: matrices.
552: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
554: @*/
555: PetscErrorCodeMatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)556: {
560: MatCreate(comm,A);
561: MatSetSizes(*A,m,n,m,n);
562: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
563: MatSetType(*A,MATSEQBAIJMKL);
564: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
565: #else
566: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
567: MatSetType(*A,MATSEQBAIJ);
568: MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
569: #endif
570: return(0);
571: }
573: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)574: {
578: MatSetType(A,MATSEQBAIJ);
579: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
580: MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
581: #else
582: PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583: #endif
584: return(0);
585: }