Actual source code: baijmkl.c

petsc-3.9.0 2018-04-07
Report Typos and Errors
  1: /*
  2:   Defines basic operations for the MATSEQBAIJMKL matrix class.
  3:   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  4:   wherever possible. If used MKL verion is older than 11.3 PETSc default 
  5:   code for sparse matrix operations is used.
  6: */

  8:  #include <../src/mat/impls/baij/seq/baij.h>
  9:  #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h>


 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 16: typedef struct {
 17:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 18:   sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 19:   struct matrix_descr descr;
 20: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
 21:   PetscInt *ai1;
 22:   PetscInt *aj1;
 23: #endif
 24: } Mat_SeqBAIJMKL;

 26: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
 27: extern PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat,MatAssemblyType);

 29: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 30: {
 31:   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
 32:   /* so we will ignore 'MatType type'. */
 34:   Mat            B        = *newmat;
 35:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;

 38:   if (reuse == MAT_INITIAL_MATRIX) {
 39:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 40:   }

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
 44:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
 45:   B->ops->destroy          = MatDestroy_SeqBAIJ;
 46:   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
 47:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
 48:   B->ops->scale            = MatScale_SeqBAIJ;
 49:   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
 50:   B->ops->axpy             = MatAXPY_SeqBAIJ;
 51: 
 52:   switch (A->rmap->bs) {
 53:     case 1:
 54:       B->ops->mult    = MatMult_SeqBAIJ_1;
 55:       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
 56:       break;
 57:     case 2:
 58:       B->ops->mult    = MatMult_SeqBAIJ_2;
 59:       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
 60:       break;
 61:     case 3:
 62:       B->ops->mult    = MatMult_SeqBAIJ_3;
 63:       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
 64:       break;
 65:     case 4:
 66:       B->ops->mult    = MatMult_SeqBAIJ_4;
 67:       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
 68:       break;
 69:     case 5:
 70:       B->ops->mult    = MatMult_SeqBAIJ_5;
 71:       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
 72:       break;
 73:     case 6:
 74:       B->ops->mult    = MatMult_SeqBAIJ_6;
 75:       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
 76:       break;
 77:     case 7:
 78:       B->ops->mult    = MatMult_SeqBAIJ_7;
 79:       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
 80:       break;
 81:     case 11:
 82:       B->ops->mult    = MatMult_SeqBAIJ_11;
 83:       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
 84:       break;
 85:     case 15:
 86:       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
 87:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 88:       break;
 89:     default:
 90:       B->ops->mult    = MatMult_SeqBAIJ_N;
 91:       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
 92:       break;
 93:   }
 94:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",NULL);

 96:   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 
 97:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 98:    * the spptr pointer. */
 99:   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL*)B->spptr;

101:   if (baijmkl->sparse_optimized) {
102:     sparse_status_t stat;
103:     stat = mkl_sparse_destroy(baijmkl->bsrA);
104:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
105:   }
106: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
107:    PetscFree(baijmkl->ai1);
108:    PetscFree(baijmkl->aj1);
109: #endif  
110:   PetscFree(B->spptr);

112:   /* Change the type of B to MATSEQBAIJ. */
113:   PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ);

115:   *newmat = B;
116:   return(0);
117: }

119: PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
120: {
122:   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
123: 
125:   if (baijmkl) {
126:     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
127:     if (baijmkl->sparse_optimized) {
128:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
129:       stat = mkl_sparse_destroy(baijmkl->bsrA);
130:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
131:     }
132: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
133:    PetscFree(baijmkl->ai1);
134:    PetscFree(baijmkl->aj1);
135: #endif    
136:     PetscFree(A->spptr);
137:   }

139:   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
140:    * to destroy everything that remains. */
141:   PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ);
142:   MatDestroy_SeqBAIJ(A);
143:   return(0);
144: }

146: PETSC_INTERN PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
147: {
149:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
150:   Mat_SeqBAIJMKL  *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
151:   PetscInt        mbs, nbs, nz, bs, i;
152:   MatScalar       *aa;
153:   PetscInt        *aj,*ai;
154:   sparse_status_t stat;

157:   if (baijmkl->sparse_optimized) {
158:     /* Matrix has been previously assembled and optimized. Must destroy old
159:      * matrix handle before running the optimization step again. */
160: #ifndef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
161:     PetscFree(baijmkl->ai1);
162:     PetscFree(baijmkl->aj1);
163: #endif     
164:     stat = mkl_sparse_destroy(baijmkl->bsrA);CHKERRMKL(stat);
165:   }
166:   baijmkl->sparse_optimized = PETSC_FALSE;

168:   /* Now perform the SpMV2 setup and matrix optimization. */
169:   baijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
170:   baijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
171:   baijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
172:   mbs = a->mbs;
173:   nbs = a->nbs;
174:   nz  = a->nz;
175:   bs  = A->rmap->bs;
176:   aa  = a->a;
177: 
178:   if ((nz!=0) & !(A->structure_only)) {
179:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
180:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
181: #ifdef PETSC_MKL_SUPPORTS_BAIJ_ZERO_BASED
182:     aj   = a->j;
183:     ai   = a->i;
184:     stat = mkl_sparse_x_create_bsr(&(baijmkl->bsrA),SPARSE_INDEX_BASE_ZERO,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
185: #else
186:     PetscMalloc1(mbs+1,&ai);
187:     PetscMalloc1(nz,&aj);
188:     for (i=0;i<mbs+1;i++)
189:       ai[i] = a->i[i]+1;
190:     for (i=0;i<nz;i++)
191:       aj[i] = a->j[i]+1;
192:     aa   = a->a;
193:     stat = mkl_sparse_x_create_bsr(&baijmkl->bsrA,SPARSE_INDEX_BASE_ONE,SPARSE_LAYOUT_COLUMN_MAJOR,mbs,nbs,bs,ai,ai+1,aj,aa);CHKERRMKL(stat);
194:     baijmkl->ai1 = ai;
195:     baijmkl->aj1 = aj;
196: #endif
197:     stat = mkl_sparse_set_mv_hint(baijmkl->bsrA,SPARSE_OPERATION_NON_TRANSPOSE,baijmkl->descr,1000);CHKERRMKL(stat);
198:     stat = mkl_sparse_set_memory_hint(baijmkl->bsrA,SPARSE_MEMORY_AGGRESSIVE);CHKERRMKL(stat);
199:     stat = mkl_sparse_optimize(baijmkl->bsrA);CHKERRMKL(stat);
200:     baijmkl->sparse_optimized = PETSC_TRUE;
201:   }
202:   return(0);
203: }

205: PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
206: {
208:   Mat_SeqBAIJMKL *baijmkl;
209:   Mat_SeqBAIJMKL *baijmkl_dest;

212:   MatDuplicate_SeqBAIJ(A,op,M);
213:   baijmkl = (Mat_SeqBAIJMKL*) A->spptr;
214:   PetscNewLog((*M),&baijmkl_dest);
215:   (*M)->spptr = (void*)baijmkl_dest;
216:   PetscMemcpy(baijmkl_dest,baijmkl,sizeof(Mat_SeqBAIJMKL));
217:   baijmkl_dest->sparse_optimized = PETSC_FALSE;
218:   MatSeqBAIJMKL_create_mkl_handle(A);
219:   return(0);
220: }

222: PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
223: {
224:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
225:   Mat_SeqBAIJMKL     *baijmkl=(Mat_SeqBAIJMKL*)A->spptr;
226:   const PetscScalar  *x;
227:   PetscScalar        *y;
228:   PetscErrorCode     ierr;
229:   sparse_status_t    stat = SPARSE_STATUS_SUCCESS;

232:   /* If there are no nonzero entries, zero yy and return immediately. */
233:   if(!a->nz) {
234:     PetscInt i;
235:     PetscInt m=a->mbs*A->rmap->bs;
236:     VecGetArray(yy,&y);
237:     for (i=0; i<m; i++) {
238:       y[i] = 0.0;
239:     }
240:     VecRestoreArray(yy,&y);
241:     return(0);
242:   }

244:   VecGetArrayRead(xx,&x);
245:   VecGetArray(yy,&y);

247:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
248:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
249:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
250:   if (!baijmkl->sparse_optimized) {
251:     MatSeqBAIJMKL_create_mkl_handle(A);
252:   }

254:   /* Call MKL SpMV2 executor routine to do the MatMult. */
255:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
256: 
257:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
258:   VecRestoreArrayRead(xx,&x);
259:   VecRestoreArray(yy,&y);
260:   return(0);
261: }

263: PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
264: {
265:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
266:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
267:   const PetscScalar *x;
268:   PetscScalar       *y;
269:   PetscErrorCode    ierr;
270:   sparse_status_t   stat;

273:   /* If there are no nonzero entries, zero yy and return immediately. */
274:   if(!a->nz) {
275:     PetscInt i;
276:     PetscInt n=a->nbs;
277:     VecGetArray(yy,&y);
278:     for (i=0; i<n; i++) {
279:       y[i] = 0.0;
280:     }
281:     VecRestoreArray(yy,&y);
282:     return(0);
283:   }

285:   VecGetArrayRead(xx,&x);
286:   VecGetArray(yy,&y);

288:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
289:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
290:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
291:   if (!baijmkl->sparse_optimized) {
292:     MatSeqBAIJMKL_create_mkl_handle(A);
293:   }

295:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
296:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,y);CHKERRMKL(stat);
297: 
298:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
299:   VecRestoreArrayRead(xx,&x);
300:   VecRestoreArray(yy,&y);
301:   return(0);
302: }

304: PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
305: {
306:   Mat_SeqBAIJ        *a       = (Mat_SeqBAIJ*)A->data;
307:   Mat_SeqBAIJMKL     *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
308:   const PetscScalar  *x;
309:   PetscScalar        *y,*z;
310:   PetscErrorCode     ierr;
311:   PetscInt           m=a->mbs*A->rmap->bs;
312:   PetscInt           i;

314:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;

317:   /* If there are no nonzero entries, set zz = yy and return immediately. */
318:   if(!a->nz) {
319:     PetscInt i;
320:     VecGetArrayPair(yy,zz,&y,&z);
321:     for (i=0; i<m; i++) {
322:       z[i] = y[i];
323:     }
324:     VecRestoreArrayPair(yy,zz,&y,&z);
325:     return(0);
326:   }

328:   VecGetArrayRead(xx,&x);
329:   VecGetArrayPair(yy,zz,&y,&z);

331:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
332:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
333:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
334:   if (!baijmkl->sparse_optimized) {
335:     MatSeqBAIJMKL_create_mkl_handle(A);
336:   }

338:   /* Call MKL sparse BLAS routine to do the MatMult. */
339:   if (zz == yy) {
340:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
341:      * with alpha and beta both set to 1.0. */
342:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
343:   } else {
344:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
345:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
346:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
347:     for (i=0; i<m; i++) {
348:       z[i] += y[i];
349:     }
350:   }

352:   PetscLogFlops(2.0*a->nz);
353:   VecRestoreArrayRead(xx,&x);
354:   VecRestoreArrayPair(yy,zz,&y,&z);
355:   return(0);
356: }

358: PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
359: {
360:   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ*)A->data;
361:   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL*)A->spptr;
362:   const PetscScalar *x;
363:   PetscScalar       *y,*z;
364:   PetscErrorCode    ierr;
365:   PetscInt          n=a->nbs*A->rmap->bs;
366:   PetscInt          i;
367:   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
368:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;

371:   /* If there are no nonzero entries, set zz = yy and return immediately. */
372:   if(!a->nz) {
373:     PetscInt i;
374:     VecGetArrayPair(yy,zz,&y,&z);
375:     for (i=0; i<n; i++) {
376:       z[i] = y[i];
377:     }
378:     VecRestoreArrayPair(yy,zz,&y,&z);
379:     return(0);
380:   }

382:   VecGetArrayRead(xx,&x);
383:   VecGetArrayPair(yy,zz,&y,&z);

385:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
386:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
387:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
388:   if (!baijmkl->sparse_optimized) {
389:     MatSeqBAIJMKL_create_mkl_handle(A);
390:   }

392:   /* Call MKL sparse BLAS routine to do the MatMult. */
393:   if (zz == yy) {
394:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
395:      * with alpha and beta both set to 1.0. */
396:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,1.0,z);CHKERRMKL(stat);
397:   } else {
398:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
399:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
400:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,baijmkl->bsrA,baijmkl->descr,x,0.0,z);CHKERRMKL(stat);
401:     for (i=0; i<n; i++) {
402:       z[i] += y[i];
403:     }
404:   }

406:   PetscLogFlops(2.0*a->nz);
407:   VecRestoreArrayRead(xx,&x);
408:   VecRestoreArrayPair(yy,zz,&y,&z);
409:   return(0);
410: }

412: PetscErrorCode MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)
413: {

417:   MatScale_SeqBAIJ(inA,alpha);
418:   MatSeqBAIJMKL_create_mkl_handle(inA);
419:   return(0);
420: }

422: PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)
423: {

427:   MatDiagonalScale_SeqBAIJ(A,ll,rr);
428:   MatSeqBAIJMKL_create_mkl_handle(A);
429:   return(0);
430: }

432: PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)
433: {

437:   MatAXPY_SeqBAIJ(Y,a,X,str);
438:   if (str == SAME_NONZERO_PATTERN) {
439:     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
440:     MatSeqBAIJMKL_create_mkl_handle(Y);
441:   }
442:   return(0);
443: }
444: /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
445:  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
446:  * routine, but can also be used to convert an assembled SeqBAIJ matrix
447:  * into a SeqBAIJMKL one. */
448: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
449: {
451:   Mat            B = *newmat;
452:   Mat_SeqBAIJMKL *baijmkl;
453:   PetscBool      sametype;

456:   if (reuse == MAT_INITIAL_MATRIX) {
457:     MatDuplicate(A,MAT_COPY_VALUES,&B);
458:   }

460:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
461:   if (sametype) return(0);

463:   PetscNewLog(B,&baijmkl);
464:   B->spptr = (void*)baijmkl;

466:   /* Set function pointers for methods that we inherit from BAIJ but override. 
467:    * We also parse some command line options below, since those determine some of the methods we point to. */
468:   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJMKL;
469: 
470:   baijmkl->sparse_optimized = PETSC_FALSE;

472:   PetscObjectComposeFunction((PetscObject)B,"MatScale_SeqBAIJMKL_C",MatScale_SeqBAIJMKL);
473:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaijmkl_seqbaij_C",MatConvert_SeqBAIJMKL_SeqBAIJ);

475:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJMKL);
476:   *newmat = B;
477:   return(0);
478: }

480: PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
481: {
482:   PetscErrorCode  ierr;

485:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
486:   MatAssemblyEnd_SeqBAIJ(A, mode);
487:   MatSeqBAIJMKL_create_mkl_handle(A);
488:   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
489:   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
490:   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
491:   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
492:   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
493:   A->ops->scale            = MatScale_SeqBAIJMKL;
494:   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
495:   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
496:   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
497:   return(0);
498: }
499: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

501: /*@C
502:    MatCreateSeqBAIJMKL - Creates a sparse matrix of type SEQBAIJMKL.
503:    This type inherits from BAIJ and is largely identical, but uses sparse BLAS 
504:    routines from Intel MKL whenever possible.
505:    MatMult, MatMultAdd, MatMultTranspose, and MatMultTransposeAdd 
506:    operations are currently supported.
507:    If the installed version of MKL supports the "SpMV2" sparse 
508:    inspector-executor routines, then those are used by default. 
509:    Default PETSc kernels are used otherwise. 

511:    Input Parameters:
512: +  comm - MPI communicator, set to PETSC_COMM_SELF
513: .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
514:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
515: .  m - number of rows
516: .  n - number of columns
517: .  nz - number of nonzero blocks  per block row (same for all rows)
518: -  nnz - array containing the number of nonzero blocks in the various block rows
519:          (possibly different for each block row) or NULL


522:    Output Parameter:
523: .  A - the matrix

525:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
526:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
527:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

529:    Options Database Keys:
530: .   -mat_no_unroll - uses code that does not unroll the loops in the
531:                      block calculations (much slower)
532: .    -mat_block_size - size of the blocks to use

534:    Level: intermediate

536:    Notes:
537:    The number of rows and columns must be divisible by blocksize.

539:    If the nnz parameter is given then the nz parameter is ignored

541:    A nonzero block is any block that as 1 or more nonzeros in it

543:    The block AIJ format is fully compatible with standard Fortran 77
544:    storage.  That is, the stored row and column indices can begin at
545:    either one (as in Fortran) or zero.  See the users' manual for details.

547:    Specify the preallocated storage with either nz or nnz (not both).
548:    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
549:    allocation.  See Users-Manual: ch_mat for details.
550:    matrices.

552: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()

554: @*/
555: PetscErrorCode  MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
556: {

560:   MatCreate(comm,A);
561:   MatSetSizes(*A,m,n,m,n);
562: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE    
563:   MatSetType(*A,MATSEQBAIJMKL);
564:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
565: #else
566:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
567:   MatSetType(*A,MATSEQBAIJ);
568:   MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);
569: #endif
570:   return(0);
571: }

573: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
574: {

578:   MatSetType(A,MATSEQBAIJ);
579: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE  
580:   MatConvert_SeqBAIJ_SeqBAIJMKL(A,MATSEQBAIJMKL,MAT_INPLACE_MATRIX,&A);
581: #else
582:   PetscInfo(A,"MKL baij routines are not supported for used version of MKL. Using PETSc default routines. \n Please use version of MKL 11.3 and higher. \n");
583: #endif
584:   return(0);
585: }