Actual source code: aijmkl.c

petsc-3.9.0 2018-04-07
Report Typos and Errors
  1: /*
  2:   Defines basic operations for the MATSEQAIJMKL matrix class.
  3:   This class is derived from the MATSEQAIJ class and retains the
  4:   compressed row storage (aka Yale sparse matrix format) but uses 
  5:   sparse BLAS operations from the Intel Math Kernel Library (MKL) 
  6:   wherever possible.
  7: */

  9:  #include <../src/mat/impls/aij/seq/aij.h>
 10:  #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>

 12: /* MKL include files. */
 13: #include <mkl_spblas.h>  /* Sparse BLAS */

 15: typedef struct {
 16:   PetscBool no_SpMV2;  /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 17:   PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 18:   PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 19:   PetscObjectState state;
 20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 21:   sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 22:   struct matrix_descr descr;
 23: #endif
 24: } Mat_SeqAIJMKL;

 26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);

 28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 29: {
 30:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 31:   /* so we will ignore 'MatType type'. */
 33:   Mat            B       = *newmat;
 34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 35:   Mat_SeqAIJMKL  *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
 36: #endif

 39:   if (reuse == MAT_INITIAL_MATRIX) {
 40:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 41:   }

 43:   /* Reset the original function pointers. */
 44:   B->ops->duplicate        = MatDuplicate_SeqAIJ;
 45:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJ;
 46:   B->ops->destroy          = MatDestroy_SeqAIJ;
 47:   B->ops->mult             = MatMult_SeqAIJ;
 48:   B->ops->multtranspose    = MatMultTranspose_SeqAIJ;
 49:   B->ops->multadd          = MatMultAdd_SeqAIJ;
 50:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
 51:   B->ops->matmult          = MatMatMult_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->ptap             = MatPtAP_SeqAIJ_SeqAIJ;
 54:   B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;

 57:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
 58:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
 59:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
 60:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
 61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
 62:   if(!aijmkl->no_SpMV2) {
 63:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
 64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
 65:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
 66: #endif
 67:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
 68:   }

 70:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 
 71:    * simply involves destroying the MKL sparse matrix handle and then freeing 
 72:    * the spptr pointer. */
 73:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;

 75:   if (aijmkl->sparse_optimized) {
 76:     sparse_status_t stat;
 77:     stat = mkl_sparse_destroy(aijmkl->csrA);
 78:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
 79:   }
 80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 81:   PetscFree(B->spptr);

 83:   /* Change the type of B to MATSEQAIJ. */
 84:   PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);

 86:   *newmat = B;
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 91: {
 93:   Mat_SeqAIJMKL  *aijmkl = (Mat_SeqAIJMKL*) A->spptr;


 97:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an 
 98:    * spptr pointer. */
 99:   if (aijmkl) {
100:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102:     if (aijmkl->sparse_optimized) {
103:       sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104:       stat = mkl_sparse_destroy(aijmkl->csrA);
105:       if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106:     }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108:     PetscFree(A->spptr);
109:   }

111:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112:    * to destroy everything that remains. */
113:   PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114:   /* Note that I don't call MatSetType().  I believe this is because that
115:    * is only to be called when *building* a matrix.  I could be wrong, but
116:    * that is how things work for the SuperLU matrix class. */
117:   MatDestroy_SeqAIJ(A);
118:   return(0);
119: }

121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 
122:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 
124:  * handle, creates a new one, and then calls mkl_sparse_optimize().
125:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 
126:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 
127:  * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine 
132:    * does nothing. We make it callable anyway in this case because it cuts 
133:    * down on littering the code with #ifdefs. */
135:   return(0);
136: #else
137:   Mat_SeqAIJ       *a = (Mat_SeqAIJ*)A->data;
138:   Mat_SeqAIJMKL    *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139:   PetscInt         m,n;
140:   MatScalar        *aa;
141:   PetscInt         *aj,*ai;
142:   sparse_status_t  stat;
143:   PetscErrorCode   ierr;

146:   if (aijmkl->no_SpMV2) return(0);

148:   if (aijmkl->sparse_optimized) {
149:     /* Matrix has been previously assembled and optimized. Must destroy old
150:      * matrix handle before running the optimization step again. */
151:     stat = mkl_sparse_destroy(aijmkl->csrA);
152:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153:   }
154:   aijmkl->sparse_optimized = PETSC_FALSE;

156:   /* Now perform the SpMV2 setup and matrix optimization. */
157:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
158:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
159:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
160:   m = A->rmap->n;
161:   n = A->cmap->n;
162:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
163:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
164:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */
165:   if ((a->nz!=0) & !(A->structure_only)) {
166:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168:     stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170:     stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172:     stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174:     stat = mkl_sparse_optimize(aijmkl->csrA);
175:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
176:     aijmkl->sparse_optimized = PETSC_TRUE;
177:     PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
178:   }

180:   return(0);
181: #endif
182: }

184: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle. 
185:  * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized) 
186:  * matrix handle.
187:  * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
188:  * there is no good alternative. */
189: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
190: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)
191: {
192:   PetscErrorCode      ierr;
193:   sparse_status_t     stat;
194:   sparse_index_base_t indexing;
195:   PetscInt            nrows, ncols;
196:   PetscInt            *aj,*ai,*dummy;
197:   MatScalar           *aa;
198:   Mat                 A;
199:   Mat_SeqAIJMKL       *aijmkl;

201:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
202:   stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
203:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

205:   if (reuse == MAT_REUSE_MATRIX) {
206:     MatDestroy(mat);
207:   }
208:   MatCreate(comm,&A);
209:   MatSetType(A,MATSEQAIJ);
210:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
211:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 
212:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
213:    * they will be destroyed when the MKL handle is destroyed.
214:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
215:   MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);

217:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
218:    * Now turn it into a MATSEQAIJMKL. */
219:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);

221:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
222:   aijmkl->csrA = csrA;

224:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
225:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 
226:    * and just need to be able to run the MKL optimization step. */
227:   aijmkl->descr.type        = SPARSE_MATRIX_TYPE_GENERAL;
228:   aijmkl->descr.mode        = SPARSE_FILL_MODE_LOWER;
229:   aijmkl->descr.diag        = SPARSE_DIAG_NON_UNIT;
230:   stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
231:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
232:   stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
233:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
234:   stat = mkl_sparse_optimize(aijmkl->csrA);
235:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
236:   aijmkl->sparse_optimized = PETSC_TRUE;
237:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));

239:   *mat = A;
240:   return(0);
241: }
242: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

244: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 
246:  * MatMatMultNumeric(). */
247: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
248: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
249: {
250:   PetscInt            i;
251:   PetscInt            nrows,ncols;
252:   PetscInt            nz;
253:   PetscInt            *ai,*aj,*dummy;
254:   PetscScalar         *aa;
255:   PetscErrorCode      ierr;
256:   Mat_SeqAIJMKL       *aijmkl;
257:   sparse_status_t     stat;
258:   sparse_index_base_t indexing;

260:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;

262:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
263:   stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
264:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");

266:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 
267:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
268:   for (i=0; i<nrows; i++) {
269:     nz = ai[i+1] - ai[i];
270:     MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
271:   }

273:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
274:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

276:   PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
277:   /* We mark our matrix as having a valid, optimized MKL handle.
278:    * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
279:   aijmkl->sparse_optimized = PETSC_TRUE;

281:   return(0);
282: }
283: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

285: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
286: {
288:   Mat_SeqAIJMKL  *aijmkl;
289:   Mat_SeqAIJMKL  *aijmkl_dest;

292:   MatDuplicate_SeqAIJ(A,op,M);
293:   aijmkl      = (Mat_SeqAIJMKL*) A->spptr;
294:   aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
295:   PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
296:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
297:   if (aijmkl->eager_inspection) {
298:     MatSeqAIJMKL_create_mkl_handle(A);
299:   }
300:   return(0);
301: }

303: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
304: {
305:   PetscErrorCode  ierr;
306:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
307:   Mat_SeqAIJMKL   *aijmkl;

310:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

312:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
313:    * extra information and some different methods, call the AssemblyEnd 
314:    * routine for a MATSEQAIJ.
315:    * I'm not sure if this is the best way to do this, but it avoids
316:    * a lot of code duplication. */
317:   a->inode.use = PETSC_FALSE;  /* Must disable: otherwise the MKL routines won't get used. */
318:   MatAssemblyEnd_SeqAIJ(A, mode);

320:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
321:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
322:   aijmkl = (Mat_SeqAIJMKL*) A->spptr;
323:   if (aijmkl->eager_inspection) {
324:     MatSeqAIJMKL_create_mkl_handle(A);
325:   }

327:   return(0);
328: }

330: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)
331: {
332:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
333:   const PetscScalar *x;
334:   PetscScalar       *y;
335:   const MatScalar   *aa;
336:   PetscErrorCode    ierr;
337:   PetscInt          m=A->rmap->n;
338:   PetscInt          n=A->cmap->n;
339:   PetscScalar       alpha = 1.0;
340:   PetscScalar       beta = 0.0;
341:   const PetscInt    *aj,*ai;
342:   char              matdescra[6];


345:   /* Variables not in MatMult_SeqAIJ. */
346:   char transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */

349:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
350:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
351:   VecGetArrayRead(xx,&x);
352:   VecGetArray(yy,&y);
353:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
354:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
355:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

357:   /* Call MKL sparse BLAS routine to do the MatMult. */
358:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

360:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
361:   VecRestoreArrayRead(xx,&x);
362:   VecRestoreArray(yy,&y);
363:   return(0);
364: }

366: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
367: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
368: {
369:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
370:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
371:   const PetscScalar *x;
372:   PetscScalar       *y;
373:   PetscErrorCode    ierr;
374:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
375:   PetscObjectState  state;


379:   /* If there are no nonzero entries, zero yy and return immediately. */
380:   if(!a->nz) {
381:     PetscInt i;
382:     PetscInt m=A->rmap->n;
383:     VecGetArray(yy,&y);
384:     for (i=0; i<m; i++) {
385:       y[i] = 0.0;
386:     }
387:     VecRestoreArray(yy,&y);
388:     return(0);
389:   }

391:   VecGetArrayRead(xx,&x);
392:   VecGetArray(yy,&y);

394:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
395:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
396:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
397:   PetscObjectStateGet((PetscObject)A,&state);
398:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
399:     MatSeqAIJMKL_create_mkl_handle(A);
400:   }

402:   /* Call MKL SpMV2 executor routine to do the MatMult. */
403:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
404:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
405: 
406:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
407:   VecRestoreArrayRead(xx,&x);
408:   VecRestoreArray(yy,&y);
409:   return(0);
410: }
411: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

413: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)
414: {
415:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
416:   const PetscScalar *x;
417:   PetscScalar       *y;
418:   const MatScalar   *aa;
419:   PetscErrorCode    ierr;
420:   PetscInt          m=A->rmap->n;
421:   PetscInt          n=A->cmap->n;
422:   PetscScalar       alpha = 1.0;
423:   PetscScalar       beta = 0.0;
424:   const PetscInt    *aj,*ai;
425:   char              matdescra[6];

427:   /* Variables not in MatMultTranspose_SeqAIJ. */
428:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */

431:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
432:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */
433:   VecGetArrayRead(xx,&x);
434:   VecGetArray(yy,&y);
435:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
436:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
437:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

439:   /* Call MKL sparse BLAS routine to do the MatMult. */
440:   mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);

442:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
443:   VecRestoreArrayRead(xx,&x);
444:   VecRestoreArray(yy,&y);
445:   return(0);
446: }

448: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
449: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)
450: {
451:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
452:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
453:   const PetscScalar *x;
454:   PetscScalar       *y;
455:   PetscErrorCode    ierr;
456:   sparse_status_t   stat;
457:   PetscObjectState  state;


461:   /* If there are no nonzero entries, zero yy and return immediately. */
462:   if(!a->nz) {
463:     PetscInt i;
464:     PetscInt n=A->cmap->n;
465:     VecGetArray(yy,&y);
466:     for (i=0; i<n; i++) {
467:       y[i] = 0.0;
468:     }
469:     VecRestoreArray(yy,&y);
470:     return(0);
471:   }

473:   VecGetArrayRead(xx,&x);
474:   VecGetArray(yy,&y);

476:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
477:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
478:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
479:   PetscObjectStateGet((PetscObject)A,&state);
480:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
481:     MatSeqAIJMKL_create_mkl_handle(A);
482:   }

484:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
485:   stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
486:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
487: 
488:   PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
489:   VecRestoreArrayRead(xx,&x);
490:   VecRestoreArray(yy,&y);
491:   return(0);
492: }
493: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

495: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
496: {
497:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
498:   const PetscScalar *x;
499:   PetscScalar       *y,*z;
500:   const MatScalar   *aa;
501:   PetscErrorCode    ierr;
502:   PetscInt          m=A->rmap->n;
503:   PetscInt          n=A->cmap->n;
504:   const PetscInt    *aj,*ai;
505:   PetscInt          i;

507:   /* Variables not in MatMultAdd_SeqAIJ. */
508:   char              transa = 'n';  /* Used to indicate to MKL that we are not computing the transpose product. */
509:   PetscScalar       alpha = 1.0;
510:   PetscScalar       beta;
511:   char              matdescra[6];

514:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
515:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

517:   VecGetArrayRead(xx,&x);
518:   VecGetArrayPair(yy,zz,&y,&z);
519:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
520:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
521:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

523:   /* Call MKL sparse BLAS routine to do the MatMult. */
524:   if (zz == yy) {
525:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
526:     beta = 1.0;
527:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
528:   } else {
529:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
530:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
531:     beta = 0.0;
532:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
533:     for (i=0; i<m; i++) {
534:       z[i] += y[i];
535:     }
536:   }

538:   PetscLogFlops(2.0*a->nz);
539:   VecRestoreArrayRead(xx,&x);
540:   VecRestoreArrayPair(yy,zz,&y,&z);
541:   return(0);
542: }

544: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
545: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
546: {
547:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
548:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
549:   const PetscScalar *x;
550:   PetscScalar       *y,*z;
551:   PetscErrorCode    ierr;
552:   PetscInt          m=A->rmap->n;
553:   PetscInt          i;

555:   /* Variables not in MatMultAdd_SeqAIJ. */
556:   sparse_status_t   stat = SPARSE_STATUS_SUCCESS;
557:   PetscObjectState  state;


561:   /* If there are no nonzero entries, set zz = yy and return immediately. */
562:   if(!a->nz) {
563:     PetscInt i;
564:     VecGetArrayPair(yy,zz,&y,&z);
565:     for (i=0; i<m; i++) {
566:       z[i] = y[i];
567:     }
568:     VecRestoreArrayPair(yy,zz,&y,&z);
569:     return(0);
570:   }

572:   VecGetArrayRead(xx,&x);
573:   VecGetArrayPair(yy,zz,&y,&z);

575:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
576:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
577:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
578:   PetscObjectStateGet((PetscObject)A,&state);
579:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
580:     MatSeqAIJMKL_create_mkl_handle(A);
581:   }

583:   /* Call MKL sparse BLAS routine to do the MatMult. */
584:   if (zz == yy) {
585:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
586:      * with alpha and beta both set to 1.0. */
587:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
588:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
589:   } else {
590:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
591:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
592:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
593:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
594:     for (i=0; i<m; i++) {
595:       z[i] += y[i];
596:     }
597:   }

599:   PetscLogFlops(2.0*a->nz);
600:   VecRestoreArrayRead(xx,&x);
601:   VecRestoreArrayPair(yy,zz,&y,&z);
602:   return(0);
603: }
604: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

606: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)
607: {
608:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
609:   const PetscScalar *x;
610:   PetscScalar       *y,*z;
611:   const MatScalar   *aa;
612:   PetscErrorCode    ierr;
613:   PetscInt          m=A->rmap->n;
614:   PetscInt          n=A->cmap->n;
615:   const PetscInt    *aj,*ai;
616:   PetscInt          i;

618:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
619:   char transa = 't';  /* Used to indicate to MKL that we are computing the transpose product. */
620:   PetscScalar       alpha = 1.0;
621:   PetscScalar       beta;
622:   char              matdescra[6];

625:   matdescra[0] = 'g';  /* Indicates to MKL that we using a general CSR matrix. */
626:   matdescra[3] = 'c';  /* Indicates to MKL that we use C-style (0-based) indexing. */

628:   VecGetArrayRead(xx,&x);
629:   VecGetArrayPair(yy,zz,&y,&z);
630:   aj   = a->j;  /* aj[k] gives column index for element aa[k]. */
631:   aa   = a->a;  /* Nonzero elements stored row-by-row. */
632:   ai   = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

634:   /* Call MKL sparse BLAS routine to do the MatMult. */
635:   if (zz == yy) {
636:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
637:     beta = 1.0;
638:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
639:   } else {
640:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 
641:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
642:     beta = 0.0;
643:     mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
644:     for (i=0; i<n; i++) {
645:       z[i] += y[i];
646:     }
647:   }

649:   PetscLogFlops(2.0*a->nz);
650:   VecRestoreArrayRead(xx,&x);
651:   VecRestoreArrayPair(yy,zz,&y,&z);
652:   return(0);
653: }

655: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
656: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
659:   Mat_SeqAIJMKL     *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
660:   const PetscScalar *x;
661:   PetscScalar       *y,*z;
662:   PetscErrorCode    ierr;
663:   PetscInt          n=A->cmap->n;
664:   PetscInt          i;
665:   PetscObjectState  state;

667:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
668:   sparse_status_t stat = SPARSE_STATUS_SUCCESS;


672:   /* If there are no nonzero entries, set zz = yy and return immediately. */
673:   if(!a->nz) {
674:     PetscInt i;
675:     VecGetArrayPair(yy,zz,&y,&z);
676:     for (i=0; i<n; i++) {
677:       z[i] = y[i];
678:     }
679:     VecRestoreArrayPair(yy,zz,&y,&z);
680:     return(0);
681:   }

683:   VecGetArrayRead(xx,&x);
684:   VecGetArrayPair(yy,zz,&y,&z);

686:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 
687:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 
688:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
689:   PetscObjectStateGet((PetscObject)A,&state);
690:   if (!aijmkl->sparse_optimized || aijmkl->state != state) {
691:     MatSeqAIJMKL_create_mkl_handle(A);
692:   }

694:   /* Call MKL sparse BLAS routine to do the MatMult. */
695:   if (zz == yy) {
696:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 
697:      * with alpha and beta both set to 1.0. */
698:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
699:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
700:   } else {
701:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 
702:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
703:     stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
704:     if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705:     for (i=0; i<n; i++) {
706:       z[i] += y[i];
707:     }
708:   }

710:   PetscLogFlops(2.0*a->nz);
711:   VecRestoreArrayRead(xx,&x);
712:   VecRestoreArrayPair(yy,zz,&y,&z);
713:   return(0);
714: }
715: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

717: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
718: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because 
719:  * the MatMatMult() interface code calls MatMatMultNumeric() in this case. 
720:  * For releases of MKL prior to version 18, update 2:
721:  * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the 
722:  * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more 
723:  * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing 
724:  * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
725: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
726: {
727:   Mat_SeqAIJMKL    *a, *b;
728:   sparse_matrix_t  csrA, csrB, csrC;
729:   PetscErrorCode   ierr;
730:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
731:   PetscObjectState state;

734:   a = (Mat_SeqAIJMKL*)A->spptr;
735:   b = (Mat_SeqAIJMKL*)B->spptr;
736:   PetscObjectStateGet((PetscObject)A,&state);
737:   if (!a->sparse_optimized || a->state != state) {
738:     MatSeqAIJMKL_create_mkl_handle(A);
739:   }
740:   PetscObjectStateGet((PetscObject)B,&state);
741:   if (!b->sparse_optimized || b->state != state) {
742:     MatSeqAIJMKL_create_mkl_handle(B);
743:   }
744:   csrA = a->csrA;
745:   csrB = b->csrA;

747:   stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
748:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

750:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

752:   return(0);
753: }
754: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

756: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
757: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)
758: {
759:   Mat_SeqAIJMKL       *a, *b, *c;
760:   sparse_matrix_t     csrA, csrB, csrC;
761:   PetscErrorCode      ierr;
762:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
763:   struct matrix_descr descr_type_gen;
764:   PetscObjectState    state;

767:   a = (Mat_SeqAIJMKL*)A->spptr;
768:   b = (Mat_SeqAIJMKL*)B->spptr;
769:   c = (Mat_SeqAIJMKL*)C->spptr;
770:   PetscObjectStateGet((PetscObject)A,&state);
771:   if (!a->sparse_optimized || a->state != state) {
772:     MatSeqAIJMKL_create_mkl_handle(A);
773:   }
774:   PetscObjectStateGet((PetscObject)B,&state);
775:   if (!b->sparse_optimized || b->state != state) {
776:     MatSeqAIJMKL_create_mkl_handle(B);
777:   }
778:   csrA = a->csrA;
779:   csrB = b->csrA;
780:   csrC = c->csrA;
781:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

783:   stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
784:                          SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
785:                          SPARSE_STAGE_FINALIZE_MULT,&csrC);

787:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");

789:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
790:   MatSeqAIJMKL_update_from_mkl_handle(C);

792:   return(0);
793: }
794: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */

796: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
797: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)
798: {
799:   Mat_SeqAIJMKL    *a, *b;
800:   sparse_matrix_t  csrA, csrB, csrC;
801:   PetscErrorCode   ierr;
802:   sparse_status_t  stat = SPARSE_STATUS_SUCCESS;
803:   PetscObjectState state;

806:   a = (Mat_SeqAIJMKL*)A->spptr;
807:   b = (Mat_SeqAIJMKL*)B->spptr;
808:   PetscObjectStateGet((PetscObject)A,&state);
809:   if (!a->sparse_optimized || a->state != state) {
810:     MatSeqAIJMKL_create_mkl_handle(A);
811:   }
812:   PetscObjectStateGet((PetscObject)B,&state);
813:   if (!b->sparse_optimized || b->state != state) {
814:     MatSeqAIJMKL_create_mkl_handle(B);
815:   }
816:   csrA = a->csrA;
817:   csrB = b->csrA;

819:   stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
820:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");

822:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);

824:   return(0);
825: }
826: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

828: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
829: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)
830: {
831:   Mat_SeqAIJMKL       *a, *p, *c;
832:   sparse_matrix_t     csrA, csrP, csrC;
833:   PetscBool           set, flag;
834:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
835:   struct matrix_descr descr_type_gen;
836:   PetscObjectState    state;
837:   PetscErrorCode      ierr;

840:   MatIsSymmetricKnown(A,&set,&flag);
841:   if (!set || (set && !flag)) {
842:     MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
843:     return(0);
844:   }

846:   a = (Mat_SeqAIJMKL*)A->spptr;
847:   p = (Mat_SeqAIJMKL*)P->spptr;
848:   c = (Mat_SeqAIJMKL*)C->spptr;
849:   PetscObjectStateGet((PetscObject)A,&state);
850:   if (!a->sparse_optimized || a->state != state) {
851:     MatSeqAIJMKL_create_mkl_handle(A);
852:   }
853:   PetscObjectStateGet((PetscObject)P,&state);
854:   if (!p->sparse_optimized || p->state != state) {
855:     MatSeqAIJMKL_create_mkl_handle(P);
856:   }
857:   csrA = a->csrA;
858:   csrP = p->csrA;
859:   csrC = c->csrA;
860:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

862:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
863:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT);
864:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");

866:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
867:   MatSeqAIJMKL_update_from_mkl_handle(C);

869:   return(0);
870: }
871: #endif

873: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
874: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
875: {
876:   Mat_SeqAIJMKL       *a, *p;
877:   sparse_matrix_t     csrA, csrP, csrC;
878:   PetscBool           set, flag;
879:   sparse_status_t     stat = SPARSE_STATUS_SUCCESS;
880:   struct matrix_descr descr_type_gen;
881:   PetscObjectState    state;
882:   PetscErrorCode      ierr;

885:   MatIsSymmetricKnown(A,&set,&flag);
886:   if (!set || (set && !flag)) {
887:     MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
888:     return(0);
889:   }

891:   if (scall == MAT_REUSE_MATRIX) {
892:     MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
893:     return(0);
894:   }

896:   a = (Mat_SeqAIJMKL*)A->spptr;
897:   p = (Mat_SeqAIJMKL*)P->spptr;
898:   PetscObjectStateGet((PetscObject)A,&state);
899:   if (!a->sparse_optimized || a->state != state) {
900:     MatSeqAIJMKL_create_mkl_handle(A);
901:   }
902:   PetscObjectStateGet((PetscObject)P,&state);
903:   if (!p->sparse_optimized || p->state != state) {
904:     MatSeqAIJMKL_create_mkl_handle(P);
905:   }
906:   csrA = a->csrA;
907:   csrP = p->csrA;
908:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

910:   /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
911:   stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT);
912:   if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");

914:   MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
915:   MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);

917:   return(0);
918: }
919: #endif

921: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
922:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLAIJ()
923:  * routine, but can also be used to convert an assembled SeqAIJ matrix
924:  * into a SeqAIJMKL one. */
925: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
926: {
928:   Mat            B = *newmat;
929:   Mat_SeqAIJMKL  *aijmkl;
930:   PetscBool      set;
931:   PetscBool      sametype;

934:   if (reuse == MAT_INITIAL_MATRIX) {
935:     MatDuplicate(A,MAT_COPY_VALUES,&B);
936:   }

938:   PetscObjectTypeCompare((PetscObject)A,type,&sametype);
939:   if (sametype) return(0);

941:   PetscNewLog(B,&aijmkl);
942:   B->spptr = (void*) aijmkl;

944:   /* Set function pointers for methods that we inherit from AIJ but override. 
945:    * We also parse some command line options below, since those determine some of the methods we point to. */
946:   B->ops->duplicate        = MatDuplicate_SeqAIJMKL;
947:   B->ops->assemblyend      = MatAssemblyEnd_SeqAIJMKL;
948:   B->ops->destroy          = MatDestroy_SeqAIJMKL;

950:   aijmkl->sparse_optimized = PETSC_FALSE;
951: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
952:   aijmkl->no_SpMV2 = PETSC_FALSE;  /* Default to using the SpMV2 routines if our MKL supports them. */
953: #else
954:   aijmkl->no_SpMV2 = PETSC_TRUE;
955: #endif
956:   aijmkl->eager_inspection = PETSC_FALSE;

958:   /* Parse command line options. */
959:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
960:   PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
961:   PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
962:   PetscOptionsEnd();
963: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
964:   if(!aijmkl->no_SpMV2) {
965:     PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n");
966:     aijmkl->no_SpMV2 = PETSC_TRUE;
967:   }
968: #endif

970:   if(!aijmkl->no_SpMV2) {
971: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
972:     B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
973:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
974:     B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
975:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
976:     B->ops->matmult          = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
977: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
978:     B->ops->matmultnumeric   = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
979: #ifndef PETSC_USE_COMPLEX
980:     B->ops->ptap             = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
981:     B->ops->ptapnumeric      = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
982: #endif
983: #endif
984:     B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
985: #endif
986:   } else {
987:     B->ops->mult             = MatMult_SeqAIJMKL;
988:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
989:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
990:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
991:   }

993:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
994:   PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
995:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
996:   PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
997:   if(!aijmkl->no_SpMV2) {
998: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
999:     PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1000: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1001:     PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1002: #endif
1003:     PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1004: #endif
1005:   }

1007:   PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1008:   *newmat = B;
1009:   return(0);
1010: }

1012: /*@C
1013:    MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1014:    This type inherits from AIJ and is largely identical, but uses sparse BLAS 
1015:    routines from Intel MKL whenever possible.
1016:    If the installed version of MKL supports the "SpMV2" sparse 
1017:    inspector-executor routines, then those are used by default.
1018:    MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1019:    symmetric A) operations are currently supported.
1020:    Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.

1022:    Collective on MPI_Comm

1024:    Input Parameters:
1025: +  comm - MPI communicator, set to PETSC_COMM_SELF
1026: .  m - number of rows
1027: .  n - number of columns
1028: .  nz - number of nonzeros per row (same for all rows)
1029: -  nnz - array containing the number of nonzeros in the various rows
1030:          (possibly different for each row) or NULL

1032:    Output Parameter:
1033: .  A - the matrix

1035:    Options Database Keys:
1036: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1037: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied

1039:    Notes:
1040:    If nnz is given then nz is ignored

1042:    Level: intermediate

1044: .keywords: matrix, MKL, sparse, parallel

1046: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1047: @*/
1048: PetscErrorCode  MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
1049: {

1053:   MatCreate(comm,A);
1054:   MatSetSizes(*A,m,n,m,n);
1055:   MatSetType(*A,MATSEQAIJMKL);
1056:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1057:   return(0);
1058: }

1060: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1061: {

1065:   MatSetType(A,MATSEQAIJ);
1066:   MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1067:   return(0);
1068: }