1: /*
2: Defines basic operations for the MATSEQAIJMKL matrix class.
3: This class is derived from the MATSEQAIJ class and retains the
4: compressed row storage (aka Yale sparse matrix format) but uses
5: sparse BLAS operations from the Intel Math Kernel Library (MKL)
6: wherever possible.
7: */
9: #include <../src/mat/impls/aij/seq/aij.h> 10: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 12: /* MKL include files. */
13: #include <mkl_spblas.h> /* Sparse BLAS */
15: typedef struct {
16: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
17: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
18: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
19: PetscObjectState state;
20: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
21: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
22: struct matrix_descr descr;
23: #endif
24: } Mat_SeqAIJMKL;
26: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
28: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat) 29: {
30: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
31: /* so we will ignore 'MatType type'. */
33: Mat B = *newmat;
34: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
35: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
36: #endif
39: if (reuse == MAT_INITIAL_MATRIX) {
40: MatDuplicate(A,MAT_COPY_VALUES,&B);
41: }
43: /* Reset the original function pointers. */
44: B->ops->duplicate = MatDuplicate_SeqAIJ;
45: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
46: B->ops->destroy = MatDestroy_SeqAIJ;
47: B->ops->mult = MatMult_SeqAIJ;
48: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
49: B->ops->multadd = MatMultAdd_SeqAIJ;
50: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
51: B->ops->matmult = MatMatMult_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->ptap = MatPtAP_SeqAIJ_SeqAIJ;
54: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
55: B->ops->transposematmult = MatTransposeMatMult_SeqAIJ_SeqAIJ;
57: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",NULL);
58: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",NULL);
59: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",NULL);
60: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",NULL);
61: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
62: if(!aijmkl->no_SpMV2) {
63: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",NULL);
64: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
65: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",NULL);
66: #endif
67: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",NULL);
68: }
70: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
71: * simply involves destroying the MKL sparse matrix handle and then freeing
72: * the spptr pointer. */
73: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL*)B->spptr;
75: if (aijmkl->sparse_optimized) {
76: sparse_status_t stat;
77: stat = mkl_sparse_destroy(aijmkl->csrA);
78: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set hints/complete mkl_sparse_optimize");
79: }
80: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
81: PetscFree(B->spptr);
83: /* Change the type of B to MATSEQAIJ. */
84: PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ);
86: *newmat = B;
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 91: {
93: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*) A->spptr;
97: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an
98: * spptr pointer. */
99: if (aijmkl) {
100: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
101: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
102: if (aijmkl->sparse_optimized) {
103: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
104: stat = mkl_sparse_destroy(aijmkl->csrA);
105: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
106: }
107: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
108: PetscFree(A->spptr);
109: }
111: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
112: * to destroy everything that remains. */
113: PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ);
114: /* Note that I don't call MatSetType(). I believe this is because that
115: * is only to be called when *building* a matrix. I could be wrong, but
116: * that is how things work for the SuperLU matrix class. */
117: MatDestroy_SeqAIJ(A);
118: return(0);
119: }
121: /* MatSeqAIJKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
122: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
123: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
124: * handle, creates a new one, and then calls mkl_sparse_optimize().
125: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
126: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
127: * an unoptimized matrix handle here. */
128: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)129: {
130: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
131: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
132: * does nothing. We make it callable anyway in this case because it cuts
133: * down on littering the code with #ifdefs. */
135: return(0);
136: #else
137: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
138: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL*)A->spptr;
139: PetscInt m,n;
140: MatScalar *aa;
141: PetscInt *aj,*ai;
142: sparse_status_t stat;
143: PetscErrorCode ierr;
146: if (aijmkl->no_SpMV2) return(0);
148: if (aijmkl->sparse_optimized) {
149: /* Matrix has been previously assembled and optimized. Must destroy old
150: * matrix handle before running the optimization step again. */
151: stat = mkl_sparse_destroy(aijmkl->csrA);
152: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_destroy");
153: }
154: aijmkl->sparse_optimized = PETSC_FALSE;
156: /* Now perform the SpMV2 setup and matrix optimization. */
157: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
158: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
159: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
160: m = A->rmap->n;
161: n = A->cmap->n;
162: aj = a->j; /* aj[k] gives column index for element aa[k]. */
163: aa = a->a; /* Nonzero elements stored row-by-row. */
164: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
165: if ((a->nz!=0) & !(A->structure_only)) {
166: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
167: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
168: stat = mkl_sparse_x_create_csr(&aijmkl->csrA,SPARSE_INDEX_BASE_ZERO,m,n,ai,ai+1,aj,aa);
169: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to create matrix handle");
170: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
171: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
172: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
173: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
174: stat = mkl_sparse_optimize(aijmkl->csrA);
175: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
176: aijmkl->sparse_optimized = PETSC_TRUE;
177: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
178: }
180: return(0);
181: #endif
182: }
184: /* MatSeqAIJMKL_create_from_mkl_handle() creates a sequential AIJMKL matrix from an MKL sparse matrix handle.
185: * We need this to implement MatMatMult() using the MKL inspector-executor routines, which return an (unoptimized)
186: * matrix handle.
187: * Note: This routine simply destroys and replaces the original matrix if MAT_REUSE_MATRIX has been specified, as
188: * there is no good alternative. */
189: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
190: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,MatReuse reuse,Mat *mat)191: {
192: PetscErrorCode ierr;
193: sparse_status_t stat;
194: sparse_index_base_t indexing;
195: PetscInt nrows, ncols;
196: PetscInt *aj,*ai,*dummy;
197: MatScalar *aa;
198: Mat A;
199: Mat_SeqAIJMKL *aijmkl;
201: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
202: stat = mkl_sparse_x_export_csr(csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
203: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
205: if (reuse == MAT_REUSE_MATRIX) {
206: MatDestroy(mat);
207: }
208: MatCreate(comm,&A);
209: MatSetType(A,MATSEQAIJ);
210: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,nrows,ncols);
211: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
212: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
213: * they will be destroyed when the MKL handle is destroyed.
214: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
215: MatSeqAIJSetPreallocationCSR(A,ai,aj,aa);
217: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
218: * Now turn it into a MATSEQAIJMKL. */
219: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
221: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
222: aijmkl->csrA = csrA;
224: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
225: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
226: * and just need to be able to run the MKL optimization step. */
227: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
228: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
229: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
230: stat = mkl_sparse_set_mv_hint(aijmkl->csrA,SPARSE_OPERATION_NON_TRANSPOSE,aijmkl->descr,1000);
231: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set mv_hint");
232: stat = mkl_sparse_set_memory_hint(aijmkl->csrA,SPARSE_MEMORY_AGGRESSIVE);
233: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to set memory_hint");
234: stat = mkl_sparse_optimize(aijmkl->csrA);
235: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_optimize");
236: aijmkl->sparse_optimized = PETSC_TRUE;
237: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
239: *mat = A;
240: return(0);
241: }
242: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
244: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
245: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
246: * MatMatMultNumeric(). */
247: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
248: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)249: {
250: PetscInt i;
251: PetscInt nrows,ncols;
252: PetscInt nz;
253: PetscInt *ai,*aj,*dummy;
254: PetscScalar *aa;
255: PetscErrorCode ierr;
256: Mat_SeqAIJMKL *aijmkl;
257: sparse_status_t stat;
258: sparse_index_base_t indexing;
260: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
262: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
263: stat = mkl_sparse_x_export_csr(aijmkl->csrA,&indexing,&nrows,&ncols,&ai,&dummy,&aj,&aa);
264: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete mkl_sparse_x_export_csr()");
266: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
267: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
268: for (i=0; i<nrows; i++) {
269: nz = ai[i+1] - ai[i];
270: MatSetValues_SeqAIJ(A, 1, &i, nz, aj+ai[i], aa+ai[i], INSERT_VALUES);
271: }
273: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
274: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
276: PetscObjectStateGet((PetscObject)A,&(aijmkl->state));
277: /* We mark our matrix as having a valid, optimized MKL handle.
278: * TODO: It is valid, but I am not sure if it is optimized. Need to ask MKL developers. */
279: aijmkl->sparse_optimized = PETSC_TRUE;
281: return(0);
282: }
283: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
285: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)286: {
288: Mat_SeqAIJMKL *aijmkl;
289: Mat_SeqAIJMKL *aijmkl_dest;
292: MatDuplicate_SeqAIJ(A,op,M);
293: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
294: aijmkl_dest = (Mat_SeqAIJMKL*) (*M)->spptr;
295: PetscMemcpy(aijmkl_dest,aijmkl,sizeof(Mat_SeqAIJMKL));
296: aijmkl_dest->sparse_optimized = PETSC_FALSE;
297: if (aijmkl->eager_inspection) {
298: MatSeqAIJMKL_create_mkl_handle(A);
299: }
300: return(0);
301: }
303: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)304: {
305: PetscErrorCode ierr;
306: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
307: Mat_SeqAIJMKL *aijmkl;
310: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
312: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
313: * extra information and some different methods, call the AssemblyEnd
314: * routine for a MATSEQAIJ.
315: * I'm not sure if this is the best way to do this, but it avoids
316: * a lot of code duplication. */
317: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
318: MatAssemblyEnd_SeqAIJ(A, mode);
320: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
321: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
322: aijmkl = (Mat_SeqAIJMKL*) A->spptr;
323: if (aijmkl->eager_inspection) {
324: MatSeqAIJMKL_create_mkl_handle(A);
325: }
327: return(0);
328: }
330: PetscErrorCode MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)331: {
332: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
333: const PetscScalar *x;
334: PetscScalar *y;
335: const MatScalar *aa;
336: PetscErrorCode ierr;
337: PetscInt m=A->rmap->n;
338: PetscInt n=A->cmap->n;
339: PetscScalar alpha = 1.0;
340: PetscScalar beta = 0.0;
341: const PetscInt *aj,*ai;
342: char matdescra[6];
345: /* Variables not in MatMult_SeqAIJ. */
346: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
349: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
350: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
351: VecGetArrayRead(xx,&x);
352: VecGetArray(yy,&y);
353: aj = a->j; /* aj[k] gives column index for element aa[k]. */
354: aa = a->a; /* Nonzero elements stored row-by-row. */
355: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
357: /* Call MKL sparse BLAS routine to do the MatMult. */
358: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
360: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
361: VecRestoreArrayRead(xx,&x);
362: VecRestoreArray(yy,&y);
363: return(0);
364: }
366: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
367: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)368: {
369: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
370: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
371: const PetscScalar *x;
372: PetscScalar *y;
373: PetscErrorCode ierr;
374: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
375: PetscObjectState state;
379: /* If there are no nonzero entries, zero yy and return immediately. */
380: if(!a->nz) {
381: PetscInt i;
382: PetscInt m=A->rmap->n;
383: VecGetArray(yy,&y);
384: for (i=0; i<m; i++) {
385: y[i] = 0.0;
386: }
387: VecRestoreArray(yy,&y);
388: return(0);
389: }
391: VecGetArrayRead(xx,&x);
392: VecGetArray(yy,&y);
394: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
395: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
396: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
397: PetscObjectStateGet((PetscObject)A,&state);
398: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
399: MatSeqAIJMKL_create_mkl_handle(A);
400: }
402: /* Call MKL SpMV2 executor routine to do the MatMult. */
403: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
404: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
405: 406: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
407: VecRestoreArrayRead(xx,&x);
408: VecRestoreArray(yy,&y);
409: return(0);
410: }
411: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
413: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)414: {
415: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
416: const PetscScalar *x;
417: PetscScalar *y;
418: const MatScalar *aa;
419: PetscErrorCode ierr;
420: PetscInt m=A->rmap->n;
421: PetscInt n=A->cmap->n;
422: PetscScalar alpha = 1.0;
423: PetscScalar beta = 0.0;
424: const PetscInt *aj,*ai;
425: char matdescra[6];
427: /* Variables not in MatMultTranspose_SeqAIJ. */
428: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
431: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
432: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
433: VecGetArrayRead(xx,&x);
434: VecGetArray(yy,&y);
435: aj = a->j; /* aj[k] gives column index for element aa[k]. */
436: aa = a->a; /* Nonzero elements stored row-by-row. */
437: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
439: /* Call MKL sparse BLAS routine to do the MatMult. */
440: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,y);
442: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
443: VecRestoreArrayRead(xx,&x);
444: VecRestoreArray(yy,&y);
445: return(0);
446: }
448: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
449: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)450: {
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
452: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
453: const PetscScalar *x;
454: PetscScalar *y;
455: PetscErrorCode ierr;
456: sparse_status_t stat;
457: PetscObjectState state;
461: /* If there are no nonzero entries, zero yy and return immediately. */
462: if(!a->nz) {
463: PetscInt i;
464: PetscInt n=A->cmap->n;
465: VecGetArray(yy,&y);
466: for (i=0; i<n; i++) {
467: y[i] = 0.0;
468: }
469: VecRestoreArray(yy,&y);
470: return(0);
471: }
473: VecGetArrayRead(xx,&x);
474: VecGetArray(yy,&y);
476: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
477: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
478: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
479: PetscObjectStateGet((PetscObject)A,&state);
480: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
481: MatSeqAIJMKL_create_mkl_handle(A);
482: }
484: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
485: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,y);
486: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
487: 488: PetscLogFlops(2.0*a->nz - a->nonzerorowcnt);
489: VecRestoreArrayRead(xx,&x);
490: VecRestoreArray(yy,&y);
491: return(0);
492: }
493: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
495: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)496: {
497: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
498: const PetscScalar *x;
499: PetscScalar *y,*z;
500: const MatScalar *aa;
501: PetscErrorCode ierr;
502: PetscInt m=A->rmap->n;
503: PetscInt n=A->cmap->n;
504: const PetscInt *aj,*ai;
505: PetscInt i;
507: /* Variables not in MatMultAdd_SeqAIJ. */
508: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
509: PetscScalar alpha = 1.0;
510: PetscScalar beta;
511: char matdescra[6];
514: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
515: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
517: VecGetArrayRead(xx,&x);
518: VecGetArrayPair(yy,zz,&y,&z);
519: aj = a->j; /* aj[k] gives column index for element aa[k]. */
520: aa = a->a; /* Nonzero elements stored row-by-row. */
521: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
523: /* Call MKL sparse BLAS routine to do the MatMult. */
524: if (zz == yy) {
525: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
526: beta = 1.0;
527: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
528: } else {
529: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
530: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
531: beta = 0.0;
532: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
533: for (i=0; i<m; i++) {
534: z[i] += y[i];
535: }
536: }
538: PetscLogFlops(2.0*a->nz);
539: VecRestoreArrayRead(xx,&x);
540: VecRestoreArrayPair(yy,zz,&y,&z);
541: return(0);
542: }
544: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
545: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)546: {
547: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
548: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
549: const PetscScalar *x;
550: PetscScalar *y,*z;
551: PetscErrorCode ierr;
552: PetscInt m=A->rmap->n;
553: PetscInt i;
555: /* Variables not in MatMultAdd_SeqAIJ. */
556: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
557: PetscObjectState state;
561: /* If there are no nonzero entries, set zz = yy and return immediately. */
562: if(!a->nz) {
563: PetscInt i;
564: VecGetArrayPair(yy,zz,&y,&z);
565: for (i=0; i<m; i++) {
566: z[i] = y[i];
567: }
568: VecRestoreArrayPair(yy,zz,&y,&z);
569: return(0);
570: }
572: VecGetArrayRead(xx,&x);
573: VecGetArrayPair(yy,zz,&y,&z);
575: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
576: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
577: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
578: PetscObjectStateGet((PetscObject)A,&state);
579: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
580: MatSeqAIJMKL_create_mkl_handle(A);
581: }
583: /* Call MKL sparse BLAS routine to do the MatMult. */
584: if (zz == yy) {
585: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
586: * with alpha and beta both set to 1.0. */
587: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
588: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
589: } else {
590: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
591: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
592: stat = mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
593: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
594: for (i=0; i<m; i++) {
595: z[i] += y[i];
596: }
597: }
599: PetscLogFlops(2.0*a->nz);
600: VecRestoreArrayRead(xx,&x);
601: VecRestoreArrayPair(yy,zz,&y,&z);
602: return(0);
603: }
604: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
606: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)607: {
608: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
609: const PetscScalar *x;
610: PetscScalar *y,*z;
611: const MatScalar *aa;
612: PetscErrorCode ierr;
613: PetscInt m=A->rmap->n;
614: PetscInt n=A->cmap->n;
615: const PetscInt *aj,*ai;
616: PetscInt i;
618: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
619: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
620: PetscScalar alpha = 1.0;
621: PetscScalar beta;
622: char matdescra[6];
625: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
626: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
628: VecGetArrayRead(xx,&x);
629: VecGetArrayPair(yy,zz,&y,&z);
630: aj = a->j; /* aj[k] gives column index for element aa[k]. */
631: aa = a->a; /* Nonzero elements stored row-by-row. */
632: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
634: /* Call MKL sparse BLAS routine to do the MatMult. */
635: if (zz == yy) {
636: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
637: beta = 1.0;
638: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
639: } else {
640: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
641: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
642: beta = 0.0;
643: mkl_xcsrmv(&transa,&m,&n,&alpha,matdescra,aa,aj,ai,ai+1,x,&beta,z);
644: for (i=0; i<n; i++) {
645: z[i] += y[i];
646: }
647: }
649: PetscLogFlops(2.0*a->nz);
650: VecRestoreArrayRead(xx,&x);
651: VecRestoreArrayPair(yy,zz,&y,&z);
652: return(0);
653: }
655: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
656: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)657: {
658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
659: Mat_SeqAIJMKL *aijmkl=(Mat_SeqAIJMKL*)A->spptr;
660: const PetscScalar *x;
661: PetscScalar *y,*z;
662: PetscErrorCode ierr;
663: PetscInt n=A->cmap->n;
664: PetscInt i;
665: PetscObjectState state;
667: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
668: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
672: /* If there are no nonzero entries, set zz = yy and return immediately. */
673: if(!a->nz) {
674: PetscInt i;
675: VecGetArrayPair(yy,zz,&y,&z);
676: for (i=0; i<n; i++) {
677: z[i] = y[i];
678: }
679: VecRestoreArrayPair(yy,zz,&y,&z);
680: return(0);
681: }
683: VecGetArrayRead(xx,&x);
684: VecGetArrayPair(yy,zz,&y,&z);
686: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
687: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
688: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
689: PetscObjectStateGet((PetscObject)A,&state);
690: if (!aijmkl->sparse_optimized || aijmkl->state != state) {
691: MatSeqAIJMKL_create_mkl_handle(A);
692: }
694: /* Call MKL sparse BLAS routine to do the MatMult. */
695: if (zz == yy) {
696: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
697: * with alpha and beta both set to 1.0. */
698: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,1.0,z);
699: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
700: } else {
701: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
702: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
703: stat = mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE,1.0,aijmkl->csrA,aijmkl->descr,x,0.0,z);
704: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: error in mkl_sparse_x_mv");
705: for (i=0; i<n; i++) {
706: z[i] += y[i];
707: }
708: }
710: PetscLogFlops(2.0*a->nz);
711: VecRestoreArrayRead(xx,&x);
712: VecRestoreArrayPair(yy,zz,&y,&z);
713: return(0);
714: }
715: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
717: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
718: /* Note that this code currently doesn't actually get used when MatMatMult() is called with MAT_REUSE_MATRIX, because
719: * the MatMatMult() interface code calls MatMatMultNumeric() in this case.
720: * For releases of MKL prior to version 18, update 2:
721: * MKL has no notion of separately callable symbolic vs. numeric phases of sparse matrix-matrix multiply, so in the
722: * MAT_REUSE_MATRIX case, the SeqAIJ routines end up being used. Even though this means that the (hopefully more
723: * optimized) MKL routines do not get used, this probably is best because the MKL routines would waste time re-computing
724: * the symbolic portion, whereas the native PETSc SeqAIJ routines will avoid this. */
725: PetscErrorCode MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)726: {
727: Mat_SeqAIJMKL *a, *b;
728: sparse_matrix_t csrA, csrB, csrC;
729: PetscErrorCode ierr;
730: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
731: PetscObjectState state;
734: a = (Mat_SeqAIJMKL*)A->spptr;
735: b = (Mat_SeqAIJMKL*)B->spptr;
736: PetscObjectStateGet((PetscObject)A,&state);
737: if (!a->sparse_optimized || a->state != state) {
738: MatSeqAIJMKL_create_mkl_handle(A);
739: }
740: PetscObjectStateGet((PetscObject)B,&state);
741: if (!b->sparse_optimized || b->state != state) {
742: MatSeqAIJMKL_create_mkl_handle(B);
743: }
744: csrA = a->csrA;
745: csrB = b->csrA;
747: stat = mkl_sparse_spmm(SPARSE_OPERATION_NON_TRANSPOSE,csrA,csrB,&csrC);
748: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
750: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
752: return(0);
753: }
754: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
756: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
757: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,Mat C)758: {
759: Mat_SeqAIJMKL *a, *b, *c;
760: sparse_matrix_t csrA, csrB, csrC;
761: PetscErrorCode ierr;
762: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
763: struct matrix_descr descr_type_gen;
764: PetscObjectState state;
767: a = (Mat_SeqAIJMKL*)A->spptr;
768: b = (Mat_SeqAIJMKL*)B->spptr;
769: c = (Mat_SeqAIJMKL*)C->spptr;
770: PetscObjectStateGet((PetscObject)A,&state);
771: if (!a->sparse_optimized || a->state != state) {
772: MatSeqAIJMKL_create_mkl_handle(A);
773: }
774: PetscObjectStateGet((PetscObject)B,&state);
775: if (!b->sparse_optimized || b->state != state) {
776: MatSeqAIJMKL_create_mkl_handle(B);
777: }
778: csrA = a->csrA;
779: csrB = b->csrA;
780: csrC = c->csrA;
781: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
783: stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrA,
784: SPARSE_OPERATION_NON_TRANSPOSE,descr_type_gen,csrB,
785: SPARSE_STAGE_FINALIZE_MULT,&csrC);
787: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete numerical stage of sparse matrix-matrix multiply");
789: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
790: MatSeqAIJMKL_update_from_mkl_handle(C);
792: return(0);
793: }
794: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M */
796: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
797: PetscErrorCode MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat*C)798: {
799: Mat_SeqAIJMKL *a, *b;
800: sparse_matrix_t csrA, csrB, csrC;
801: PetscErrorCode ierr;
802: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
803: PetscObjectState state;
806: a = (Mat_SeqAIJMKL*)A->spptr;
807: b = (Mat_SeqAIJMKL*)B->spptr;
808: PetscObjectStateGet((PetscObject)A,&state);
809: if (!a->sparse_optimized || a->state != state) {
810: MatSeqAIJMKL_create_mkl_handle(A);
811: }
812: PetscObjectStateGet((PetscObject)B,&state);
813: if (!b->sparse_optimized || b->state != state) {
814: MatSeqAIJMKL_create_mkl_handle(B);
815: }
816: csrA = a->csrA;
817: csrB = b->csrA;
819: stat = mkl_sparse_spmm(SPARSE_OPERATION_TRANSPOSE,csrA,csrB,&csrC);
820: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete sparse matrix-matrix multiply");
822: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
824: return(0);
825: }
826: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
828: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
829: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,Mat C)830: {
831: Mat_SeqAIJMKL *a, *p, *c;
832: sparse_matrix_t csrA, csrP, csrC;
833: PetscBool set, flag;
834: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
835: struct matrix_descr descr_type_gen;
836: PetscObjectState state;
837: PetscErrorCode ierr;
840: MatIsSymmetricKnown(A,&set,&flag);
841: if (!set || (set && !flag)) {
842: MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,C);
843: return(0);
844: }
846: a = (Mat_SeqAIJMKL*)A->spptr;
847: p = (Mat_SeqAIJMKL*)P->spptr;
848: c = (Mat_SeqAIJMKL*)C->spptr;
849: PetscObjectStateGet((PetscObject)A,&state);
850: if (!a->sparse_optimized || a->state != state) {
851: MatSeqAIJMKL_create_mkl_handle(A);
852: }
853: PetscObjectStateGet((PetscObject)P,&state);
854: if (!p->sparse_optimized || p->state != state) {
855: MatSeqAIJMKL_create_mkl_handle(P);
856: }
857: csrA = a->csrA;
858: csrP = p->csrA;
859: csrC = c->csrA;
860: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
862: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
863: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FINALIZE_MULT);
864: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to finalize mkl_sparse_sypr");
866: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
867: MatSeqAIJMKL_update_from_mkl_handle(C);
869: return(0);
870: }
871: #endif
873: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
874: PetscErrorCode MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)875: {
876: Mat_SeqAIJMKL *a, *p;
877: sparse_matrix_t csrA, csrP, csrC;
878: PetscBool set, flag;
879: sparse_status_t stat = SPARSE_STATUS_SUCCESS;
880: struct matrix_descr descr_type_gen;
881: PetscObjectState state;
882: PetscErrorCode ierr;
885: MatIsSymmetricKnown(A,&set,&flag);
886: if (!set || (set && !flag)) {
887: MatPtAP_SeqAIJ_SeqAIJ(A,P,scall,fill,C);
888: return(0);
889: }
891: if (scall == MAT_REUSE_MATRIX) {
892: MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2(A,P,*C);
893: return(0);
894: }
896: a = (Mat_SeqAIJMKL*)A->spptr;
897: p = (Mat_SeqAIJMKL*)P->spptr;
898: PetscObjectStateGet((PetscObject)A,&state);
899: if (!a->sparse_optimized || a->state != state) {
900: MatSeqAIJMKL_create_mkl_handle(A);
901: }
902: PetscObjectStateGet((PetscObject)P,&state);
903: if (!p->sparse_optimized || p->state != state) {
904: MatSeqAIJMKL_create_mkl_handle(P);
905: }
906: csrA = a->csrA;
907: csrP = p->csrA;
908: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
910: /* Note that the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
911: stat = mkl_sparse_sypr(SPARSE_OPERATION_TRANSPOSE,csrP,csrA,descr_type_gen,&csrC,SPARSE_STAGE_FULL_MULT);
912: if (stat != SPARSE_STATUS_SUCCESS) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Intel MKL error: unable to complete full mkl_sparse_sypr");
914: MatSeqAIJMKL_create_from_mkl_handle(PETSC_COMM_SELF,csrC,scall,C);
915: MatSetOption(*C,MAT_SYMMETRIC,PETSC_TRUE);
917: return(0);
918: }
919: #endif
921: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
922: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqMKLAIJ()
923: * routine, but can also be used to convert an assembled SeqAIJ matrix
924: * into a SeqAIJMKL one. */
925: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat *newmat)926: {
928: Mat B = *newmat;
929: Mat_SeqAIJMKL *aijmkl;
930: PetscBool set;
931: PetscBool sametype;
934: if (reuse == MAT_INITIAL_MATRIX) {
935: MatDuplicate(A,MAT_COPY_VALUES,&B);
936: }
938: PetscObjectTypeCompare((PetscObject)A,type,&sametype);
939: if (sametype) return(0);
941: PetscNewLog(B,&aijmkl);
942: B->spptr = (void*) aijmkl;
944: /* Set function pointers for methods that we inherit from AIJ but override.
945: * We also parse some command line options below, since those determine some of the methods we point to. */
946: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
947: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
948: B->ops->destroy = MatDestroy_SeqAIJMKL;
950: aijmkl->sparse_optimized = PETSC_FALSE;
951: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
952: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
953: #else
954: aijmkl->no_SpMV2 = PETSC_TRUE;
955: #endif
956: aijmkl->eager_inspection = PETSC_FALSE;
958: /* Parse command line options. */
959: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"AIJMKL Options","Mat");
960: PetscOptionsBool("-mat_aijmkl_no_spmv2","NoSPMV2","None",(PetscBool)aijmkl->no_SpMV2,(PetscBool*)&aijmkl->no_SpMV2,&set);
961: PetscOptionsBool("-mat_aijmkl_eager_inspection","Eager Inspection","None",(PetscBool)aijmkl->eager_inspection,(PetscBool*)&aijmkl->eager_inspection,&set);
962: PetscOptionsEnd();
963: #ifndef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
964: if(!aijmkl->no_SpMV2) {
965: PetscInfo(B,"User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n");
966: aijmkl->no_SpMV2 = PETSC_TRUE;
967: }
968: #endif
970: if(!aijmkl->no_SpMV2) {
971: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
972: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
973: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
974: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
975: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
976: B->ops->matmult = MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
977: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
978: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
979: #ifndef PETSC_USE_COMPLEX
980: B->ops->ptap = MatPtAP_SeqAIJMKL_SeqAIJMKL_SpMV2;
981: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2;
982: #endif
983: #endif
984: B->ops->transposematmult = MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2;
985: #endif
986: } else {
987: B->ops->mult = MatMult_SeqAIJMKL;
988: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
989: B->ops->multadd = MatMultAdd_SeqAIJMKL;
990: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
991: }
993: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaijmkl_seqaij_C",MatConvert_SeqAIJMKL_SeqAIJ);
994: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqdense_seqaijmkl_C",MatMatMult_SeqDense_SeqAIJ);
995: PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_seqdense_seqaijmkl_C",MatMatMultSymbolic_SeqDense_SeqAIJ);
996: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqdense_seqaijmkl_C",MatMatMultNumeric_SeqDense_SeqAIJ);
997: if(!aijmkl->no_SpMV2) {
998: #ifdef PETSC_HAVE_MKL_SPARSE_OPTIMIZE
999: PetscObjectComposeFunction((PetscObject)B,"MatMatMult_seqaijmkl_seqaijmkl_C",MatMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1000: #ifdef PETSC_HAVE_MKL_SPARSE_SP2M
1001: PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_seqaijmkl_seqaijmkl_C",MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_SpMV2);
1002: #endif
1003: PetscObjectComposeFunction((PetscObject)B,"MatTransposeMatMult_seqaijmkl_seqaijmkl_C",MatTransposeMatMult_SeqAIJMKL_SeqAIJMKL_SpMV2);
1004: #endif
1005: }
1007: PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJMKL);
1008: *newmat = B;
1009: return(0);
1010: }
1012: /*@C
1013: MatCreateSeqAIJMKL - Creates a sparse matrix of type SEQAIJMKL.
1014: This type inherits from AIJ and is largely identical, but uses sparse BLAS
1015: routines from Intel MKL whenever possible.
1016: If the installed version of MKL supports the "SpMV2" sparse
1017: inspector-executor routines, then those are used by default.
1018: MatMult, MatMultAdd, MatMultTranspose, MatMultTransposeAdd, MatMatMult, MatTransposeMatMult, and MatPtAP (for
1019: symmetric A) operations are currently supported.
1020: Note that MKL version 18, update 2 or later is required for MatPtAP/MatPtAPNumeric and MatMatMultNumeric.
1022: Collective on MPI_Comm1024: Input Parameters:
1025: + comm - MPI communicator, set to PETSC_COMM_SELF1026: . m - number of rows
1027: . n - number of columns
1028: . nz - number of nonzeros per row (same for all rows)
1029: - nnz - array containing the number of nonzeros in the various rows
1030: (possibly different for each row) or NULL
1032: Output Parameter:
1033: . A - the matrix
1035: Options Database Keys:
1036: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1037: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, performing this step the first time the matrix is applied
1039: Notes:
1040: If nnz is given then nz is ignored
1042: Level: intermediate
1044: .keywords: matrix, MKL, sparse, parallel
1046: .seealso: MatCreate(), MatCreateMPIAIJMKL(), MatSetValues()
1047: @*/
1048: PetscErrorCodeMatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)1049: {
1053: MatCreate(comm,A);
1054: MatSetSizes(*A,m,n,m,n);
1055: MatSetType(*A,MATSEQAIJMKL);
1056: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,nnz);
1057: return(0);
1058: }
1060: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)1061: {
1065: MatSetType(A,MATSEQAIJ);
1066: MatConvert_SeqAIJ_SeqAIJMKL(A,MATSEQAIJMKL,MAT_INPLACE_MATRIX,&A);
1067: return(0);
1068: }