Actual source code: matmatmult.c

petsc-3.13.0 2020-03-29
Report Typos and Errors

  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5: */

  7:  #include <../src/mat/impls/aij/seq/aij.h>
  8:  #include <../src/mat/utils/freespace.h>
  9:  #include <petscbt.h>
 10:  #include <petsc/private/isimpl.h>
 11:  #include <../src/mat/impls/dense/seq/dense.h>

 13: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
 14: {

 18:   if (C->ops->matmultnumeric) {
 19:     (*C->ops->matmultnumeric)(A,B,C);
 20:   } else {
 21:     MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);
 22:   }
 23:   return(0);
 24: }

 26: /* Modified from MatCreateSeqAIJWithArrays() */
 27: PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat mat)
 28: {
 30:   PetscInt       ii;
 31:   Mat_SeqAIJ     *aij;

 34:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
 35:   MatSetSizes(mat,m,n,m,n);

 37:   MatSetType(mat,MATSEQAIJ);
 38:   MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,0);
 39:   aij  = (Mat_SeqAIJ*)(mat)->data;
 40:   PetscMalloc1(m,&aij->imax);
 41:   PetscMalloc1(m,&aij->ilen);

 43:   aij->i            = i;
 44:   aij->j            = j;
 45:   aij->a            = a;
 46:   aij->singlemalloc = PETSC_FALSE;
 47:   aij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
 48:   aij->free_a       = PETSC_FALSE;
 49:   aij->free_ij      = PETSC_FALSE;

 51:   for (ii=0; ii<m; ii++) {
 52:     aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
 53:   }

 55:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
 56:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
 57:   return(0);
 58: }

 60: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
 61: {
 62:   PetscErrorCode      ierr;
 63:   Mat_Product         *product = C->product;
 64:   MatProductAlgorithm alg;
 65:   PetscBool           flg;

 68:   if (product) {
 69:     alg = product->alg;
 70:   } else {
 71:     alg = "sorted";
 72:   }

 74:   /* sorted */
 75:   PetscStrcmp(alg,"sorted",&flg);
 76:   if (flg) {
 77:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);
 78:     return(0);
 79:   }

 81:   /* scalable */
 82:   PetscStrcmp(alg,"scalable",&flg);
 83:   if (flg) {
 84:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);
 85:     return(0);
 86:   }

 88:   /* scalable_fast */
 89:   PetscStrcmp(alg,"scalable_fast",&flg);
 90:   if (flg) {
 91:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);
 92:     return(0);
 93:   }

 95:   /* heap */
 96:   PetscStrcmp(alg,"heap",&flg);
 97:   if (flg) {
 98:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);
 99:     return(0);
100:   }

102:   /* btheap */
103:   PetscStrcmp(alg,"btheap",&flg);
104:   if (flg) {
105:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);
106:     return(0);
107:   }

109:   /* llcondensed */
110:   PetscStrcmp(alg,"llcondensed",&flg);
111:   if (flg) {
112:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);
113:     return(0);
114:   }

116:   /* rowmerge */
117:   PetscStrcmp(alg,"rowmerge",&flg);
118:   if (flg) {
119:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);
120:     return(0);
121:   }

123: #if defined(PETSC_HAVE_HYPRE)
124:   PetscStrcmp(alg,"hypre",&flg);
125:   if (flg) {
126:     MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);
127:     return(0);
128:   }
129: #endif

131:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
132:   return(0);
133: }

135: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
136: {
137:   PetscErrorCode     ierr;
138:   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
139:   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
140:   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
141:   PetscReal          afill;
142:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
143:   PetscTable         ta;
144:   PetscBT            lnkbt;
145:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

148:   /* Get ci and cj */
149:   /*---------------*/
150:   /* Allocate ci array, arrays for fill computation and */
151:   /* free space for accumulating nonzero column info */
152:   PetscMalloc1(am+2,&ci);
153:   ci[0] = 0;

155:   /* create and initialize a linked list */
156:   PetscTableCreate(bn,bn,&ta);
157:   MatRowMergeMax_SeqAIJ(b,bm,ta);
158:   PetscTableGetCount(ta,&Crmax);
159:   PetscTableDestroy(&ta);

161:   PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);

163:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
164:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);

166:   current_space = free_space;

168:   /* Determine ci and cj */
169:   for (i=0; i<am; i++) {
170:     anzi = ai[i+1] - ai[i];
171:     aj   = a->j + ai[i];
172:     for (j=0; j<anzi; j++) {
173:       brow = aj[j];
174:       bnzj = bi[brow+1] - bi[brow];
175:       bj   = b->j + bi[brow];
176:       /* add non-zero cols of B into the sorted linked list lnk */
177:       PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);
178:     }
179:     cnzi = lnk[0];

181:     /* If free space is not available, make more free space */
182:     /* Double the amount of total space in the list */
183:     if (current_space->local_remaining<cnzi) {
184:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
185:       ndouble++;
186:     }

188:     /* Copy data into free space, then initialize lnk */
189:     PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);

191:     current_space->array           += cnzi;
192:     current_space->local_used      += cnzi;
193:     current_space->local_remaining -= cnzi;

195:     ci[i+1] = ci[i] + cnzi;
196:   }

198:   /* Column indices are in the list of free space */
199:   /* Allocate space for cj, initialize cj, and */
200:   /* destroy list of free space and other temporary array(s) */
201:   PetscMalloc1(ci[am]+1,&cj);
202:   PetscFreeSpaceContiguous(&free_space,cj);
203:   PetscLLCondensedDestroy(lnk,lnkbt);

205:   /* put together the new symbolic matrix */
206:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
207:   MatSetBlockSizesFromMats(C,A,B);
208:   MatSetType(C,((PetscObject)A)->type_name);

210:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
211:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
212:   c                         = (Mat_SeqAIJ*)(C->data);
213:   c->free_a                 = PETSC_FALSE;
214:   c->free_ij                = PETSC_TRUE;
215:   c->nonew                  = 0;

217:   /* fast, needs non-scalable O(bn) array 'abdense' */
218:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

220:   /* set MatInfo */
221:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
222:   if (afill < 1.0) afill = 1.0;
223:   c->maxnz                     = ci[am];
224:   c->nz                        = ci[am];
225:   C->info.mallocs           = ndouble;
226:   C->info.fill_ratio_given  = fill;
227:   C->info.fill_ratio_needed = afill;

229: #if defined(PETSC_USE_INFO)
230:   if (ci[am]) {
231:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
232:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
233:   } else {
234:     PetscInfo(C,"Empty matrix product\n");
235:   }
236: #endif
237:   return(0);
238: }

240: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
241: {
243:   PetscLogDouble flops=0.0;
244:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
245:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
246:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
247:   PetscInt       *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
248:   PetscInt       am   =A->rmap->n,cm=C->rmap->n;
249:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
250:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
251:   PetscScalar    *ab_dense;

254:   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
255:     PetscMalloc1(ci[cm]+1,&ca);
256:     c->a      = ca;
257:     c->free_a = PETSC_TRUE;
258:   } else {
259:     ca        = c->a;
260:   }
261:   if (!c->matmult_abdense) {
262:     PetscCalloc1(B->cmap->N,&ab_dense);
263:     c->matmult_abdense = ab_dense;
264:   } else {
265:     ab_dense = c->matmult_abdense;
266:   }

268:   /* clean old values in C */
269:   PetscArrayzero(ca,ci[cm]);
270:   /* Traverse A row-wise. */
271:   /* Build the ith row in C by summing over nonzero columns in A, */
272:   /* the rows of B corresponding to nonzeros of A. */
273:   for (i=0; i<am; i++) {
274:     anzi = ai[i+1] - ai[i];
275:     for (j=0; j<anzi; j++) {
276:       brow = aj[j];
277:       bnzi = bi[brow+1] - bi[brow];
278:       bjj  = bj + bi[brow];
279:       baj  = ba + bi[brow];
280:       /* perform dense axpy */
281:       valtmp = aa[j];
282:       for (k=0; k<bnzi; k++) {
283:         ab_dense[bjj[k]] += valtmp*baj[k];
284:       }
285:       flops += 2*bnzi;
286:     }
287:     aj += anzi; aa += anzi;

289:     cnzi = ci[i+1] - ci[i];
290:     for (k=0; k<cnzi; k++) {
291:       ca[k]          += ab_dense[cj[k]];
292:       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
293:     }
294:     flops += cnzi;
295:     cj    += cnzi; ca += cnzi;
296:   }
297:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
298:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
299:   PetscLogFlops(flops);
300:   return(0);
301: }

303: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
304: {
306:   PetscLogDouble flops=0.0;
307:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
308:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
309:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
310:   PetscInt       *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
311:   PetscInt       am   = A->rmap->N,cm=C->rmap->N;
312:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
313:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
314:   PetscInt       nextb;

317:   if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
318:     PetscMalloc1(ci[cm]+1,&ca);
319:     c->a      = ca;
320:     c->free_a = PETSC_TRUE;
321:   }

323:   /* clean old values in C */
324:   PetscArrayzero(ca,ci[cm]);
325:   /* Traverse A row-wise. */
326:   /* Build the ith row in C by summing over nonzero columns in A, */
327:   /* the rows of B corresponding to nonzeros of A. */
328:   for (i=0; i<am; i++) {
329:     anzi = ai[i+1] - ai[i];
330:     cnzi = ci[i+1] - ci[i];
331:     for (j=0; j<anzi; j++) {
332:       brow = aj[j];
333:       bnzi = bi[brow+1] - bi[brow];
334:       bjj  = bj + bi[brow];
335:       baj  = ba + bi[brow];
336:       /* perform sparse axpy */
337:       valtmp = aa[j];
338:       nextb  = 0;
339:       for (k=0; nextb<bnzi; k++) {
340:         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
341:           ca[k] += valtmp*baj[nextb++];
342:         }
343:       }
344:       flops += 2*bnzi;
345:     }
346:     aj += anzi; aa += anzi;
347:     cj += cnzi; ca += cnzi;
348:   }

350:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
351:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
352:   PetscLogFlops(flops);
353:   return(0);
354: }

356: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
357: {
358:   PetscErrorCode     ierr;
359:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
360:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
361:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
362:   MatScalar          *ca;
363:   PetscReal          afill;
364:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
365:   PetscTable         ta;
366:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

369:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
370:   /*-----------------------------------------------------------------------------------------*/
371:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
372:   PetscMalloc1(am+2,&ci);
373:   ci[0] = 0;

375:   /* create and initialize a linked list */
376:   PetscTableCreate(bn,bn,&ta);
377:   MatRowMergeMax_SeqAIJ(b,bm,ta);
378:   PetscTableGetCount(ta,&Crmax);
379:   PetscTableDestroy(&ta);

381:   PetscLLCondensedCreate_fast(Crmax,&lnk);

383:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
384:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
385:   current_space = free_space;

387:   /* Determine ci and cj */
388:   for (i=0; i<am; i++) {
389:     anzi = ai[i+1] - ai[i];
390:     aj   = a->j + ai[i];
391:     for (j=0; j<anzi; j++) {
392:       brow = aj[j];
393:       bnzj = bi[brow+1] - bi[brow];
394:       bj   = b->j + bi[brow];
395:       /* add non-zero cols of B into the sorted linked list lnk */
396:       PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);
397:     }
398:     cnzi = lnk[1];

400:     /* If free space is not available, make more free space */
401:     /* Double the amount of total space in the list */
402:     if (current_space->local_remaining<cnzi) {
403:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
404:       ndouble++;
405:     }

407:     /* Copy data into free space, then initialize lnk */
408:     PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);

410:     current_space->array           += cnzi;
411:     current_space->local_used      += cnzi;
412:     current_space->local_remaining -= cnzi;

414:     ci[i+1] = ci[i] + cnzi;
415:   }

417:   /* Column indices are in the list of free space */
418:   /* Allocate space for cj, initialize cj, and */
419:   /* destroy list of free space and other temporary array(s) */
420:   PetscMalloc1(ci[am]+1,&cj);
421:   PetscFreeSpaceContiguous(&free_space,cj);
422:   PetscLLCondensedDestroy_fast(lnk);

424:   /* Allocate space for ca */
425:   PetscCalloc1(ci[am]+1,&ca);

427:   /* put together the new symbolic matrix */
428:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);
429:   MatSetBlockSizesFromMats(C,A,B);
430:   MatSetType(C,((PetscObject)A)->type_name);

432:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
433:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
434:   c          = (Mat_SeqAIJ*)(C->data);
435:   c->free_a  = PETSC_TRUE;
436:   c->free_ij = PETSC_TRUE;
437:   c->nonew   = 0;

439:   /* slower, less memory */
440:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;

442:   /* set MatInfo */
443:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
444:   if (afill < 1.0) afill = 1.0;
445:   c->maxnz                     = ci[am];
446:   c->nz                        = ci[am];
447:   C->info.mallocs           = ndouble;
448:   C->info.fill_ratio_given  = fill;
449:   C->info.fill_ratio_needed = afill;

451: #if defined(PETSC_USE_INFO)
452:   if (ci[am]) {
453:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
454:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
455:   } else {
456:     PetscInfo(C,"Empty matrix product\n");
457:   }
458: #endif
459:   return(0);
460: }

462: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
463: {
464:   PetscErrorCode     ierr;
465:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
466:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
467:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
468:   MatScalar          *ca;
469:   PetscReal          afill;
470:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
471:   PetscTable         ta;
472:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

475:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
476:   /*---------------------------------------------------------------------------------------------*/
477:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
478:   PetscMalloc1(am+2,&ci);
479:   ci[0] = 0;

481:   /* create and initialize a linked list */
482:   PetscTableCreate(bn,bn,&ta);
483:   MatRowMergeMax_SeqAIJ(b,bm,ta);
484:   PetscTableGetCount(ta,&Crmax);
485:   PetscTableDestroy(&ta);
486:   PetscLLCondensedCreate_Scalable(Crmax,&lnk);

488:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
489:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
490:   current_space = free_space;

492:   /* Determine ci and cj */
493:   for (i=0; i<am; i++) {
494:     anzi = ai[i+1] - ai[i];
495:     aj   = a->j + ai[i];
496:     for (j=0; j<anzi; j++) {
497:       brow = aj[j];
498:       bnzj = bi[brow+1] - bi[brow];
499:       bj   = b->j + bi[brow];
500:       /* add non-zero cols of B into the sorted linked list lnk */
501:       PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);
502:     }
503:     cnzi = lnk[0];

505:     /* If free space is not available, make more free space */
506:     /* Double the amount of total space in the list */
507:     if (current_space->local_remaining<cnzi) {
508:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
509:       ndouble++;
510:     }

512:     /* Copy data into free space, then initialize lnk */
513:     PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);

515:     current_space->array           += cnzi;
516:     current_space->local_used      += cnzi;
517:     current_space->local_remaining -= cnzi;

519:     ci[i+1] = ci[i] + cnzi;
520:   }

522:   /* Column indices are in the list of free space */
523:   /* Allocate space for cj, initialize cj, and */
524:   /* destroy list of free space and other temporary array(s) */
525:   PetscMalloc1(ci[am]+1,&cj);
526:   PetscFreeSpaceContiguous(&free_space,cj);
527:   PetscLLCondensedDestroy_Scalable(lnk);

529:   /* Allocate space for ca */
530:   /*-----------------------*/
531:   PetscCalloc1(ci[am]+1,&ca);

533:   /* put together the new symbolic matrix */
534:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,C);
535:   MatSetBlockSizesFromMats(C,A,B);
536:   MatSetType(C,((PetscObject)A)->type_name);

538:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
539:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
540:   c          = (Mat_SeqAIJ*)(C->data);
541:   c->free_a  = PETSC_TRUE;
542:   c->free_ij = PETSC_TRUE;
543:   c->nonew   = 0;

545:   /* slower, less memory */
546:   C->ops->matmultnumeric    = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;

548:   /* set MatInfo */
549:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
550:   if (afill < 1.0) afill = 1.0;
551:   c->maxnz                     = ci[am];
552:   c->nz                        = ci[am];
553:   C->info.mallocs           = ndouble;
554:   C->info.fill_ratio_given  = fill;
555:   C->info.fill_ratio_needed = afill;

557: #if defined(PETSC_USE_INFO)
558:   if (ci[am]) {
559:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
560:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
561:   } else {
562:     PetscInfo(C,"Empty matrix product\n");
563:   }
564: #endif
565:   return(0);
566: }

568: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
569: {
570:   PetscErrorCode     ierr;
571:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
572:   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
573:   PetscInt           *ci,*cj,*bb;
574:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
575:   PetscReal          afill;
576:   PetscInt           i,j,col,ndouble = 0;
577:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
578:   PetscHeap          h;

581:   /* Get ci and cj - by merging sorted rows using a heap */
582:   /*---------------------------------------------------------------------------------------------*/
583:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
584:   PetscMalloc1(am+2,&ci);
585:   ci[0] = 0;

587:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
588:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
589:   current_space = free_space;

591:   PetscHeapCreate(a->rmax,&h);
592:   PetscMalloc1(a->rmax,&bb);

594:   /* Determine ci and cj */
595:   for (i=0; i<am; i++) {
596:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
597:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
598:     ci[i+1] = ci[i];
599:     /* Populate the min heap */
600:     for (j=0; j<anzi; j++) {
601:       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
602:       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
603:         PetscHeapAdd(h,j,bj[bb[j]++]);
604:       }
605:     }
606:     /* Pick off the min element, adding it to free space */
607:     PetscHeapPop(h,&j,&col);
608:     while (j >= 0) {
609:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
610:         PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);
611:         ndouble++;
612:       }
613:       *(current_space->array++) = col;
614:       current_space->local_used++;
615:       current_space->local_remaining--;
616:       ci[i+1]++;

618:       /* stash if anything else remains in this row of B */
619:       if (bb[j] < bi[acol[j]+1]) {PetscHeapStash(h,j,bj[bb[j]++]);}
620:       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
621:         PetscInt j2,col2;
622:         PetscHeapPeek(h,&j2,&col2);
623:         if (col2 != col) break;
624:         PetscHeapPop(h,&j2,&col2);
625:         if (bb[j2] < bi[acol[j2]+1]) {PetscHeapStash(h,j2,bj[bb[j2]++]);}
626:       }
627:       /* Put any stashed elements back into the min heap */
628:       PetscHeapUnstash(h);
629:       PetscHeapPop(h,&j,&col);
630:     }
631:   }
632:   PetscFree(bb);
633:   PetscHeapDestroy(&h);

635:   /* Column indices are in the list of free space */
636:   /* Allocate space for cj, initialize cj, and */
637:   /* destroy list of free space and other temporary array(s) */
638:   PetscMalloc1(ci[am],&cj);
639:   PetscFreeSpaceContiguous(&free_space,cj);

641:   /* put together the new symbolic matrix */
642:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
643:   MatSetBlockSizesFromMats(C,A,B);
644:   MatSetType(C,((PetscObject)A)->type_name);

646:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
647:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
648:   c          = (Mat_SeqAIJ*)(C->data);
649:   c->free_a  = PETSC_TRUE;
650:   c->free_ij = PETSC_TRUE;
651:   c->nonew   = 0;

653:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

655:   /* set MatInfo */
656:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
657:   if (afill < 1.0) afill = 1.0;
658:   c->maxnz                     = ci[am];
659:   c->nz                        = ci[am];
660:   C->info.mallocs           = ndouble;
661:   C->info.fill_ratio_given  = fill;
662:   C->info.fill_ratio_needed = afill;

664: #if defined(PETSC_USE_INFO)
665:   if (ci[am]) {
666:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
667:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
668:   } else {
669:     PetscInfo(C,"Empty matrix product\n");
670:   }
671: #endif
672:   return(0);
673: }

675: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
676: {
677:   PetscErrorCode     ierr;
678:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
679:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
680:   PetscInt           *ci,*cj,*bb;
681:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
682:   PetscReal          afill;
683:   PetscInt           i,j,col,ndouble = 0;
684:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
685:   PetscHeap          h;
686:   PetscBT            bt;

689:   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
690:   /*---------------------------------------------------------------------------------------------*/
691:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
692:   PetscMalloc1(am+2,&ci);
693:   ci[0] = 0;

695:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
696:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);

698:   current_space = free_space;

700:   PetscHeapCreate(a->rmax,&h);
701:   PetscMalloc1(a->rmax,&bb);
702:   PetscBTCreate(bn,&bt);

704:   /* Determine ci and cj */
705:   for (i=0; i<am; i++) {
706:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
707:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
708:     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
709:     ci[i+1] = ci[i];
710:     /* Populate the min heap */
711:     for (j=0; j<anzi; j++) {
712:       PetscInt brow = acol[j];
713:       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
714:         PetscInt bcol = bj[bb[j]];
715:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
716:           PetscHeapAdd(h,j,bcol);
717:           bb[j]++;
718:           break;
719:         }
720:       }
721:     }
722:     /* Pick off the min element, adding it to free space */
723:     PetscHeapPop(h,&j,&col);
724:     while (j >= 0) {
725:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
726:         fptr = NULL;                      /* need PetscBTMemzero */
727:         PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);
728:         ndouble++;
729:       }
730:       *(current_space->array++) = col;
731:       current_space->local_used++;
732:       current_space->local_remaining--;
733:       ci[i+1]++;

735:       /* stash if anything else remains in this row of B */
736:       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
737:         PetscInt bcol = bj[bb[j]];
738:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
739:           PetscHeapAdd(h,j,bcol);
740:           bb[j]++;
741:           break;
742:         }
743:       }
744:       PetscHeapPop(h,&j,&col);
745:     }
746:     if (fptr) {                 /* Clear the bits for this row */
747:       for (; fptr<current_space->array; fptr++) {PetscBTClear(bt,*fptr);}
748:     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
749:       PetscBTMemzero(bn,bt);
750:     }
751:   }
752:   PetscFree(bb);
753:   PetscHeapDestroy(&h);
754:   PetscBTDestroy(&bt);

756:   /* Column indices are in the list of free space */
757:   /* Allocate space for cj, initialize cj, and */
758:   /* destroy list of free space and other temporary array(s) */
759:   PetscMalloc1(ci[am],&cj);
760:   PetscFreeSpaceContiguous(&free_space,cj);

762:   /* put together the new symbolic matrix */
763:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
764:   MatSetBlockSizesFromMats(C,A,B);
765:   MatSetType(C,((PetscObject)A)->type_name);

767:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
768:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
769:   c          = (Mat_SeqAIJ*)(C->data);
770:   c->free_a  = PETSC_TRUE;
771:   c->free_ij = PETSC_TRUE;
772:   c->nonew   = 0;

774:   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

776:   /* set MatInfo */
777:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
778:   if (afill < 1.0) afill = 1.0;
779:   c->maxnz                     = ci[am];
780:   c->nz                        = ci[am];
781:   C->info.mallocs           = ndouble;
782:   C->info.fill_ratio_given  = fill;
783:   C->info.fill_ratio_needed = afill;

785: #if defined(PETSC_USE_INFO)
786:   if (ci[am]) {
787:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
788:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
789:   } else {
790:     PetscInfo(C,"Empty matrix product\n");
791:   }
792: #endif
793:   return(0);
794: }


797: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
798: {
799:   PetscErrorCode     ierr;
800:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
801:   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
802:   PetscInt           *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
803:   PetscInt           c_maxmem,a_maxrownnz=0,a_rownnz;
804:   const PetscInt     workcol[8]={0,1,2,3,4,5,6,7};
805:   const PetscInt     am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
806:   const PetscInt     *brow_ptr[8],*brow_end[8];
807:   PetscInt           window[8];
808:   PetscInt           window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
809:   PetscInt           i,k,ndouble=0,L1_rowsleft,rowsleft;
810:   PetscReal          afill;
811:   PetscInt           *workj_L1,*workj_L2,*workj_L3;
812:   PetscInt           L1_nnz,L2_nnz;

814:   /* Step 1: Get upper bound on memory required for allocation.
815:              Because of the way virtual memory works,
816:              only the memory pages that are actually needed will be physically allocated. */
818:   PetscMalloc1(am+1,&ci);
819:   for (i=0; i<am; i++) {
820:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
821:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
822:     a_rownnz = 0;
823:     for (k=0; k<anzi; ++k) {
824:       a_rownnz += bi[acol[k]+1] - bi[acol[k]];
825:       if (a_rownnz > bn) {
826:         a_rownnz = bn;
827:         break;
828:       }
829:     }
830:     a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
831:   }
832:   /* temporary work areas for merging rows */
833:   PetscMalloc1(a_maxrownnz*8,&workj_L1);
834:   PetscMalloc1(a_maxrownnz*8,&workj_L2);
835:   PetscMalloc1(a_maxrownnz,&workj_L3);

837:   /* This should be enough for almost all matrices. If not, memory is reallocated later. */
838:   c_maxmem = 8*(ai[am]+bi[bm]);
839:   /* Step 2: Populate pattern for C */
840:   PetscMalloc1(c_maxmem,&cj);

842:   ci_nnz       = 0;
843:   ci[0]        = 0;
844:   worki_L1[0]  = 0;
845:   worki_L2[0]  = 0;
846:   for (i=0; i<am; i++) {
847:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
848:     const PetscInt *acol = aj + ai[i];      /* column indices of nonzero entries in this row */
849:     rowsleft             = anzi;
850:     inputcol_L1          = acol;
851:     L2_nnz               = 0;
852:     L2_nrows             = 1;  /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1   */
853:     worki_L2[1]          = 0;
854:     outputi_nnz          = 0;

856:     /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem  -> allocate more memory */
857:     while (ci_nnz+a_maxrownnz > c_maxmem) {
858:       c_maxmem *= 2;
859:       ndouble++;
860:       PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);
861:     }

863:     while (rowsleft) {
864:       L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
865:       L1_nrows    = 0;
866:       L1_nnz      = 0;
867:       inputcol    = inputcol_L1;
868:       inputi      = bi;
869:       inputj      = bj;

871:       /* The following macro is used to specialize for small rows in A.
872:          This helps with compiler unrolling, improving performance substantially.
873:           Input:  inputj   inputi  inputcol  bn
874:           Output: outputj  outputi_nnz                       */
875:        #define MatMatMultSymbolic_RowMergeMacro(ANNZ)                        \
876:          window_min  = bn;                                                   \
877:          outputi_nnz = 0;                                                    \
878:          for (k=0; k<ANNZ; ++k) {                                            \
879:            brow_ptr[k] = inputj + inputi[inputcol[k]];                       \
880:            brow_end[k] = inputj + inputi[inputcol[k]+1];                     \
881:            window[k]   = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn;   \
882:            window_min  = PetscMin(window[k], window_min);                    \
883:          }                                                                   \
884:          while (window_min < bn) {                                           \
885:            outputj[outputi_nnz++] = window_min;                              \
886:            /* advance front and compute new minimum */                       \
887:            old_window_min = window_min;                                      \
888:            window_min = bn;                                                  \
889:            for (k=0; k<ANNZ; ++k) {                                          \
890:              if (window[k] == old_window_min) {                              \
891:                brow_ptr[k]++;                                                \
892:                window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
893:              }                                                               \
894:              window_min = PetscMin(window[k], window_min);                   \
895:            }                                                                 \
896:          }

898:       /************** L E V E L  1 ***************/
899:       /* Merge up to 8 rows of B to L1 work array*/
900:       while (L1_rowsleft) {
901:         outputi_nnz = 0;
902:         if (anzi > 8)  outputj = workj_L1 + L1_nnz;     /* Level 1 rowmerge*/
903:         else           outputj = cj + ci_nnz;           /* Merge directly to C */

905:         switch (L1_rowsleft) {
906:         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
907:                  brow_end[0] = inputj + inputi[inputcol[0]+1];
908:                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
909:                  inputcol    += L1_rowsleft;
910:                  rowsleft    -= L1_rowsleft;
911:                  L1_rowsleft  = 0;
912:                  break;
913:         case 2:  MatMatMultSymbolic_RowMergeMacro(2);
914:                  inputcol    += L1_rowsleft;
915:                  rowsleft    -= L1_rowsleft;
916:                  L1_rowsleft  = 0;
917:                  break;
918:         case 3: MatMatMultSymbolic_RowMergeMacro(3);
919:                  inputcol    += L1_rowsleft;
920:                  rowsleft    -= L1_rowsleft;
921:                  L1_rowsleft  = 0;
922:                  break;
923:         case 4:  MatMatMultSymbolic_RowMergeMacro(4);
924:                  inputcol    += L1_rowsleft;
925:                  rowsleft    -= L1_rowsleft;
926:                  L1_rowsleft  = 0;
927:                  break;
928:         case 5:  MatMatMultSymbolic_RowMergeMacro(5);
929:                  inputcol    += L1_rowsleft;
930:                  rowsleft    -= L1_rowsleft;
931:                  L1_rowsleft  = 0;
932:                  break;
933:         case 6:  MatMatMultSymbolic_RowMergeMacro(6);
934:                  inputcol    += L1_rowsleft;
935:                  rowsleft    -= L1_rowsleft;
936:                  L1_rowsleft  = 0;
937:                  break;
938:         case 7:  MatMatMultSymbolic_RowMergeMacro(7);
939:                  inputcol    += L1_rowsleft;
940:                  rowsleft    -= L1_rowsleft;
941:                  L1_rowsleft  = 0;
942:                  break;
943:         default: MatMatMultSymbolic_RowMergeMacro(8);
944:                  inputcol    += 8;
945:                  rowsleft    -= 8;
946:                  L1_rowsleft -= 8;
947:                  break;
948:         }
949:         inputcol_L1           = inputcol;
950:         L1_nnz               += outputi_nnz;
951:         worki_L1[++L1_nrows]  = L1_nnz;
952:       }

954:       /********************** L E V E L  2 ************************/
955:       /* Merge from L1 work array to either C or to L2 work array */
956:       if (anzi > 8) {
957:         inputi      = worki_L1;
958:         inputj      = workj_L1;
959:         inputcol    = workcol;
960:         outputi_nnz = 0;

962:         if (anzi <= 64) outputj = cj + ci_nnz;        /* Merge from L1 work array to C */
963:         else            outputj = workj_L2 + L2_nnz;  /* Merge from L1 work array to L2 work array */

965:         switch (L1_nrows) {
966:         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
967:                  brow_end[0] = inputj + inputi[inputcol[0]+1];
968:                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
969:                  break;
970:         case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
971:         case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
972:         case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
973:         case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
974:         case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
975:         case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
976:         case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
977:         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
978:         }
979:         L2_nnz               += outputi_nnz;
980:         worki_L2[++L2_nrows]  = L2_nnz;

982:         /************************ L E V E L  3 **********************/
983:         /* Merge from L2 work array to either C or to L2 work array */
984:         if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
985:           inputi      = worki_L2;
986:           inputj      = workj_L2;
987:           inputcol    = workcol;
988:           outputi_nnz = 0;
989:           if (rowsleft) outputj = workj_L3;
990:           else          outputj = cj + ci_nnz;
991:           switch (L2_nrows) {
992:           case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
993:                    brow_end[0] = inputj + inputi[inputcol[0]+1];
994:                    for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
995:                    break;
996:           case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
997:           case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
998:           case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
999:           case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
1000:           case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
1001:           case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1002:           case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1003:           default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1004:           }
1005:           L2_nrows    = 1;
1006:           L2_nnz      = outputi_nnz;
1007:           worki_L2[1] = outputi_nnz;
1008:           /* Copy to workj_L2 */
1009:           if (rowsleft) {
1010:             for (k=0; k<outputi_nnz; ++k)  workj_L2[k] = outputj[k];
1011:           }
1012:         }
1013:       }
1014:     }  /* while (rowsleft) */
1015: #undef MatMatMultSymbolic_RowMergeMacro

1017:     /* terminate current row */
1018:     ci_nnz += outputi_nnz;
1019:     ci[i+1] = ci_nnz;
1020:   }

1022:   /* Step 3: Create the new symbolic matrix */
1023:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
1024:   MatSetBlockSizesFromMats(C,A,B);
1025:   MatSetType(C,((PetscObject)A)->type_name);

1027:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1028:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1029:   c          = (Mat_SeqAIJ*)(C->data);
1030:   c->free_a  = PETSC_TRUE;
1031:   c->free_ij = PETSC_TRUE;
1032:   c->nonew   = 0;

1034:   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

1036:   /* set MatInfo */
1037:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1038:   if (afill < 1.0) afill = 1.0;
1039:   c->maxnz                     = ci[am];
1040:   c->nz                        = ci[am];
1041:   C->info.mallocs           = ndouble;
1042:   C->info.fill_ratio_given  = fill;
1043:   C->info.fill_ratio_needed = afill;

1045: #if defined(PETSC_USE_INFO)
1046:   if (ci[am]) {
1047:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1048:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1049:   } else {
1050:     PetscInfo(C,"Empty matrix product\n");
1051:   }
1052: #endif

1054:   /* Step 4: Free temporary work areas */
1055:   PetscFree(workj_L1);
1056:   PetscFree(workj_L2);
1057:   PetscFree(workj_L3);
1058:   return(0);
1059: }

1061: /* concatenate unique entries and then sort */
1062: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1063: {
1064:   PetscErrorCode     ierr;
1065:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1066:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
1067:   PetscInt           *ci,*cj;
1068:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1069:   PetscReal          afill;
1070:   PetscInt           i,j,ndouble = 0;
1071:   PetscSegBuffer     seg,segrow;
1072:   char               *seen;

1075:   PetscMalloc1(am+1,&ci);
1076:   ci[0] = 0;

1078:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1079:   PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);
1080:   PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);
1081:   PetscCalloc1(bn,&seen);

1083:   /* Determine ci and cj */
1084:   for (i=0; i<am; i++) {
1085:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1086:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1087:     PetscInt packlen = 0,*PETSC_RESTRICT crow;
1088:     /* Pack segrow */
1089:     for (j=0; j<anzi; j++) {
1090:       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1091:       for (k=bjstart; k<bjend; k++) {
1092:         PetscInt bcol = bj[k];
1093:         if (!seen[bcol]) { /* new entry */
1094:           PetscInt *PETSC_RESTRICT slot;
1095:           PetscSegBufferGetInts(segrow,1,&slot);
1096:           *slot = bcol;
1097:           seen[bcol] = 1;
1098:           packlen++;
1099:         }
1100:       }
1101:     }
1102:     PetscSegBufferGetInts(seg,packlen,&crow);
1103:     PetscSegBufferExtractTo(segrow,crow);
1104:     PetscSortInt(packlen,crow);
1105:     ci[i+1] = ci[i] + packlen;
1106:     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1107:   }
1108:   PetscSegBufferDestroy(&segrow);
1109:   PetscFree(seen);

1111:   /* Column indices are in the segmented buffer */
1112:   PetscSegBufferExtractAlloc(seg,&cj);
1113:   PetscSegBufferDestroy(&seg);

1115:   /* put together the new symbolic matrix */
1116:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);
1117:   MatSetBlockSizesFromMats(C,A,B);
1118:   MatSetType(C,((PetscObject)A)->type_name);

1120:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1121:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1122:   c          = (Mat_SeqAIJ*)(C->data);
1123:   c->free_a  = PETSC_TRUE;
1124:   c->free_ij = PETSC_TRUE;
1125:   c->nonew   = 0;

1127:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

1129:   /* set MatInfo */
1130:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1131:   if (afill < 1.0) afill = 1.0;
1132:   c->maxnz                     = ci[am];
1133:   c->nz                        = ci[am];
1134:   C->info.mallocs           = ndouble;
1135:   C->info.fill_ratio_given  = fill;
1136:   C->info.fill_ratio_needed = afill;

1138: #if defined(PETSC_USE_INFO)
1139:   if (ci[am]) {
1140:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1141:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1142:   } else {
1143:     PetscInfo(C,"Empty matrix product\n");
1144:   }
1145: #endif
1146:   return(0);
1147: }

1149: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
1150: {
1151:   PetscErrorCode      ierr;
1152:   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)A->data;
1153:   Mat_MatMatTransMult *abt=a->abt;

1156:   (abt->destroy)(A);
1157:   MatTransposeColoringDestroy(&abt->matcoloring);
1158:   MatDestroy(&abt->Bt_den);
1159:   MatDestroy(&abt->ABt_den);
1160:   PetscFree(abt);
1161:   return(0);
1162: }

1164: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1165: {
1166:   PetscErrorCode      ierr;
1167:   Mat                 Bt;
1168:   PetscInt            *bti,*btj;
1169:   Mat_MatMatTransMult *abt;
1170:   Mat_SeqAIJ          *c;
1171:   Mat_Product         *product = C->product;
1172:   MatProductAlgorithm alg = product->alg;

1175:   /* create symbolic Bt */
1176:   MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1177:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);
1178:   MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1179:   MatSetType(Bt,((PetscObject)A)->type_name);

1181:   /* get symbolic C=A*Bt */
1182:   MatProductSetAlgorithm(C,"sorted"); /* set algorithm for C = A*Bt */
1183:   MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);
1184:   MatProductSetAlgorithm(C,alg); /* resume original algorithm for ABt product */

1186:   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
1187:   PetscNew(&abt);
1188:   c      = (Mat_SeqAIJ*)C->data;
1189:   c->abt = abt;

1191:   abt->usecoloring = PETSC_FALSE;
1192:   abt->destroy     = C->ops->destroy;
1193:   C->ops->destroy  = MatDestroy_SeqAIJ_MatMatMultTrans;
1194:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;

1196:   abt->usecoloring = PETSC_FALSE;
1197:   PetscStrcmp(product->alg,"color",&abt->usecoloring);
1198:   if (abt->usecoloring) {
1199:     /* Create MatTransposeColoring from symbolic C=A*B^T */
1200:     MatTransposeColoring matcoloring;
1201:     MatColoring          coloring;
1202:     ISColoring           iscoloring;
1203:     Mat                  Bt_dense,C_dense;

1205:     /* inode causes memory problem */
1206:     MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);

1208:     MatColoringCreate(C,&coloring);
1209:     MatColoringSetDistance(coloring,2);
1210:     MatColoringSetType(coloring,MATCOLORINGSL);
1211:     MatColoringSetFromOptions(coloring);
1212:     MatColoringApply(coloring,&iscoloring);
1213:     MatColoringDestroy(&coloring);
1214:     MatTransposeColoringCreate(C,iscoloring,&matcoloring);

1216:     abt->matcoloring = matcoloring;

1218:     ISColoringDestroy(&iscoloring);

1220:     /* Create Bt_dense and C_dense = A*Bt_dense */
1221:     MatCreate(PETSC_COMM_SELF,&Bt_dense);
1222:     MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);
1223:     MatSetType(Bt_dense,MATSEQDENSE);
1224:     MatSeqDenseSetPreallocation(Bt_dense,NULL);

1226:     Bt_dense->assembled = PETSC_TRUE;
1227:     abt->Bt_den         = Bt_dense;

1229:     MatCreate(PETSC_COMM_SELF,&C_dense);
1230:     MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);
1231:     MatSetType(C_dense,MATSEQDENSE);
1232:     MatSeqDenseSetPreallocation(C_dense,NULL);

1234:     Bt_dense->assembled = PETSC_TRUE;
1235:     abt->ABt_den  = C_dense;

1237: #if defined(PETSC_USE_INFO)
1238:     {
1239:       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
1240:       PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
1241:     }
1242: #endif
1243:   }
1244:   /* clean up */
1245:   MatDestroy(&Bt);
1246:   MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1247:   return(0);
1248: }

1250: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1251: {
1252:   PetscErrorCode      ierr;
1253:   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1254:   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
1255:   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
1256:   PetscLogDouble      flops=0.0;
1257:   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
1258:   Mat_MatMatTransMult *abt = c->abt;

1261:   /* clear old values in C */
1262:   if (!c->a) {
1263:     PetscCalloc1(ci[cm]+1,&ca);
1264:     c->a      = ca;
1265:     c->free_a = PETSC_TRUE;
1266:   } else {
1267:     ca =  c->a;
1268:     PetscArrayzero(ca,ci[cm]+1);
1269:   }

1271:   if (abt->usecoloring) {
1272:     MatTransposeColoring matcoloring = abt->matcoloring;
1273:     Mat                  Bt_dense,C_dense = abt->ABt_den;

1275:     /* Get Bt_dense by Apply MatTransposeColoring to B */
1276:     Bt_dense = abt->Bt_den;
1277:     MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);

1279:     /* C_dense = A*Bt_dense */
1280:     MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);

1282:     /* Recover C from C_dense */
1283:     MatTransColoringApplyDenToSp(matcoloring,C_dense,C);
1284:     return(0);
1285:   }

1287:   for (i=0; i<cm; i++) {
1288:     anzi = ai[i+1] - ai[i];
1289:     acol = aj + ai[i];
1290:     aval = aa + ai[i];
1291:     cnzi = ci[i+1] - ci[i];
1292:     ccol = cj + ci[i];
1293:     cval = ca + ci[i];
1294:     for (j=0; j<cnzi; j++) {
1295:       brow = ccol[j];
1296:       bnzj = bi[brow+1] - bi[brow];
1297:       bcol = bj + bi[brow];
1298:       bval = ba + bi[brow];

1300:       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1301:       nexta = 0; nextb = 0;
1302:       while (nexta<anzi && nextb<bnzj) {
1303:         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1304:         if (nexta == anzi) break;
1305:         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1306:         if (nextb == bnzj) break;
1307:         if (acol[nexta] == bcol[nextb]) {
1308:           cval[j] += aval[nexta]*bval[nextb];
1309:           nexta++; nextb++;
1310:           flops += 2;
1311:         }
1312:       }
1313:     }
1314:   }
1315:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1316:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1317:   PetscLogFlops(flops);
1318:   return(0);
1319: }

1321: PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(Mat A)
1322: {
1323:   PetscErrorCode      ierr;
1324:   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1325:   Mat_MatTransMatMult *atb = a->atb;

1328:   if (atb) {
1329:     MatDestroy(&atb->At);
1330:     (*atb->destroy)(A);
1331:   }
1332:   PetscFree(atb);
1333:   return(0);
1334: }

1336: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1337: {
1338:   PetscErrorCode      ierr;
1339:   Mat                 At;
1340:   PetscInt            *ati,*atj;
1341:   Mat_Product         *product = C->product;
1342:   MatProductAlgorithm alg;
1343:   PetscBool           flg;

1346:   if (product) {
1347:     alg = product->alg;
1348:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"!product, not supported yet");

1350:   /* outerproduct */
1351:   PetscStrcmp(alg,"outerproduct",&flg);
1352:   if (flg) {
1353:     /* create symbolic At */
1354:     MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1355:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);
1356:     MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1357:     MatSetType(At,((PetscObject)A)->type_name);

1359:     /* get symbolic C=At*B */
1360:     product->alg = "sorted";
1361:     MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);

1363:     /* clean up */
1364:     MatDestroy(&At);
1365:     MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);

1367:     C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1368:     return(0);
1369:   }

1371:   /* matmatmult */
1372:   PetscStrcmp(alg,"at*b",&flg);
1373:   if (flg) {
1374:     Mat_MatTransMatMult *atb;
1375:     Mat_SeqAIJ          *c;

1377:     PetscNew(&atb);
1378:     MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);
1379:     product->alg = "sorted";
1380:     MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);

1382:     c               = (Mat_SeqAIJ*)C->data;
1383:     c->atb          = atb;
1384:     atb->At         = At;
1385:     atb->destroy    = C->ops->destroy;
1386:     atb->updateAt   = PETSC_FALSE; /* because At is computed here */
1387:     C->ops->destroy = MatDestroy_SeqAIJ_MatTransMatMult;

1389:     C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1390:     return(0);
1391:   }

1393:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1394:   return(0);
1395: }

1397: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1398: {
1400:   Mat_SeqAIJ     *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1401:   PetscInt       am   =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1402:   PetscInt       cm   =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1403:   PetscLogDouble flops=0.0;
1404:   MatScalar      *aa  =a->a,*ba,*ca,*caj;

1407:   if (!c->a) {
1408:     PetscCalloc1(ci[cm]+1,&ca);

1410:     c->a      = ca;
1411:     c->free_a = PETSC_TRUE;
1412:   } else {
1413:     ca   = c->a;
1414:     PetscArrayzero(ca,ci[cm]);
1415:   }

1417:   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1418:   for (i=0; i<am; i++) {
1419:     bj   = b->j + bi[i];
1420:     ba   = b->a + bi[i];
1421:     bnzi = bi[i+1] - bi[i];
1422:     anzi = ai[i+1] - ai[i];
1423:     for (j=0; j<anzi; j++) {
1424:       nextb = 0;
1425:       crow  = *aj++;
1426:       cjj   = cj + ci[crow];
1427:       caj   = ca + ci[crow];
1428:       /* perform sparse axpy operation.  Note cjj includes bj. */
1429:       for (k=0; nextb<bnzi; k++) {
1430:         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
1431:           caj[k] += (*aa)*(*(ba+nextb));
1432:           nextb++;
1433:         }
1434:       }
1435:       flops += 2*bnzi;
1436:       aa++;
1437:     }
1438:   }

1440:   /* Assemble the final matrix and clean up */
1441:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1442:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1443:   PetscLogFlops(flops);
1444:   return(0);
1445: }

1447: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
1448: {

1452:   MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);

1454:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1455:   return(0);
1456: }

1458: PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1459: {
1460:   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
1461:   Mat_SeqDense      *bd = (Mat_SeqDense*)B->data;
1462:   PetscErrorCode    ierr;
1463:   PetscScalar       *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4,aatmp;
1464:   const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1465:   const PetscInt    *aj;
1466:   PetscInt          cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
1467:   PetscInt          am4=4*am,bm4=4*bm,col,i,j,n,ajtmp;

1470:   if (!cm || !cn) return(0);
1471:   MatSeqAIJGetArrayRead(A,&av);
1472:   MatDenseGetArray(C,&c);
1473:   MatDenseGetArrayRead(B,&b);
1474:   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1475:   c1 = c; c2 = c1 + am; c3 = c2 + am; c4 = c3 + am;
1476:   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1477:     for (i=0; i<am; i++) {        /* over rows of C in those columns */
1478:       r1 = r2 = r3 = r4 = 0.0;
1479:       n  = a->i[i+1] - a->i[i];
1480:       aj = a->j + a->i[i];
1481:       aa = av + a->i[i];
1482:       for (j=0; j<n; j++) {
1483:         aatmp = aa[j]; ajtmp = aj[j];
1484:         r1 += aatmp*b1[ajtmp];
1485:         r2 += aatmp*b2[ajtmp];
1486:         r3 += aatmp*b3[ajtmp];
1487:         r4 += aatmp*b4[ajtmp];
1488:       }
1489:       c1[i] += r1;
1490:       c2[i] += r2;
1491:       c3[i] += r3;
1492:       c4[i] += r4;
1493:     }
1494:     b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1495:     c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1496:   }
1497:   for (; col<cn; col++) {   /* over extra columns of C */
1498:     for (i=0; i<am; i++) {  /* over rows of C in those columns */
1499:       r1 = 0.0;
1500:       n  = a->i[i+1] - a->i[i];
1501:       aj = a->j + a->i[i];
1502:       aa = av + a->i[i];
1503:       for (j=0; j<n; j++) {
1504:         r1 += aa[j]*b1[aj[j]];
1505:       }
1506:       c1[i] += r1;
1507:     }
1508:     b1 += bm;
1509:     c1 += am;
1510:   }
1511:   PetscLogFlops(cn*(2.0*a->nz));
1512:   MatDenseRestoreArray(C,&c);
1513:   MatDenseRestoreArrayRead(B,&b);
1514:   MatSeqAIJRestoreArrayRead(A,&av);
1515:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1516:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1517:   return(0);
1518: }

1520: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1521: {

1525:   if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n);
1526:   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
1527:   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);

1529:   MatZeroEntries(C);
1530:   MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C);
1531:   return(0);
1532: }

1534: /* ------------------------------------------------------- */
1535: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
1536: {
1538:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
1539:   C->ops->productsymbolic = MatProductSymbolic_AB;
1540:   /* dense mat may not call MatProductSymbolic(), thus set C->ops->productnumeric here */
1541:   C->ops->productnumeric  = MatProductNumeric_AB;
1542:   return(0);
1543: }

1545: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
1546: {
1548:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqAIJ_SeqDense;
1549:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
1550:   C->ops->productnumeric           = MatProductNumeric_AtB;
1551:   return(0);
1552: }

1554: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
1555: {
1557:   Mat_Product    *product = C->product;

1560:   switch (product->type) {
1561:   case MATPRODUCT_AB:
1562:     MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);
1563:     break;
1564:   case MATPRODUCT_AtB:
1565:     MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);
1566:     break;
1567:   case MATPRODUCT_PtAP:
1568:     MatProductSetFromOptions_SeqDense(C);
1569:     break;
1570:   default:
1571:     /* Use MatProduct_Basic() if there is no specific implementation */
1572:     C->ops->productsymbolic = MatProductSymbolic_Basic;
1573:   }
1574:   return(0);
1575: }
1576: /* ------------------------------------------------------- */
1577: static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
1578: {
1580:   Mat_Product    *product = C->product;
1581:   Mat            A = product->A;
1582:   PetscBool      baij;

1585:   PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);
1586:   if (!baij) { /* A is seqsbaij */
1587:     PetscBool sbaij;
1588:     PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);
1589:     if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");

1591:     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
1592:   } else { /* A is seqbaij */
1593:     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
1594:   }

1596:   C->ops->productsymbolic = MatProductSymbolic_AB;
1597:   C->ops->productnumeric  = MatProductNumeric_AB;
1598:   return(0);
1599: }

1601: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
1602: {
1604:   Mat_Product    *product = C->product;

1607:   if (product->type == MATPRODUCT_AB) {
1608:     MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);
1609:   } else SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported");
1610:   return(0);
1611: }
1612: /* ------------------------------------------------------- */
1613: static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
1614: {
1616:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
1617:   C->ops->productsymbolic = MatProductSymbolic_AB;
1618:   C->ops->productnumeric  = MatProductNumeric_AB;
1619:   return(0);
1620: }

1622: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
1623: {
1625:   Mat_Product    *product = C->product;

1628:   if (product->type == MATPRODUCT_AB) {
1629:     MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);
1630:   } else SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported");
1631:   return(0);
1632: }
1633: /* ------------------------------------------------------- */

1635: PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1636: {
1638:   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
1639:   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
1640:   PetscInt       *bi      = b->i,*bj=b->j;
1641:   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1642:   MatScalar      *btval,*btval_den,*ba=b->a;
1643:   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;

1646:   btval_den=btdense->v;
1647:   PetscArrayzero(btval_den,m*n);
1648:   for (k=0; k<ncolors; k++) {
1649:     ncolumns = coloring->ncolumns[k];
1650:     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1651:       col   = *(columns + colorforcol[k] + l);
1652:       btcol = bj + bi[col];
1653:       btval = ba + bi[col];
1654:       anz   = bi[col+1] - bi[col];
1655:       for (j=0; j<anz; j++) {
1656:         brow            = btcol[j];
1657:         btval_den[brow] = btval[j];
1658:       }
1659:     }
1660:     btval_den += m;
1661:   }
1662:   return(0);
1663: }

1665: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1666: {
1667:   PetscErrorCode    ierr;
1668:   Mat_SeqAIJ        *csp = (Mat_SeqAIJ*)Csp->data;
1669:   const PetscScalar *ca_den,*ca_den_ptr;
1670:   PetscScalar       *ca=csp->a;
1671:   PetscInt          k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1672:   PetscInt          brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1673:   PetscInt          nrows,*row,*idx;
1674:   PetscInt          *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;

1677:   MatDenseGetArrayRead(Cden,&ca_den);

1679:   if (brows > 0) {
1680:     PetscInt *lstart,row_end,row_start;
1681:     lstart = matcoloring->lstart;
1682:     PetscArrayzero(lstart,ncolors);

1684:     row_end = brows;
1685:     if (row_end > m) row_end = m;
1686:     for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1687:       ca_den_ptr = ca_den;
1688:       for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1689:         nrows = matcoloring->nrows[k];
1690:         row   = rows  + colorforrow[k];
1691:         idx   = den2sp + colorforrow[k];
1692:         for (l=lstart[k]; l<nrows; l++) {
1693:           if (row[l] >= row_end) {
1694:             lstart[k] = l;
1695:             break;
1696:           } else {
1697:             ca[idx[l]] = ca_den_ptr[row[l]];
1698:           }
1699:         }
1700:         ca_den_ptr += m;
1701:       }
1702:       row_end += brows;
1703:       if (row_end > m) row_end = m;
1704:     }
1705:   } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1706:     ca_den_ptr = ca_den;
1707:     for (k=0; k<ncolors; k++) {
1708:       nrows = matcoloring->nrows[k];
1709:       row   = rows  + colorforrow[k];
1710:       idx   = den2sp + colorforrow[k];
1711:       for (l=0; l<nrows; l++) {
1712:         ca[idx[l]] = ca_den_ptr[row[l]];
1713:       }
1714:       ca_den_ptr += m;
1715:     }
1716:   }

1718:   MatDenseRestoreArrayRead(Cden,&ca_den);
1719: #if defined(PETSC_USE_INFO)
1720:   if (matcoloring->brows > 0) {
1721:     PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);
1722:   } else {
1723:     PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");
1724:   }
1725: #endif
1726:   return(0);
1727: }

1729: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1730: {
1732:   PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
1733:   const PetscInt *is,*ci,*cj,*row_idx;
1734:   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1735:   IS             *isa;
1736:   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1737:   PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1738:   PetscInt       *colorforcol,*columns,*columns_i,brows;
1739:   PetscBool      flg;

1742:   ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);

1744:   /* bs >1 is not being tested yet! */
1745:   Nbs       = mat->cmap->N/bs;
1746:   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1747:   c->N      = Nbs;
1748:   c->m      = c->M;
1749:   c->rstart = 0;
1750:   c->brows  = 100;

1752:   c->ncolors = nis;
1753:   PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);
1754:   PetscMalloc1(csp->nz+1,&rows);
1755:   PetscMalloc1(csp->nz+1,&den2sp);

1757:   brows = c->brows;
1758:   PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);
1759:   if (flg) c->brows = brows;
1760:   if (brows > 0) {
1761:     PetscMalloc1(nis+1,&c->lstart);
1762:   }

1764:   colorforrow[0] = 0;
1765:   rows_i         = rows;
1766:   den2sp_i       = den2sp;

1768:   PetscMalloc1(nis+1,&colorforcol);
1769:   PetscMalloc1(Nbs+1,&columns);

1771:   colorforcol[0] = 0;
1772:   columns_i      = columns;

1774:   /* get column-wise storage of mat */
1775:   MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);

1777:   cm   = c->m;
1778:   PetscMalloc1(cm+1,&rowhit);
1779:   PetscMalloc1(cm+1,&idxhit);
1780:   for (i=0; i<nis; i++) { /* loop over color */
1781:     ISGetLocalSize(isa[i],&n);
1782:     ISGetIndices(isa[i],&is);

1784:     c->ncolumns[i] = n;
1785:     if (n) {
1786:       PetscArraycpy(columns_i,is,n);
1787:     }
1788:     colorforcol[i+1] = colorforcol[i] + n;
1789:     columns_i       += n;

1791:     /* fast, crude version requires O(N*N) work */
1792:     PetscArrayzero(rowhit,cm);

1794:     for (j=0; j<n; j++) { /* loop over columns*/
1795:       col     = is[j];
1796:       row_idx = cj + ci[col];
1797:       m       = ci[col+1] - ci[col];
1798:       for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1799:         idxhit[*row_idx]   = spidx[ci[col] + k];
1800:         rowhit[*row_idx++] = col + 1;
1801:       }
1802:     }
1803:     /* count the number of hits */
1804:     nrows = 0;
1805:     for (j=0; j<cm; j++) {
1806:       if (rowhit[j]) nrows++;
1807:     }
1808:     c->nrows[i]      = nrows;
1809:     colorforrow[i+1] = colorforrow[i] + nrows;

1811:     nrows = 0;
1812:     for (j=0; j<cm; j++) { /* loop over rows */
1813:       if (rowhit[j]) {
1814:         rows_i[nrows]   = j;
1815:         den2sp_i[nrows] = idxhit[j];
1816:         nrows++;
1817:       }
1818:     }
1819:     den2sp_i += nrows;

1821:     ISRestoreIndices(isa[i],&is);
1822:     rows_i += nrows;
1823:   }
1824:   MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1825:   PetscFree(rowhit);
1826:   ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);
1827:   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);

1829:   c->colorforrow = colorforrow;
1830:   c->rows        = rows;
1831:   c->den2sp      = den2sp;
1832:   c->colorforcol = colorforcol;
1833:   c->columns     = columns;

1835:   PetscFree(idxhit);
1836:   return(0);
1837: }

1839: /* --------------------------------------------------------------- */
1840: static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1841: {
1843:   Mat_Product    *product = C->product;
1844:   Mat            A=product->A,B=product->B;

1847:   if (C->ops->mattransposemultnumeric) {
1848:     /* Alg: "outerproduct" */
1849:     (C->ops->mattransposemultnumeric)(A,B,C);
1850:   } else {
1851:     /* Alg: "matmatmult" -- C = At*B */
1852:     Mat_SeqAIJ          *c = (Mat_SeqAIJ*)C->data;
1853:     Mat_MatTransMatMult *atb = c->atb;
1854:     Mat                 At = atb->At;

1856:     if (atb->updateAt) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
1857:       MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);
1858:     }
1859:     MatMatMultNumeric_SeqAIJ_SeqAIJ(At,B,C);
1860:     atb->updateAt = PETSC_TRUE;
1861:   }
1862:   return(0);
1863: }

1865: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1866: {
1868:   Mat_Product    *product = C->product;
1869:   Mat            A=product->A,B=product->B;
1870:   PetscReal      fill=product->fill;

1873:   MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);

1875:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
1876:   return(0);
1877: }

1879: /* --------------------------------------------------------------- */
1880: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
1881: {
1883:   Mat_Product    *product = C->product;
1884:   PetscInt       alg = 0; /* default algorithm */
1885:   PetscBool      flg = PETSC_FALSE;
1886: #if !defined(PETSC_HAVE_HYPRE)
1887:   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
1888:   PetscInt       nalg = 7;
1889: #else
1890:   const char     *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
1891:   PetscInt       nalg = 8;
1892: #endif

1895:   /* Set default algorithm */
1896:   PetscStrcmp(C->product->alg,"default",&flg);
1897:   if (flg) {
1898:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1899:   }

1901:   /* Get runtime option */
1902:   if (product->api_user) {
1903:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
1904:     PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);
1905:     PetscOptionsEnd();
1906:   } else {
1907:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
1908:     PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);
1909:     PetscOptionsEnd();
1910:   }
1911:   if (flg) {
1912:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1913:   }

1915:   C->ops->productsymbolic = MatProductSymbolic_AB;
1916:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
1917:   return(0);
1918: }

1920: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
1921: {
1923:   Mat_Product    *product = C->product;
1924:   PetscInt       alg = 0; /* default algorithm */
1925:   PetscBool      flg = PETSC_FALSE;
1926:   const char     *algTypes[2] = {"at*b","outerproduct"};
1927:   PetscInt       nalg = 2;

1930:   /* Set default algorithm */
1931:   PetscStrcmp(product->alg,"default",&flg);
1932:   if (flg) {
1933:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1934:   }

1936:   /* Get runtime option */
1937:   if (product->api_user) {
1938:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");
1939:     PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
1940:     PetscOptionsEnd();
1941:   } else {
1942:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");
1943:     PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);
1944:     PetscOptionsEnd();
1945:   }
1946:   if (flg) {
1947:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1948:   }

1950:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
1951:   return(0);
1952: }

1954: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
1955: {
1957:   Mat_Product    *product = C->product;
1958:   PetscInt       alg = 0; /* default algorithm */
1959:   PetscBool      flg = PETSC_FALSE;
1960:   const char     *algTypes[2] = {"default","color"};
1961:   PetscInt       nalg = 2;

1964:   /* Set default algorithm */
1965:   PetscStrcmp(C->product->alg,"default",&flg);
1966:   if (!flg) {
1967:     alg = 1;
1968:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1969:   }

1971:   /* Get runtime option */
1972:   if (product->api_user) {
1973:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
1974:     PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
1975:     PetscOptionsEnd();
1976:   } else {
1977:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
1978:     PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
1979:     PetscOptionsEnd();
1980:   }
1981:   if (flg) {
1982:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1983:   }

1985:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1986:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1987:   return(0);
1988: }

1990: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
1991: {
1993:   Mat_Product    *product = C->product;
1994:   PetscBool      flg = PETSC_FALSE;
1995:   PetscInt       alg = 0; /* default algorithm -- alg=1 should be default!!! */
1996: #if !defined(PETSC_HAVE_HYPRE)
1997:   const char      *algTypes[2] = {"scalable","rap"};
1998:   PetscInt        nalg = 2;
1999: #else
2000:   const char      *algTypes[3] = {"scalable","rap","hypre"};
2001:   PetscInt        nalg = 3;
2002: #endif

2005:   /* Set default algorithm */
2006:   PetscStrcmp(product->alg,"default",&flg);
2007:   if (flg) {
2008:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2009:   }

2011:   /* Get runtime option */
2012:   if (product->api_user) {
2013:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
2014:     PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2015:     PetscOptionsEnd();
2016:   } else {
2017:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2018:     PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2019:     PetscOptionsEnd();
2020:   }
2021:   if (flg) {
2022:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2023:   }

2025:   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
2026:   return(0);
2027: }

2029: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
2030: {
2032:   Mat_Product    *product = C->product;
2033:   PetscBool      flg = PETSC_FALSE;
2034:   PetscInt       alg = 0; /* default algorithm */
2035:   const char     *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
2036:   PetscInt        nalg = 3;

2039:   /* Set default algorithm */
2040:   PetscStrcmp(product->alg,"default",&flg);
2041:   if (flg) {
2042:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2043:   }

2045:   /* Get runtime option */
2046:   if (product->api_user) {
2047:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");
2048:     PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);
2049:     PetscOptionsEnd();
2050:   } else {
2051:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");
2052:     PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);
2053:     PetscOptionsEnd();
2054:   }
2055:   if (flg) {
2056:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2057:   }

2059:   C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
2060:   return(0);
2061: }

2063: /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
2064: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
2065: {
2067:   Mat_Product    *product = C->product;
2068:   PetscInt       alg = 0; /* default algorithm */
2069:   PetscBool      flg = PETSC_FALSE;
2070:   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
2071:   PetscInt       nalg = 7;

2074:   /* Set default algorithm */
2075:   PetscStrcmp(product->alg,"default",&flg);
2076:   if (flg) {
2077:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2078:   }

2080:   /* Get runtime option */
2081:   if (product->api_user) {
2082:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");
2083:     PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2084:     PetscOptionsEnd();
2085:   } else {
2086:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");
2087:     PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);
2088:     PetscOptionsEnd();
2089:   }
2090:   if (flg) {
2091:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2092:   }

2094:   C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
2095:   C->ops->productsymbolic    = MatProductSymbolic_ABC;
2096:   return(0);
2097: }

2099: PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
2100: {
2102:   Mat_Product    *product = C->product;

2105:   switch (product->type) {
2106:   case MATPRODUCT_AB:
2107:     MatProductSetFromOptions_SeqAIJ_AB(C);
2108:     break;
2109:   case MATPRODUCT_AtB:
2110:     MatProductSetFromOptions_SeqAIJ_AtB(C);
2111:     break;
2112:   case MATPRODUCT_ABt:
2113:     MatProductSetFromOptions_SeqAIJ_ABt(C);
2114:     break;
2115:   case MATPRODUCT_PtAP:
2116:     MatProductSetFromOptions_SeqAIJ_PtAP(C);
2117:     break;
2118:   case MATPRODUCT_RARt:
2119:     MatProductSetFromOptions_SeqAIJ_RARt(C);
2120:     break;
2121:   case MATPRODUCT_ABC:
2122:     MatProductSetFromOptions_SeqAIJ_ABC(C);
2123:     break;
2124:   default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type is not supported");
2125:   }
2126:   return(0);
2127: }