Actual source code: sell.c

petsc-3.9.0 2018-04-07
Report Typos and Errors

  2: /*
  3:   Defines the basic matrix operations for the SELL matrix storage format.
  4: */
  5:  #include <../src/mat/impls/sell/seq/sell.h>
  6:  #include <petscblaslapack.h>
  7:  #include <petsc/private/kernels/blocktranspose.h>
  8: #if defined(PETSC_HAVE_IMMINTRIN_H)
  9:   #include <immintrin.h>

 11:   #if !defined(_MM_SCALE_8)
 12:   #define _MM_SCALE_8    8
 13:   #endif

 15:   #if defined(__AVX512F__)
 16:   /* these do not work
 17:    vec_idx  = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
 18:    vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
 19:   */
 20:     #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
 21:     /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
 22:     vec_idx  = _mm256_loadu_si256((__m256i const*)acolidx); \
 23:     vec_vals = _mm512_loadu_pd(aval); \
 24:     vec_x    = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
 25:     vec_y    = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
 26:   #elif defined(__AVX2__)
 27:     #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
 28:     vec_vals = _mm256_loadu_pd(aval); \
 29:     vec_idx  = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
 30:     vec_x    = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
 31:     vec_y    = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
 32:   #endif
 33: #endif  /* PETSC_HAVE_IMMINTRIN_H */

 35: /*@C
 36:  MatSeqSELLSetPreallocation - For good matrix assembly performance
 37:  the user should preallocate the matrix storage by setting the parameter nz
 38:  (or the array nnz).  By setting these parameters accurately, performance
 39:  during matrix assembly can be increased significantly.

 41:  Collective on MPI_Comm

 43:  Input Parameters:
 44:  +  B - The matrix
 45:  .  nz - number of nonzeros per row (same for all rows)
 46:  -  nnz - array containing the number of nonzeros in the various rows
 47:  (possibly different for each row) or NULL

 49:  Notes:
 50:  If nnz is given then nz is ignored.

 52:  Specify the preallocated storage with either nz or nnz (not both).
 53:  Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
 54:  allocation.  For large problems you MUST preallocate memory or you
 55:  will get TERRIBLE performance, see the users' manual chapter on matrices.

 57:  You can call MatGetInfo() to get information on how effective the preallocation was;
 58:  for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
 59:  You can also run with the option -info and look for messages with the string
 60:  malloc in them to see if additional memory allocation was needed.

 62:  Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
 63:  entries or columns indices.

 65:  The maximum number of nonzeos in any row should be as accuate as possible.
 66:  If it is underesitmated, you will get bad performance due to reallocation
 67:  (MatSeqXSELLReallocateSELL).

 69:  Level: intermediate

 71:  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()

 73:  @*/
 74: PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
 75: {

 81:   PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));
 82:   return(0);
 83: }

 85: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
 86: {
 87:   Mat_SeqSELL    *b;
 88:   PetscInt       i,j,totalslices;
 89:   PetscBool      skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;

 93:   if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
 94:   if (maxallocrow == MAT_SKIP_ALLOCATION) {
 95:     skipallocation = PETSC_TRUE;
 96:     maxallocrow    = 0;
 97:   }

 99:   PetscLayoutSetUp(B->rmap);
100:   PetscLayoutSetUp(B->cmap);

102:   /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
103:   if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
104:   if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
105:   if (rlen) {
106:     for (i=0; i<B->rmap->n; i++) {
107:       if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
108:       if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
109:     }
110:   }

112:   B->preallocated = PETSC_TRUE;

114:   b = (Mat_SeqSELL*)B->data;

116:   totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
117:   b->totalslices = totalslices;
118:   if (!skipallocation) {
119:     if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);

121:     if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
122:       PetscMalloc1(totalslices+1,&b->sliidx);
123:       PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));
124:     }
125:     if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
126:       if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
127:       else if (maxallocrow < 0) maxallocrow = 1;
128:       for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
129:     } else {
130:       maxallocrow = 0;
131:       b->sliidx[0] = 0;
132:       for (i=1; i<totalslices; i++) {
133:         b->sliidx[i] = 0;
134:         for (j=0;j<8;j++) {
135:           b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
136:         }
137:         maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
138:         b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
139:       }
140:       /* last slice */
141:       b->sliidx[totalslices] = 0;
142:       for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
143:       maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
144:       b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
145:     }

147:     /* allocate space for val, colidx, rlen */
148:     /* FIXME: should B's old memory be unlogged? */
149:     MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);
150:     /* FIXME: assuming an element of the bit array takes 8 bits */
151:     PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);
152:     PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));
153:     /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
154:     PetscCalloc1(8*totalslices,&b->rlen);
155:     PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));

157:     b->singlemalloc = PETSC_TRUE;
158:     b->free_val     = PETSC_TRUE;
159:     b->free_colidx  = PETSC_TRUE;
160:   } else {
161:     b->free_val    = PETSC_FALSE;
162:     b->free_colidx = PETSC_FALSE;
163:   }

165:   b->nz               = 0;
166:   b->maxallocrow      = maxallocrow;
167:   b->rlenmax          = maxallocrow;
168:   b->maxallocmat      = b->sliidx[totalslices];
169:   B->info.nz_unneeded = (double)b->maxallocmat;
170:   if (realalloc) {
171:     MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
172:   }
173:   return(0);
174: }

176: PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
177: {
178:   Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
179:   PetscInt    shift;

182:   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
183:   if (nz) *nz = a->rlen[row];
184:   shift = a->sliidx[row>>3]+(row&0x07);
185:   if (!a->getrowcols) {

188:     PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);
189:   }
190:   if (idx) {
191:     PetscInt j;
192:     for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
193:     *idx = a->getrowcols;
194:   }
195:   if (v) {
196:     PetscInt j;
197:     for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
198:     *v = a->getrowvals;
199:   }
200:   return(0);
201: }

203: PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
204: {
206:   return(0);
207: }

209: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
210: {
211:   Mat            B;
212:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
213:   PetscInt       i;

217:   if (reuse == MAT_REUSE_MATRIX) {
218:     B    = *newmat;
219:     MatZeroEntries(B);
220:   } else {
221:     MatCreate(PetscObjectComm((PetscObject)A),&B);
222:     MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
223:     MatSetType(B,MATSEQAIJ);
224:     MatSeqAIJSetPreallocation(B,0,a->rlen);
225:   }

227:   for (i=0; i<A->rmap->n; i++) {
228:     PetscInt    nz,*cols;
229:     PetscScalar *vals;

231:     MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);
232:     MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);
233:     MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);
234:   }

236:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
237:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
238:   B->rmap->bs = A->rmap->bs;

240:   if (reuse == MAT_INPLACE_MATRIX) {
241:     MatHeaderReplace(A,&B);
242:   } else {
243:     *newmat = B;
244:   }
245:   return(0);
246: }

248:  #include <../src/mat/impls/aij/seq/aij.h>

250: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
251: {
252:   Mat               B;
253:   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
254:   PetscInt          *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
255:   const PetscInt    *cols;
256:   const PetscScalar *vals;
257:   PetscErrorCode    ierr;

260:   if (A->rmap->bs > 1) {
261:     MatConvert_Basic(A,newtype,reuse,newmat);
262:     return(0);
263:   }

265:   if (reuse == MAT_REUSE_MATRIX) {
266:     B = *newmat;
267:   } else {
268:     /* Can we just use ilen? */
269:     PetscMalloc1(m,&rowlengths);
270:     for (i=0; i<m; i++) {
271:       rowlengths[i] = ai[i+1] - ai[i];
272:     }

274:     MatCreate(PetscObjectComm((PetscObject)A),&B);
275:     MatSetSizes(B,m,n,m,n);
276:     MatSetType(B,MATSEQSELL);
277:     MatSeqSELLSetPreallocation(B,0,rowlengths);
278:     PetscFree(rowlengths);
279:   }

281:   for (row=0; row<m; row++) {
282:     MatGetRow(A,row,&ncols,&cols,&vals);
283:     MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);
284:     MatRestoreRow(A,row,&ncols,&cols,&vals);
285:   }
286:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
287:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
288:   B->rmap->bs = A->rmap->bs;

290:   if (reuse == MAT_INPLACE_MATRIX) {
291:     MatHeaderReplace(A,&B);
292:   } else {
293:     *newmat = B;
294:   }
295:   return(0);
296: }

298: PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
299: {
300:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
301:   PetscScalar       *y;
302:   const PetscScalar *x;
303:   const MatScalar   *aval=a->val;
304:   PetscInt          totalslices=a->totalslices;
305:   const PetscInt    *acolidx=a->colidx;
306:   PetscInt          i,j;
307:   PetscErrorCode    ierr;
308: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
309:   __m512d           vec_x,vec_y,vec_vals;
310:   __m256i           vec_idx;
311:   __mmask8          mask;
312:   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
313:   __m256i           vec_idx2,vec_idx3,vec_idx4;
314: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
315:   __m128i           vec_idx;
316:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
317:   MatScalar         yval;
318:   PetscInt          r,rows_left,row,nnz_in_row;
319: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
320:   __m128d           vec_x_tmp;
321:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
322:   MatScalar         yval;
323:   PetscInt          r,rows_left,row,nnz_in_row;
324: #else
325:   PetscScalar       sum[8];
326: #endif

328: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
329: #pragma disjoint(*x,*y,*aval)
330: #endif

333:   VecGetArrayRead(xx,&x);
334:   VecGetArray(yy,&y);
335: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
336:   for (i=0; i<totalslices; i++) { /* loop over slices */
337:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
338:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);

340:     vec_y  = _mm512_setzero_pd();
341:     vec_y2 = _mm512_setzero_pd();
342:     vec_y3 = _mm512_setzero_pd();
343:     vec_y4 = _mm512_setzero_pd();

345:     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
346:     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
347:     case 3:
348:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
349:       acolidx += 8; aval += 8;
350:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
351:       acolidx += 8; aval += 8;
352:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
353:       acolidx += 8; aval += 8;
354:       j += 3;
355:       break;
356:     case 2:
357:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
358:       acolidx += 8; aval += 8;
359:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
360:       acolidx += 8; aval += 8;
361:       j += 2;
362:       break;
363:     case 1:
364:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
365:       acolidx += 8; aval += 8;
366:       j += 1;
367:       break;
368:     }
369:     #pragma novector
370:     for (; j<(a->sliidx[i+1]>>3); j+=4) {
371:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
372:       acolidx += 8; aval += 8;
373:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
374:       acolidx += 8; aval += 8;
375:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
376:       acolidx += 8; aval += 8;
377:       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
378:       acolidx += 8; aval += 8;
379:     }

381:     vec_y = _mm512_add_pd(vec_y,vec_y2);
382:     vec_y = _mm512_add_pd(vec_y,vec_y3);
383:     vec_y = _mm512_add_pd(vec_y,vec_y4);
384:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
385:       mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
386:       _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
387:     } else {
388:       _mm512_storeu_pd(&y[8*i],vec_y);
389:     }
390:   }
391: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
392:   for (i=0; i<totalslices; i++) { /* loop over full slices */
393:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
394:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);

396:     /* last slice may have padding rows. Don't use vectorization. */
397:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
398:       rows_left = A->rmap->n - 8*i;
399:       for (r=0; r<rows_left; ++r) {
400:         yval = (MatScalar)0;
401:         row = 8*i + r;
402:         nnz_in_row = a->rlen[row];
403:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
404:         y[row] = yval;
405:       }
406:       break;
407:     }

409:     vec_y  = _mm256_setzero_pd();
410:     vec_y2 = _mm256_setzero_pd();

412:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
413:     #pragma novector
414:     #pragma unroll(2)
415:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
416:       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
417:       aval += 4; acolidx += 4;
418:       AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
419:       aval += 4; acolidx += 4;
420:     }

422:     _mm256_storeu_pd(y+i*8,vec_y);
423:     _mm256_storeu_pd(y+i*8+4,vec_y2);
424:   }
425: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
426:   for (i=0; i<totalslices; i++) { /* loop over full slices */
427:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
428:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);

430:     vec_y  = _mm256_setzero_pd();
431:     vec_y2 = _mm256_setzero_pd();

433:     /* last slice may have padding rows. Don't use vectorization. */
434:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
435:       rows_left = A->rmap->n - 8*i;
436:       for (r=0; r<rows_left; ++r) {
437:         yval = (MatScalar)0;
438:         row = 8*i + r;
439:         nnz_in_row = a->rlen[row];
440:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
441:         y[row] = yval;
442:       }
443:       break;
444:     }

446:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
447:     #pragma novector
448:     #pragma unroll(2)
449:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
450:       vec_vals  = _mm256_loadu_pd(aval);
451:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
454:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
455:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
456:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
457:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
458:       aval     += 4;

460:       vec_vals  = _mm256_loadu_pd(aval);
461:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
464:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
465:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
466:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
467:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
468:       aval     += 4;
469:     }

471:     _mm256_storeu_pd(y + i*8,     vec_y);
472:     _mm256_storeu_pd(y + i*8 + 4, vec_y2);
473:   }
474: #else
475:   for (i=0; i<totalslices; i++) { /* loop over slices */
476:     for (j=0; j<8; j++) sum[j] = 0.0;
477:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
478:       sum[0] += aval[j] * x[acolidx[j]];
479:       sum[1] += aval[j+1] * x[acolidx[j+1]];
480:       sum[2] += aval[j+2] * x[acolidx[j+2]];
481:       sum[3] += aval[j+3] * x[acolidx[j+3]];
482:       sum[4] += aval[j+4] * x[acolidx[j+4]];
483:       sum[5] += aval[j+5] * x[acolidx[j+5]];
484:       sum[6] += aval[j+6] * x[acolidx[j+6]];
485:       sum[7] += aval[j+7] * x[acolidx[j+7]];
486:     }
487:     if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
488:       for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
489:     } else {
490:       for(j=0; j<8; j++) y[8*i+j] = sum[j];
491:     }
492:   }
493: #endif

495:   PetscLogFlops(2.0*a->nz-a->nonzerorowcnt); /* theoretical minimal FLOPs */
496:   VecRestoreArrayRead(xx,&x);
497:   VecRestoreArray(yy,&y);
498:   return(0);
499: }

501: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
502: PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
503: {
504:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
505:   PetscScalar       *y,*z;
506:   const PetscScalar *x;
507:   const MatScalar   *aval=a->val;
508:   PetscInt          totalslices=a->totalslices;
509:   const PetscInt    *acolidx=a->colidx;
510:   PetscInt          i,j;
511:   PetscErrorCode    ierr;
512: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
513:   __m512d           vec_x,vec_y,vec_vals;
514:   __m256i           vec_idx;
515:   __mmask8          mask;
516:   __m512d           vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
517:   __m256i           vec_idx2,vec_idx3,vec_idx4;
518: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
519:   __m128d           vec_x_tmp;
520:   __m256d           vec_x,vec_y,vec_y2,vec_vals;
521:   MatScalar         yval;
522:   PetscInt          r,row,nnz_in_row;
523: #else
524:   PetscScalar       sum[8];
525: #endif

527: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
528: #pragma disjoint(*x,*y,*aval)
529: #endif

532:   VecGetArrayRead(xx,&x);
533:   VecGetArrayPair(yy,zz,&y,&z);
534: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
535:   for (i=0; i<totalslices; i++) { /* loop over slices */
536:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
537:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);

539:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
540:       mask   = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
541:       vec_y  = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
542:     } else {
543:       vec_y  = _mm512_loadu_pd(&y[8*i]);
544:     }
545:     vec_y2 = _mm512_setzero_pd();
546:     vec_y3 = _mm512_setzero_pd();
547:     vec_y4 = _mm512_setzero_pd();

549:     j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
550:     switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
551:     case 3:
552:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
553:       acolidx += 8; aval += 8;
554:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
555:       acolidx += 8; aval += 8;
556:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
557:       acolidx += 8; aval += 8;
558:       j += 3;
559:       break;
560:     case 2:
561:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
562:       acolidx += 8; aval += 8;
563:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
564:       acolidx += 8; aval += 8;
565:       j += 2;
566:       break;
567:     case 1:
568:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
569:       acolidx += 8; aval += 8;
570:       j += 1;
571:       break;
572:     }
573:     #pragma novector
574:     for (; j<(a->sliidx[i+1]>>3); j+=4) {
575:       AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
576:       acolidx += 8; aval += 8;
577:       AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
578:       acolidx += 8; aval += 8;
579:       AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
580:       acolidx += 8; aval += 8;
581:       AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
582:       acolidx += 8; aval += 8;
583:     }

585:     vec_y = _mm512_add_pd(vec_y,vec_y2);
586:     vec_y = _mm512_add_pd(vec_y,vec_y3);
587:     vec_y = _mm512_add_pd(vec_y,vec_y4);
588:     if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
589:       _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
590:     } else {
591:       _mm512_storeu_pd(&z[8*i],vec_y);
592:     }
593:   }
594: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
595:   for (i=0; i<totalslices; i++) { /* loop over full slices */
596:     PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
597:     PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);

599:     /* last slice may have padding rows. Don't use vectorization. */
600:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
601:       for (r=0; r<(A->rmap->n & 0x07); ++r) {
602:         row        = 8*i + r;
603:         yval       = (MatScalar)0.0;
604:         nnz_in_row = a->rlen[row];
605:         for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
606:         z[row] = y[row] + yval;
607:       }
608:       break;
609:     }

611:     vec_y  = _mm256_loadu_pd(y+8*i);
612:     vec_y2 = _mm256_loadu_pd(y+8*i+4);

614:     /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
615:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
616:       vec_vals  = _mm256_loadu_pd(aval);
617:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
620:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
621:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
622:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
623:       vec_y     = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
624:       aval     += 4;

626:       vec_vals  = _mm256_loadu_pd(aval);
627:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
630:       vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
631:       vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
632:       vec_x     = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
633:       vec_y2    = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
634:       aval     += 4;
635:     }

637:     _mm256_storeu_pd(z+i*8,vec_y);
638:     _mm256_storeu_pd(z+i*8+4,vec_y2);
639:   }
640: #else
641:   for (i=0; i<totalslices; i++) { /* loop over slices */
642:     for (j=0; j<8; j++) sum[j] = 0.0;
643:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
644:       sum[0] += aval[j] * x[acolidx[j]];
645:       sum[1] += aval[j+1] * x[acolidx[j+1]];
646:       sum[2] += aval[j+2] * x[acolidx[j+2]];
647:       sum[3] += aval[j+3] * x[acolidx[j+3]];
648:       sum[4] += aval[j+4] * x[acolidx[j+4]];
649:       sum[5] += aval[j+5] * x[acolidx[j+5]];
650:       sum[6] += aval[j+6] * x[acolidx[j+6]];
651:       sum[7] += aval[j+7] * x[acolidx[j+7]];
652:     }
653:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
654:       for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
655:     } else {
656:       for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
657:     }
658:   }
659: #endif

661:   PetscLogFlops(2.0*a->nz);
662:   VecRestoreArrayRead(xx,&x);
663:   VecRestoreArrayPair(yy,zz,&y,&z);
664:   return(0);
665: }

667: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
668: {
669:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
670:   PetscScalar       *y;
671:   const PetscScalar *x;
672:   const MatScalar   *aval=a->val;
673:   const PetscInt    *acolidx=a->colidx;
674:   PetscInt          i,j,r,row,nnz_in_row,totalslices=a->totalslices;
675:   PetscErrorCode    ierr;

677: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
678: #pragma disjoint(*x,*y,*aval)
679: #endif

682:   if (A->symmetric) {
683:     MatMultAdd_SeqSELL(A,xx,zz,yy);
684:     return(0);
685:   }
686:   if (zz != yy) { VecCopy(zz,yy); }
687:   VecGetArrayRead(xx,&x);
688:   VecGetArray(yy,&y);
689:   for (i=0; i<a->totalslices; i++) { /* loop over slices */
690:     if (i == totalslices-1 && (A->rmap->n & 0x07)) {
691:       for (r=0; r<(A->rmap->n & 0x07); ++r) {
692:         row        = 8*i + r;
693:         nnz_in_row = a->rlen[row];
694:         for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
695:       }
696:       break;
697:     }
698:     for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
699:       y[acolidx[j]]   += aval[j] * x[8*i];
700:       y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
701:       y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
702:       y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
703:       y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
704:       y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
705:       y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
706:       y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
707:     }
708:   }
709:   PetscLogFlops(2.0*a->sliidx[a->totalslices]);
710:   VecRestoreArrayRead(xx,&x);
711:   VecRestoreArray(yy,&y);
712:   return(0);
713: }

715: PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
716: {

720:   if (A->symmetric) {
721:     MatMult_SeqSELL(A,xx,yy);
722:   } else {
723:     VecSet(yy,0.0);
724:     MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);
725:   }
726:   return(0);
727: }

729: /*
730:      Checks for missing diagonals
731: */
732: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool  *missing,PetscInt *d)
733: {
734:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
735:   PetscInt    *diag,i;

738:   *missing = PETSC_FALSE;
739:   if (A->rmap->n > 0 && !(a->colidx)) {
740:     *missing = PETSC_TRUE;
741:     if (d) *d = 0;
742:     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
743:   } else {
744:     diag = a->diag;
745:     for (i=0; i<A->rmap->n; i++) {
746:       if (diag[i] == -1) {
747:         *missing = PETSC_TRUE;
748:         if (d) *d = i;
749:         PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
750:         break;
751:       }
752:     }
753:   }
754:   return(0);
755: }

757: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
758: {
759:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
760:   PetscInt       i,j,m=A->rmap->n,shift;

764:   if (!a->diag) {
765:     PetscMalloc1(m,&a->diag);
766:     PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
767:     a->free_diag = PETSC_TRUE;
768:   }
769:   for (i=0; i<m; i++) { /* loop over rows */
770:     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
771:     a->diag[i] = -1;
772:     for (j=0; j<a->rlen[i]; j++) {
773:       if (a->colidx[shift+j*8] == i) {
774:         a->diag[i] = shift+j*8;
775:         break;
776:       }
777:     }
778:   }
779:   return(0);
780: }

782: /*
783:   Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
784: */
785: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
786: {
787:   Mat_SeqSELL    *a=(Mat_SeqSELL*) A->data;
788:   PetscInt       i,*diag,m = A->rmap->n;
789:   MatScalar      *val = a->val;
790:   PetscScalar    *idiag,*mdiag;

794:   if (a->idiagvalid) return(0);
795:   MatMarkDiagonal_SeqSELL(A);
796:   diag = a->diag;
797:   if (!a->idiag) {
798:     PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);
799:     PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));
800:     val  = a->val;
801:   }
802:   mdiag = a->mdiag;
803:   idiag = a->idiag;

805:   if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
806:     for (i=0; i<m; i++) {
807:       mdiag[i] = val[diag[i]];
808:       if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
809:         if (PetscRealPart(fshift)) {
810:           PetscInfo1(A,"Zero diagonal on row %D\n",i);
811:           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
812:           A->factorerror_zeropivot_value = 0.0;
813:           A->factorerror_zeropivot_row   = i;
814:         } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
815:       }
816:       idiag[i] = 1.0/val[diag[i]];
817:     }
818:     PetscLogFlops(m);
819:   } else {
820:     for (i=0; i<m; i++) {
821:       mdiag[i] = val[diag[i]];
822:       idiag[i] = omega/(fshift + val[diag[i]]);
823:     }
824:     PetscLogFlops(2.0*m);
825:   }
826:   a->idiagvalid = PETSC_TRUE;
827:   return(0);
828: }

830: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
831: {
832:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;

836:   PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));
837:   MatSeqSELLInvalidateDiagonal(A);
838:   return(0);
839: }

841: PetscErrorCode MatDestroy_SeqSELL(Mat A)
842: {
843:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;

847: #if defined(PETSC_USE_LOG)
848:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
849: #endif
850:   MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);
851:   ISDestroy(&a->row);
852:   ISDestroy(&a->col);
853:   PetscFree(a->diag);
854:   PetscFree(a->rlen);
855:   PetscFree(a->sliidx);
856:   PetscFree3(a->idiag,a->mdiag,a->ssor_work);
857:   PetscFree(a->solve_work);
858:   ISDestroy(&a->icol);
859:   PetscFree(a->saved_values);
860:   PetscFree2(a->getrowcols,a->getrowvals);

862:   PetscFree(A->data);

864:   PetscObjectChangeTypeName((PetscObject)A,0);
865:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
866:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
867: #if defined(PETSC_HAVE_ELEMENTAL)
868: #endif
869:   PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);
870:   return(0);
871: }

873: PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
874: {
875:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;

879:   switch (op) {
880:   case MAT_ROW_ORIENTED:
881:     a->roworiented = flg;
882:     break;
883:   case MAT_KEEP_NONZERO_PATTERN:
884:     a->keepnonzeropattern = flg;
885:     break;
886:   case MAT_NEW_NONZERO_LOCATIONS:
887:     a->nonew = (flg ? 0 : 1);
888:     break;
889:   case MAT_NEW_NONZERO_LOCATION_ERR:
890:     a->nonew = (flg ? -1 : 0);
891:     break;
892:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893:     a->nonew = (flg ? -2 : 0);
894:     break;
895:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
896:     a->nounused = (flg ? -1 : 0);
897:     break;
898:   case MAT_NEW_DIAGONALS:
899:   case MAT_IGNORE_OFF_PROC_ENTRIES:
900:   case MAT_USE_HASH_TABLE:
901:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
902:     break;
903:   case MAT_SPD:
904:   case MAT_SYMMETRIC:
905:   case MAT_STRUCTURALLY_SYMMETRIC:
906:   case MAT_HERMITIAN:
907:   case MAT_SYMMETRY_ETERNAL:
908:     /* These options are handled directly by MatSetOption() */
909:     break;
910:   default:
911:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
912:   }
913:   return(0);
914: }

916: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
917: {
918:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
919:   PetscInt       i,j,n,shift;
920:   PetscScalar    *x,zero=0.0;

924:   VecGetLocalSize(v,&n);
925:   if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");

927:   if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
928:     PetscInt *diag=a->diag;
929:     VecGetArray(v,&x);
930:     for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
931:     VecRestoreArray(v,&x);
932:     return(0);
933:   }

935:   VecSet(v,zero);
936:   VecGetArray(v,&x);
937:   for (i=0; i<n; i++) { /* loop over rows */
938:     shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
939:     x[i] = 0;
940:     for (j=0; j<a->rlen[i]; j++) {
941:       if (a->colidx[shift+j*8] == i) {
942:         x[i] = a->val[shift+j*8];
943:         break;
944:       }
945:     }
946:   }
947:   VecRestoreArray(v,&x);
948:   return(0);
949: }

951: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
952: {
953:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
954:   const PetscScalar *l,*r;
955:   PetscInt          i,j,m,n,row;
956:   PetscErrorCode    ierr;

959:   if (ll) {
960:     /* The local size is used so that VecMPI can be passed to this routine
961:        by MatDiagonalScale_MPISELL */
962:     VecGetLocalSize(ll,&m);
963:     if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
964:     VecGetArrayRead(ll,&l);
965:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
966:       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
967:         a->val[j] *= l[8*i+row];
968:       }
969:     }
970:     VecRestoreArrayRead(ll,&l);
971:     PetscLogFlops(a->nz);
972:   }
973:   if (rr) {
974:     VecGetLocalSize(rr,&n);
975:     if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
976:     VecGetArrayRead(rr,&r);
977:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
978:       for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
979:         a->val[j] *= r[a->colidx[j]];
980:       }
981:     }
982:     VecRestoreArrayRead(rr,&r);
983:     PetscLogFlops(a->nz);
984:   }
985:   MatSeqSELLInvalidateDiagonal(A);
986:   return(0);
987: }

989: extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);

991: PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
992: {
993:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
994:   PetscInt    *cp,i,k,low,high,t,row,col,l;
995:   PetscInt    shift;
996:   MatScalar   *vp;

999:   for (k=0; k<m; k++) { /* loop over requested rows */
1000:     row = im[k];
1001:     if (row<0) continue;
1002: #if defined(PETSC_USE_DEBUG)
1003:     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1004: #endif
1005:     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1006:     cp = a->colidx+shift; /* pointer to the row */
1007:     vp = a->val+shift; /* pointer to the row */
1008:     for (l=0; l<n; l++) { /* loop over requested columns */
1009:       col = in[l];
1010:       if (col<0) continue;
1011: #if defined(PETSC_USE_DEBUG)
1012:       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1013: #endif
1014:       high = a->rlen[row]; low = 0; /* assume unsorted */
1015:       while (high-low > 5) {
1016:         t = (low+high)/2;
1017:         if (*(cp+t*8) > col) high = t;
1018:         else low = t;
1019:       }
1020:       for (i=low; i<high; i++) {
1021:         if (*(cp+8*i) > col) break;
1022:         if (*(cp+8*i) == col) {
1023:           *v++ = *(vp+8*i);
1024:           goto finished;
1025:         }
1026:       }
1027:       *v++ = 0.0;
1028:     finished:;
1029:     }
1030:   }
1031:   return(0);
1032: }

1034: PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1035: {
1036:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1037:   PetscInt          i,j,m=A->rmap->n,shift;
1038:   const char        *name;
1039:   PetscViewerFormat format;
1040:   PetscErrorCode    ierr;

1043:   PetscViewerGetFormat(viewer,&format);
1044:   if (format == PETSC_VIEWER_ASCII_MATLAB) {
1045:     PetscInt nofinalvalue = 0;
1046:     /*
1047:     if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1048:       nofinalvalue = 1;
1049:     }
1050:     */
1051:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1052:     PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);
1053:     PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);
1054: #if defined(PETSC_USE_COMPLEX)
1055:     PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);
1056: #else
1057:     PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);
1058: #endif
1059:     PetscViewerASCIIPrintf(viewer,"zzz = [\n");

1061:     for (i=0; i<m; i++) {
1062:       shift = a->sliidx[i>>3]+(i&0x07);
1063:       for (j=0; j<a->rlen[i]; j++) {
1064: #if defined(PETSC_USE_COMPLEX)
1065:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1066: #else
1067:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);
1068: #endif
1069:       }
1070:     }
1071:     /*
1072:     if (nofinalvalue) {
1073: #if defined(PETSC_USE_COMPLEX)
1074:       PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e %18.16e\n",m,A->cmap->n,0.,0.);
1075: #else
1076:       PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",m,A->cmap->n,0.0);
1077: #endif
1078:     }
1079:     */
1080:     PetscObjectGetName((PetscObject)A,&name);
1081:     PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);
1082:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1083:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1084:     return(0);
1085:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1086:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1087:     for (i=0; i<m; i++) {
1088:       PetscViewerASCIIPrintf(viewer,"row %D:",i);
1089:       shift = a->sliidx[i>>3]+(i&0x07);
1090:       for (j=0; j<a->rlen[i]; j++) {
1091: #if defined(PETSC_USE_COMPLEX)
1092:         if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1093:           PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1094:         } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1095:           PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1096:         } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1097:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1098:         }
1099: #else
1100:         if (a->val[shift+8*j] != 0.0) {PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);}
1101: #endif
1102:       }
1103:       PetscViewerASCIIPrintf(viewer,"\n");
1104:     }
1105:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1106:   } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1107:     PetscInt    cnt=0,jcnt;
1108:     PetscScalar value;
1109: #if defined(PETSC_USE_COMPLEX)
1110:     PetscBool   realonly=PETSC_TRUE;
1111:     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1112:       if (PetscImaginaryPart(a->val[i]) != 0.0) {
1113:         realonly = PETSC_FALSE;
1114:         break;
1115:       }
1116:     }
1117: #endif

1119:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1120:     for (i=0; i<m; i++) {
1121:       jcnt = 0;
1122:       shift = a->sliidx[i>>3]+(i&0x07);
1123:       for (j=0; j<A->cmap->n; j++) {
1124:         if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1125:           value = a->val[cnt++];
1126:           jcnt++;
1127:         } else {
1128:           value = 0.0;
1129:         }
1130: #if defined(PETSC_USE_COMPLEX)
1131:         if (realonly) {
1132:           PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));
1133:         } else {
1134:           PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));
1135:         }
1136: #else
1137:         PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);
1138: #endif
1139:       }
1140:       PetscViewerASCIIPrintf(viewer,"\n");
1141:     }
1142:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1143:   } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1144:     PetscInt fshift=1;
1145:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1146: #if defined(PETSC_USE_COMPLEX)
1147:     PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");
1148: #else
1149:     PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");
1150: #endif
1151:     PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);
1152:     for (i=0; i<m; i++) {
1153:       shift = a->sliidx[i>>3]+(i&0x07);
1154:       for (j=0; j<a->rlen[i]; j++) {
1155: #if defined(PETSC_USE_COMPLEX)
1156:         PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1157: #else
1158:         PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);
1159: #endif
1160:       }
1161:     }
1162:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1163:   } else if (format == PETSC_VIEWER_NATIVE) {
1164:     for (i=0; i<a->totalslices; i++) { /* loop over slices */
1165:       PetscInt row;
1166:       PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);
1167:       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1168: #if defined(PETSC_USE_COMPLEX)
1169:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1170:           PetscViewerASCIIPrintf(viewer,"  %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1171:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1172:           PetscViewerASCIIPrintf(viewer,"  %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));
1173:         } else {
1174:           PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));
1175:         }
1176: #else
1177:         PetscViewerASCIIPrintf(viewer,"  %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);
1178: #endif
1179:       }
1180:     }
1181:   } else {
1182:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1183:     if (A->factortype) {
1184:       for (i=0; i<m; i++) {
1185:         shift = a->sliidx[i>>3]+(i&0x07);
1186:         PetscViewerASCIIPrintf(viewer,"row %D:",i);
1187:         /* L part */
1188:         for (j=shift; j<a->diag[i]; j+=8) {
1189: #if defined(PETSC_USE_COMPLEX)
1190:           if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1191:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1192:           } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1193:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1194:           } else {
1195:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1196:           }
1197: #else
1198:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1199: #endif
1200:         }
1201:         /* diagonal */
1202:         j = a->diag[i];
1203: #if defined(PETSC_USE_COMPLEX)
1204:         if (PetscImaginaryPart(a->val[j]) > 0.0) {
1205:           PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));
1206:         } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1207:           PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));
1208:         } else {
1209:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));
1210:         }
1211: #else
1212:         PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));
1213: #endif

1215:         /* U part */
1216:         for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1217: #if defined(PETSC_USE_COMPLEX)
1218:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1219:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1220:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1221:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1222:           } else {
1223:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1224:           }
1225: #else
1226:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1227: #endif
1228:         }
1229:         PetscViewerASCIIPrintf(viewer,"\n");
1230:       }
1231:     } else {
1232:       for (i=0; i<m; i++) {
1233:         shift = a->sliidx[i>>3]+(i&0x07);
1234:         PetscViewerASCIIPrintf(viewer,"row %D:",i);
1235:         for (j=0; j<a->rlen[i]; j++) {
1236: #if defined(PETSC_USE_COMPLEX)
1237:           if (PetscImaginaryPart(a->val[j]) > 0.0) {
1238:             PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1239:           } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1240:             PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1241:           } else {
1242:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1243:           }
1244: #else
1245:           PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);
1246: #endif
1247:         }
1248:         PetscViewerASCIIPrintf(viewer,"\n");
1249:       }
1250:     }
1251:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1252:   }
1253:   PetscViewerFlush(viewer);
1254:   return(0);
1255: }

1257:  #include <petscdraw.h>
1258: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1259: {
1260:   Mat               A=(Mat)Aa;
1261:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1262:   PetscInt          i,j,m=A->rmap->n,shift;
1263:   int               color;
1264:   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1265:   PetscViewer       viewer;
1266:   PetscViewerFormat format;
1267:   PetscErrorCode    ierr;

1270:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1271:   PetscViewerGetFormat(viewer,&format);
1272:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1274:   /* loop over matrix elements drawing boxes */

1276:   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1277:     PetscDrawCollectiveBegin(draw);
1278:     /* Blue for negative, Cyan for zero and  Red for positive */
1279:     color = PETSC_DRAW_BLUE;
1280:     for (i=0; i<m; i++) {
1281:       shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1282:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1283:       for (j=0; j<a->rlen[i]; j++) {
1284:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1285:         if (PetscRealPart(a->val[shift+8*j]) >=  0.) continue;
1286:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1287:       }
1288:     }
1289:     color = PETSC_DRAW_CYAN;
1290:     for (i=0; i<m; i++) {
1291:       shift = a->sliidx[i>>3]+(i&0x07);
1292:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1293:       for (j=0; j<a->rlen[i]; j++) {
1294:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1295:         if (a->val[shift+8*j] !=  0.) continue;
1296:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1297:       }
1298:     }
1299:     color = PETSC_DRAW_RED;
1300:     for (i=0; i<m; i++) {
1301:       shift = a->sliidx[i>>3]+(i&0x07);
1302:       y_l = m - i - 1.0; y_r = y_l + 1.0;
1303:       for (j=0; j<a->rlen[i]; j++) {
1304:         x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1305:         if (PetscRealPart(a->val[shift+8*j]) <=  0.) continue;
1306:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1307:       }
1308:     }
1309:     PetscDrawCollectiveEnd(draw);
1310:   } else {
1311:     /* use contour shading to indicate magnitude of values */
1312:     /* first determine max of all nonzero values */
1313:     PetscReal minv=0.0,maxv=0.0;
1314:     PetscInt  count=0;
1315:     PetscDraw popup;
1316:     for (i=0; i<a->sliidx[a->totalslices]; i++) {
1317:       if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1318:     }
1319:     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1320:     PetscDrawGetPopup(draw,&popup);
1321:     PetscDrawScalePopup(popup,minv,maxv);

1323:     PetscDrawCollectiveBegin(draw);
1324:     for (i=0; i<m; i++) {
1325:       shift = a->sliidx[i>>3]+(i&0x07);
1326:       y_l = m - i - 1.0;
1327:       y_r = y_l + 1.0;
1328:       for (j=0; j<a->rlen[i]; j++) {
1329:         x_l = a->colidx[shift+j*8];
1330:         x_r = x_l + 1.0;
1331:         color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1332:         PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1333:         count++;
1334:       }
1335:     }
1336:     PetscDrawCollectiveEnd(draw);
1337:   }
1338:   return(0);
1339: }

1341:  #include <petscdraw.h>
1342: PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1343: {
1344:   PetscDraw      draw;
1345:   PetscReal      xr,yr,xl,yl,h,w;
1346:   PetscBool      isnull;

1350:   PetscViewerDrawGetDraw(viewer,0,&draw);
1351:   PetscDrawIsNull(draw,&isnull);
1352:   if (isnull) return(0);

1354:   xr   = A->cmap->n; yr  = A->rmap->n; h = yr/10.0; w = xr/10.0;
1355:   xr  += w;          yr += h;         xl = -w;     yl = -h;
1356:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1357:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1358:   PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);
1359:   PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1360:   PetscDrawSave(draw);
1361:   return(0);
1362: }

1364: PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1365: {
1366:   PetscBool      iascii,isbinary,isdraw;

1370:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1371:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1372:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1373:   if (iascii) {
1374:     MatView_SeqSELL_ASCII(A,viewer);
1375:   } else if (isbinary) {
1376:     /* MatView_SeqSELL_Binary(A,viewer); */
1377:   } else if (isdraw) {
1378:     MatView_SeqSELL_Draw(A,viewer);
1379:   }
1380:   return(0);
1381: }

1383: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1384: {
1385:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1386:   PetscInt       i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1387:   MatScalar      *vp;

1391:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1392:   /* To do: compress out the unused elements */
1393:   MatMarkDiagonal_SeqSELL(A);
1394:   PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);
1395:   PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
1396:   PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);
1397:   /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1398:   for (i=0; i<a->totalslices; ++i) {
1399:     shift = a->sliidx[i];    /* starting index of the slice */
1400:     cp    = a->colidx+shift; /* pointer to the column indices of the slice */
1401:     vp    = a->val+shift;    /* pointer to the nonzero values of the slice */
1402:     for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1403:       row  = 8*i + row_in_slice;
1404:       nrow = a->rlen[row]; /* number of nonzeros in row */
1405:       /*
1406:         Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1407:         But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1408:       */
1409:       lastcol = 0;
1410:       if (nrow>0) { /* nonempty row */
1411:         lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1412:       } else if (!row_in_slice) { /* first row of the currect slice is empty */
1413:         for (j=1;j<8;j++) {
1414:           if (a->rlen[8*i+j]) {
1415:             lastcol = cp[j];
1416:             break;
1417:           }
1418:         }
1419:       } else {
1420:         if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1421:       }

1423:       for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1424:         cp[8*k+row_in_slice] = lastcol;
1425:         vp[8*k+row_in_slice] = (MatScalar)0;
1426:       }
1427:     }
1428:   }

1430:   A->info.mallocs += a->reallocs;
1431:   a->reallocs      = 0;

1433:   MatSeqSELLInvalidateDiagonal(A);
1434:   return(0);
1435: }

1437: PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1438: {
1439:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;

1442:   info->block_size   = 1.0;
1443:   info->nz_allocated = (double)a->maxallocmat;
1444:   info->nz_used      = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1445:   info->nz_unneeded  = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1446:   info->assemblies   = (double)A->num_ass;
1447:   info->mallocs      = (double)A->info.mallocs;
1448:   info->memory       = ((PetscObject)A)->mem;
1449:   if (A->factortype) {
1450:     info->fill_ratio_given  = A->info.fill_ratio_given;
1451:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1452:     info->factor_mallocs    = A->info.factor_mallocs;
1453:   } else {
1454:     info->fill_ratio_given  = 0;
1455:     info->fill_ratio_needed = 0;
1456:     info->factor_mallocs    = 0;
1457:   }
1458:   return(0);
1459: }

1461: PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1462: {
1463:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1464:   PetscInt       shift,i,k,l,low,high,t,ii,row,col,nrow;
1465:   PetscInt       *cp,nonew=a->nonew,lastcol=-1;
1466:   MatScalar      *vp,value;

1470:   for (k=0; k<m; k++) { /* loop over added rows */
1471:     row = im[k];
1472:     if (row < 0) continue;
1473: #if defined(PETSC_USE_DEBUG)
1474:     if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1475: #endif
1476:     shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1477:     cp    = a->colidx+shift; /* pointer to the row */
1478:     vp    = a->val+shift; /* pointer to the row */
1479:     nrow  = a->rlen[row];
1480:     low   = 0;
1481:     high  = nrow;

1483:     for (l=0; l<n; l++) { /* loop over added columns */
1484:       col = in[l];
1485:       if (col<0) continue;
1486: #if defined(PETSC_USE_DEBUG)
1487:       if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1488: #endif
1489:       if (a->roworiented) {
1490:         value = v[l+k*n];
1491:       } else {
1492:         value = v[k+l*m];
1493:       }
1494:       if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;

1496:       /* search in this row for the specified colmun, i indicates the column to be set */
1497:       if (col <= lastcol) low = 0;
1498:       else high = nrow;
1499:       lastcol = col;
1500:       while (high-low > 5) {
1501:         t = (low+high)/2;
1502:         if (*(cp+t*8) > col) high = t;
1503:         else low = t;
1504:       }
1505:       for (i=low; i<high; i++) {
1506:         if (*(cp+i*8) > col) break;
1507:         if (*(cp+i*8) == col) {
1508:           if (is == ADD_VALUES) *(vp+i*8) += value;
1509:           else *(vp+i*8) = value;
1510:           low = i + 1;
1511:           goto noinsert;
1512:         }
1513:       }
1514:       if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1515:       if (nonew == 1) goto noinsert;
1516:       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1517:       /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1518:       MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1519:       /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1520:       for (ii=nrow-1; ii>=i; ii--) {
1521:         *(cp+(ii+1)*8) = *(cp+ii*8);
1522:         *(vp+(ii+1)*8) = *(vp+ii*8);
1523:       }
1524:       a->rlen[row]++;
1525:       *(cp+i*8) = col;
1526:       *(vp+i*8) = value;
1527:       a->nz++;
1528:       A->nonzerostate++;
1529:       low = i+1; high++; nrow++;
1530: noinsert:;
1531:     }
1532:     a->rlen[row] = nrow;
1533:   }
1534:   return(0);
1535: }

1537: PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1538: {

1542:   /* If the two matrices have the same copy implementation, use fast copy. */
1543:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1544:     Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1545:     Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;

1547:     if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1548:     PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1549:   } else {
1550:     MatCopy_Basic(A,B,str);
1551:   }
1552:   return(0);
1553: }

1555: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1556: {

1560:   MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);
1561:   return(0);
1562: }

1564: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1565: {
1566:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;

1569:   *array = a->val;
1570:   return(0);
1571: }

1573: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1574: {
1576:   return(0);
1577: }

1579: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1580: {
1581:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1582:   PetscInt    i;
1583:   MatScalar   *aval=a->val;

1586:   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1587:   return(0);
1588: }

1590: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1591: {
1592:   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
1593:   PetscInt       i;
1594:   MatScalar      *aval=a->val;

1598:   for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1599:   MatSeqSELLInvalidateDiagonal(A);
1600:   return(0);
1601: }

1603: PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1604: {
1605:   Mat_SeqSELL    *a=(Mat_SeqSELL*)inA->data;
1606:   MatScalar      *aval=a->val;
1607:   PetscScalar    oalpha=alpha;
1608:   PetscBLASInt   one=1,size;

1612:   PetscBLASIntCast(a->sliidx[a->totalslices],&size);
1613:   PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1614:   PetscLogFlops(a->nz);
1615:   MatSeqSELLInvalidateDiagonal(inA);
1616:   return(0);
1617: }

1619: PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1620: {
1621:   Mat_SeqSELL    *y=(Mat_SeqSELL*)Y->data;

1625:   if (!Y->preallocated || !y->nz) {
1626:     MatSeqSELLSetPreallocation(Y,1,NULL);
1627:   }
1628:   MatShift_Basic(Y,a);
1629:   return(0);
1630: }

1632: PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1633: {
1634:   Mat_SeqSELL       *a=(Mat_SeqSELL*)A->data;
1635:   PetscScalar       *x,sum,*t;
1636:   const MatScalar   *idiag=0,*mdiag;
1637:   const PetscScalar *b,*xb;
1638:   PetscInt          n,m=A->rmap->n,i,j,shift;
1639:   const PetscInt    *diag;
1640:   PetscErrorCode    ierr;

1643:   its = its*lits;

1645:   if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1646:   if (!a->idiagvalid) {MatInvertDiagonal_SeqSELL(A,omega,fshift);}
1647:   a->fshift = fshift;
1648:   a->omega  = omega;

1650:   diag  = a->diag;
1651:   t     = a->ssor_work;
1652:   idiag = a->idiag;
1653:   mdiag = a->mdiag;

1655:   VecGetArray(xx,&x);
1656:   VecGetArrayRead(bb,&b);
1657:   /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1658:   if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1659:   if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1660:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");

1662:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1663:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1664:       for (i=0; i<m; i++) {
1665:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1666:         sum   = b[i];
1667:         n     = (diag[i]-shift)/8;
1668:         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1669:         t[i]  = sum;
1670:         x[i]  = sum*idiag[i];
1671:       }
1672:       xb   = t;
1673:       PetscLogFlops(a->nz);
1674:     } else xb = b;
1675:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1676:       for (i=m-1; i>=0; i--) {
1677:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1678:         sum   = xb[i];
1679:         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1680:         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1681:         if (xb == b) {
1682:           x[i] = sum*idiag[i];
1683:         } else {
1684:           x[i] = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1685:         }
1686:       }
1687:       PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1688:     }
1689:     its--;
1690:   }
1691:   while (its--) {
1692:     if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1693:       for (i=0; i<m; i++) {
1694:         /* lower */
1695:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1696:         sum   = b[i];
1697:         n     = (diag[i]-shift)/8;
1698:         for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1699:         t[i]  = sum;             /* save application of the lower-triangular part */
1700:         /* upper */
1701:         n     = a->rlen[i]-(diag[i]-shift)/8-1;
1702:         for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1703:         x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1704:       }
1705:       xb   = t;
1706:       PetscLogFlops(2.0*a->nz);
1707:     } else xb = b;
1708:     if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1709:       for (i=m-1; i>=0; i--) {
1710:         shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1711:         sum = xb[i];
1712:         if (xb == b) {
1713:           /* whole matrix (no checkpointing available) */
1714:           n     = a->rlen[i];
1715:           for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1716:           x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1717:         } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1718:           n     = a->rlen[i]-(diag[i]-shift)/8-1;
1719:           for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1720:           x[i]  = (1.-omega)*x[i]+sum*idiag[i];  /* omega in idiag */
1721:         }
1722:       }
1723:       if (xb == b) {
1724:         PetscLogFlops(2.0*a->nz);
1725:       } else {
1726:         PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1727:       }
1728:     }
1729:   }
1730:   VecRestoreArray(xx,&x);
1731:   VecRestoreArrayRead(bb,&b);
1732:   return(0);
1733: }

1735: /* -------------------------------------------------------------------*/
1736: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1737:                                        MatGetRow_SeqSELL,
1738:                                        MatRestoreRow_SeqSELL,
1739:                                        MatMult_SeqSELL,
1740:                                /* 4*/  MatMultAdd_SeqSELL,
1741:                                        MatMultTranspose_SeqSELL,
1742:                                        MatMultTransposeAdd_SeqSELL,
1743:                                        0,
1744:                                        0,
1745:                                        0,
1746:                                /* 10*/ 0,
1747:                                        0,
1748:                                        0,
1749:                                        MatSOR_SeqSELL,
1750:                                        0,
1751:                                /* 15*/ MatGetInfo_SeqSELL,
1752:                                        MatEqual_SeqSELL,
1753:                                        MatGetDiagonal_SeqSELL,
1754:                                        MatDiagonalScale_SeqSELL,
1755:                                        0,
1756:                                /* 20*/ 0,
1757:                                        MatAssemblyEnd_SeqSELL,
1758:                                        MatSetOption_SeqSELL,
1759:                                        MatZeroEntries_SeqSELL,
1760:                                /* 24*/ 0,
1761:                                        0,
1762:                                        0,
1763:                                        0,
1764:                                        0,
1765:                                /* 29*/ MatSetUp_SeqSELL,
1766:                                        0,
1767:                                        0,
1768:                                        0,
1769:                                        0,
1770:                                /* 34*/ MatDuplicate_SeqSELL,
1771:                                        0,
1772:                                        0,
1773:                                        0,
1774:                                        0,
1775:                                /* 39*/ 0,
1776:                                        0,
1777:                                        0,
1778:                                        MatGetValues_SeqSELL,
1779:                                        MatCopy_SeqSELL,
1780:                                /* 44*/ 0,
1781:                                        MatScale_SeqSELL,
1782:                                        MatShift_SeqSELL,
1783:                                        0,
1784:                                        0,
1785:                                /* 49*/ 0,
1786:                                        0,
1787:                                        0,
1788:                                        0,
1789:                                        0,
1790:                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
1791:                                        0,
1792:                                        0,
1793:                                        0,
1794:                                        0,
1795:                                /* 59*/ 0,
1796:                                        MatDestroy_SeqSELL,
1797:                                        MatView_SeqSELL,
1798:                                        0,
1799:                                        0,
1800:                                /* 64*/ 0,
1801:                                        0,
1802:                                        0,
1803:                                        0,
1804:                                        0,
1805:                                /* 69*/ 0,
1806:                                        0,
1807:                                        0,
1808:                                        0,
1809:                                        0,
1810:                                /* 74*/ 0,
1811:                                        MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1812:                                        0,
1813:                                        0,
1814:                                        0,
1815:                                /* 79*/ 0,
1816:                                        0,
1817:                                        0,
1818:                                        0,
1819:                                        0,
1820:                                /* 84*/ 0,
1821:                                        0,
1822:                                        0,
1823:                                        0,
1824:                                        0,
1825:                                /* 89*/ 0,
1826:                                        0,
1827:                                        0,
1828:                                        0,
1829:                                        0,
1830:                                /* 94*/ 0,
1831:                                        0,
1832:                                        0,
1833:                                        0,
1834:                                        0,
1835:                                /* 99*/ 0,
1836:                                        0,
1837:                                        0,
1838:                                        MatConjugate_SeqSELL,
1839:                                        0,
1840:                                /*104*/ 0,
1841:                                        0,
1842:                                        0,
1843:                                        0,
1844:                                        0,
1845:                                /*109*/ 0,
1846:                                        0,
1847:                                        0,
1848:                                        0,
1849:                                        MatMissingDiagonal_SeqSELL,
1850:                                /*114*/ 0,
1851:                                        0,
1852:                                        0,
1853:                                        0,
1854:                                        0,
1855:                                /*119*/ 0,
1856:                                        0,
1857:                                        0,
1858:                                        0,
1859:                                        0,
1860:                                /*124*/ 0,
1861:                                        0,
1862:                                        0,
1863:                                        0,
1864:                                        0,
1865:                                /*129*/ 0,
1866:                                        0,
1867:                                        0,
1868:                                        0,
1869:                                        0,
1870:                                /*134*/ 0,
1871:                                        0,
1872:                                        0,
1873:                                        0,
1874:                                        0,
1875:                                /*139*/ 0,
1876:                                        0,
1877:                                        0,
1878:                                        MatFDColoringSetUp_SeqXAIJ,
1879:                                        0,
1880:                                 /*144*/0
1881: };

1883: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1884: {
1885:   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;

1889:   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");

1891:   /* allocate space for values if not already there */
1892:   if (!a->saved_values) {
1893:     PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);
1894:     PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));
1895:   }

1897:   /* copy values over */
1898:   PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1899:   return(0);
1900: }

1902: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1903: {
1904:   Mat_SeqSELL    *a=(Mat_SeqSELL*)mat->data;

1908:   if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1909:   if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1910:   /* copy values over */
1911:   PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1912:   return(0);
1913: }

1915: /*@C
1916:  MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()

1918:  Not Collective

1920:  Input Parameters:
1921:  .  mat - a MATSEQSELL matrix
1922:  .  array - pointer to the data

1924:  Level: intermediate

1926:  .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1927:  @*/
1928: PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1929: {

1933:   PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));
1934:   return(0);
1935: }

1937: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1938: {
1939:   Mat_SeqSELL    *b;
1940:   PetscMPIInt    size;

1944:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
1945:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");

1947:   PetscNewLog(B,&b);

1949:   B->data = (void*)b;

1951:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));

1953:   b->row                = 0;
1954:   b->col                = 0;
1955:   b->icol               = 0;
1956:   b->reallocs           = 0;
1957:   b->ignorezeroentries  = PETSC_FALSE;
1958:   b->roworiented        = PETSC_TRUE;
1959:   b->nonew              = 0;
1960:   b->diag               = 0;
1961:   b->solve_work         = 0;
1962:   B->spptr              = 0;
1963:   b->saved_values       = 0;
1964:   b->idiag              = 0;
1965:   b->mdiag              = 0;
1966:   b->ssor_work          = 0;
1967:   b->omega              = 1.0;
1968:   b->fshift             = 0.0;
1969:   b->idiagvalid         = PETSC_FALSE;
1970:   b->keepnonzeropattern = PETSC_FALSE;

1972:   PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);
1973:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);
1974:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);
1975:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);
1976:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);
1977:   PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);
1978:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);
1979:   return(0);
1980: }

1982: /*
1983:  Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1984:  */
1985: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1986: {
1987:   Mat_SeqSELL    *c,*a=(Mat_SeqSELL*)A->data;
1988:   PetscInt       i,m=A->rmap->n;
1989:   PetscInt       totalslices=a->totalslices;

1993:   c = (Mat_SeqSELL*)C->data;

1995:   C->factortype = A->factortype;
1996:   c->row        = 0;
1997:   c->col        = 0;
1998:   c->icol       = 0;
1999:   c->reallocs   = 0;

2001:   C->assembled = PETSC_TRUE;

2003:   PetscLayoutReference(A->rmap,&C->rmap);
2004:   PetscLayoutReference(A->cmap,&C->cmap);

2006:   PetscMalloc1(8*totalslices,&c->rlen);
2007:   PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2008:   PetscMalloc1(totalslices+1,&c->sliidx);
2009:   PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));

2011:   for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2012:   for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];

2014:   /* allocate the matrix space */
2015:   if (mallocmatspace) {
2016:     PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);
2017:     PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));

2019:     c->singlemalloc = PETSC_TRUE;

2021:     if (m > 0) {
2022:       PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));
2023:       if (cpvalues == MAT_COPY_VALUES) {
2024:         PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));
2025:       } else {
2026:         PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));
2027:       }
2028:     }
2029:   }

2031:   c->ignorezeroentries = a->ignorezeroentries;
2032:   c->roworiented       = a->roworiented;
2033:   c->nonew             = a->nonew;
2034:   if (a->diag) {
2035:     PetscMalloc1(m,&c->diag);
2036:     PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2037:     for (i=0; i<m; i++) {
2038:       c->diag[i] = a->diag[i];
2039:     }
2040:   } else c->diag = 0;

2042:   c->solve_work         = 0;
2043:   c->saved_values       = 0;
2044:   c->idiag              = 0;
2045:   c->ssor_work          = 0;
2046:   c->keepnonzeropattern = a->keepnonzeropattern;
2047:   c->free_val           = PETSC_TRUE;
2048:   c->free_colidx        = PETSC_TRUE;

2050:   c->maxallocmat  = a->maxallocmat;
2051:   c->maxallocrow  = a->maxallocrow;
2052:   c->rlenmax      = a->rlenmax;
2053:   c->nz           = a->nz;
2054:   C->preallocated = PETSC_TRUE;

2056:   c->nonzerorowcnt = a->nonzerorowcnt;
2057:   C->nonzerostate  = A->nonzerostate;

2059:   PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2060:   return(0);
2061: }

2063: PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2064: {

2068:   MatCreate(PetscObjectComm((PetscObject)A),B);
2069:   MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);
2070:   if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2071:     MatSetBlockSizesFromMats(*B,A,A);
2072:   }
2073:   MatSetType(*B,((PetscObject)A)->type_name);
2074:   MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);
2075:   return(0);
2076: }

2078: /*@C
2079:  MatCreateSeqSELL - Creates a sparse matrix in SELL format.

2081:  Collective on MPI_Comm

2083:  Input Parameters:
2084:  +  comm - MPI communicator, set to PETSC_COMM_SELF
2085:  .  m - number of rows
2086:  .  n - number of columns
2087:  .  rlenmax - maximum number of nonzeros in a row
2088:  -  rlen - array containing the number of nonzeros in the various rows
2089:  (possibly different for each row) or NULL

2091:  Output Parameter:
2092:  .  A - the matrix

2094:  It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2095:  MatXXXXSetPreallocation() paradgm instead of this routine directly.
2096:  [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]

2098:  Notes:
2099:  If nnz is given then nz is ignored

2101:  Specify the preallocated storage with either rlenmax or rlen (not both).
2102:  Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2103:  allocation.  For large problems you MUST preallocate memory or you
2104:  will get TERRIBLE performance, see the users' manual chapter on matrices.

2106:  Level: intermediate

2108:  .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()

2110:  @*/
2111: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2112: {

2116:   MatCreate(comm,A);
2117:   MatSetSizes(*A,m,n,m,n);
2118:   MatSetType(*A,MATSEQSELL);
2119:   MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);
2120:   return(0);
2121: }

2123: PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2124: {
2125:   Mat_SeqSELL     *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2126:   PetscInt       totalslices=a->totalslices;

2130:   /* If the  matrix dimensions are not equal,or no of nonzeros */
2131:   if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2132:     *flg = PETSC_FALSE;
2133:     return(0);
2134:   }
2135:   /* if the a->colidx are the same */
2136:   PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);
2137:   if (!*flg) return(0);
2138:   /* if a->val are the same */
2139:   PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);
2140:   return(0);
2141: }

2143: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2144: {
2145:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;

2148:   a->idiagvalid  = PETSC_FALSE;
2149:   return(0);
2150: }

2152: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2153: {
2154: #if defined(PETSC_USE_COMPLEX)
2155:   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2156:   PetscInt    i;
2157:   PetscScalar *val = a->val;

2160:   for (i=0; i<a->sliidx[a->totalslices]; i++) {
2161:     val[i] = PetscConj(val[i]);
2162:   }
2163: #else
2165: #endif
2166:   return(0);
2167: }