Actual source code: sell.c
petsc-3.9.0 2018-04-07
2: /*
3: Defines the basic matrix operations for the SELL matrix storage format.
4: */
5: #include <../src/mat/impls/sell/seq/sell.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/kernels/blocktranspose.h>
8: #if defined(PETSC_HAVE_IMMINTRIN_H)
9: #include <immintrin.h>
11: #if !defined(_MM_SCALE_8)
12: #define _MM_SCALE_8 8
13: #endif
15: #if defined(__AVX512F__)
16: /* these do not work
17: vec_idx = _mm512_loadunpackhi_epi32(vec_idx,acolidx);
18: vec_vals = _mm512_loadunpackhi_pd(vec_vals,aval);
19: */
20: #define AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
21: /* if the mask bit is set, copy from acolidx, otherwise from vec_idx */ \
22: vec_idx = _mm256_loadu_si256((__m256i const*)acolidx); \
23: vec_vals = _mm512_loadu_pd(aval); \
24: vec_x = _mm512_i32gather_pd(vec_idx,x,_MM_SCALE_8); \
25: vec_y = _mm512_fmadd_pd(vec_x,vec_vals,vec_y)
26: #elif defined(__AVX2__)
27: #define AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y) \
28: vec_vals = _mm256_loadu_pd(aval); \
29: vec_idx = _mm_loadu_si128((__m128i const*)acolidx); /* SSE2 */ \
30: vec_x = _mm256_i32gather_pd(x,vec_idx,_MM_SCALE_8); \
31: vec_y = _mm256_fmadd_pd(vec_x,vec_vals,vec_y)
32: #endif
33: #endif /* PETSC_HAVE_IMMINTRIN_H */
35: /*@C
36: MatSeqSELLSetPreallocation - For good matrix assembly performance
37: the user should preallocate the matrix storage by setting the parameter nz
38: (or the array nnz). By setting these parameters accurately, performance
39: during matrix assembly can be increased significantly.
41: Collective on MPI_Comm
43: Input Parameters:
44: + B - The matrix
45: . nz - number of nonzeros per row (same for all rows)
46: - nnz - array containing the number of nonzeros in the various rows
47: (possibly different for each row) or NULL
49: Notes:
50: If nnz is given then nz is ignored.
52: Specify the preallocated storage with either nz or nnz (not both).
53: Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
54: allocation. For large problems you MUST preallocate memory or you
55: will get TERRIBLE performance, see the users' manual chapter on matrices.
57: You can call MatGetInfo() to get information on how effective the preallocation was;
58: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
59: You can also run with the option -info and look for messages with the string
60: malloc in them to see if additional memory allocation was needed.
62: Developers: Use nz of MAT_SKIP_ALLOCATION to not allocate any space for the matrix
63: entries or columns indices.
65: The maximum number of nonzeos in any row should be as accuate as possible.
66: If it is underesitmated, you will get bad performance due to reallocation
67: (MatSeqXSELLReallocateSELL).
69: Level: intermediate
71: .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatGetInfo()
73: @*/
74: PetscErrorCode MatSeqSELLSetPreallocation(Mat B,PetscInt rlenmax,const PetscInt rlen[])
75: {
81: PetscTryMethod(B,"MatSeqSELLSetPreallocation_C",(Mat,PetscInt,const PetscInt[]),(B,rlenmax,rlen));
82: return(0);
83: }
85: PetscErrorCode MatSeqSELLSetPreallocation_SeqSELL(Mat B,PetscInt maxallocrow,const PetscInt rlen[])
86: {
87: Mat_SeqSELL *b;
88: PetscInt i,j,totalslices;
89: PetscBool skipallocation=PETSC_FALSE,realalloc=PETSC_FALSE;
93: if (maxallocrow >= 0 || rlen) realalloc = PETSC_TRUE;
94: if (maxallocrow == MAT_SKIP_ALLOCATION) {
95: skipallocation = PETSC_TRUE;
96: maxallocrow = 0;
97: }
99: PetscLayoutSetUp(B->rmap);
100: PetscLayoutSetUp(B->cmap);
102: /* FIXME: if one preallocates more space than needed, the matrix does not shrink automatically, but for best performance it should */
103: if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 5;
104: if (maxallocrow < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"maxallocrow cannot be less than 0: value %D",maxallocrow);
105: if (rlen) {
106: for (i=0; i<B->rmap->n; i++) {
107: if (rlen[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be less than 0: local row %D value %D",i,rlen[i]);
108: if (rlen[i] > B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"rlen cannot be greater than row length: local row %D value %D rowlength %D",i,rlen[i],B->cmap->n);
109: }
110: }
112: B->preallocated = PETSC_TRUE;
114: b = (Mat_SeqSELL*)B->data;
116: totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* ceil(n/8) */
117: b->totalslices = totalslices;
118: if (!skipallocation) {
119: if (B->rmap->n & 0x07) PetscInfo1(B,"Padding rows to the SEQSELL matrix because the number of rows is not the multiple of 8 (value %D)\n",B->rmap->n);
121: if (!b->sliidx) { /* sliidx gives the starting index of each slice, the last element is the total space allocated */
122: PetscMalloc1(totalslices+1,&b->sliidx);
123: PetscLogObjectMemory((PetscObject)B,(totalslices+1)*sizeof(PetscInt));
124: }
125: if (!rlen) { /* if rlen is not provided, allocate same space for all the slices */
126: if (maxallocrow == PETSC_DEFAULT || maxallocrow == PETSC_DECIDE) maxallocrow = 10;
127: else if (maxallocrow < 0) maxallocrow = 1;
128: for (i=0; i<=totalslices; i++) b->sliidx[i] = i*8*maxallocrow;
129: } else {
130: maxallocrow = 0;
131: b->sliidx[0] = 0;
132: for (i=1; i<totalslices; i++) {
133: b->sliidx[i] = 0;
134: for (j=0;j<8;j++) {
135: b->sliidx[i] = PetscMax(b->sliidx[i],rlen[8*(i-1)+j]);
136: }
137: maxallocrow = PetscMax(b->sliidx[i],maxallocrow);
138: b->sliidx[i] = b->sliidx[i-1] + 8*b->sliidx[i];
139: }
140: /* last slice */
141: b->sliidx[totalslices] = 0;
142: for (j=(totalslices-1)*8;j<B->rmap->n;j++) b->sliidx[totalslices] = PetscMax(b->sliidx[totalslices],rlen[j]);
143: maxallocrow = PetscMax(b->sliidx[totalslices],maxallocrow);
144: b->sliidx[totalslices] = b->sliidx[totalslices-1] + 8*b->sliidx[totalslices];
145: }
147: /* allocate space for val, colidx, rlen */
148: /* FIXME: should B's old memory be unlogged? */
149: MatSeqXSELLFreeSELL(B,&b->val,&b->colidx);
150: /* FIXME: assuming an element of the bit array takes 8 bits */
151: PetscMalloc2(b->sliidx[totalslices],&b->val,b->sliidx[totalslices],&b->colidx);
152: PetscLogObjectMemory((PetscObject)B,b->sliidx[totalslices]*(sizeof(PetscScalar)+sizeof(PetscInt)));
153: /* b->rlen will count nonzeros in each row so far. We dont copy rlen to b->rlen because the matrix has not been set. */
154: PetscCalloc1(8*totalslices,&b->rlen);
155: PetscLogObjectMemory((PetscObject)B,8*totalslices*sizeof(PetscInt));
157: b->singlemalloc = PETSC_TRUE;
158: b->free_val = PETSC_TRUE;
159: b->free_colidx = PETSC_TRUE;
160: } else {
161: b->free_val = PETSC_FALSE;
162: b->free_colidx = PETSC_FALSE;
163: }
165: b->nz = 0;
166: b->maxallocrow = maxallocrow;
167: b->rlenmax = maxallocrow;
168: b->maxallocmat = b->sliidx[totalslices];
169: B->info.nz_unneeded = (double)b->maxallocmat;
170: if (realalloc) {
171: MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
172: }
173: return(0);
174: }
176: PetscErrorCode MatGetRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
177: {
178: Mat_SeqSELL *a = (Mat_SeqSELL*)A->data;
179: PetscInt shift;
182: if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
183: if (nz) *nz = a->rlen[row];
184: shift = a->sliidx[row>>3]+(row&0x07);
185: if (!a->getrowcols) {
188: PetscMalloc2(a->rlenmax,&a->getrowcols,a->rlenmax,&a->getrowvals);
189: }
190: if (idx) {
191: PetscInt j;
192: for (j=0; j<a->rlen[row]; j++) a->getrowcols[j] = a->colidx[shift+8*j];
193: *idx = a->getrowcols;
194: }
195: if (v) {
196: PetscInt j;
197: for (j=0; j<a->rlen[row]; j++) a->getrowvals[j] = a->val[shift+8*j];
198: *v = a->getrowvals;
199: }
200: return(0);
201: }
203: PetscErrorCode MatRestoreRow_SeqSELL(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
204: {
206: return(0);
207: }
209: PetscErrorCode MatConvert_SeqSELL_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
210: {
211: Mat B;
212: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
213: PetscInt i;
217: if (reuse == MAT_REUSE_MATRIX) {
218: B = *newmat;
219: MatZeroEntries(B);
220: } else {
221: MatCreate(PetscObjectComm((PetscObject)A),&B);
222: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
223: MatSetType(B,MATSEQAIJ);
224: MatSeqAIJSetPreallocation(B,0,a->rlen);
225: }
227: for (i=0; i<A->rmap->n; i++) {
228: PetscInt nz,*cols;
229: PetscScalar *vals;
231: MatGetRow_SeqSELL(A,i,&nz,&cols,&vals);
232: MatSetValues(B,1,&i,nz,cols,vals,INSERT_VALUES);
233: MatRestoreRow_SeqSELL(A,i,&nz,&cols,&vals);
234: }
236: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
237: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
238: B->rmap->bs = A->rmap->bs;
240: if (reuse == MAT_INPLACE_MATRIX) {
241: MatHeaderReplace(A,&B);
242: } else {
243: *newmat = B;
244: }
245: return(0);
246: }
248: #include <../src/mat/impls/aij/seq/aij.h>
250: PetscErrorCode MatConvert_SeqAIJ_SeqSELL(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
251: {
252: Mat B;
253: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
254: PetscInt *ai=a->i,m=A->rmap->N,n=A->cmap->N,i,*rowlengths,row,ncols;
255: const PetscInt *cols;
256: const PetscScalar *vals;
257: PetscErrorCode ierr;
260: if (A->rmap->bs > 1) {
261: MatConvert_Basic(A,newtype,reuse,newmat);
262: return(0);
263: }
265: if (reuse == MAT_REUSE_MATRIX) {
266: B = *newmat;
267: } else {
268: /* Can we just use ilen? */
269: PetscMalloc1(m,&rowlengths);
270: for (i=0; i<m; i++) {
271: rowlengths[i] = ai[i+1] - ai[i];
272: }
274: MatCreate(PetscObjectComm((PetscObject)A),&B);
275: MatSetSizes(B,m,n,m,n);
276: MatSetType(B,MATSEQSELL);
277: MatSeqSELLSetPreallocation(B,0,rowlengths);
278: PetscFree(rowlengths);
279: }
281: for (row=0; row<m; row++) {
282: MatGetRow(A,row,&ncols,&cols,&vals);
283: MatSetValues(B,1,&row,ncols,cols,vals,INSERT_VALUES);
284: MatRestoreRow(A,row,&ncols,&cols,&vals);
285: }
286: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
287: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
288: B->rmap->bs = A->rmap->bs;
290: if (reuse == MAT_INPLACE_MATRIX) {
291: MatHeaderReplace(A,&B);
292: } else {
293: *newmat = B;
294: }
295: return(0);
296: }
298: PetscErrorCode MatMult_SeqSELL(Mat A,Vec xx,Vec yy)
299: {
300: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
301: PetscScalar *y;
302: const PetscScalar *x;
303: const MatScalar *aval=a->val;
304: PetscInt totalslices=a->totalslices;
305: const PetscInt *acolidx=a->colidx;
306: PetscInt i,j;
307: PetscErrorCode ierr;
308: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
309: __m512d vec_x,vec_y,vec_vals;
310: __m256i vec_idx;
311: __mmask8 mask;
312: __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
313: __m256i vec_idx2,vec_idx3,vec_idx4;
314: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
315: __m128i vec_idx;
316: __m256d vec_x,vec_y,vec_y2,vec_vals;
317: MatScalar yval;
318: PetscInt r,rows_left,row,nnz_in_row;
319: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
320: __m128d vec_x_tmp;
321: __m256d vec_x,vec_y,vec_y2,vec_vals;
322: MatScalar yval;
323: PetscInt r,rows_left,row,nnz_in_row;
324: #else
325: PetscScalar sum[8];
326: #endif
328: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
329: #pragma disjoint(*x,*y,*aval)
330: #endif
333: VecGetArrayRead(xx,&x);
334: VecGetArray(yy,&y);
335: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
336: for (i=0; i<totalslices; i++) { /* loop over slices */
337: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
338: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
340: vec_y = _mm512_setzero_pd();
341: vec_y2 = _mm512_setzero_pd();
342: vec_y3 = _mm512_setzero_pd();
343: vec_y4 = _mm512_setzero_pd();
345: j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
346: switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
347: case 3:
348: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
349: acolidx += 8; aval += 8;
350: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
351: acolidx += 8; aval += 8;
352: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
353: acolidx += 8; aval += 8;
354: j += 3;
355: break;
356: case 2:
357: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
358: acolidx += 8; aval += 8;
359: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
360: acolidx += 8; aval += 8;
361: j += 2;
362: break;
363: case 1:
364: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
365: acolidx += 8; aval += 8;
366: j += 1;
367: break;
368: }
369: #pragma novector
370: for (; j<(a->sliidx[i+1]>>3); j+=4) {
371: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
372: acolidx += 8; aval += 8;
373: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
374: acolidx += 8; aval += 8;
375: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
376: acolidx += 8; aval += 8;
377: AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
378: acolidx += 8; aval += 8;
379: }
381: vec_y = _mm512_add_pd(vec_y,vec_y2);
382: vec_y = _mm512_add_pd(vec_y,vec_y3);
383: vec_y = _mm512_add_pd(vec_y,vec_y4);
384: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
385: mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
386: _mm512_mask_storeu_pd(&y[8*i],mask,vec_y);
387: } else {
388: _mm512_storeu_pd(&y[8*i],vec_y);
389: }
390: }
391: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
392: for (i=0; i<totalslices; i++) { /* loop over full slices */
393: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
394: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
396: /* last slice may have padding rows. Don't use vectorization. */
397: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
398: rows_left = A->rmap->n - 8*i;
399: for (r=0; r<rows_left; ++r) {
400: yval = (MatScalar)0;
401: row = 8*i + r;
402: nnz_in_row = a->rlen[row];
403: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
404: y[row] = yval;
405: }
406: break;
407: }
409: vec_y = _mm256_setzero_pd();
410: vec_y2 = _mm256_setzero_pd();
412: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
413: #pragma novector
414: #pragma unroll(2)
415: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
416: AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
417: aval += 4; acolidx += 4;
418: AVX2_Mult_Private(vec_idx,vec_x,vec_vals,vec_y2);
419: aval += 4; acolidx += 4;
420: }
422: _mm256_storeu_pd(y+i*8,vec_y);
423: _mm256_storeu_pd(y+i*8+4,vec_y2);
424: }
425: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
426: for (i=0; i<totalslices; i++) { /* loop over full slices */
427: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
428: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
430: vec_y = _mm256_setzero_pd();
431: vec_y2 = _mm256_setzero_pd();
433: /* last slice may have padding rows. Don't use vectorization. */
434: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
435: rows_left = A->rmap->n - 8*i;
436: for (r=0; r<rows_left; ++r) {
437: yval = (MatScalar)0;
438: row = 8*i + r;
439: nnz_in_row = a->rlen[row];
440: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j + r] * x[acolidx[8*j + r]];
441: y[row] = yval;
442: }
443: break;
444: }
446: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
447: #pragma novector
448: #pragma unroll(2)
449: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
450: vec_vals = _mm256_loadu_pd(aval);
451: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
452: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
453: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
454: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
455: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
456: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
457: vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
458: aval += 4;
460: vec_vals = _mm256_loadu_pd(aval);
461: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
462: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
463: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
464: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
465: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
466: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
467: vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
468: aval += 4;
469: }
471: _mm256_storeu_pd(y + i*8, vec_y);
472: _mm256_storeu_pd(y + i*8 + 4, vec_y2);
473: }
474: #else
475: for (i=0; i<totalslices; i++) { /* loop over slices */
476: for (j=0; j<8; j++) sum[j] = 0.0;
477: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
478: sum[0] += aval[j] * x[acolidx[j]];
479: sum[1] += aval[j+1] * x[acolidx[j+1]];
480: sum[2] += aval[j+2] * x[acolidx[j+2]];
481: sum[3] += aval[j+3] * x[acolidx[j+3]];
482: sum[4] += aval[j+4] * x[acolidx[j+4]];
483: sum[5] += aval[j+5] * x[acolidx[j+5]];
484: sum[6] += aval[j+6] * x[acolidx[j+6]];
485: sum[7] += aval[j+7] * x[acolidx[j+7]];
486: }
487: if (i == totalslices-1 && (A->rmap->n & 0x07)) { /* if last slice has padding rows */
488: for(j=0; j<(A->rmap->n & 0x07); j++) y[8*i+j] = sum[j];
489: } else {
490: for(j=0; j<8; j++) y[8*i+j] = sum[j];
491: }
492: }
493: #endif
495: PetscLogFlops(2.0*a->nz-a->nonzerorowcnt); /* theoretical minimal FLOPs */
496: VecRestoreArrayRead(xx,&x);
497: VecRestoreArray(yy,&y);
498: return(0);
499: }
501: #include <../src/mat/impls/aij/seq/ftn-kernels/fmultadd.h>
502: PetscErrorCode MatMultAdd_SeqSELL(Mat A,Vec xx,Vec yy,Vec zz)
503: {
504: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
505: PetscScalar *y,*z;
506: const PetscScalar *x;
507: const MatScalar *aval=a->val;
508: PetscInt totalslices=a->totalslices;
509: const PetscInt *acolidx=a->colidx;
510: PetscInt i,j;
511: PetscErrorCode ierr;
512: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
513: __m512d vec_x,vec_y,vec_vals;
514: __m256i vec_idx;
515: __mmask8 mask;
516: __m512d vec_x2,vec_y2,vec_vals2,vec_x3,vec_y3,vec_vals3,vec_x4,vec_y4,vec_vals4;
517: __m256i vec_idx2,vec_idx3,vec_idx4;
518: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
519: __m128d vec_x_tmp;
520: __m256d vec_x,vec_y,vec_y2,vec_vals;
521: MatScalar yval;
522: PetscInt r,row,nnz_in_row;
523: #else
524: PetscScalar sum[8];
525: #endif
527: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
528: #pragma disjoint(*x,*y,*aval)
529: #endif
532: VecGetArrayRead(xx,&x);
533: VecGetArrayPair(yy,zz,&y,&z);
534: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
535: for (i=0; i<totalslices; i++) { /* loop over slices */
536: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
537: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
539: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
540: mask = (__mmask8)(0xff >> (8-(A->rmap->n & 0x07)));
541: vec_y = _mm512_mask_loadu_pd(vec_y,mask,&y[8*i]);
542: } else {
543: vec_y = _mm512_loadu_pd(&y[8*i]);
544: }
545: vec_y2 = _mm512_setzero_pd();
546: vec_y3 = _mm512_setzero_pd();
547: vec_y4 = _mm512_setzero_pd();
549: j = a->sliidx[i]>>3; /* 8 bytes are read at each time, corresponding to a slice columnn */
550: switch ((a->sliidx[i+1]-a->sliidx[i])/8 & 3) {
551: case 3:
552: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
553: acolidx += 8; aval += 8;
554: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
555: acolidx += 8; aval += 8;
556: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
557: acolidx += 8; aval += 8;
558: j += 3;
559: break;
560: case 2:
561: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
562: acolidx += 8; aval += 8;
563: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
564: acolidx += 8; aval += 8;
565: j += 2;
566: break;
567: case 1:
568: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
569: acolidx += 8; aval += 8;
570: j += 1;
571: break;
572: }
573: #pragma novector
574: for (; j<(a->sliidx[i+1]>>3); j+=4) {
575: AVX512_Mult_Private(vec_idx,vec_x,vec_vals,vec_y);
576: acolidx += 8; aval += 8;
577: AVX512_Mult_Private(vec_idx2,vec_x2,vec_vals2,vec_y2);
578: acolidx += 8; aval += 8;
579: AVX512_Mult_Private(vec_idx3,vec_x3,vec_vals3,vec_y3);
580: acolidx += 8; aval += 8;
581: AVX512_Mult_Private(vec_idx4,vec_x4,vec_vals4,vec_y4);
582: acolidx += 8; aval += 8;
583: }
585: vec_y = _mm512_add_pd(vec_y,vec_y2);
586: vec_y = _mm512_add_pd(vec_y,vec_y3);
587: vec_y = _mm512_add_pd(vec_y,vec_y4);
588: if (i == totalslices-1 && A->rmap->n & 0x07) { /* if last slice has padding rows */
589: _mm512_mask_storeu_pd(&z[8*i],mask,vec_y);
590: } else {
591: _mm512_storeu_pd(&z[8*i],vec_y);
592: }
593: }
594: #elif defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
595: for (i=0; i<totalslices; i++) { /* loop over full slices */
596: PetscPrefetchBlock(acolidx,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
597: PetscPrefetchBlock(aval,a->sliidx[i+1]-a->sliidx[i],0,PETSC_PREFETCH_HINT_T0);
599: /* last slice may have padding rows. Don't use vectorization. */
600: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
601: for (r=0; r<(A->rmap->n & 0x07); ++r) {
602: row = 8*i + r;
603: yval = (MatScalar)0.0;
604: nnz_in_row = a->rlen[row];
605: for (j=0; j<nnz_in_row; ++j) yval += aval[8*j+r] * x[acolidx[8*j+r]];
606: z[row] = y[row] + yval;
607: }
608: break;
609: }
611: vec_y = _mm256_loadu_pd(y+8*i);
612: vec_y2 = _mm256_loadu_pd(y+8*i+4);
614: /* Process slice of height 8 (512 bits) via two subslices of height 4 (256 bits) via AVX */
615: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
616: vec_vals = _mm256_loadu_pd(aval);
617: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
618: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
619: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
620: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
621: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
622: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
623: vec_y = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y);
624: aval += 4;
626: vec_vals = _mm256_loadu_pd(aval);
627: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
628: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
629: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,0);
630: vec_x_tmp = _mm_loadl_pd(vec_x_tmp, x + *acolidx++);
631: vec_x_tmp = _mm_loadh_pd(vec_x_tmp, x + *acolidx++);
632: vec_x = _mm256_insertf128_pd(vec_x,vec_x_tmp,1);
633: vec_y2 = _mm256_add_pd(_mm256_mul_pd(vec_x,vec_vals),vec_y2);
634: aval += 4;
635: }
637: _mm256_storeu_pd(z+i*8,vec_y);
638: _mm256_storeu_pd(z+i*8+4,vec_y2);
639: }
640: #else
641: for (i=0; i<totalslices; i++) { /* loop over slices */
642: for (j=0; j<8; j++) sum[j] = 0.0;
643: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
644: sum[0] += aval[j] * x[acolidx[j]];
645: sum[1] += aval[j+1] * x[acolidx[j+1]];
646: sum[2] += aval[j+2] * x[acolidx[j+2]];
647: sum[3] += aval[j+3] * x[acolidx[j+3]];
648: sum[4] += aval[j+4] * x[acolidx[j+4]];
649: sum[5] += aval[j+5] * x[acolidx[j+5]];
650: sum[6] += aval[j+6] * x[acolidx[j+6]];
651: sum[7] += aval[j+7] * x[acolidx[j+7]];
652: }
653: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
654: for (j=0; j<(A->rmap->n & 0x07); j++) z[8*i+j] = y[8*i+j] + sum[j];
655: } else {
656: for (j=0; j<8; j++) z[8*i+j] = y[8*i+j] + sum[j];
657: }
658: }
659: #endif
661: PetscLogFlops(2.0*a->nz);
662: VecRestoreArrayRead(xx,&x);
663: VecRestoreArrayPair(yy,zz,&y,&z);
664: return(0);
665: }
667: PetscErrorCode MatMultTransposeAdd_SeqSELL(Mat A,Vec xx,Vec zz,Vec yy)
668: {
669: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
670: PetscScalar *y;
671: const PetscScalar *x;
672: const MatScalar *aval=a->val;
673: const PetscInt *acolidx=a->colidx;
674: PetscInt i,j,r,row,nnz_in_row,totalslices=a->totalslices;
675: PetscErrorCode ierr;
677: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
678: #pragma disjoint(*x,*y,*aval)
679: #endif
682: if (A->symmetric) {
683: MatMultAdd_SeqSELL(A,xx,zz,yy);
684: return(0);
685: }
686: if (zz != yy) { VecCopy(zz,yy); }
687: VecGetArrayRead(xx,&x);
688: VecGetArray(yy,&y);
689: for (i=0; i<a->totalslices; i++) { /* loop over slices */
690: if (i == totalslices-1 && (A->rmap->n & 0x07)) {
691: for (r=0; r<(A->rmap->n & 0x07); ++r) {
692: row = 8*i + r;
693: nnz_in_row = a->rlen[row];
694: for (j=0; j<nnz_in_row; ++j) y[acolidx[8*j+r]] += aval[8*j+r] * x[row];
695: }
696: break;
697: }
698: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j+=8) {
699: y[acolidx[j]] += aval[j] * x[8*i];
700: y[acolidx[j+1]] += aval[j+1] * x[8*i+1];
701: y[acolidx[j+2]] += aval[j+2] * x[8*i+2];
702: y[acolidx[j+3]] += aval[j+3] * x[8*i+3];
703: y[acolidx[j+4]] += aval[j+4] * x[8*i+4];
704: y[acolidx[j+5]] += aval[j+5] * x[8*i+5];
705: y[acolidx[j+6]] += aval[j+6] * x[8*i+6];
706: y[acolidx[j+7]] += aval[j+7] * x[8*i+7];
707: }
708: }
709: PetscLogFlops(2.0*a->sliidx[a->totalslices]);
710: VecRestoreArrayRead(xx,&x);
711: VecRestoreArray(yy,&y);
712: return(0);
713: }
715: PetscErrorCode MatMultTranspose_SeqSELL(Mat A,Vec xx,Vec yy)
716: {
720: if (A->symmetric) {
721: MatMult_SeqSELL(A,xx,yy);
722: } else {
723: VecSet(yy,0.0);
724: MatMultTransposeAdd_SeqSELL(A,xx,yy,yy);
725: }
726: return(0);
727: }
729: /*
730: Checks for missing diagonals
731: */
732: PetscErrorCode MatMissingDiagonal_SeqSELL(Mat A,PetscBool *missing,PetscInt *d)
733: {
734: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
735: PetscInt *diag,i;
738: *missing = PETSC_FALSE;
739: if (A->rmap->n > 0 && !(a->colidx)) {
740: *missing = PETSC_TRUE;
741: if (d) *d = 0;
742: PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
743: } else {
744: diag = a->diag;
745: for (i=0; i<A->rmap->n; i++) {
746: if (diag[i] == -1) {
747: *missing = PETSC_TRUE;
748: if (d) *d = i;
749: PetscInfo1(A,"Matrix is missing diagonal number %D\n",i);
750: break;
751: }
752: }
753: }
754: return(0);
755: }
757: PetscErrorCode MatMarkDiagonal_SeqSELL(Mat A)
758: {
759: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
760: PetscInt i,j,m=A->rmap->n,shift;
764: if (!a->diag) {
765: PetscMalloc1(m,&a->diag);
766: PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));
767: a->free_diag = PETSC_TRUE;
768: }
769: for (i=0; i<m; i++) { /* loop over rows */
770: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
771: a->diag[i] = -1;
772: for (j=0; j<a->rlen[i]; j++) {
773: if (a->colidx[shift+j*8] == i) {
774: a->diag[i] = shift+j*8;
775: break;
776: }
777: }
778: }
779: return(0);
780: }
782: /*
783: Negative shift indicates do not generate an error if there is a zero diagonal, just invert it anyways
784: */
785: PetscErrorCode MatInvertDiagonal_SeqSELL(Mat A,PetscScalar omega,PetscScalar fshift)
786: {
787: Mat_SeqSELL *a=(Mat_SeqSELL*) A->data;
788: PetscInt i,*diag,m = A->rmap->n;
789: MatScalar *val = a->val;
790: PetscScalar *idiag,*mdiag;
794: if (a->idiagvalid) return(0);
795: MatMarkDiagonal_SeqSELL(A);
796: diag = a->diag;
797: if (!a->idiag) {
798: PetscMalloc3(m,&a->idiag,m,&a->mdiag,m,&a->ssor_work);
799: PetscLogObjectMemory((PetscObject)A, 3*m*sizeof(PetscScalar));
800: val = a->val;
801: }
802: mdiag = a->mdiag;
803: idiag = a->idiag;
805: if (omega == 1.0 && PetscRealPart(fshift) <= 0.0) {
806: for (i=0; i<m; i++) {
807: mdiag[i] = val[diag[i]];
808: if (!PetscAbsScalar(mdiag[i])) { /* zero diagonal */
809: if (PetscRealPart(fshift)) {
810: PetscInfo1(A,"Zero diagonal on row %D\n",i);
811: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
812: A->factorerror_zeropivot_value = 0.0;
813: A->factorerror_zeropivot_row = i;
814: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Zero diagonal on row %D",i);
815: }
816: idiag[i] = 1.0/val[diag[i]];
817: }
818: PetscLogFlops(m);
819: } else {
820: for (i=0; i<m; i++) {
821: mdiag[i] = val[diag[i]];
822: idiag[i] = omega/(fshift + val[diag[i]]);
823: }
824: PetscLogFlops(2.0*m);
825: }
826: a->idiagvalid = PETSC_TRUE;
827: return(0);
828: }
830: PetscErrorCode MatZeroEntries_SeqSELL(Mat A)
831: {
832: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
836: PetscMemzero(a->val,(a->sliidx[a->totalslices])*sizeof(PetscScalar));
837: MatSeqSELLInvalidateDiagonal(A);
838: return(0);
839: }
841: PetscErrorCode MatDestroy_SeqSELL(Mat A)
842: {
843: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
847: #if defined(PETSC_USE_LOG)
848: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->n,A->cmap->n,a->nz);
849: #endif
850: MatSeqXSELLFreeSELL(A,&a->val,&a->colidx);
851: ISDestroy(&a->row);
852: ISDestroy(&a->col);
853: PetscFree(a->diag);
854: PetscFree(a->rlen);
855: PetscFree(a->sliidx);
856: PetscFree3(a->idiag,a->mdiag,a->ssor_work);
857: PetscFree(a->solve_work);
858: ISDestroy(&a->icol);
859: PetscFree(a->saved_values);
860: PetscFree2(a->getrowcols,a->getrowvals);
862: PetscFree(A->data);
864: PetscObjectChangeTypeName((PetscObject)A,0);
865: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);
866: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);
867: #if defined(PETSC_HAVE_ELEMENTAL)
868: #endif
869: PetscObjectComposeFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",NULL);
870: return(0);
871: }
873: PetscErrorCode MatSetOption_SeqSELL(Mat A,MatOption op,PetscBool flg)
874: {
875: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
879: switch (op) {
880: case MAT_ROW_ORIENTED:
881: a->roworiented = flg;
882: break;
883: case MAT_KEEP_NONZERO_PATTERN:
884: a->keepnonzeropattern = flg;
885: break;
886: case MAT_NEW_NONZERO_LOCATIONS:
887: a->nonew = (flg ? 0 : 1);
888: break;
889: case MAT_NEW_NONZERO_LOCATION_ERR:
890: a->nonew = (flg ? -1 : 0);
891: break;
892: case MAT_NEW_NONZERO_ALLOCATION_ERR:
893: a->nonew = (flg ? -2 : 0);
894: break;
895: case MAT_UNUSED_NONZERO_LOCATION_ERR:
896: a->nounused = (flg ? -1 : 0);
897: break;
898: case MAT_NEW_DIAGONALS:
899: case MAT_IGNORE_OFF_PROC_ENTRIES:
900: case MAT_USE_HASH_TABLE:
901: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
902: break;
903: case MAT_SPD:
904: case MAT_SYMMETRIC:
905: case MAT_STRUCTURALLY_SYMMETRIC:
906: case MAT_HERMITIAN:
907: case MAT_SYMMETRY_ETERNAL:
908: /* These options are handled directly by MatSetOption() */
909: break;
910: default:
911: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
912: }
913: return(0);
914: }
916: PetscErrorCode MatGetDiagonal_SeqSELL(Mat A,Vec v)
917: {
918: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
919: PetscInt i,j,n,shift;
920: PetscScalar *x,zero=0.0;
924: VecGetLocalSize(v,&n);
925: if (n != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
927: if (A->factortype == MAT_FACTOR_ILU || A->factortype == MAT_FACTOR_LU) {
928: PetscInt *diag=a->diag;
929: VecGetArray(v,&x);
930: for (i=0; i<n; i++) x[i] = 1.0/a->val[diag[i]];
931: VecRestoreArray(v,&x);
932: return(0);
933: }
935: VecSet(v,zero);
936: VecGetArray(v,&x);
937: for (i=0; i<n; i++) { /* loop over rows */
938: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
939: x[i] = 0;
940: for (j=0; j<a->rlen[i]; j++) {
941: if (a->colidx[shift+j*8] == i) {
942: x[i] = a->val[shift+j*8];
943: break;
944: }
945: }
946: }
947: VecRestoreArray(v,&x);
948: return(0);
949: }
951: PetscErrorCode MatDiagonalScale_SeqSELL(Mat A,Vec ll,Vec rr)
952: {
953: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
954: const PetscScalar *l,*r;
955: PetscInt i,j,m,n,row;
956: PetscErrorCode ierr;
959: if (ll) {
960: /* The local size is used so that VecMPI can be passed to this routine
961: by MatDiagonalScale_MPISELL */
962: VecGetLocalSize(ll,&m);
963: if (m != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
964: VecGetArrayRead(ll,&l);
965: for (i=0; i<a->totalslices; i++) { /* loop over slices */
966: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
967: a->val[j] *= l[8*i+row];
968: }
969: }
970: VecRestoreArrayRead(ll,&l);
971: PetscLogFlops(a->nz);
972: }
973: if (rr) {
974: VecGetLocalSize(rr,&n);
975: if (n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
976: VecGetArrayRead(rr,&r);
977: for (i=0; i<a->totalslices; i++) { /* loop over slices */
978: for (j=a->sliidx[i]; j<a->sliidx[i+1]; j++) {
979: a->val[j] *= r[a->colidx[j]];
980: }
981: }
982: VecRestoreArrayRead(rr,&r);
983: PetscLogFlops(a->nz);
984: }
985: MatSeqSELLInvalidateDiagonal(A);
986: return(0);
987: }
989: extern PetscErrorCode MatSetValues_SeqSELL(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
991: PetscErrorCode MatGetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
992: {
993: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
994: PetscInt *cp,i,k,low,high,t,row,col,l;
995: PetscInt shift;
996: MatScalar *vp;
999: for (k=0; k<m; k++) { /* loop over requested rows */
1000: row = im[k];
1001: if (row<0) continue;
1002: #if defined(PETSC_USE_DEBUG)
1003: if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1004: #endif
1005: shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1006: cp = a->colidx+shift; /* pointer to the row */
1007: vp = a->val+shift; /* pointer to the row */
1008: for (l=0; l<n; l++) { /* loop over requested columns */
1009: col = in[l];
1010: if (col<0) continue;
1011: #if defined(PETSC_USE_DEBUG)
1012: if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: row %D max %D",col,A->cmap->n-1);
1013: #endif
1014: high = a->rlen[row]; low = 0; /* assume unsorted */
1015: while (high-low > 5) {
1016: t = (low+high)/2;
1017: if (*(cp+t*8) > col) high = t;
1018: else low = t;
1019: }
1020: for (i=low; i<high; i++) {
1021: if (*(cp+8*i) > col) break;
1022: if (*(cp+8*i) == col) {
1023: *v++ = *(vp+8*i);
1024: goto finished;
1025: }
1026: }
1027: *v++ = 0.0;
1028: finished:;
1029: }
1030: }
1031: return(0);
1032: }
1034: PetscErrorCode MatView_SeqSELL_ASCII(Mat A,PetscViewer viewer)
1035: {
1036: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1037: PetscInt i,j,m=A->rmap->n,shift;
1038: const char *name;
1039: PetscViewerFormat format;
1040: PetscErrorCode ierr;
1043: PetscViewerGetFormat(viewer,&format);
1044: if (format == PETSC_VIEWER_ASCII_MATLAB) {
1045: PetscInt nofinalvalue = 0;
1046: /*
1047: if (m && ((a->i[m] == a->i[m-1]) || (a->j[a->nz-1] != A->cmap->n-1))) {
1048: nofinalvalue = 1;
1049: }
1050: */
1051: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1052: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D \n",m,A->cmap->n);
1053: PetscViewerASCIIPrintf(viewer,"%% Nonzeros = %D \n",a->nz);
1054: #if defined(PETSC_USE_COMPLEX)
1055: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,4);\n",a->nz+nofinalvalue);
1056: #else
1057: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",a->nz+nofinalvalue);
1058: #endif
1059: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
1061: for (i=0; i<m; i++) {
1062: shift = a->sliidx[i>>3]+(i&0x07);
1063: for (j=0; j<a->rlen[i]; j++) {
1064: #if defined(PETSC_USE_COMPLEX)
1065: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1066: #else
1067: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,a->colidx[shift+8*j]+1,(double)a->val[shift+8*j]);
1068: #endif
1069: }
1070: }
1071: /*
1072: if (nofinalvalue) {
1073: #if defined(PETSC_USE_COMPLEX)
1074: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e %18.16e\n",m,A->cmap->n,0.,0.);
1075: #else
1076: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",m,A->cmap->n,0.0);
1077: #endif
1078: }
1079: */
1080: PetscObjectGetName((PetscObject)A,&name);
1081: PetscViewerASCIIPrintf(viewer,"];\n %s = spconvert(zzz);\n",name);
1082: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1083: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
1084: return(0);
1085: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1086: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1087: for (i=0; i<m; i++) {
1088: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1089: shift = a->sliidx[i>>3]+(i&0x07);
1090: for (j=0; j<a->rlen[i]; j++) {
1091: #if defined(PETSC_USE_COMPLEX)
1092: if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1093: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1094: } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0 && PetscRealPart(a->val[shift+8*j]) != 0.0) {
1095: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1096: } else if (PetscRealPart(a->val[shift+8*j]) != 0.0) {
1097: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1098: }
1099: #else
1100: if (a->val[shift+8*j] != 0.0) {PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);}
1101: #endif
1102: }
1103: PetscViewerASCIIPrintf(viewer,"\n");
1104: }
1105: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1106: } else if (format == PETSC_VIEWER_ASCII_DENSE) {
1107: PetscInt cnt=0,jcnt;
1108: PetscScalar value;
1109: #if defined(PETSC_USE_COMPLEX)
1110: PetscBool realonly=PETSC_TRUE;
1111: for (i=0; i<a->sliidx[a->totalslices]; i++) {
1112: if (PetscImaginaryPart(a->val[i]) != 0.0) {
1113: realonly = PETSC_FALSE;
1114: break;
1115: }
1116: }
1117: #endif
1119: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1120: for (i=0; i<m; i++) {
1121: jcnt = 0;
1122: shift = a->sliidx[i>>3]+(i&0x07);
1123: for (j=0; j<A->cmap->n; j++) {
1124: if (jcnt < a->rlen[i] && j == a->colidx[shift+8*j]) {
1125: value = a->val[cnt++];
1126: jcnt++;
1127: } else {
1128: value = 0.0;
1129: }
1130: #if defined(PETSC_USE_COMPLEX)
1131: if (realonly) {
1132: PetscViewerASCIIPrintf(viewer," %7.5e ",(double)PetscRealPart(value));
1133: } else {
1134: PetscViewerASCIIPrintf(viewer," %7.5e+%7.5e i ",(double)PetscRealPart(value),(double)PetscImaginaryPart(value));
1135: }
1136: #else
1137: PetscViewerASCIIPrintf(viewer," %7.5e ",(double)value);
1138: #endif
1139: }
1140: PetscViewerASCIIPrintf(viewer,"\n");
1141: }
1142: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1143: } else if (format == PETSC_VIEWER_ASCII_MATRIXMARKET) {
1144: PetscInt fshift=1;
1145: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1146: #if defined(PETSC_USE_COMPLEX)
1147: PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate complex general\n");
1148: #else
1149: PetscViewerASCIIPrintf(viewer,"%%%%MatrixMarket matrix coordinate real general\n");
1150: #endif
1151: PetscViewerASCIIPrintf(viewer,"%D %D %D\n", m, A->cmap->n, a->nz);
1152: for (i=0; i<m; i++) {
1153: shift = a->sliidx[i>>3]+(i&0x07);
1154: for (j=0; j<a->rlen[i]; j++) {
1155: #if defined(PETSC_USE_COMPLEX)
1156: PetscViewerASCIIPrintf(viewer,"%D %D %g %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1157: #else
1158: PetscViewerASCIIPrintf(viewer,"%D %D %g\n",i+fshift,a->colidx[shift+8*j]+fshift,(double)a->val[shift+8*j]);
1159: #endif
1160: }
1161: }
1162: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1163: } else if (format == PETSC_VIEWER_NATIVE) {
1164: for (i=0; i<a->totalslices; i++) { /* loop over slices */
1165: PetscInt row;
1166: PetscViewerASCIIPrintf(viewer,"slice %D: %D %D\n",i,a->sliidx[i],a->sliidx[i+1]);
1167: for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
1168: #if defined(PETSC_USE_COMPLEX)
1169: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1170: PetscViewerASCIIPrintf(viewer," %D %D %g + %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1171: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1172: PetscViewerASCIIPrintf(viewer," %D %D %g - %g i\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]),-(double)PetscImaginaryPart(a->val[j]));
1173: } else {
1174: PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)PetscRealPart(a->val[j]));
1175: }
1176: #else
1177: PetscViewerASCIIPrintf(viewer," %D %D %g\n",8*i+row,a->colidx[j],(double)a->val[j]);
1178: #endif
1179: }
1180: }
1181: } else {
1182: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
1183: if (A->factortype) {
1184: for (i=0; i<m; i++) {
1185: shift = a->sliidx[i>>3]+(i&0x07);
1186: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1187: /* L part */
1188: for (j=shift; j<a->diag[i]; j+=8) {
1189: #if defined(PETSC_USE_COMPLEX)
1190: if (PetscImaginaryPart(a->val[shift+8*j]) > 0.0) {
1191: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1192: } else if (PetscImaginaryPart(a->val[shift+8*j]) < 0.0) {
1193: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1194: } else {
1195: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1196: }
1197: #else
1198: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1199: #endif
1200: }
1201: /* diagonal */
1202: j = a->diag[i];
1203: #if defined(PETSC_USE_COMPLEX)
1204: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1205: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)PetscImaginaryPart(1.0/a->val[j]));
1206: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1207: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]),(double)(-PetscImaginaryPart(1.0/a->val[j])));
1208: } else {
1209: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(1.0/a->val[j]));
1210: }
1211: #else
1212: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)(1.0/a->val[j]));
1213: #endif
1215: /* U part */
1216: for (j=a->diag[i]+1; j<shift+8*a->rlen[i]; j+=8) {
1217: #if defined(PETSC_USE_COMPLEX)
1218: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1219: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)PetscImaginaryPart(a->val[j]));
1220: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1221: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[j],(double)PetscRealPart(a->val[j]),(double)(-PetscImaginaryPart(a->val[j])));
1222: } else {
1223: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)PetscRealPart(a->val[j]));
1224: }
1225: #else
1226: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[j],(double)a->val[j]);
1227: #endif
1228: }
1229: PetscViewerASCIIPrintf(viewer,"\n");
1230: }
1231: } else {
1232: for (i=0; i<m; i++) {
1233: shift = a->sliidx[i>>3]+(i&0x07);
1234: PetscViewerASCIIPrintf(viewer,"row %D:",i);
1235: for (j=0; j<a->rlen[i]; j++) {
1236: #if defined(PETSC_USE_COMPLEX)
1237: if (PetscImaginaryPart(a->val[j]) > 0.0) {
1238: PetscViewerASCIIPrintf(viewer," (%D, %g + %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)PetscImaginaryPart(a->val[shift+8*j]));
1239: } else if (PetscImaginaryPart(a->val[j]) < 0.0) {
1240: PetscViewerASCIIPrintf(viewer," (%D, %g - %g i)",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]),(double)-PetscImaginaryPart(a->val[shift+8*j]));
1241: } else {
1242: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)PetscRealPart(a->val[shift+8*j]));
1243: }
1244: #else
1245: PetscViewerASCIIPrintf(viewer," (%D, %g) ",a->colidx[shift+8*j],(double)a->val[shift+8*j]);
1246: #endif
1247: }
1248: PetscViewerASCIIPrintf(viewer,"\n");
1249: }
1250: }
1251: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
1252: }
1253: PetscViewerFlush(viewer);
1254: return(0);
1255: }
1257: #include <petscdraw.h>
1258: PetscErrorCode MatView_SeqSELL_Draw_Zoom(PetscDraw draw,void *Aa)
1259: {
1260: Mat A=(Mat)Aa;
1261: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1262: PetscInt i,j,m=A->rmap->n,shift;
1263: int color;
1264: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1265: PetscViewer viewer;
1266: PetscViewerFormat format;
1267: PetscErrorCode ierr;
1270: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1271: PetscViewerGetFormat(viewer,&format);
1272: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1274: /* loop over matrix elements drawing boxes */
1276: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1277: PetscDrawCollectiveBegin(draw);
1278: /* Blue for negative, Cyan for zero and Red for positive */
1279: color = PETSC_DRAW_BLUE;
1280: for (i=0; i<m; i++) {
1281: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1282: y_l = m - i - 1.0; y_r = y_l + 1.0;
1283: for (j=0; j<a->rlen[i]; j++) {
1284: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1285: if (PetscRealPart(a->val[shift+8*j]) >= 0.) continue;
1286: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1287: }
1288: }
1289: color = PETSC_DRAW_CYAN;
1290: for (i=0; i<m; i++) {
1291: shift = a->sliidx[i>>3]+(i&0x07);
1292: y_l = m - i - 1.0; y_r = y_l + 1.0;
1293: for (j=0; j<a->rlen[i]; j++) {
1294: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1295: if (a->val[shift+8*j] != 0.) continue;
1296: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1297: }
1298: }
1299: color = PETSC_DRAW_RED;
1300: for (i=0; i<m; i++) {
1301: shift = a->sliidx[i>>3]+(i&0x07);
1302: y_l = m - i - 1.0; y_r = y_l + 1.0;
1303: for (j=0; j<a->rlen[i]; j++) {
1304: x_l = a->colidx[shift+j*8]; x_r = x_l + 1.0;
1305: if (PetscRealPart(a->val[shift+8*j]) <= 0.) continue;
1306: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1307: }
1308: }
1309: PetscDrawCollectiveEnd(draw);
1310: } else {
1311: /* use contour shading to indicate magnitude of values */
1312: /* first determine max of all nonzero values */
1313: PetscReal minv=0.0,maxv=0.0;
1314: PetscInt count=0;
1315: PetscDraw popup;
1316: for (i=0; i<a->sliidx[a->totalslices]; i++) {
1317: if (PetscAbsScalar(a->val[i]) > maxv) maxv = PetscAbsScalar(a->val[i]);
1318: }
1319: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1320: PetscDrawGetPopup(draw,&popup);
1321: PetscDrawScalePopup(popup,minv,maxv);
1323: PetscDrawCollectiveBegin(draw);
1324: for (i=0; i<m; i++) {
1325: shift = a->sliidx[i>>3]+(i&0x07);
1326: y_l = m - i - 1.0;
1327: y_r = y_l + 1.0;
1328: for (j=0; j<a->rlen[i]; j++) {
1329: x_l = a->colidx[shift+j*8];
1330: x_r = x_l + 1.0;
1331: color = PetscDrawRealToColor(PetscAbsScalar(a->val[count]),minv,maxv);
1332: PetscDrawRectangle(draw,x_l,y_l,x_r,y_r,color,color,color,color);
1333: count++;
1334: }
1335: }
1336: PetscDrawCollectiveEnd(draw);
1337: }
1338: return(0);
1339: }
1341: #include <petscdraw.h>
1342: PetscErrorCode MatView_SeqSELL_Draw(Mat A,PetscViewer viewer)
1343: {
1344: PetscDraw draw;
1345: PetscReal xr,yr,xl,yl,h,w;
1346: PetscBool isnull;
1350: PetscViewerDrawGetDraw(viewer,0,&draw);
1351: PetscDrawIsNull(draw,&isnull);
1352: if (isnull) return(0);
1354: xr = A->cmap->n; yr = A->rmap->n; h = yr/10.0; w = xr/10.0;
1355: xr += w; yr += h; xl = -w; yl = -h;
1356: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1357: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1358: PetscDrawZoom(draw,MatView_SeqSELL_Draw_Zoom,A);
1359: PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);
1360: PetscDrawSave(draw);
1361: return(0);
1362: }
1364: PetscErrorCode MatView_SeqSELL(Mat A,PetscViewer viewer)
1365: {
1366: PetscBool iascii,isbinary,isdraw;
1370: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1371: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1372: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1373: if (iascii) {
1374: MatView_SeqSELL_ASCII(A,viewer);
1375: } else if (isbinary) {
1376: /* MatView_SeqSELL_Binary(A,viewer); */
1377: } else if (isdraw) {
1378: MatView_SeqSELL_Draw(A,viewer);
1379: }
1380: return(0);
1381: }
1383: PetscErrorCode MatAssemblyEnd_SeqSELL(Mat A,MatAssemblyType mode)
1384: {
1385: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1386: PetscInt i,shift,row_in_slice,row,nrow,*cp,lastcol,j,k;
1387: MatScalar *vp;
1391: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1392: /* To do: compress out the unused elements */
1393: MatMarkDiagonal_SeqSELL(A);
1394: PetscInfo6(A,"Matrix size: %D X %D; storage space: %D allocated %D used (%D nonzeros+%D paddedzeros)\n",A->rmap->n,A->cmap->n,a->maxallocmat,a->sliidx[a->totalslices],a->nz,a->sliidx[a->totalslices]-a->nz);
1395: PetscInfo1(A,"Number of mallocs during MatSetValues() is %D\n",a->reallocs);
1396: PetscInfo1(A,"Maximum nonzeros in any row is %D\n",a->rlenmax);
1397: /* Set unused slots for column indices to last valid column index. Set unused slots for values to zero. This allows for a use of unmasked intrinsics -> higher performance */
1398: for (i=0; i<a->totalslices; ++i) {
1399: shift = a->sliidx[i]; /* starting index of the slice */
1400: cp = a->colidx+shift; /* pointer to the column indices of the slice */
1401: vp = a->val+shift; /* pointer to the nonzero values of the slice */
1402: for (row_in_slice=0; row_in_slice<8; ++row_in_slice) { /* loop over rows in the slice */
1403: row = 8*i + row_in_slice;
1404: nrow = a->rlen[row]; /* number of nonzeros in row */
1405: /*
1406: Search for the nearest nonzero. Normally setting the index to zero may cause extra communication.
1407: But if the entire slice are empty, it is fine to use 0 since the index will not be loaded.
1408: */
1409: lastcol = 0;
1410: if (nrow>0) { /* nonempty row */
1411: lastcol = cp[8*(nrow-1)+row_in_slice]; /* use the index from the last nonzero at current row */
1412: } else if (!row_in_slice) { /* first row of the currect slice is empty */
1413: for (j=1;j<8;j++) {
1414: if (a->rlen[8*i+j]) {
1415: lastcol = cp[j];
1416: break;
1417: }
1418: }
1419: } else {
1420: if (a->sliidx[i+1] != shift) lastcol = cp[row_in_slice-1]; /* use the index from the previous row */
1421: }
1423: for (k=nrow; k<(a->sliidx[i+1]-shift)/8; ++k) {
1424: cp[8*k+row_in_slice] = lastcol;
1425: vp[8*k+row_in_slice] = (MatScalar)0;
1426: }
1427: }
1428: }
1430: A->info.mallocs += a->reallocs;
1431: a->reallocs = 0;
1433: MatSeqSELLInvalidateDiagonal(A);
1434: return(0);
1435: }
1437: PetscErrorCode MatGetInfo_SeqSELL(Mat A,MatInfoType flag,MatInfo *info)
1438: {
1439: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1442: info->block_size = 1.0;
1443: info->nz_allocated = (double)a->maxallocmat;
1444: info->nz_used = (double)a->sliidx[a->totalslices]; /* include padding zeros */
1445: info->nz_unneeded = (double)(a->maxallocmat-a->sliidx[a->totalslices]);
1446: info->assemblies = (double)A->num_ass;
1447: info->mallocs = (double)A->info.mallocs;
1448: info->memory = ((PetscObject)A)->mem;
1449: if (A->factortype) {
1450: info->fill_ratio_given = A->info.fill_ratio_given;
1451: info->fill_ratio_needed = A->info.fill_ratio_needed;
1452: info->factor_mallocs = A->info.factor_mallocs;
1453: } else {
1454: info->fill_ratio_given = 0;
1455: info->fill_ratio_needed = 0;
1456: info->factor_mallocs = 0;
1457: }
1458: return(0);
1459: }
1461: PetscErrorCode MatSetValues_SeqSELL(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1462: {
1463: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1464: PetscInt shift,i,k,l,low,high,t,ii,row,col,nrow;
1465: PetscInt *cp,nonew=a->nonew,lastcol=-1;
1466: MatScalar *vp,value;
1470: for (k=0; k<m; k++) { /* loop over added rows */
1471: row = im[k];
1472: if (row < 0) continue;
1473: #if defined(PETSC_USE_DEBUG)
1474: if (row >= A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->n-1);
1475: #endif
1476: shift = a->sliidx[row>>3]+(row&0x07); /* starting index of the row */
1477: cp = a->colidx+shift; /* pointer to the row */
1478: vp = a->val+shift; /* pointer to the row */
1479: nrow = a->rlen[row];
1480: low = 0;
1481: high = nrow;
1483: for (l=0; l<n; l++) { /* loop over added columns */
1484: col = in[l];
1485: if (col<0) continue;
1486: #if defined(PETSC_USE_DEBUG)
1487: if (col >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Col too large: row %D max %D",col,A->cmap->n-1);
1488: #endif
1489: if (a->roworiented) {
1490: value = v[l+k*n];
1491: } else {
1492: value = v[k+l*m];
1493: }
1494: if ((value == 0.0 && a->ignorezeroentries) && (is == ADD_VALUES)) continue;
1496: /* search in this row for the specified colmun, i indicates the column to be set */
1497: if (col <= lastcol) low = 0;
1498: else high = nrow;
1499: lastcol = col;
1500: while (high-low > 5) {
1501: t = (low+high)/2;
1502: if (*(cp+t*8) > col) high = t;
1503: else low = t;
1504: }
1505: for (i=low; i<high; i++) {
1506: if (*(cp+i*8) > col) break;
1507: if (*(cp+i*8) == col) {
1508: if (is == ADD_VALUES) *(vp+i*8) += value;
1509: else *(vp+i*8) = value;
1510: low = i + 1;
1511: goto noinsert;
1512: }
1513: }
1514: if (value == 0.0 && a->ignorezeroentries) goto noinsert;
1515: if (nonew == 1) goto noinsert;
1516: if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1517: /* If the current row length exceeds the slice width (e.g. nrow==slice_width), allocate a new space, otherwise do nothing */
1518: MatSeqXSELLReallocateSELL(A,A->rmap->n,1,nrow,a->sliidx,row/8,row,col,a->colidx,a->val,cp,vp,nonew,MatScalar);
1519: /* add the new nonzero to the high position, shift the remaining elements in current row to the right by one slot */
1520: for (ii=nrow-1; ii>=i; ii--) {
1521: *(cp+(ii+1)*8) = *(cp+ii*8);
1522: *(vp+(ii+1)*8) = *(vp+ii*8);
1523: }
1524: a->rlen[row]++;
1525: *(cp+i*8) = col;
1526: *(vp+i*8) = value;
1527: a->nz++;
1528: A->nonzerostate++;
1529: low = i+1; high++; nrow++;
1530: noinsert:;
1531: }
1532: a->rlen[row] = nrow;
1533: }
1534: return(0);
1535: }
1537: PetscErrorCode MatCopy_SeqSELL(Mat A,Mat B,MatStructure str)
1538: {
1542: /* If the two matrices have the same copy implementation, use fast copy. */
1543: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1544: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1545: Mat_SeqSELL *b=(Mat_SeqSELL*)B->data;
1547: if (a->sliidx[a->totalslices] != b->sliidx[b->totalslices]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1548: PetscMemcpy(b->val,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1549: } else {
1550: MatCopy_Basic(A,B,str);
1551: }
1552: return(0);
1553: }
1555: PetscErrorCode MatSetUp_SeqSELL(Mat A)
1556: {
1560: MatSeqSELLSetPreallocation(A,PETSC_DEFAULT,0);
1561: return(0);
1562: }
1564: PetscErrorCode MatSeqSELLGetArray_SeqSELL(Mat A,PetscScalar *array[])
1565: {
1566: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1569: *array = a->val;
1570: return(0);
1571: }
1573: PetscErrorCode MatSeqSELLRestoreArray_SeqSELL(Mat A,PetscScalar *array[])
1574: {
1576: return(0);
1577: }
1579: PetscErrorCode MatRealPart_SeqSELL(Mat A)
1580: {
1581: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1582: PetscInt i;
1583: MatScalar *aval=a->val;
1586: for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i]=PetscRealPart(aval[i]);
1587: return(0);
1588: }
1590: PetscErrorCode MatImaginaryPart_SeqSELL(Mat A)
1591: {
1592: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1593: PetscInt i;
1594: MatScalar *aval=a->val;
1598: for (i=0; i<a->sliidx[a->totalslices]; i++) aval[i] = PetscImaginaryPart(aval[i]);
1599: MatSeqSELLInvalidateDiagonal(A);
1600: return(0);
1601: }
1603: PetscErrorCode MatScale_SeqSELL(Mat inA,PetscScalar alpha)
1604: {
1605: Mat_SeqSELL *a=(Mat_SeqSELL*)inA->data;
1606: MatScalar *aval=a->val;
1607: PetscScalar oalpha=alpha;
1608: PetscBLASInt one=1,size;
1612: PetscBLASIntCast(a->sliidx[a->totalslices],&size);
1613: PetscStackCallBLAS("BLASscal",BLASscal_(&size,&oalpha,aval,&one));
1614: PetscLogFlops(a->nz);
1615: MatSeqSELLInvalidateDiagonal(inA);
1616: return(0);
1617: }
1619: PetscErrorCode MatShift_SeqSELL(Mat Y,PetscScalar a)
1620: {
1621: Mat_SeqSELL *y=(Mat_SeqSELL*)Y->data;
1625: if (!Y->preallocated || !y->nz) {
1626: MatSeqSELLSetPreallocation(Y,1,NULL);
1627: }
1628: MatShift_Basic(Y,a);
1629: return(0);
1630: }
1632: PetscErrorCode MatSOR_SeqSELL(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1633: {
1634: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
1635: PetscScalar *x,sum,*t;
1636: const MatScalar *idiag=0,*mdiag;
1637: const PetscScalar *b,*xb;
1638: PetscInt n,m=A->rmap->n,i,j,shift;
1639: const PetscInt *diag;
1640: PetscErrorCode ierr;
1643: its = its*lits;
1645: if (fshift != a->fshift || omega != a->omega) a->idiagvalid = PETSC_FALSE; /* must recompute idiag[] */
1646: if (!a->idiagvalid) {MatInvertDiagonal_SeqSELL(A,omega,fshift);}
1647: a->fshift = fshift;
1648: a->omega = omega;
1650: diag = a->diag;
1651: t = a->ssor_work;
1652: idiag = a->idiag;
1653: mdiag = a->mdiag;
1655: VecGetArray(xx,&x);
1656: VecGetArrayRead(bb,&b);
1657: /* We count flops by assuming the upper triangular and lower triangular parts have the same number of nonzeros */
1658: if (flag == SOR_APPLY_UPPER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_UPPER is not implemented");
1659: if (flag == SOR_APPLY_LOWER) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SOR_APPLY_LOWER is not implemented");
1660: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
1662: if (flag & SOR_ZERO_INITIAL_GUESS) {
1663: if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1664: for (i=0; i<m; i++) {
1665: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1666: sum = b[i];
1667: n = (diag[i]-shift)/8;
1668: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1669: t[i] = sum;
1670: x[i] = sum*idiag[i];
1671: }
1672: xb = t;
1673: PetscLogFlops(a->nz);
1674: } else xb = b;
1675: if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1676: for (i=m-1; i>=0; i--) {
1677: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1678: sum = xb[i];
1679: n = a->rlen[i]-(diag[i]-shift)/8-1;
1680: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1681: if (xb == b) {
1682: x[i] = sum*idiag[i];
1683: } else {
1684: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1685: }
1686: }
1687: PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1688: }
1689: its--;
1690: }
1691: while (its--) {
1692: if ((flag & SOR_FORWARD_SWEEP) || (flag & SOR_LOCAL_FORWARD_SWEEP)) {
1693: for (i=0; i<m; i++) {
1694: /* lower */
1695: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1696: sum = b[i];
1697: n = (diag[i]-shift)/8;
1698: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1699: t[i] = sum; /* save application of the lower-triangular part */
1700: /* upper */
1701: n = a->rlen[i]-(diag[i]-shift)/8-1;
1702: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1703: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1704: }
1705: xb = t;
1706: PetscLogFlops(2.0*a->nz);
1707: } else xb = b;
1708: if ((flag & SOR_BACKWARD_SWEEP) || (flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1709: for (i=m-1; i>=0; i--) {
1710: shift = a->sliidx[i>>3]+(i&0x07); /* starting index of the row i */
1711: sum = xb[i];
1712: if (xb == b) {
1713: /* whole matrix (no checkpointing available) */
1714: n = a->rlen[i];
1715: for (j=0; j<n; j++) sum -= a->val[shift+j*8]*x[a->colidx[shift+j*8]];
1716: x[i] = (1.-omega)*x[i]+(sum+mdiag[i]*x[i])*idiag[i];
1717: } else { /* lower-triangular part has been saved, so only apply upper-triangular */
1718: n = a->rlen[i]-(diag[i]-shift)/8-1;
1719: for (j=1; j<=n; j++) sum -= a->val[diag[i]+j*8]*x[a->colidx[diag[i]+j*8]];
1720: x[i] = (1.-omega)*x[i]+sum*idiag[i]; /* omega in idiag */
1721: }
1722: }
1723: if (xb == b) {
1724: PetscLogFlops(2.0*a->nz);
1725: } else {
1726: PetscLogFlops(a->nz); /* assumes 1/2 in upper */
1727: }
1728: }
1729: }
1730: VecRestoreArray(xx,&x);
1731: VecRestoreArrayRead(bb,&b);
1732: return(0);
1733: }
1735: /* -------------------------------------------------------------------*/
1736: static struct _MatOps MatOps_Values = {MatSetValues_SeqSELL,
1737: MatGetRow_SeqSELL,
1738: MatRestoreRow_SeqSELL,
1739: MatMult_SeqSELL,
1740: /* 4*/ MatMultAdd_SeqSELL,
1741: MatMultTranspose_SeqSELL,
1742: MatMultTransposeAdd_SeqSELL,
1743: 0,
1744: 0,
1745: 0,
1746: /* 10*/ 0,
1747: 0,
1748: 0,
1749: MatSOR_SeqSELL,
1750: 0,
1751: /* 15*/ MatGetInfo_SeqSELL,
1752: MatEqual_SeqSELL,
1753: MatGetDiagonal_SeqSELL,
1754: MatDiagonalScale_SeqSELL,
1755: 0,
1756: /* 20*/ 0,
1757: MatAssemblyEnd_SeqSELL,
1758: MatSetOption_SeqSELL,
1759: MatZeroEntries_SeqSELL,
1760: /* 24*/ 0,
1761: 0,
1762: 0,
1763: 0,
1764: 0,
1765: /* 29*/ MatSetUp_SeqSELL,
1766: 0,
1767: 0,
1768: 0,
1769: 0,
1770: /* 34*/ MatDuplicate_SeqSELL,
1771: 0,
1772: 0,
1773: 0,
1774: 0,
1775: /* 39*/ 0,
1776: 0,
1777: 0,
1778: MatGetValues_SeqSELL,
1779: MatCopy_SeqSELL,
1780: /* 44*/ 0,
1781: MatScale_SeqSELL,
1782: MatShift_SeqSELL,
1783: 0,
1784: 0,
1785: /* 49*/ 0,
1786: 0,
1787: 0,
1788: 0,
1789: 0,
1790: /* 54*/ MatFDColoringCreate_SeqXAIJ,
1791: 0,
1792: 0,
1793: 0,
1794: 0,
1795: /* 59*/ 0,
1796: MatDestroy_SeqSELL,
1797: MatView_SeqSELL,
1798: 0,
1799: 0,
1800: /* 64*/ 0,
1801: 0,
1802: 0,
1803: 0,
1804: 0,
1805: /* 69*/ 0,
1806: 0,
1807: 0,
1808: 0,
1809: 0,
1810: /* 74*/ 0,
1811: MatFDColoringApply_AIJ, /* reuse the FDColoring function for AIJ */
1812: 0,
1813: 0,
1814: 0,
1815: /* 79*/ 0,
1816: 0,
1817: 0,
1818: 0,
1819: 0,
1820: /* 84*/ 0,
1821: 0,
1822: 0,
1823: 0,
1824: 0,
1825: /* 89*/ 0,
1826: 0,
1827: 0,
1828: 0,
1829: 0,
1830: /* 94*/ 0,
1831: 0,
1832: 0,
1833: 0,
1834: 0,
1835: /* 99*/ 0,
1836: 0,
1837: 0,
1838: MatConjugate_SeqSELL,
1839: 0,
1840: /*104*/ 0,
1841: 0,
1842: 0,
1843: 0,
1844: 0,
1845: /*109*/ 0,
1846: 0,
1847: 0,
1848: 0,
1849: MatMissingDiagonal_SeqSELL,
1850: /*114*/ 0,
1851: 0,
1852: 0,
1853: 0,
1854: 0,
1855: /*119*/ 0,
1856: 0,
1857: 0,
1858: 0,
1859: 0,
1860: /*124*/ 0,
1861: 0,
1862: 0,
1863: 0,
1864: 0,
1865: /*129*/ 0,
1866: 0,
1867: 0,
1868: 0,
1869: 0,
1870: /*134*/ 0,
1871: 0,
1872: 0,
1873: 0,
1874: 0,
1875: /*139*/ 0,
1876: 0,
1877: 0,
1878: MatFDColoringSetUp_SeqXAIJ,
1879: 0,
1880: /*144*/0
1881: };
1883: PetscErrorCode MatStoreValues_SeqSELL(Mat mat)
1884: {
1885: Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1889: if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1891: /* allocate space for values if not already there */
1892: if (!a->saved_values) {
1893: PetscMalloc1(a->sliidx[a->totalslices]+1,&a->saved_values);
1894: PetscLogObjectMemory((PetscObject)mat,(a->sliidx[a->totalslices]+1)*sizeof(PetscScalar));
1895: }
1897: /* copy values over */
1898: PetscMemcpy(a->saved_values,a->val,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1899: return(0);
1900: }
1902: PetscErrorCode MatRetrieveValues_SeqSELL(Mat mat)
1903: {
1904: Mat_SeqSELL *a=(Mat_SeqSELL*)mat->data;
1908: if (!a->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
1909: if (!a->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
1910: /* copy values over */
1911: PetscMemcpy(a->val,a->saved_values,a->sliidx[a->totalslices]*sizeof(PetscScalar));
1912: return(0);
1913: }
1915: /*@C
1916: MatSeqSELLRestoreArray - returns access to the array where the data for a MATSEQSELL matrix is stored obtained by MatSeqSELLGetArray()
1918: Not Collective
1920: Input Parameters:
1921: . mat - a MATSEQSELL matrix
1922: . array - pointer to the data
1924: Level: intermediate
1926: .seealso: MatSeqSELLGetArray(), MatSeqSELLRestoreArrayF90()
1927: @*/
1928: PetscErrorCode MatSeqSELLRestoreArray(Mat A,PetscScalar **array)
1929: {
1933: PetscUseMethod(A,"MatSeqSELLRestoreArray_C",(Mat,PetscScalar**),(A,array));
1934: return(0);
1935: }
1937: PETSC_EXTERN PetscErrorCode MatCreate_SeqSELL(Mat B)
1938: {
1939: Mat_SeqSELL *b;
1940: PetscMPIInt size;
1944: MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
1945: if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Comm must be of size 1");
1947: PetscNewLog(B,&b);
1949: B->data = (void*)b;
1951: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1953: b->row = 0;
1954: b->col = 0;
1955: b->icol = 0;
1956: b->reallocs = 0;
1957: b->ignorezeroentries = PETSC_FALSE;
1958: b->roworiented = PETSC_TRUE;
1959: b->nonew = 0;
1960: b->diag = 0;
1961: b->solve_work = 0;
1962: B->spptr = 0;
1963: b->saved_values = 0;
1964: b->idiag = 0;
1965: b->mdiag = 0;
1966: b->ssor_work = 0;
1967: b->omega = 1.0;
1968: b->fshift = 0.0;
1969: b->idiagvalid = PETSC_FALSE;
1970: b->keepnonzeropattern = PETSC_FALSE;
1972: PetscObjectChangeTypeName((PetscObject)B,MATSEQSELL);
1973: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLGetArray_C",MatSeqSELLGetArray_SeqSELL);
1974: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLRestoreArray_C",MatSeqSELLRestoreArray_SeqSELL);
1975: PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqSELL);
1976: PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqSELL);
1977: PetscObjectComposeFunction((PetscObject)B,"MatSeqSELLSetPreallocation_C",MatSeqSELLSetPreallocation_SeqSELL);
1978: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsell_seqaij_C",MatConvert_SeqSELL_SeqAIJ);
1979: return(0);
1980: }
1982: /*
1983: Given a matrix generated with MatGetFactor() duplicates all the information in A into B
1984: */
1985: PetscErrorCode MatDuplicateNoCreate_SeqSELL(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
1986: {
1987: Mat_SeqSELL *c,*a=(Mat_SeqSELL*)A->data;
1988: PetscInt i,m=A->rmap->n;
1989: PetscInt totalslices=a->totalslices;
1993: c = (Mat_SeqSELL*)C->data;
1995: C->factortype = A->factortype;
1996: c->row = 0;
1997: c->col = 0;
1998: c->icol = 0;
1999: c->reallocs = 0;
2001: C->assembled = PETSC_TRUE;
2003: PetscLayoutReference(A->rmap,&C->rmap);
2004: PetscLayoutReference(A->cmap,&C->cmap);
2006: PetscMalloc1(8*totalslices,&c->rlen);
2007: PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2008: PetscMalloc1(totalslices+1,&c->sliidx);
2009: PetscLogObjectMemory((PetscObject)C, (totalslices+1)*sizeof(PetscInt));
2011: for (i=0; i<m; i++) c->rlen[i] = a->rlen[i];
2012: for (i=0; i<totalslices+1; i++) c->sliidx[i] = a->sliidx[i];
2014: /* allocate the matrix space */
2015: if (mallocmatspace) {
2016: PetscMalloc2(a->maxallocmat,&c->val,a->maxallocmat,&c->colidx);
2017: PetscLogObjectMemory((PetscObject)C,a->maxallocmat*(sizeof(PetscScalar)+sizeof(PetscInt)));
2019: c->singlemalloc = PETSC_TRUE;
2021: if (m > 0) {
2022: PetscMemcpy(c->colidx,a->colidx,(a->maxallocmat)*sizeof(PetscInt));
2023: if (cpvalues == MAT_COPY_VALUES) {
2024: PetscMemcpy(c->val,a->val,a->maxallocmat*sizeof(PetscScalar));
2025: } else {
2026: PetscMemzero(c->val,a->maxallocmat*sizeof(PetscScalar));
2027: }
2028: }
2029: }
2031: c->ignorezeroentries = a->ignorezeroentries;
2032: c->roworiented = a->roworiented;
2033: c->nonew = a->nonew;
2034: if (a->diag) {
2035: PetscMalloc1(m,&c->diag);
2036: PetscLogObjectMemory((PetscObject)C,m*sizeof(PetscInt));
2037: for (i=0; i<m; i++) {
2038: c->diag[i] = a->diag[i];
2039: }
2040: } else c->diag = 0;
2042: c->solve_work = 0;
2043: c->saved_values = 0;
2044: c->idiag = 0;
2045: c->ssor_work = 0;
2046: c->keepnonzeropattern = a->keepnonzeropattern;
2047: c->free_val = PETSC_TRUE;
2048: c->free_colidx = PETSC_TRUE;
2050: c->maxallocmat = a->maxallocmat;
2051: c->maxallocrow = a->maxallocrow;
2052: c->rlenmax = a->rlenmax;
2053: c->nz = a->nz;
2054: C->preallocated = PETSC_TRUE;
2056: c->nonzerorowcnt = a->nonzerorowcnt;
2057: C->nonzerostate = A->nonzerostate;
2059: PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
2060: return(0);
2061: }
2063: PetscErrorCode MatDuplicate_SeqSELL(Mat A,MatDuplicateOption cpvalues,Mat *B)
2064: {
2068: MatCreate(PetscObjectComm((PetscObject)A),B);
2069: MatSetSizes(*B,A->rmap->n,A->cmap->n,A->rmap->n,A->cmap->n);
2070: if (!(A->rmap->n % A->rmap->bs) && !(A->cmap->n % A->cmap->bs)) {
2071: MatSetBlockSizesFromMats(*B,A,A);
2072: }
2073: MatSetType(*B,((PetscObject)A)->type_name);
2074: MatDuplicateNoCreate_SeqSELL(*B,A,cpvalues,PETSC_TRUE);
2075: return(0);
2076: }
2078: /*@C
2079: MatCreateSeqSELL - Creates a sparse matrix in SELL format.
2081: Collective on MPI_Comm
2083: Input Parameters:
2084: + comm - MPI communicator, set to PETSC_COMM_SELF
2085: . m - number of rows
2086: . n - number of columns
2087: . rlenmax - maximum number of nonzeros in a row
2088: - rlen - array containing the number of nonzeros in the various rows
2089: (possibly different for each row) or NULL
2091: Output Parameter:
2092: . A - the matrix
2094: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2095: MatXXXXSetPreallocation() paradgm instead of this routine directly.
2096: [MatXXXXSetPreallocation() is, for example, MatSeqSELLSetPreallocation]
2098: Notes:
2099: If nnz is given then nz is ignored
2101: Specify the preallocated storage with either rlenmax or rlen (not both).
2102: Set rlenmax=PETSC_DEFAULT and rlen=NULL for PETSc to control dynamic memory
2103: allocation. For large problems you MUST preallocate memory or you
2104: will get TERRIBLE performance, see the users' manual chapter on matrices.
2106: Level: intermediate
2108: .seealso: MatCreate(), MatCreateSELL(), MatSetValues(), MatCreateSeqSELLWithArrays()
2110: @*/
2111: PetscErrorCode MatCreateSeqSELL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt maxallocrow,const PetscInt rlen[],Mat *A)
2112: {
2116: MatCreate(comm,A);
2117: MatSetSizes(*A,m,n,m,n);
2118: MatSetType(*A,MATSEQSELL);
2119: MatSeqSELLSetPreallocation_SeqSELL(*A,maxallocrow,rlen);
2120: return(0);
2121: }
2123: PetscErrorCode MatEqual_SeqSELL(Mat A,Mat B,PetscBool * flg)
2124: {
2125: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data,*b=(Mat_SeqSELL*)B->data;
2126: PetscInt totalslices=a->totalslices;
2130: /* If the matrix dimensions are not equal,or no of nonzeros */
2131: if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n) ||(a->nz != b->nz) || (a->rlenmax != b->rlenmax)) {
2132: *flg = PETSC_FALSE;
2133: return(0);
2134: }
2135: /* if the a->colidx are the same */
2136: PetscMemcmp(a->colidx,b->colidx,a->sliidx[totalslices]*sizeof(PetscInt),flg);
2137: if (!*flg) return(0);
2138: /* if a->val are the same */
2139: PetscMemcmp(a->val,b->val,a->sliidx[totalslices]*sizeof(PetscScalar),flg);
2140: return(0);
2141: }
2143: PetscErrorCode MatSeqSELLInvalidateDiagonal(Mat A)
2144: {
2145: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2148: a->idiagvalid = PETSC_FALSE;
2149: return(0);
2150: }
2152: PetscErrorCode MatConjugate_SeqSELL(Mat A)
2153: {
2154: #if defined(PETSC_USE_COMPLEX)
2155: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
2156: PetscInt i;
2157: PetscScalar *val = a->val;
2160: for (i=0; i<a->sliidx[a->totalslices]; i++) {
2161: val[i] = PetscConj(val[i]);
2162: }
2163: #else
2165: #endif
2166: return(0);
2167: }