Actual source code: matscalapack.c
petsc-3.14.1 2020-11-03
1: #include <petsc/private/petscscalapack.h>
3: #define DEFAULT_BLOCKSIZE 64
5: /*
6: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8: */
9: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
11: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12: {
13: PetscErrorCode ierr;
14: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
15: PetscBool iascii;
16: PetscViewerFormat format;
17: Mat Adense;
20: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
21: if (iascii) {
22: PetscViewerGetFormat(viewer,&format);
23: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24: PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
25: PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
26: PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
27: PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
28: return(0);
29: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30: return(0);
31: }
32: }
33: /* convert to dense format and call MatView() */
34: MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
35: MatView(Adense,viewer);
36: MatDestroy(&Adense);
37: return(0);
38: }
40: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41: {
43: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
44: PetscLogDouble isend[2],irecv[2];
47: info->block_size = 1.0;
49: isend[0] = a->lld*a->locc; /* locally allocated */
50: isend[1] = a->locr*a->locc; /* used submatrix */
51: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52: info->nz_allocated = isend[0];
53: info->nz_used = isend[1];
54: } else if (flag == MAT_GLOBAL_MAX) {
55: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
56: info->nz_allocated = irecv[0];
57: info->nz_used = irecv[1];
58: } else if (flag == MAT_GLOBAL_SUM) {
59: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
60: info->nz_allocated = irecv[0];
61: info->nz_used = irecv[1];
62: }
64: info->nz_unneeded = 0;
65: info->assemblies = A->num_ass;
66: info->mallocs = 0;
67: info->memory = ((PetscObject)A)->mem;
68: info->fill_ratio_given = 0;
69: info->fill_ratio_needed = 0;
70: info->factor_mallocs = 0;
71: return(0);
72: }
74: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75: {
77: switch (op) {
78: case MAT_NEW_NONZERO_LOCATIONS:
79: case MAT_NEW_NONZERO_LOCATION_ERR:
80: case MAT_NEW_NONZERO_ALLOCATION_ERR:
81: case MAT_SYMMETRIC:
82: case MAT_SORTED_FULL:
83: case MAT_HERMITIAN:
84: break;
85: default:
86: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87: }
88: return(0);
89: }
91: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92: {
93: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
95: PetscInt i,j;
96: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
99: for (i=0;i<nr;i++) {
100: if (rows[i] < 0) continue;
101: PetscBLASIntCast(rows[i]+1,&gridx);
102: for (j=0;j<nc;j++) {
103: if (cols[j] < 0) continue;
104: PetscBLASIntCast(cols[j]+1,&gcidx);
105: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106: if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107: switch (imode) {
108: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111: }
112: } else {
113: if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114: A->assembled = PETSC_FALSE;
115: MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
116: }
117: }
118: }
119: return(0);
120: }
122: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123: {
125: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
126: PetscScalar *x2d,*y2d,alpha=1.0;
127: const PetscInt *ranges;
128: PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
131: if (transpose) {
133: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134: PetscLayoutGetRanges(A->rmap,&ranges);
135: PetscBLASIntCast(ranges[1],&mb); /* x block size */
136: xlld = PetscMax(1,A->rmap->n);
137: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
139: PetscLayoutGetRanges(A->cmap,&ranges);
140: PetscBLASIntCast(ranges[1],&nb); /* y block size */
141: ylld = 1;
142: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
145: /* allocate 2d vectors */
146: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147: lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148: PetscMalloc2(lszx,&x2d,lszy,&y2d);
149: xlld = PetscMax(1,lszx);
151: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
154: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
157: /* redistribute x as a column of a 2d matrix */
158: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
160: /* redistribute y as a row of a 2d matrix */
161: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
163: /* call PBLAS subroutine */
164: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
166: /* redistribute y from a row of a 2d matrix */
167: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
169: } else { /* non-transpose */
171: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172: PetscLayoutGetRanges(A->cmap,&ranges);
173: PetscBLASIntCast(ranges[1],&nb); /* x block size */
174: xlld = 1;
175: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
177: PetscLayoutGetRanges(A->rmap,&ranges);
178: PetscBLASIntCast(ranges[1],&mb); /* y block size */
179: ylld = PetscMax(1,A->rmap->n);
180: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
183: /* allocate 2d vectors */
184: lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185: lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186: PetscMalloc2(lszx,&x2d,lszy,&y2d);
187: ylld = PetscMax(1,lszy);
189: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
192: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
195: /* redistribute x as a row of a 2d matrix */
196: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
198: /* redistribute y as a column of a 2d matrix */
199: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
201: /* call PBLAS subroutine */
202: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
204: /* redistribute y from a column of a 2d matrix */
205: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
207: }
208: PetscFree2(x2d,y2d);
209: return(0);
210: }
212: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213: {
214: PetscErrorCode ierr;
215: const PetscScalar *xarray;
216: PetscScalar *yarray;
219: VecGetArrayRead(x,&xarray);
220: VecGetArray(y,&yarray);
221: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
222: VecRestoreArrayRead(x,&xarray);
223: VecRestoreArray(y,&yarray);
224: return(0);
225: }
227: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228: {
229: PetscErrorCode ierr;
230: const PetscScalar *xarray;
231: PetscScalar *yarray;
234: VecGetArrayRead(x,&xarray);
235: VecGetArray(y,&yarray);
236: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
237: VecRestoreArrayRead(x,&xarray);
238: VecRestoreArray(y,&yarray);
239: return(0);
240: }
242: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243: {
244: PetscErrorCode ierr;
245: const PetscScalar *xarray;
246: PetscScalar *zarray;
249: if (y != z) { VecCopy(y,z); }
250: VecGetArrayRead(x,&xarray);
251: VecGetArray(z,&zarray);
252: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
253: VecRestoreArrayRead(x,&xarray);
254: VecRestoreArray(z,&zarray);
255: return(0);
256: }
258: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259: {
260: PetscErrorCode ierr;
261: const PetscScalar *xarray;
262: PetscScalar *zarray;
265: if (y != z) { VecCopy(y,z); }
266: VecGetArrayRead(x,&xarray);
267: VecGetArray(z,&zarray);
268: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
269: VecRestoreArrayRead(x,&xarray);
270: VecRestoreArray(z,&zarray);
271: return(0);
272: }
274: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275: {
276: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279: PetscScalar sone=1.0,zero=0.0;
280: PetscBLASInt one=1;
283: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284: C->assembled = PETSC_TRUE;
285: return(0);
286: }
288: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289: {
293: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
294: MatSetType(C,MATSCALAPACK);
295: MatSetUp(C);
296: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297: return(0);
298: }
300: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301: {
302: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305: PetscScalar sone=1.0,zero=0.0;
306: PetscBLASInt one=1;
309: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310: C->assembled = PETSC_TRUE;
311: return(0);
312: }
314: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315: {
319: MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
320: MatSetType(C,MATSCALAPACK);
321: MatSetUp(C);
322: return(0);
323: }
325: /* --------------------------------------- */
326: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327: {
329: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330: C->ops->productsymbolic = MatProductSymbolic_AB;
331: return(0);
332: }
334: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335: {
337: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338: C->ops->productsymbolic = MatProductSymbolic_ABt;
339: return(0);
340: }
342: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343: {
345: Mat_Product *product = C->product;
348: switch (product->type) {
349: case MATPRODUCT_AB:
350: MatProductSetFromOptions_ScaLAPACK_AB(C);
351: break;
352: case MATPRODUCT_ABt:
353: MatProductSetFromOptions_ScaLAPACK_ABt(C);
354: break;
355: default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356: }
357: return(0);
358: }
359: /* --------------------------------------- */
361: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362: {
363: PetscErrorCode ierr;
364: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
365: PetscScalar *darray,*d2d,v;
366: const PetscInt *ranges;
367: PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
370: VecGetArray(D,&darray);
372: if (A->rmap->N<=A->cmap->N) { /* row version */
374: /* create ScaLAPACK descriptor for vector (1d block distribution) */
375: PetscLayoutGetRanges(A->rmap,&ranges);
376: PetscBLASIntCast(ranges[1],&mb); /* D block size */
377: dlld = PetscMax(1,A->rmap->n);
378: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
381: /* allocate 2d vector */
382: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383: PetscCalloc1(lszd,&d2d);
384: dlld = PetscMax(1,lszd);
386: /* create ScaLAPACK descriptor for vector (2d block distribution) */
387: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
390: /* collect diagonal */
391: for (j=1;j<=a->M;j++) {
392: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394: }
396: /* redistribute d from a column of a 2d matrix */
397: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398: PetscFree(d2d);
400: } else { /* column version */
402: /* create ScaLAPACK descriptor for vector (1d block distribution) */
403: PetscLayoutGetRanges(A->cmap,&ranges);
404: PetscBLASIntCast(ranges[1],&nb); /* D block size */
405: dlld = 1;
406: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
409: /* allocate 2d vector */
410: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411: PetscCalloc1(lszd,&d2d);
413: /* create ScaLAPACK descriptor for vector (2d block distribution) */
414: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
417: /* collect diagonal */
418: for (j=1;j<=a->N;j++) {
419: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421: }
423: /* redistribute d from a row of a 2d matrix */
424: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425: PetscFree(d2d);
426: }
428: VecRestoreArray(D,&darray);
429: VecAssemblyBegin(D);
430: VecAssemblyEnd(D);
431: return(0);
432: }
434: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435: {
436: PetscErrorCode ierr;
437: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
438: const PetscScalar *d;
439: const PetscInt *ranges;
440: PetscScalar *d2d;
441: PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
444: if (R) {
445: VecGetArrayRead(R,(const PetscScalar **)&d);
446: /* create ScaLAPACK descriptor for vector (1d block distribution) */
447: PetscLayoutGetRanges(A->cmap,&ranges);
448: PetscBLASIntCast(ranges[1],&nb); /* D block size */
449: dlld = 1;
450: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
453: /* allocate 2d vector */
454: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455: PetscCalloc1(lszd,&d2d);
457: /* create ScaLAPACK descriptor for vector (2d block distribution) */
458: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
461: /* redistribute d to a row of a 2d matrix */
462: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
464: /* broadcast along process columns */
465: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466: else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
468: /* local scaling */
469: for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
471: PetscFree(d2d);
472: VecRestoreArrayRead(R,(const PetscScalar **)&d);
473: }
474: if (L) {
475: VecGetArrayRead(L,(const PetscScalar **)&d);
476: /* create ScaLAPACK descriptor for vector (1d block distribution) */
477: PetscLayoutGetRanges(A->rmap,&ranges);
478: PetscBLASIntCast(ranges[1],&mb); /* D block size */
479: dlld = PetscMax(1,A->rmap->n);
480: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
483: /* allocate 2d vector */
484: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485: PetscCalloc1(lszd,&d2d);
486: dlld = PetscMax(1,lszd);
488: /* create ScaLAPACK descriptor for vector (2d block distribution) */
489: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
492: /* redistribute d to a column of a 2d matrix */
493: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
495: /* broadcast along process rows */
496: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497: else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
499: /* local scaling */
500: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
502: PetscFree(d2d);
503: VecRestoreArrayRead(L,(const PetscScalar **)&d);
504: }
505: return(0);
506: }
508: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509: {
511: *missing = PETSC_FALSE;
512: return(0);
513: }
515: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516: {
517: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518: PetscBLASInt n,one=1;
521: n = x->lld*x->locc;
522: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523: return(0);
524: }
526: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527: {
528: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529: PetscBLASInt i,n;
530: PetscScalar v;
533: n = PetscMin(x->M,x->N);
534: for (i=1;i<=n;i++) {
535: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536: v += alpha;
537: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538: }
539: return(0);
540: }
542: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543: {
545: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
546: Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
547: PetscBLASInt one=1;
548: PetscScalar beta=1.0;
551: MatScaLAPACKCheckDistribution(Y,1,X,3);
552: PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553: PetscObjectStateIncrease((PetscObject)Y);
554: return(0);
555: }
557: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558: {
560: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
561: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
564: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
565: PetscObjectStateIncrease((PetscObject)B);
566: return(0);
567: }
569: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570: {
571: Mat Bs;
572: MPI_Comm comm;
573: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
577: PetscObjectGetComm((PetscObject)A,&comm);
578: MatCreate(comm,&Bs);
579: MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
580: MatSetType(Bs,MATSCALAPACK);
581: b = (Mat_ScaLAPACK*)Bs->data;
582: b->M = a->M;
583: b->N = a->N;
584: b->mb = a->mb;
585: b->nb = a->nb;
586: b->rsrc = a->rsrc;
587: b->csrc = a->csrc;
588: MatSetUp(Bs);
589: *B = Bs;
590: if (op == MAT_COPY_VALUES) {
591: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
592: }
593: Bs->assembled = PETSC_TRUE;
594: return(0);
595: }
597: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598: {
600: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
601: Mat Bs = *B;
602: PetscBLASInt one=1;
603: PetscScalar sone=1.0,zero=0.0;
604: #if defined(PETSC_USE_COMPLEX)
605: PetscInt i,j;
606: #endif
609: if (reuse == MAT_INITIAL_MATRIX) {
610: MatCreate(PetscObjectComm((PetscObject)A),&Bs);
611: MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
612: MatSetType(Bs,MATSCALAPACK);
613: MatSetUp(Bs);
614: *B = Bs;
615: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616: b = (Mat_ScaLAPACK*)Bs->data;
617: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618: #if defined(PETSC_USE_COMPLEX)
619: /* undo conjugation */
620: for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621: #endif
622: Bs->assembled = PETSC_TRUE;
623: return(0);
624: }
626: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627: {
628: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629: PetscInt i,j;
632: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633: return(0);
634: }
636: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637: {
639: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
640: Mat Bs = *B;
641: PetscBLASInt one=1;
642: PetscScalar sone=1.0,zero=0.0;
645: if (reuse == MAT_INITIAL_MATRIX) {
646: MatCreate(PetscObjectComm((PetscObject)A),&Bs);
647: MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
648: MatSetType(Bs,MATSCALAPACK);
649: MatSetUp(Bs);
650: *B = Bs;
651: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652: b = (Mat_ScaLAPACK*)Bs->data;
653: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654: Bs->assembled = PETSC_TRUE;
655: return(0);
656: }
658: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659: {
661: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
662: PetscScalar *x,*x2d;
663: const PetscInt *ranges;
664: PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
667: VecCopy(B,X);
668: VecGetArray(X,&x);
670: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671: PetscLayoutGetRanges(A->rmap,&ranges);
672: PetscBLASIntCast(ranges[1],&mb); /* x block size */
673: xlld = PetscMax(1,A->rmap->n);
674: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
677: /* allocate 2d vector */
678: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679: PetscMalloc1(lszx,&x2d);
680: xlld = PetscMax(1,lszx);
682: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
686: /* redistribute x as a column of a 2d matrix */
687: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
689: /* call ScaLAPACK subroutine */
690: switch (A->factortype) {
691: case MAT_FACTOR_LU:
692: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
694: break;
695: case MAT_FACTOR_CHOLESKY:
696: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
698: break;
699: default:
700: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701: break;
702: }
704: /* redistribute x from a column of a 2d matrix */
705: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
707: PetscFree(x2d);
708: VecRestoreArray(X,&x);
709: return(0);
710: }
712: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713: {
717: MatSolve_ScaLAPACK(A,B,X);
718: VecAXPY(X,1,Y);
719: return(0);
720: }
722: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723: {
725: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726: PetscBool flg1,flg2;
727: PetscBLASInt one=1,info;
730: PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
731: PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
732: if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733: MatScaLAPACKCheckDistribution(B,1,X,2);
734: b = (Mat_ScaLAPACK*)B->data;
735: x = (Mat_ScaLAPACK*)X->data;
736: PetscArraycpy(x->loc,b->loc,b->lld*b->locc);
738: switch (A->factortype) {
739: case MAT_FACTOR_LU:
740: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
742: break;
743: case MAT_FACTOR_CHOLESKY:
744: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
746: break;
747: default:
748: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749: break;
750: }
751: return(0);
752: }
754: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755: {
757: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
758: PetscBLASInt one=1,info;
761: if (!a->pivots) {
762: PetscMalloc1(a->locr+a->mb,&a->pivots);
763: PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
764: }
765: PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
767: A->factortype = MAT_FACTOR_LU;
768: A->assembled = PETSC_TRUE;
770: PetscFree(A->solvertype);
771: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
772: return(0);
773: }
775: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776: {
780: MatCopy(A,F,SAME_NONZERO_PATTERN);
781: MatLUFactor_ScaLAPACK(F,0,0,info);
782: return(0);
783: }
785: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786: {
788: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789: return(0);
790: }
792: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793: {
795: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
796: PetscBLASInt one=1,info;
799: PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
801: A->factortype = MAT_FACTOR_CHOLESKY;
802: A->assembled = PETSC_TRUE;
804: PetscFree(A->solvertype);
805: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
806: return(0);
807: }
809: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810: {
814: MatCopy(A,F,SAME_NONZERO_PATTERN);
815: MatCholeskyFactor_ScaLAPACK(F,0,info);
816: return(0);
817: }
819: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820: {
822: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823: return(0);
824: }
826: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827: {
829: *type = MATSOLVERSCALAPACK;
830: return(0);
831: }
833: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834: {
835: Mat B;
839: /* Create the factorization matrix */
840: MatCreate(PetscObjectComm((PetscObject)A),&B);
841: MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
842: MatSetType(B,MATSCALAPACK);
843: MatSetUp(B);
844: B->factortype = ftype;
845: PetscFree(B->solvertype);
846: PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);
848: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
849: *F = B;
850: return(0);
851: }
853: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854: {
858: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
859: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
860: return(0);
861: }
863: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864: {
866: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
867: PetscBLASInt one=1,lwork=0;
868: const char *ntype;
869: PetscScalar *work=NULL,dummy;
872: switch (type){
873: case NORM_1:
874: ntype = "1";
875: lwork = PetscMax(a->locr,a->locc);
876: break;
877: case NORM_FROBENIUS:
878: ntype = "F";
879: work = &dummy;
880: break;
881: case NORM_INFINITY:
882: ntype = "I";
883: lwork = PetscMax(a->locr,a->locc);
884: break;
885: default:
886: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887: }
888: if (lwork) { PetscMalloc1(lwork,&work); }
889: *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890: if (lwork) { PetscFree(work); }
891: return(0);
892: }
894: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895: {
896: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
900: PetscArrayzero(a->loc,a->lld*a->locc);
901: return(0);
902: }
904: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905: {
906: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
908: PetscInt i,n,nb,isrc,nproc,iproc,*idx;
911: if (rows) {
912: n = a->locr;
913: nb = a->mb;
914: isrc = a->rsrc;
915: nproc = a->grid->nprow;
916: iproc = a->grid->myrow;
917: PetscMalloc1(n,&idx);
918: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
920: }
921: if (cols) {
922: n = a->locc;
923: nb = a->nb;
924: isrc = a->csrc;
925: nproc = a->grid->npcol;
926: iproc = a->grid->mycol;
927: PetscMalloc1(n,&idx);
928: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
930: }
931: return(0);
932: }
934: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935: {
937: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
938: Mat Bmpi;
939: MPI_Comm comm;
940: PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
941: const PetscInt *ranges,*branges,*cwork;
942: const PetscScalar *vwork;
943: PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
944: PetscScalar *barray;
945: PetscBool differ=PETSC_FALSE;
946: PetscMPIInt size;
949: PetscObjectGetComm((PetscObject)A,&comm);
950: PetscLayoutGetRanges(A->rmap,&ranges);
952: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
953: MPI_Comm_size(comm,&size);
954: PetscLayoutGetRanges((*B)->rmap,&branges);
955: for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
956: }
958: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
959: MatCreate(comm,&Bmpi);
960: m = PETSC_DECIDE;
961: PetscSplitOwnershipEqual(comm,&m,&M);
962: n = PETSC_DECIDE;
963: PetscSplitOwnershipEqual(comm,&n,&N);
964: MatSetSizes(Bmpi,m,n,M,N);
965: MatSetType(Bmpi,MATDENSE);
966: MatSetUp(Bmpi);
968: /* create ScaLAPACK descriptor for B (1d block distribution) */
969: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
970: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
971: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
974: /* redistribute matrix */
975: MatDenseGetArray(Bmpi,&barray);
976: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
977: MatDenseRestoreArray(Bmpi,&barray);
978: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
979: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
981: /* transfer rows of auxiliary matrix to the final matrix B */
982: MatGetOwnershipRange(Bmpi,&rstart,&rend);
983: for (i=rstart;i<rend;i++) {
984: MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
985: MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
986: MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
987: }
988: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
989: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
990: MatDestroy(&Bmpi);
992: } else { /* normal cases */
994: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
995: else {
996: MatCreate(comm,&Bmpi);
997: m = PETSC_DECIDE;
998: PetscSplitOwnershipEqual(comm,&m,&M);
999: n = PETSC_DECIDE;
1000: PetscSplitOwnershipEqual(comm,&n,&N);
1001: MatSetSizes(Bmpi,m,n,M,N);
1002: MatSetType(Bmpi,MATDENSE);
1003: MatSetUp(Bmpi);
1004: }
1006: /* create ScaLAPACK descriptor for B (1d block distribution) */
1007: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
1008: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
1009: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1012: /* redistribute matrix */
1013: MatDenseGetArray(Bmpi,&barray);
1014: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1015: MatDenseRestoreArray(Bmpi,&barray);
1017: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1018: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1019: if (reuse == MAT_INPLACE_MATRIX) {
1020: MatHeaderReplace(A,&Bmpi);
1021: } else *B = Bmpi;
1022: }
1023: return(0);
1024: }
1026: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1027: {
1029: Mat_ScaLAPACK *b;
1030: Mat Bmpi;
1031: MPI_Comm comm;
1032: PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
1033: const PetscInt *ranges;
1034: PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
1035: PetscScalar *aarray;
1036: PetscInt lda;
1039: PetscObjectGetComm((PetscObject)A,&comm);
1041: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1042: else {
1043: MatCreate(comm,&Bmpi);
1044: m = PETSC_DECIDE;
1045: PetscSplitOwnershipEqual(comm,&m,&M);
1046: n = PETSC_DECIDE;
1047: PetscSplitOwnershipEqual(comm,&n,&N);
1048: MatSetSizes(Bmpi,m,n,M,N);
1049: MatSetType(Bmpi,MATSCALAPACK);
1050: MatSetUp(Bmpi);
1051: }
1052: b = (Mat_ScaLAPACK*)Bmpi->data;
1054: /* create ScaLAPACK descriptor for A (1d block distribution) */
1055: PetscLayoutGetRanges(A->rmap,&ranges);
1056: PetscBLASIntCast(ranges[1],&amb); /* row block size */
1057: MatDenseGetLDA(A,&lda);
1058: lld = PetscMax(lda,1); /* local leading dimension */
1059: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1062: /* redistribute matrix */
1063: MatDenseGetArray(A,&aarray);
1064: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1065: MatDenseRestoreArray(A,&aarray);
1067: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
1068: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
1069: if (reuse == MAT_INPLACE_MATRIX) {
1070: MatHeaderReplace(A,&Bmpi);
1071: } else *B = Bmpi;
1072: return(0);
1073: }
1075: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1076: {
1077: Mat mat_scal;
1078: PetscErrorCode ierr;
1079: PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1080: const PetscInt *cols;
1081: const PetscScalar *vals;
1084: if (reuse == MAT_REUSE_MATRIX) {
1085: mat_scal = *newmat;
1086: MatZeroEntries(mat_scal);
1087: } else {
1088: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1089: m = PETSC_DECIDE;
1090: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1091: n = PETSC_DECIDE;
1092: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1093: MatSetSizes(mat_scal,m,n,M,N);
1094: MatSetType(mat_scal,MATSCALAPACK);
1095: MatSetUp(mat_scal);
1096: }
1097: for (row=rstart;row<rend;row++) {
1098: MatGetRow(A,row,&ncols,&cols,&vals);
1099: MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1100: MatRestoreRow(A,row,&ncols,&cols,&vals);
1101: }
1102: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1103: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1105: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1106: else *newmat = mat_scal;
1107: return(0);
1108: }
1110: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1111: {
1112: Mat mat_scal;
1113: PetscErrorCode ierr;
1114: PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1115: const PetscInt *cols;
1116: const PetscScalar *vals;
1117: PetscScalar v;
1120: if (reuse == MAT_REUSE_MATRIX) {
1121: mat_scal = *newmat;
1122: MatZeroEntries(mat_scal);
1123: } else {
1124: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1125: m = PETSC_DECIDE;
1126: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1127: n = PETSC_DECIDE;
1128: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1129: MatSetSizes(mat_scal,m,n,M,N);
1130: MatSetType(mat_scal,MATSCALAPACK);
1131: MatSetUp(mat_scal);
1132: }
1133: MatGetRowUpperTriangular(A);
1134: for (row=rstart;row<rend;row++) {
1135: MatGetRow(A,row,&ncols,&cols,&vals);
1136: MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1137: for (j=0;j<ncols;j++) { /* lower triangular part */
1138: if (cols[j] == row) continue;
1139: v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1140: MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1141: }
1142: MatRestoreRow(A,row,&ncols,&cols,&vals);
1143: }
1144: MatRestoreRowUpperTriangular(A);
1145: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1146: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1148: if (reuse == MAT_INPLACE_MATRIX) { MatHeaderReplace(A,&mat_scal); }
1149: else *newmat = mat_scal;
1150: return(0);
1151: }
1153: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1154: {
1155: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1157: PetscInt sz=0;
1160: PetscLayoutSetUp(A->rmap);
1161: PetscLayoutSetUp(A->cmap);
1162: if (!a->lld) a->lld = a->locr;
1164: PetscFree(a->loc);
1165: PetscIntMultError(a->lld,a->locc,&sz);
1166: PetscCalloc1(sz,&a->loc);
1167: PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));
1169: A->preallocated = PETSC_TRUE;
1170: return(0);
1171: }
1173: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1174: {
1175: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1176: PetscErrorCode ierr;
1177: Mat_ScaLAPACK_Grid *grid;
1178: PetscBool flg;
1179: MPI_Comm icomm;
1182: MatStashDestroy_Private(&A->stash);
1183: PetscFree(a->loc);
1184: PetscFree(a->pivots);
1185: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1186: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1187: if (--grid->grid_refct == 0) {
1188: Cblacs_gridexit(grid->ictxt);
1189: Cblacs_gridexit(grid->ictxrow);
1190: Cblacs_gridexit(grid->ictxcol);
1191: PetscFree(grid);
1192: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
1193: }
1194: PetscCommDestroy(&icomm);
1195: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1196: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1197: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1198: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1199: PetscFree(A->data);
1200: return(0);
1201: }
1203: PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1204: {
1206: const PetscInt *ranges;
1207: PetscMPIInt size;
1208: PetscInt i,n;
1211: MPI_Comm_size(map->comm,&size);
1212: if (size>2) {
1213: PetscLayoutGetRanges(map,&ranges);
1214: n = ranges[1]-ranges[0];
1215: for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1216: if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1217: }
1218: return(0);
1219: }
1221: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1222: {
1223: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1225: PetscBLASInt info=0;
1228: PetscLayoutSetUp(A->rmap);
1229: PetscLayoutSetUp(A->cmap);
1231: /* check that the layout is as enforced by MatCreateScaLAPACK */
1232: MatScaLAPACKCheckLayout(A->rmap);
1233: MatScaLAPACKCheckLayout(A->cmap);
1235: /* compute local sizes */
1236: PetscBLASIntCast(A->rmap->N,&a->M);
1237: PetscBLASIntCast(A->cmap->N,&a->N);
1238: a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239: a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240: a->lld = PetscMax(1,a->locr);
1242: /* allocate local array */
1243: MatScaLAPACKSetPreallocation(A);
1245: /* set up ScaLAPACK descriptor */
1246: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1248: return(0);
1249: }
1251: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252: {
1254: PetscInt nstash,reallocs;
1257: if (A->nooffprocentries) return(0);
1258: MatStashScatterBegin_Private(A,&A->stash,NULL);
1259: MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1260: PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
1261: return(0);
1262: }
1264: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265: {
1267: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1268: PetscMPIInt n;
1269: PetscInt i,flg,*row,*col;
1270: PetscScalar *val;
1271: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1274: if (A->nooffprocentries) return(0);
1275: while (1) {
1276: MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1277: if (!flg) break;
1278: for (i=0;i<n;i++) {
1279: PetscBLASIntCast(row[i]+1,&gridx);
1280: PetscBLASIntCast(col[i]+1,&gcidx);
1281: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1282: if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1283: switch (A->insertmode) {
1284: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1285: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1286: default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1287: }
1288: }
1289: }
1290: MatStashScatterEnd_Private(&A->stash);
1291: return(0);
1292: }
1294: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1295: {
1297: Mat Adense,As;
1298: MPI_Comm comm;
1301: PetscObjectGetComm((PetscObject)newMat,&comm);
1302: MatCreate(comm,&Adense);
1303: MatSetType(Adense,MATDENSE);
1304: MatLoad(Adense,viewer);
1305: MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1306: MatDestroy(&Adense);
1307: MatHeaderReplace(newMat,&As);
1308: return(0);
1309: }
1311: /* -------------------------------------------------------------------*/
1312: static struct _MatOps MatOps_Values = {
1313: MatSetValues_ScaLAPACK,
1314: 0,
1315: 0,
1316: MatMult_ScaLAPACK,
1317: /* 4*/ MatMultAdd_ScaLAPACK,
1318: MatMultTranspose_ScaLAPACK,
1319: MatMultTransposeAdd_ScaLAPACK,
1320: MatSolve_ScaLAPACK,
1321: MatSolveAdd_ScaLAPACK,
1322: 0,
1323: /*10*/ 0,
1324: MatLUFactor_ScaLAPACK,
1325: MatCholeskyFactor_ScaLAPACK,
1326: 0,
1327: MatTranspose_ScaLAPACK,
1328: /*15*/ MatGetInfo_ScaLAPACK,
1329: 0,
1330: MatGetDiagonal_ScaLAPACK,
1331: MatDiagonalScale_ScaLAPACK,
1332: MatNorm_ScaLAPACK,
1333: /*20*/ MatAssemblyBegin_ScaLAPACK,
1334: MatAssemblyEnd_ScaLAPACK,
1335: MatSetOption_ScaLAPACK,
1336: MatZeroEntries_ScaLAPACK,
1337: /*24*/ 0,
1338: MatLUFactorSymbolic_ScaLAPACK,
1339: MatLUFactorNumeric_ScaLAPACK,
1340: MatCholeskyFactorSymbolic_ScaLAPACK,
1341: MatCholeskyFactorNumeric_ScaLAPACK,
1342: /*29*/ MatSetUp_ScaLAPACK,
1343: 0,
1344: 0,
1345: 0,
1346: 0,
1347: /*34*/ MatDuplicate_ScaLAPACK,
1348: 0,
1349: 0,
1350: 0,
1351: 0,
1352: /*39*/ MatAXPY_ScaLAPACK,
1353: 0,
1354: 0,
1355: 0,
1356: MatCopy_ScaLAPACK,
1357: /*44*/ 0,
1358: MatScale_ScaLAPACK,
1359: MatShift_ScaLAPACK,
1360: 0,
1361: 0,
1362: /*49*/ 0,
1363: 0,
1364: 0,
1365: 0,
1366: 0,
1367: /*54*/ 0,
1368: 0,
1369: 0,
1370: 0,
1371: 0,
1372: /*59*/ 0,
1373: MatDestroy_ScaLAPACK,
1374: MatView_ScaLAPACK,
1375: 0,
1376: 0,
1377: /*64*/ 0,
1378: 0,
1379: 0,
1380: 0,
1381: 0,
1382: /*69*/ 0,
1383: 0,
1384: MatConvert_ScaLAPACK_Dense,
1385: 0,
1386: 0,
1387: /*74*/ 0,
1388: 0,
1389: 0,
1390: 0,
1391: 0,
1392: /*79*/ 0,
1393: 0,
1394: 0,
1395: 0,
1396: MatLoad_ScaLAPACK,
1397: /*84*/ 0,
1398: 0,
1399: 0,
1400: 0,
1401: 0,
1402: /*89*/ 0,
1403: 0,
1404: MatMatMultNumeric_ScaLAPACK,
1405: 0,
1406: 0,
1407: /*94*/ 0,
1408: 0,
1409: 0,
1410: MatMatTransposeMultNumeric_ScaLAPACK,
1411: 0,
1412: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1413: 0,
1414: 0,
1415: MatConjugate_ScaLAPACK,
1416: 0,
1417: /*104*/0,
1418: 0,
1419: 0,
1420: 0,
1421: 0,
1422: /*109*/MatMatSolve_ScaLAPACK,
1423: 0,
1424: 0,
1425: 0,
1426: MatMissingDiagonal_ScaLAPACK,
1427: /*114*/0,
1428: 0,
1429: 0,
1430: 0,
1431: 0,
1432: /*119*/0,
1433: MatHermitianTranspose_ScaLAPACK,
1434: 0,
1435: 0,
1436: 0,
1437: /*124*/0,
1438: 0,
1439: 0,
1440: 0,
1441: 0,
1442: /*129*/0,
1443: 0,
1444: 0,
1445: 0,
1446: 0,
1447: /*134*/0,
1448: 0,
1449: 0,
1450: 0,
1451: 0,
1452: 0,
1453: /*140*/0,
1454: 0,
1455: 0,
1456: 0,
1457: 0,
1458: /*145*/0,
1459: 0,
1460: 0
1461: };
1463: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1464: {
1465: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1466: PetscInt size=stash->size,nsends;
1467: PetscErrorCode ierr;
1468: PetscInt count,*sindices,**rindices,i,j,l;
1469: PetscScalar **rvalues,*svalues;
1470: MPI_Comm comm = stash->comm;
1471: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472: PetscMPIInt *sizes,*nlengths,nreceives;
1473: PetscInt *sp_idx,*sp_idy;
1474: PetscScalar *sp_val;
1475: PetscMatStashSpace space,space_next;
1476: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1480: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1481: InsertMode addv;
1482: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1483: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484: mat->insertmode = addv; /* in case this processor had no cache */
1485: }
1487: bs2 = stash->bs*stash->bs;
1489: /* first count number of contributors to each processor */
1490: PetscCalloc1(size,&nlengths);
1491: PetscMalloc1(stash->n+1,&owner);
1493: i = j = 0;
1494: space = stash->space_head;
1495: while (space) {
1496: space_next = space->next;
1497: for (l=0; l<space->local_used; l++) {
1498: PetscBLASIntCast(space->idx[l]+1,&gridx);
1499: PetscBLASIntCast(space->idy[l]+1,&gcidx);
1500: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501: j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502: nlengths[j]++; owner[i] = j;
1503: i++;
1504: }
1505: space = space_next;
1506: }
1508: /* Now check what procs get messages - and compute nsends. */
1509: PetscCalloc1(size,&sizes);
1510: for (i=0, nsends=0; i<size; i++) {
1511: if (nlengths[i]) {
1512: sizes[i] = 1; nsends++;
1513: }
1514: }
1516: {PetscMPIInt *onodes,*olengths;
1517: /* Determine the number of messages to expect, their lengths, from from-ids */
1518: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1519: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1520: /* since clubbing row,col - lengths are multiplied by 2 */
1521: for (i=0; i<nreceives; i++) olengths[i] *=2;
1522: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1523: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1526: PetscFree(onodes);
1527: PetscFree(olengths);}
1529: /* do sends:
1530: 1) starts[i] gives the starting index in svalues for stuff going to
1531: the ith processor
1532: */
1533: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1534: PetscMalloc1(2*nsends,&send_waits);
1535: PetscMalloc2(size,&startv,size,&starti);
1536: /* use 2 sends the first with all_a, the next with all_i and all_j */
1537: startv[0] = 0; starti[0] = 0;
1538: for (i=1; i<size; i++) {
1539: startv[i] = startv[i-1] + nlengths[i-1];
1540: starti[i] = starti[i-1] + 2*nlengths[i-1];
1541: }
1543: i = 0;
1544: space = stash->space_head;
1545: while (space) {
1546: space_next = space->next;
1547: sp_idx = space->idx;
1548: sp_idy = space->idy;
1549: sp_val = space->val;
1550: for (l=0; l<space->local_used; l++) {
1551: j = owner[i];
1552: if (bs2 == 1) {
1553: svalues[startv[j]] = sp_val[l];
1554: } else {
1555: PetscInt k;
1556: PetscScalar *buf1,*buf2;
1557: buf1 = svalues+bs2*startv[j];
1558: buf2 = space->val + bs2*l;
1559: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560: }
1561: sindices[starti[j]] = sp_idx[l];
1562: sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563: startv[j]++;
1564: starti[j]++;
1565: i++;
1566: }
1567: space = space_next;
1568: }
1569: startv[0] = 0;
1570: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1572: for (i=0,count=0; i<size; i++) {
1573: if (sizes[i]) {
1574: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1575: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1576: }
1577: }
1578: #if defined(PETSC_USE_INFO)
1579: PetscInfo1(NULL,"No of messages: %d \n",nsends);
1580: for (i=0; i<size; i++) {
1581: if (sizes[i]) {
1582: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
1583: }
1584: }
1585: #endif
1586: PetscFree(nlengths);
1587: PetscFree(owner);
1588: PetscFree2(startv,starti);
1589: PetscFree(sizes);
1591: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592: PetscMalloc1(2*nreceives,&recv_waits);
1594: for (i=0; i<nreceives; i++) {
1595: recv_waits[2*i] = recv_waits1[i];
1596: recv_waits[2*i+1] = recv_waits2[i];
1597: }
1598: stash->recv_waits = recv_waits;
1600: PetscFree(recv_waits1);
1601: PetscFree(recv_waits2);
1603: stash->svalues = svalues;
1604: stash->sindices = sindices;
1605: stash->rvalues = rvalues;
1606: stash->rindices = rindices;
1607: stash->send_waits = send_waits;
1608: stash->nsends = nsends;
1609: stash->nrecvs = nreceives;
1610: stash->reproduce_count = 0;
1611: return(0);
1612: }
1614: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615: {
1617: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1620: if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1621: if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1622: if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1623: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1624: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1625: return(0);
1626: }
1628: /*@
1629: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1630: the ScaLAPACK matrix
1632: Logically Collective on A
1634: Input Parameter:
1635: + A - a MATSCALAPACK matrix
1636: . mb - the row block size
1637: - nb - the column block size
1639: Level: intermediate
1641: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1642: @*/
1643: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1644: {
1651: PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1652: return(0);
1653: }
1655: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1656: {
1657: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1660: if (mb) *mb = a->mb;
1661: if (nb) *nb = a->nb;
1662: return(0);
1663: }
1665: /*@
1666: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1667: the ScaLAPACK matrix
1669: Not collective
1671: Input Parameter:
1672: . A - a MATSCALAPACK matrix
1674: Output Parameters:
1675: + mb - the row block size
1676: - nb - the column block size
1678: Level: intermediate
1680: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1681: @*/
1682: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1683: {
1688: PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1689: return(0);
1690: }
1692: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1693: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1695: /*MC
1696: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1698: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1700: Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1702: Options Database Keys:
1703: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1704: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1705: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1707: Level: beginner
1709: .seealso: MATDENSE, MATELEMENTAL
1710: M*/
1712: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1713: {
1714: Mat_ScaLAPACK *a;
1715: PetscErrorCode ierr;
1716: PetscBool flg,flg1;
1717: Mat_ScaLAPACK_Grid *grid;
1718: MPI_Comm icomm;
1719: PetscBLASInt nprow,npcol,myrow,mycol;
1720: PetscInt optv1,k=2,array[2]={0,0};
1721: PetscMPIInt size;
1724: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1725: A->insertmode = NOT_SET_VALUES;
1727: MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1728: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1729: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1730: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1731: A->stash.ScatterDestroy = NULL;
1733: PetscNewLog(A,&a);
1734: A->data = (void*)a;
1736: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1737: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1738: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1739: }
1740: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1741: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1742: if (!flg) {
1743: PetscNewLog(A,&grid);
1745: MPI_Comm_size(icomm,&size);
1746: grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1748: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1749: PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1750: if (flg1) {
1751: if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1752: grid->nprow = optv1;
1753: }
1754: PetscOptionsEnd();
1756: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1757: grid->npcol = size/grid->nprow;
1758: PetscBLASIntCast(grid->nprow,&nprow);
1759: PetscBLASIntCast(grid->npcol,&npcol);
1760: Cblacs_get(-1,0,&grid->ictxt);
1761: Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762: Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763: grid->grid_refct = 1;
1764: grid->nprow = nprow;
1765: grid->npcol = npcol;
1766: grid->myrow = myrow;
1767: grid->mycol = mycol;
1768: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769: Cblacs_get(-1,0,&grid->ictxrow);
1770: Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771: Cblacs_get(-1,0,&grid->ictxcol);
1772: Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1773: MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);
1775: } else grid->grid_refct++;
1776: PetscCommDestroy(&icomm);
1777: a->grid = grid;
1778: a->mb = DEFAULT_BLOCKSIZE;
1779: a->nb = DEFAULT_BLOCKSIZE;
1781: PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1782: PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1783: if (flg) {
1784: a->mb = array[0];
1785: a->nb = (k>1)? array[1]: a->mb;
1786: }
1787: PetscOptionsEnd();
1789: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1790: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1791: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1792: PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1793: return(0);
1794: }
1796: /*@C
1797: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1798: (2D block cyclic distribution).
1800: Collective
1802: Input Parameters:
1803: + comm - MPI communicator
1804: . mb - row block size (or PETSC_DECIDE to have it set)
1805: . nb - column block size (or PETSC_DECIDE to have it set)
1806: . M - number of global rows
1807: . N - number of global columns
1808: . rsrc - coordinate of process that owns the first row of the distributed matrix
1809: - csrc - coordinate of process that owns the first column of the distributed matrix
1811: Output Parameter:
1812: . A - the matrix
1814: Options Database Keys:
1815: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1817: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1818: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1819: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1821: Notes:
1822: If PETSC_DECIDE is used for the block sizes, then an appropriate value
1823: is chosen.
1825: Storage Information:
1826: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1827: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1828: significance and are thus ignored. The block sizes refer to the values
1829: used for the distributed matrix, not the same meaning as in BAIJ.
1831: Level: intermediate
1833: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1834: @*/
1835: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1836: {
1838: Mat_ScaLAPACK *a;
1839: PetscInt m,n;
1842: MatCreate(comm,A);
1843: MatSetType(*A,MATSCALAPACK);
1844: if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846: m = PETSC_DECIDE;
1847: PetscSplitOwnershipEqual(comm,&m,&M);
1848: n = PETSC_DECIDE;
1849: PetscSplitOwnershipEqual(comm,&n,&N);
1850: MatSetSizes(*A,m,n,M,N);
1851: a = (Mat_ScaLAPACK*)(*A)->data;
1852: PetscBLASIntCast(M,&a->M);
1853: PetscBLASIntCast(N,&a->N);
1854: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1855: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1856: PetscBLASIntCast(rsrc,&a->rsrc);
1857: PetscBLASIntCast(csrc,&a->csrc);
1858: MatSetUp(*A);
1859: return(0);
1860: }