Actual source code: fftw.c
petsc-3.12.0 2019-09-29
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34: /* MatMult_SeqFFTW performs forward DFT in parallel
35: Input parameter:
36: A - the matrix
37: x - the vector on which FDFT will be performed
39: Output parameter:
40: y - vector that stores result of FDFT
41: */
42: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
43: {
45: Mat_FFT *fft = (Mat_FFT*)A->data;
46: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
47: const PetscScalar *x_array;
48: PetscScalar *y_array;
49: #if defined(PETSC_USE_COMPLEX)
50: #if defined(PETSC_USE_64BIT_INDICES)
51: fftw_iodim64 *iodims;
52: #else
53: fftw_iodim *iodims;
54: #endif
55: PetscInt i;
56: #endif
57: PetscInt ndim = fft->ndim,*dim = fft->dim;
60: VecGetArrayRead(x,&x_array);
61: VecGetArray(y,&y_array);
62: if (!fftw->p_forward) { /* create a plan, then excute it */
63: switch (ndim) {
64: case 1:
65: #if defined(PETSC_USE_COMPLEX)
66: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67: #else
68: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69: #endif
70: break;
71: case 2:
72: #if defined(PETSC_USE_COMPLEX)
73: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74: #else
75: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76: #endif
77: break;
78: case 3:
79: #if defined(PETSC_USE_COMPLEX)
80: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81: #else
82: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83: #endif
84: break;
85: default:
86: #if defined(PETSC_USE_COMPLEX)
87: iodims = fftw->iodims;
88: #if defined(PETSC_USE_64BIT_INDICES)
89: if (ndim) {
90: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91: iodims[ndim-1].is = iodims[ndim-1].os = 1;
92: for (i=ndim-2; i>=0; --i) {
93: iodims[i].n = (ptrdiff_t)dim[i];
94: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95: }
96: }
97: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98: #else
99: if (ndim) {
100: iodims[ndim-1].n = (int)dim[ndim-1];
101: iodims[ndim-1].is = iodims[ndim-1].os = 1;
102: for (i=ndim-2; i>=0; --i) {
103: iodims[i].n = (int)dim[i];
104: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105: }
106: }
107: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108: #endif
110: #else
111: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112: #endif
113: break;
114: }
115: fftw->finarray = (PetscScalar *) x_array;
116: fftw->foutarray = y_array;
117: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119: fftw_execute(fftw->p_forward);
120: } else { /* use existing plan */
121: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: #if defined(PETSC_USE_COMPLEX)
123: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
124: #else
125: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
126: #endif
127: } else {
128: fftw_execute(fftw->p_forward);
129: }
130: }
131: VecRestoreArray(y,&y_array);
132: VecRestoreArrayRead(x,&x_array);
133: return(0);
134: }
136: /* MatMultTranspose_SeqFFTW performs serial backward DFT
137: Input parameter:
138: A - the matrix
139: x - the vector on which BDFT will be performed
141: Output parameter:
142: y - vector that stores result of BDFT
143: */
145: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
146: {
148: Mat_FFT *fft = (Mat_FFT*)A->data;
149: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
150: const PetscScalar *x_array;
151: PetscScalar *y_array;
152: PetscInt ndim=fft->ndim,*dim=fft->dim;
153: #if defined(PETSC_USE_COMPLEX)
154: #if defined(PETSC_USE_64BIT_INDICES)
155: fftw_iodim64 *iodims=fftw->iodims;
156: #else
157: fftw_iodim *iodims=fftw->iodims;
158: #endif
159: #endif
162: VecGetArrayRead(x,&x_array);
163: VecGetArray(y,&y_array);
164: if (!fftw->p_backward) { /* create a plan, then excute it */
165: switch (ndim) {
166: case 1:
167: #if defined(PETSC_USE_COMPLEX)
168: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
169: #else
170: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
171: #endif
172: break;
173: case 2:
174: #if defined(PETSC_USE_COMPLEX)
175: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
176: #else
177: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
178: #endif
179: break;
180: case 3:
181: #if defined(PETSC_USE_COMPLEX)
182: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
183: #else
184: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
185: #endif
186: break;
187: default:
188: #if defined(PETSC_USE_COMPLEX)
189: #if defined(PETSC_USE_64BIT_INDICES)
190: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
191: #else
192: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
193: #endif
194: #else
195: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
196: #endif
197: break;
198: }
199: fftw->binarray = (PetscScalar *) x_array;
200: fftw->boutarray = y_array;
201: fftw_execute(fftw->p_backward);
202: } else { /* use existing plan */
203: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
204: #if defined(PETSC_USE_COMPLEX)
205: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
206: #else
207: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
208: #endif
209: } else {
210: fftw_execute(fftw->p_backward);
211: }
212: }
213: VecRestoreArray(y,&y_array);
214: VecRestoreArrayRead(x,&x_array);
215: return(0);
216: }
218: /* MatMult_MPIFFTW performs forward DFT in parallel
219: Input parameter:
220: A - the matrix
221: x - the vector on which FDFT will be performed
223: Output parameter:
224: y - vector that stores result of FDFT
225: */
226: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
227: {
229: Mat_FFT *fft = (Mat_FFT*)A->data;
230: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
231: const PetscScalar *x_array;
232: PetscScalar *y_array;
233: PetscInt ndim=fft->ndim,*dim=fft->dim;
234: MPI_Comm comm;
237: PetscObjectGetComm((PetscObject)A,&comm);
238: VecGetArrayRead(x,&x_array);
239: VecGetArray(y,&y_array);
240: if (!fftw->p_forward) { /* create a plan, then excute it */
241: switch (ndim) {
242: case 1:
243: #if defined(PETSC_USE_COMPLEX)
244: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245: #else
246: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
247: #endif
248: break;
249: case 2:
250: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
252: #else
253: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
254: #endif
255: break;
256: case 3:
257: #if defined(PETSC_USE_COMPLEX)
258: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
259: #else
260: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
261: #endif
262: break;
263: default:
264: #if defined(PETSC_USE_COMPLEX)
265: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
266: #else
267: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
268: #endif
269: break;
270: }
271: fftw->finarray = (PetscScalar *) x_array;
272: fftw->foutarray = y_array;
273: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
274: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
275: fftw_execute(fftw->p_forward);
276: } else { /* use existing plan */
277: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
278: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
279: } else {
280: fftw_execute(fftw->p_forward);
281: }
282: }
283: VecRestoreArray(y,&y_array);
284: VecRestoreArrayRead(x,&x_array);
285: return(0);
286: }
288: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
289: Input parameter:
290: A - the matrix
291: x - the vector on which BDFT will be performed
293: Output parameter:
294: y - vector that stores result of BDFT
295: */
296: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
297: {
299: Mat_FFT *fft = (Mat_FFT*)A->data;
300: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
301: const PetscScalar *x_array;
302: PetscScalar *y_array;
303: PetscInt ndim=fft->ndim,*dim=fft->dim;
304: MPI_Comm comm;
307: PetscObjectGetComm((PetscObject)A,&comm);
308: VecGetArrayRead(x,&x_array);
309: VecGetArray(y,&y_array);
310: if (!fftw->p_backward) { /* create a plan, then excute it */
311: switch (ndim) {
312: case 1:
313: #if defined(PETSC_USE_COMPLEX)
314: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
315: #else
316: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
317: #endif
318: break;
319: case 2:
320: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322: #else
323: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
324: #endif
325: break;
326: case 3:
327: #if defined(PETSC_USE_COMPLEX)
328: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
331: #endif
332: break;
333: default:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
338: #endif
339: break;
340: }
341: fftw->binarray = (PetscScalar *) x_array;
342: fftw->boutarray = y_array;
343: fftw_execute(fftw->p_backward);
344: } else { /* use existing plan */
345: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
346: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
347: } else {
348: fftw_execute(fftw->p_backward);
349: }
350: }
351: VecRestoreArray(y,&y_array);
352: VecRestoreArrayRead(x,&x_array);
353: return(0);
354: }
356: PetscErrorCode MatDestroy_FFTW(Mat A)
357: {
358: Mat_FFT *fft = (Mat_FFT*)A->data;
359: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
363: fftw_destroy_plan(fftw->p_forward);
364: fftw_destroy_plan(fftw->p_backward);
365: if (fftw->iodims) {
366: free(fftw->iodims);
367: }
368: PetscFree(fftw->dim_fftw);
369: PetscFree(fft->data);
370: fftw_mpi_cleanup();
371: return(0);
372: }
374: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
375: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
376: {
378: PetscScalar *array;
381: VecGetArray(v,&array);
382: fftw_free((fftw_complex*)array);
383: VecRestoreArray(v,&array);
384: VecDestroy_MPI(v);
385: return(0);
386: }
389: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
390: {
392: Mat A;
395: PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);
396: MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);
397: return(0);
398: }
400: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
401: {
403: Mat A;
406: PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);
407: MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);
408: return(0);
409: }
411: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
412: {
414: Mat A;
417: PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);
418: MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);
419: return(0);
420: }
423: /*@
424: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
425: parallel layout determined by FFTW
427: Collective on Mat
429: Input Parameter:
430: . A - the matrix
432: Output Parameter:
433: + x - (optional) input vector of forward FFTW
434: - y - (optional) output vector of forward FFTW
435: - z - (optional) output vector of backward FFTW
437: Level: advanced
439: Note: The parallel layout of output of forward FFTW is always same as the input
440: of the backward FFTW. But parallel layout of the input vector of forward
441: FFTW might not be same as the output of backward FFTW.
442: Also note that we need to provide enough space while doing parallel real transform.
443: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
444: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
445: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
446: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
447: zeros if it is odd only one zero is needed.
448: Lastly one needs some scratch space at the end of data set in each process. alloc_local
449: figures out how much space is needed, i.e. it figures out the data+scratch space for
450: each processor and returns that.
452: .seealso: MatCreateFFTW()
453: @*/
454: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
455: {
459: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
460: return(0);
461: }
463: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
464: {
466: PetscMPIInt size,rank;
467: MPI_Comm comm;
468: Mat_FFT *fft = (Mat_FFT*)A->data;
469: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
470: PetscInt N = fft->N;
471: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
476: PetscObjectGetComm((PetscObject)A,&comm);
478: MPI_Comm_size(comm, &size);
479: MPI_Comm_rank(comm, &rank);
480: if (size == 1) { /* sequential case */
481: #if defined(PETSC_USE_COMPLEX)
482: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
483: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
484: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
485: #else
486: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
487: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
488: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
489: #endif
490: } else { /* parallel cases */
491: ptrdiff_t alloc_local,local_n0,local_0_start;
492: ptrdiff_t local_n1;
493: fftw_complex *data_fout;
494: ptrdiff_t local_1_start;
495: #if defined(PETSC_USE_COMPLEX)
496: fftw_complex *data_fin,*data_bout;
497: #else
498: double *data_finr,*data_boutr;
499: PetscInt n1,N1;
500: ptrdiff_t temp;
501: #endif
503: switch (ndim) {
504: case 1:
505: #if !defined(PETSC_USE_COMPLEX)
506: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
507: #else
508: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
509: if (fin) {
510: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
512: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
513: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
514: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
515: }
516: if (fout) {
517: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
518: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
519: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
520: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
521: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
522: }
523: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
524: if (bout) {
525: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
527: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
528: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
529: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
530: }
531: break;
532: #endif
533: case 2:
534: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
535: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
536: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
537: if (fin) {
538: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
539: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
540: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
541: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
542: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
543: }
544: if (fout) {
545: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
546: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
547: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
548: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
549: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
550: }
551: if (bout) {
552: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
554: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
555: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
556: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
557: }
558: #else
559: /* Get local size */
560: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
561: if (fin) {
562: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
564: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
565: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
566: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
567: }
568: if (fout) {
569: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
571: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
572: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
573: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
574: }
575: if (bout) {
576: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
578: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
579: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
580: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
581: }
582: #endif
583: break;
584: case 3:
585: #if !defined(PETSC_USE_COMPLEX)
586: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
587: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
588: if (fin) {
589: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
590: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
591: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
592: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
593: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: if (fout) {
596: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
597: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
598: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
599: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
600: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
601: }
602: if (bout) {
603: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
604: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
605: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
606: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
607: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: #else
610: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
611: if (fin) {
612: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
613: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
614: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
615: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
616: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
617: }
618: if (fout) {
619: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
620: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
621: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
622: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
623: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
624: }
625: if (bout) {
626: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
627: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
628: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
629: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
630: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: #endif
633: break;
634: default:
635: #if !defined(PETSC_USE_COMPLEX)
636: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
638: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
640: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
641: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
643: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
645: if (fin) {
646: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
647: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
648: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
649: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
650: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
651: }
652: if (fout) {
653: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
654: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
655: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
656: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
657: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
658: }
659: if (bout) {
660: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
661: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
662: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
663: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
664: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
665: }
666: #else
667: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
668: if (fin) {
669: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
670: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
671: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
672: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
673: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
674: }
675: if (fout) {
676: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
677: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
678: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
679: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
680: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
681: }
682: if (bout) {
683: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
684: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
685: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
686: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
687: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
688: }
689: #endif
690: break;
691: }
692: }
693: return(0);
694: }
696: /*@
697: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
699: Collective on Mat
701: Input Parameters:
702: + A - FFTW matrix
703: - x - the PETSc vector
705: Output Parameters:
706: . y - the FFTW vector
708: Options Database Keys:
709: . -mat_fftw_plannerflags - set FFTW planner flags
711: Level: intermediate
713: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
714: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
715: zeros. This routine does that job by scattering operation.
717: .seealso: VecScatterFFTWToPetsc()
718: @*/
719: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
720: {
724: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
725: return(0);
726: }
728: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
729: {
731: MPI_Comm comm;
732: Mat_FFT *fft = (Mat_FFT*)A->data;
733: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
734: PetscInt N =fft->N;
735: PetscInt ndim =fft->ndim,*dim=fft->dim;
736: PetscInt low;
737: PetscMPIInt rank,size;
738: PetscInt vsize,vsize1;
739: ptrdiff_t local_n0,local_0_start;
740: ptrdiff_t local_n1,local_1_start;
741: VecScatter vecscat;
742: IS list1,list2;
743: #if !defined(PETSC_USE_COMPLEX)
744: PetscInt i,j,k,partial_dim;
745: PetscInt *indx1, *indx2, tempindx, tempindx1;
746: PetscInt NM;
747: ptrdiff_t temp;
748: #endif
751: PetscObjectGetComm((PetscObject)A,&comm);
752: MPI_Comm_size(comm, &size);
753: MPI_Comm_rank(comm, &rank);
754: VecGetOwnershipRange(y,&low,NULL);
756: if (size==1) {
757: VecGetSize(x,&vsize);
758: VecGetSize(y,&vsize1);
759: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
760: VecScatterCreate(x,list1,y,list1,&vecscat);
761: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
762: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
763: VecScatterDestroy(&vecscat);
764: ISDestroy(&list1);
765: } else {
766: switch (ndim) {
767: case 1:
768: #if defined(PETSC_USE_COMPLEX)
769: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
771: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
772: ISCreateStride(comm,local_n0,low,1,&list2);
773: VecScatterCreate(x,list1,y,list2,&vecscat);
774: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
775: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
776: VecScatterDestroy(&vecscat);
777: ISDestroy(&list1);
778: ISDestroy(&list2);
779: #else
780: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
781: #endif
782: break;
783: case 2:
784: #if defined(PETSC_USE_COMPLEX)
785: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
787: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
788: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
789: VecScatterCreate(x,list1,y,list2,&vecscat);
790: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
791: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
792: VecScatterDestroy(&vecscat);
793: ISDestroy(&list1);
794: ISDestroy(&list2);
795: #else
796: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
798: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
799: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
801: if (dim[1]%2==0) {
802: NM = dim[1]+2;
803: } else {
804: NM = dim[1]+1;
805: }
806: for (i=0; i<local_n0; i++) {
807: for (j=0; j<dim[1]; j++) {
808: tempindx = i*dim[1] + j;
809: tempindx1 = i*NM + j;
811: indx1[tempindx]=local_0_start*dim[1]+tempindx;
812: indx2[tempindx]=low+tempindx1;
813: }
814: }
816: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
817: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
819: VecScatterCreate(x,list1,y,list2,&vecscat);
820: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
821: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
822: VecScatterDestroy(&vecscat);
823: ISDestroy(&list1);
824: ISDestroy(&list2);
825: PetscFree(indx1);
826: PetscFree(indx2);
827: #endif
828: break;
830: case 3:
831: #if defined(PETSC_USE_COMPLEX)
832: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
834: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
835: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
836: VecScatterCreate(x,list1,y,list2,&vecscat);
837: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
838: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
839: VecScatterDestroy(&vecscat);
840: ISDestroy(&list1);
841: ISDestroy(&list2);
842: #else
843: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
844: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
845: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
847: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
848: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
850: if (dim[2]%2==0) NM = dim[2]+2;
851: else NM = dim[2]+1;
853: for (i=0; i<local_n0; i++) {
854: for (j=0; j<dim[1]; j++) {
855: for (k=0; k<dim[2]; k++) {
856: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
857: tempindx1 = i*dim[1]*NM + j*NM + k;
859: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
860: indx2[tempindx]=low+tempindx1;
861: }
862: }
863: }
865: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
866: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
867: VecScatterCreate(x,list1,y,list2,&vecscat);
868: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
869: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
870: VecScatterDestroy(&vecscat);
871: ISDestroy(&list1);
872: ISDestroy(&list2);
873: PetscFree(indx1);
874: PetscFree(indx2);
875: #endif
876: break;
878: default:
879: #if defined(PETSC_USE_COMPLEX)
880: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
882: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
883: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
884: VecScatterCreate(x,list1,y,list2,&vecscat);
885: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
886: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
887: VecScatterDestroy(&vecscat);
888: ISDestroy(&list1);
889: ISDestroy(&list2);
890: #else
891: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
892: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
893: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
895: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
897: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
899: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
901: partial_dim = fftw->partial_dim;
903: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
904: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
906: if (dim[ndim-1]%2==0) NM = 2;
907: else NM = 1;
909: j = low;
910: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
911: indx1[i] = local_0_start*partial_dim + i;
912: indx2[i] = j;
913: if (k%dim[ndim-1]==0) j+=NM;
914: j++;
915: }
916: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
917: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
918: VecScatterCreate(x,list1,y,list2,&vecscat);
919: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
920: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
921: VecScatterDestroy(&vecscat);
922: ISDestroy(&list1);
923: ISDestroy(&list2);
924: PetscFree(indx1);
925: PetscFree(indx2);
926: #endif
927: break;
928: }
929: }
930: return(0);
931: }
933: /*@
934: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
936: Collective on Mat
938: Input Parameters:
939: + A - FFTW matrix
940: - x - FFTW vector
942: Output Parameters:
943: . y - PETSc vector
945: Level: intermediate
947: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
948: VecScatterFFTWToPetsc removes those extra zeros.
950: .seealso: VecScatterPetscToFFTW()
951: @*/
952: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
953: {
957: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
958: return(0);
959: }
961: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
962: {
964: MPI_Comm comm;
965: Mat_FFT *fft = (Mat_FFT*)A->data;
966: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
967: PetscInt N = fft->N;
968: PetscInt ndim = fft->ndim,*dim=fft->dim;
969: PetscInt low;
970: PetscMPIInt rank,size;
971: ptrdiff_t local_n0,local_0_start;
972: ptrdiff_t local_n1,local_1_start;
973: VecScatter vecscat;
974: IS list1,list2;
975: #if !defined(PETSC_USE_COMPLEX)
976: PetscInt i,j,k,partial_dim;
977: PetscInt *indx1, *indx2, tempindx, tempindx1;
978: PetscInt NM;
979: ptrdiff_t temp;
980: #endif
983: PetscObjectGetComm((PetscObject)A,&comm);
984: MPI_Comm_size(comm, &size);
985: MPI_Comm_rank(comm, &rank);
986: VecGetOwnershipRange(x,&low,NULL);
988: if (size==1) {
989: ISCreateStride(comm,N,0,1,&list1);
990: VecScatterCreate(x,list1,y,list1,&vecscat);
991: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
992: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
993: VecScatterDestroy(&vecscat);
994: ISDestroy(&list1);
996: } else {
997: switch (ndim) {
998: case 1:
999: #if defined(PETSC_USE_COMPLEX)
1000: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1002: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
1003: ISCreateStride(comm,local_n1,low,1,&list2);
1004: VecScatterCreate(x,list1,y,list2,&vecscat);
1005: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1006: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1007: VecScatterDestroy(&vecscat);
1008: ISDestroy(&list1);
1009: ISDestroy(&list2);
1010: #else
1011: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1012: #endif
1013: break;
1014: case 2:
1015: #if defined(PETSC_USE_COMPLEX)
1016: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1018: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1019: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
1020: VecScatterCreate(x,list2,y,list1,&vecscat);
1021: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1022: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1023: VecScatterDestroy(&vecscat);
1024: ISDestroy(&list1);
1025: ISDestroy(&list2);
1026: #else
1027: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1029: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
1030: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
1032: if (dim[1]%2==0) NM = dim[1]+2;
1033: else NM = dim[1]+1;
1035: for (i=0; i<local_n0; i++) {
1036: for (j=0; j<dim[1]; j++) {
1037: tempindx = i*dim[1] + j;
1038: tempindx1 = i*NM + j;
1040: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1041: indx2[tempindx]=low+tempindx1;
1042: }
1043: }
1045: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1046: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1048: VecScatterCreate(x,list2,y,list1,&vecscat);
1049: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1050: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1051: VecScatterDestroy(&vecscat);
1052: ISDestroy(&list1);
1053: ISDestroy(&list2);
1054: PetscFree(indx1);
1055: PetscFree(indx2);
1056: #endif
1057: break;
1058: case 3:
1059: #if defined(PETSC_USE_COMPLEX)
1060: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1062: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1063: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1064: VecScatterCreate(x,list1,y,list2,&vecscat);
1065: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1066: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1067: VecScatterDestroy(&vecscat);
1068: ISDestroy(&list1);
1069: ISDestroy(&list2);
1070: #else
1071: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1073: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1074: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1076: if (dim[2]%2==0) NM = dim[2]+2;
1077: else NM = dim[2]+1;
1079: for (i=0; i<local_n0; i++) {
1080: for (j=0; j<dim[1]; j++) {
1081: for (k=0; k<dim[2]; k++) {
1082: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1083: tempindx1 = i*dim[1]*NM + j*NM + k;
1085: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1086: indx2[tempindx]=low+tempindx1;
1087: }
1088: }
1089: }
1091: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1092: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1094: VecScatterCreate(x,list2,y,list1,&vecscat);
1095: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1096: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1097: VecScatterDestroy(&vecscat);
1098: ISDestroy(&list1);
1099: ISDestroy(&list2);
1100: PetscFree(indx1);
1101: PetscFree(indx2);
1102: #endif
1103: break;
1104: default:
1105: #if defined(PETSC_USE_COMPLEX)
1106: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1108: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1109: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1110: VecScatterCreate(x,list1,y,list2,&vecscat);
1111: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1112: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1113: VecScatterDestroy(&vecscat);
1114: ISDestroy(&list1);
1115: ISDestroy(&list2);
1116: #else
1117: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1119: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1121: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1123: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1125: partial_dim = fftw->partial_dim;
1127: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1128: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1130: if (dim[ndim-1]%2==0) NM = 2;
1131: else NM = 1;
1133: j = low;
1134: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1135: indx1[i] = local_0_start*partial_dim + i;
1136: indx2[i] = j;
1137: if (k%dim[ndim-1]==0) j+=NM;
1138: j++;
1139: }
1140: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1141: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1143: VecScatterCreate(x,list2,y,list1,&vecscat);
1144: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1145: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1146: VecScatterDestroy(&vecscat);
1147: ISDestroy(&list1);
1148: ISDestroy(&list2);
1149: PetscFree(indx1);
1150: PetscFree(indx2);
1151: #endif
1152: break;
1153: }
1154: }
1155: return(0);
1156: }
1158: /*
1159: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1161: Options Database Keys:
1162: + -mat_fftw_plannerflags - set FFTW planner flags
1164: Level: intermediate
1166: */
1167: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1168: {
1170: MPI_Comm comm;
1171: Mat_FFT *fft=(Mat_FFT*)A->data;
1172: Mat_FFTW *fftw;
1173: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1174: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1175: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1176: PetscBool flg;
1177: PetscInt p_flag,partial_dim=1,ctr;
1178: PetscMPIInt size,rank;
1179: ptrdiff_t *pdim;
1180: ptrdiff_t local_n1,local_1_start;
1181: #if !defined(PETSC_USE_COMPLEX)
1182: ptrdiff_t temp;
1183: PetscInt N1,tot_dim;
1184: #else
1185: PetscInt n1;
1186: #endif
1189: PetscObjectGetComm((PetscObject)A,&comm);
1190: MPI_Comm_size(comm, &size);
1191: MPI_Comm_rank(comm, &rank);
1193: fftw_mpi_init();
1194: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1195: pdim[0] = dim[0];
1196: #if !defined(PETSC_USE_COMPLEX)
1197: tot_dim = 2*dim[0];
1198: #endif
1199: for (ctr=1; ctr<ndim; ctr++) {
1200: partial_dim *= dim[ctr];
1201: pdim[ctr] = dim[ctr];
1202: #if !defined(PETSC_USE_COMPLEX)
1203: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1204: else tot_dim *= dim[ctr];
1205: #endif
1206: }
1208: if (size == 1) {
1209: #if defined(PETSC_USE_COMPLEX)
1210: MatSetSizes(A,N,N,N,N);
1211: n = N;
1212: #else
1213: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1214: n = tot_dim;
1215: #endif
1217: } else {
1218: ptrdiff_t local_n0,local_0_start;
1219: switch (ndim) {
1220: case 1:
1221: #if !defined(PETSC_USE_COMPLEX)
1222: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1223: #else
1224: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1226: n = (PetscInt)local_n0;
1227: n1 = (PetscInt)local_n1;
1228: MatSetSizes(A,n1,n,N,N);
1229: #endif
1230: break;
1231: case 2:
1232: #if defined(PETSC_USE_COMPLEX)
1233: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1234: /*
1235: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1236: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1237: */
1238: n = (PetscInt)local_n0*dim[1];
1239: MatSetSizes(A,n,n,N,N);
1240: #else
1241: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1243: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1244: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1245: #endif
1246: break;
1247: case 3:
1248: #if defined(PETSC_USE_COMPLEX)
1249: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1251: n = (PetscInt)local_n0*dim[1]*dim[2];
1252: MatSetSizes(A,n,n,N,N);
1253: #else
1254: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1256: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1257: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1258: #endif
1259: break;
1260: default:
1261: #if defined(PETSC_USE_COMPLEX)
1262: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1264: n = (PetscInt)local_n0*partial_dim;
1265: MatSetSizes(A,n,n,N,N);
1266: #else
1267: temp = pdim[ndim-1];
1269: pdim[ndim-1] = temp/2 + 1;
1271: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1273: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1274: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1276: pdim[ndim-1] = temp;
1278: MatSetSizes(A,n,n,N1,N1);
1279: #endif
1280: break;
1281: }
1282: }
1283: free(pdim);
1284: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1285: PetscNewLog(A,&fftw);
1286: fft->data = (void*)fftw;
1288: fft->n = n;
1289: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1290: fftw->partial_dim = partial_dim;
1292: PetscMalloc1(ndim, &(fftw->dim_fftw));
1293: if (size == 1) {
1294: #if defined(PETSC_USE_64BIT_INDICES)
1295: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1296: #else
1297: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1298: #endif
1299: }
1301: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1303: fftw->p_forward = 0;
1304: fftw->p_backward = 0;
1305: fftw->p_flag = FFTW_ESTIMATE;
1307: if (size == 1) {
1308: A->ops->mult = MatMult_SeqFFTW;
1309: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1310: } else {
1311: A->ops->mult = MatMult_MPIFFTW;
1312: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1313: }
1314: fft->matdestroy = MatDestroy_FFTW;
1315: A->assembled = PETSC_TRUE;
1316: A->preallocated = PETSC_TRUE;
1318: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1319: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1320: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1322: /* get runtime options */
1323: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1324: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1325: if (flg) {
1326: fftw->p_flag = iplans[p_flag];
1327: }
1328: PetscOptionsEnd();
1329: return(0);
1330: }