HOPS
HOPS class reference
MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh
Go to the documentation of this file.
1 #ifndef MHO_OpenCLBatchedMultidimensionalFastFourierTransform_HH__
2 #define MHO_OpenCLBatchedMultidimensionalFastFourierTransform_HH__
3 
4 #include <complex>
5 #include <cstdlib>
6 #include <cstring>
7 #include <fstream>
8 #include <sstream>
9 
10 //core (opencl)
12 #include "MHO_NDArrayWrapper.hh"
13 #include "MHO_OpenCLInterface.hh"
15 #include "MHO_UnaryOperator.hh"
16 
17 #define ENFORCE_CL_FINISH
18 
19 namespace hops
20 {
21 
22 template< typename XArgType > class MHO_OpenCLBatchedMultidimensionalFastFourierTransform: public MHO_Operator
23 {
24  public:
26  "Array element type must be a complex floating point type.");
27  using complex_value_type = typename XArgType::value_type;
28  using floating_point_value_type = typename XArgType::value_type::value_type;
29 
31  : MHO_Operator(), fIsValid(false), fIsForward(true), fInitialized(false), fAllSpatialDimensionsAreEqual(true),
32  fFillFromHostData(true), fReadOutDataToHost(true), fMaxBufferSize(0), fTotalDataSize(0), fOpenCLFlags(""),
33  fFFTKernel(nullptr), fSpatialDimensionBufferCL(nullptr), fTwiddleBufferCL(nullptr),
34  fConjugateTwiddleBufferCL(nullptr), fScaleBufferCL(nullptr), fCirculantBufferCL(nullptr), fDataBufferCL(nullptr),
35  fPermuationArrayCL(nullptr), fWorkspaceBufferCL(nullptr), fNLocal(0)
36  {
37  ;
38  };
39 
41  {
42  delete fFFTKernel;
43  delete fSpatialDimensionBufferCL;
44  delete fTwiddleBufferCL;
45  delete fConjugateTwiddleBufferCL;
46  delete fScaleBufferCL;
47  delete fCirculantBufferCL;
48  delete fDataBufferCL;
49  delete fPermuationArrayCL;
50  delete fWorkspaceBufferCL;
51  };
52 
53  virtual void SetInput(XArgType* in) { fInput = in; };
54 
55  virtual void SetOutput(XArgType* out) { fOutput = out; };
56 
57  private:
58  XArgType* fInput;
59  XArgType* fOutput;
60 
61  public:
62  //control direction of FFT
63  void SetForward() { fIsForward = true; }
64 
65  void SetBackward() { fIsForward = false; };
66 
67  //control whether GPU buffer is initialized from host data (fInput)
68  void SetWriteOutHostDataTrue() { fFillFromHostData = true; };
69 
70  void SetWriteOutHostDataFalse() { fFillFromHostData = false; };
71 
72  //control whether result is read back to host (fOutput)
73  void SetReadOutDataToHostTrue() { fReadOutDataToHost = true; };
74 
75  void SetReadOutDataToHostFalse() { fReadOutDataToHost = false; };
76 
78 
79  //opencl build flags used
80  std::string GetOpenCLFlags() const { return fOpenCLFlags; };
81 
83 
84  //raw access to the OpenCL data buffer...use carefully
85  cl::Buffer* GetDataBuffer() { return fDataBufferCL; };
86 
88 
89  //overides automatic determination of local workgroup size if set
90  //do NOT provide any sanity checks
91  void ForceLocalSize(unsigned int local) { fNLocal = local; };
92 
94 
95  virtual bool Initialize()
96  {
97  //input and output must be set before being called
98  std::cout << "initializing" << std::endl;
99  if(!fInitialized) //can only be initialized once!
100  {
101  if(DoInputOutputDimensionsMatch())
102  {
103  fIsValid = true;
104  this->fInput->GetDimensions(fDimensionSize);
105  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
106  {
107  fSpatialDim[i] = fDimensionSize[i + 1];
108  }
109  }
110  else
111  {
112  std::cout << "dim mismatch" << std::endl;
113  fIsValid = false;
114  fInitialized = false;
115  }
116 
117  if(fIsValid)
118  {
119  std::cout << "its valid" << std::endl;
120  ConstructWorkspace();
121  ConstructOpenCLKernels();
122  BuildBuffers();
123  AssignBuffers();
124  fInitialized = true;
125  }
126  }
127  return fInitialized;
128  }
129 
131 
132  virtual bool Execute()
133  {
134  if(fIsValid && fInitialized)
135  {
136  //set the basic arguments
137  unsigned int n_multdim_ffts = fDimensionSize[0];
138  fFFTKernel->setArg(0, n_multdim_ffts); //number of complete multidimensional fft's to perform
139  if(fIsForward)
140  {
141  fFFTKernel->setArg(2, 1); //direction of FFT is forward
142  }
143  else
144  {
145  fFFTKernel->setArg(2, 0); //direction of FFT is backward (inverse)
146  }
147  //arg 3 is dimensions, already written to GPU
148 
149  //write the data to the buffer if necessary
150  FillDataBuffer();
151 
152  for(unsigned int D = 0; D < XArgType::rank::value; D++)
153  {
154  //compute number of 1d fft's needed (n-global)
155  unsigned int n_global = fDimensionSize[0];
156  unsigned int n_local_1d_transforms = 1;
157  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
158  {
159  if(i != D)
160  {
161  n_global *= fSpatialDim[i];
162  n_local_1d_transforms *= fSpatialDim[i];
163  };
164  };
165 
166  //pad out n-global to be a multiple of the n-local
167  unsigned int nDummy = fNLocal - (n_global % fNLocal);
168  if(nDummy == fNLocal)
169  {
170  nDummy = 0;
171  };
172  n_global += nDummy;
173 
174  cl::NDRange global(n_global);
175  cl::NDRange local(fNLocal);
176 
177  fFFTKernel->setArg(1, D); //(index of the selected dimension) updated at each stage
178 
179  if(fAllSpatialDimensionsAreEqual)
180  {
181  //no need to write the constants, as they are already on the gpu
182  //just fire up the kernel
183  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueNDRangeKernel(*fFFTKernel, cl::NullRange, global,
184  local);
185 #ifdef ENFORCE_CL_FINISH
187 #endif
188  }
189  else
190  {
191  //we enqueue write the needed constants for this dimension
192  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueWriteBuffer(
193  *fTwiddleBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fTwiddle[D][0]));
194 #ifdef ENFORCE_CL_FINISH
196 #endif
197  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueWriteBuffer(*fConjugateTwiddleBufferCL, CL_TRUE,
198  0, fMaxBufferSize * sizeof(CL_TYPE2),
199  &(fConjugateTwiddle[D][0]));
200 #ifdef ENFORCE_CL_FINISH
202 #endif
203  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueWriteBuffer(
204  *fScaleBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fScale[D][0]));
205 #ifdef ENFORCE_CL_FINISH
207 #endif
208  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueWriteBuffer(
209  *fCirculantBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fCirculant[D][0]));
210 #ifdef ENFORCE_CL_FINISH
212 #endif
213  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueWriteBuffer(
214  *fPermuationArrayCL, CL_TRUE, 0, fMaxBufferSize * sizeof(unsigned int), &(fPermuationArray[D][0]));
215 #ifdef ENFORCE_CL_FINISH
217 #endif
218 
219  //now enqueue the kernel
220  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueNDRangeKernel(*fFFTKernel, cl::NullRange, global,
221  local);
222 #ifdef ENFORCE_CL_FINISH
224 #endif
225  }
226  }
227 
228  //read the data from the buffer if necessary
229  ReadOutDataBuffer();
230  return true;
231  }
232  else
233  {
234  std::cout
235  << "MHO_OpenCLBatchedMultidimensionalFastFourierTransform::Execute: Not valid and initialized. Aborting."
236  << std::endl;
237  return false;
238  }
239  }
240 
242 
243  private:
244  void ConstructWorkspace()
245  {
246  //figure out the size of all the data
247  fTotalDataSize = MHO_NDArrayMath::TotalArraySize< XArgType::rank::value >(fDimensionSize);
248 
249  //figure out the size of the private buffers needed
250  fMaxBufferSize = 0;
251  fAllSpatialDimensionsAreEqual = true;
252  unsigned int previous_dim = fSpatialDim[0];
253  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
254  {
255  if(previous_dim != fSpatialDim[i])
256  {
257  fAllSpatialDimensionsAreEqual = false;
258  };
259 
260  if(fSpatialDim[i] > fMaxBufferSize)
261  {
262  fMaxBufferSize = fSpatialDim[i];
263  }
264 
265  if(!(MHO_BitReversalPermutation::IsPowerOfTwo(fSpatialDim[i])))
266  {
268  fSpatialDim[i]) > fMaxBufferSize)
269  {
270  fMaxBufferSize =
272  fSpatialDim[i]);
273  }
274  }
275  }
276 
277  //create the build flags
278  std::stringstream ss;
279  ss << " -D FFT_NDIM=" << XArgType::rank::value;
280  ss << " -D FFT_BUFFERSIZE=" << fMaxBufferSize;
281 
282  // //determine the size of the device's constant memory buffer, if it is too small we do not
283  // //use it, and use global memory instead
284  // size_t const_mem_size =
285  // MHO_OpenCLInterface::GetInstance()->GetDevice().getInfo<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE>();
286  // if (fMaxBufferSize * sizeof(CL_TYPE2) < const_mem_size) {
287  // ss << " -D FFT_USE_CONST_MEM";
288  // }
289 
290  ss << " -I " << MHO_OpenCLInterface::GetInstance()->GetKernelPath();
291  fOpenCLFlags = ss.str();
292 
293  //now we need to compute all the auxiliary variables we need for the FFT
294  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
295  {
296  unsigned int N = fSpatialDim[i];
297  unsigned int M = N;
298 
300  {
302  }
303 
304  //resize to appropriate lengths
305  fTwiddle[i].resize(M);
306  fConjugateTwiddle[i].resize(M);
307  fScale[i].resize(M);
308  fCirculant[i].resize(M);
309  fPermuationArray[i].resize(M);
310 
311  //compute the twiddle factors for this dimension
313 
314  //compute the conjugate twiddle factors
316  M, &(fConjugateTwiddle[i][0]));
317 
318  //compute the bluestein scale factors for this dimension
320  &(fScale[i][0]));
321  //compute the circulant vector for this dimension
323  N, M, &(fTwiddle[i][0]), &(fScale[i][0]), &(fCirculant[i][0]));
324 
326 
327  std::cout << "build workspace" << std::endl;
328  }
329  }
330 
332 
333  void ConstructOpenCLKernels()
334  {
335  std::cout << "opencl kernels" << std::endl;
336  //Get name of kernel source file
337  std::stringstream clFile;
339  << "/MHO_MultidimensionalFastFourierTransform_kernel.cl";
340 
341  //set the build options
342  std::stringstream options;
343  options << GetOpenCLFlags();
344 
345  MHO_OpenCLKernelBuilder k_builder;
346  fFFTKernel =
347  k_builder.BuildKernel(clFile.str(), std::string("MultidimensionalFastFourierTransform_Stage"), options.str());
348 
349  //get n-local
350  if(fNLocal == 0) //if fNLocal has already been set externally, do nothing
351  {
352  fNLocal =
353  fFFTKernel->getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >(MHO_OpenCLInterface::GetInstance()->GetDevice());
354 
355  fPreferredWorkgroupMultiple = fFFTKernel->getWorkGroupInfo< CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE >(
357 
358  if(fPreferredWorkgroupMultiple < fNLocal)
359  {
360  fNLocal = fPreferredWorkgroupMultiple;
361  }
362  }
363  }
364 
366 
367  void BuildBuffers()
368  {
369  std::cout << "building buffers" << std::endl;
370  //buffer for the 'spatial' dimensions of the array
371  fSpatialDimensionBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
372  XArgType::rank::value * sizeof(unsigned int));
373 
374  //buffer for the FFT twiddle factors
375  fTwiddleBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
376  fMaxBufferSize * sizeof(CL_TYPE2));
377 
378  //buffer for the conjugate FFT twiddle factors
379  fConjugateTwiddleBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
380  fMaxBufferSize * sizeof(CL_TYPE2));
381 
382  //buffer for the bluestein scale factors
383  fScaleBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
384  fMaxBufferSize * sizeof(CL_TYPE2));
385 
386  //buffer for the bluestein circulant vector
387  fCirculantBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
388  fMaxBufferSize * sizeof(CL_TYPE2));
389 
390  //buffer for the data to be transformed
391  fDataBufferCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_WRITE,
392  fTotalDataSize * sizeof(CL_TYPE2));
393 
394  //buffer for the permutation_array
395  fPermuationArrayCL = new cl::Buffer(MHO_OpenCLInterface::GetInstance()->GetContext(), CL_MEM_READ_ONLY,
396  fMaxBufferSize * sizeof(unsigned int));
397 
398  //determine the largest global worksize
399  fMaxNWorkItems = 0;
400  for(unsigned int D = 0; D < XArgType::rank::value; D++)
401  {
402  //compute number of 1d fft's needed (n-global)
403  unsigned int n_global = fDimensionSize[0];
404  unsigned int n_local_1d_transforms = 1;
405  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
406  {
407  if(i != D)
408  {
409  n_global *= fSpatialDim[i];
410  n_local_1d_transforms *= fSpatialDim[i];
411  };
412  };
413 
414  //pad out n-global to be a multiple of the n-local
415  unsigned int nDummy = fNLocal - (n_global % fNLocal);
416  if(nDummy == fNLocal)
417  {
418  nDummy = 0;
419  };
420  n_global += nDummy;
421 
422  if(fMaxNWorkItems < n_global)
423  {
424  fMaxNWorkItems = n_global;
425  };
426  }
427  }
428 
430 
431  void AssignBuffers()
432  {
433  cl::CommandQueue& Q = MHO_OpenCLInterface::GetInstance()->GetQueue();
434 
435  //assign buffers and set kernel arguments
436  unsigned int n_multdim_ffts = fDimensionSize[0];
437  //small arguments that do not need to be enqueued/written
438  fFFTKernel->setArg(0, n_multdim_ffts); //number of multidimensional fft's to perform
439 
440  //assign buffers and set kernel arguments
441  fFFTKernel->setArg(1, 0); //(index of the selected dimension) updated at each stage
442 
443  //assign buffers and set kernel arguments
444  fFFTKernel->setArg(2, 0); //updated at each execution
445 
446  //array dimensionality written once
447  fFFTKernel->setArg(3, *fSpatialDimensionBufferCL);
448  Q.enqueueWriteBuffer(*fSpatialDimensionBufferCL, CL_TRUE, 0, (XArgType::rank::value - 1) * sizeof(unsigned int),
449  fSpatialDim);
450 #ifdef ENFORCE_CL_FINISH
452 #endif
453 
454  //following are updated at each stage when necessary
455  //however in the special case where all spatial dimensions are the same
456  //we can write them to the GPU now and need not re-send them during execution
457  fFFTKernel->setArg(4, *fTwiddleBufferCL);
458  fFFTKernel->setArg(5, *fConjugateTwiddleBufferCL);
459  fFFTKernel->setArg(6, *fScaleBufferCL);
460  fFFTKernel->setArg(7, *fCirculantBufferCL);
461  fFFTKernel->setArg(8, *fPermuationArrayCL);
462 
463  if(fAllSpatialDimensionsAreEqual)
464  {
465  //write the constant buffers
466  Q.enqueueWriteBuffer(*fTwiddleBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fTwiddle[0][0]));
467 #ifdef ENFORCE_CL_FINISH
469 #endif
470  Q.enqueueWriteBuffer(*fConjugateTwiddleBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2),
471  &(fConjugateTwiddle[0][0]));
472 #ifdef ENFORCE_CL_FINISH
474 #endif
475  Q.enqueueWriteBuffer(*fScaleBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fScale[0][0]));
476 #ifdef ENFORCE_CL_FINISH
478 #endif
479  Q.enqueueWriteBuffer(*fCirculantBufferCL, CL_TRUE, 0, fMaxBufferSize * sizeof(CL_TYPE2), &(fCirculant[0][0]));
480 #ifdef ENFORCE_CL_FINISH
482 #endif
483  Q.enqueueWriteBuffer(*fPermuationArrayCL, CL_TRUE, 0, fMaxBufferSize * sizeof(unsigned int),
484  &(fPermuationArray[0][0]));
485 #ifdef ENFORCE_CL_FINISH
487 #endif
488  }
489 
490  //the data is updated once per execution
491  fFFTKernel->setArg(9, *fDataBufferCL);
492  }
493 
495 
496  void FillDataBuffer()
497  {
498  if(fFillFromHostData)
499  {
500  //initialize data on the GPU from host memory
501  cl::CommandQueue& Q = MHO_OpenCLInterface::GetInstance()->GetQueue();
502  auto* ptr = (CL_TYPE2*)(&((this->fInput->GetData())[0]));
503  std::cout << "total data size = " << fTotalDataSize << std::endl;
504  Q.enqueueWriteBuffer(*fDataBufferCL, CL_TRUE, 0, fTotalDataSize * sizeof(CL_TYPE2), ptr);
505 #ifdef ENFORCE_CL_FINISH
507 #endif
508  }
509 
510  //otherwise assume data is already on GPU from previous result
511  }
512 
514 
515  void ReadOutDataBuffer()
516  {
517  if(fReadOutDataToHost)
518  {
519  //read out data from the GPU to the host memory
520  cl::CommandQueue& Q = MHO_OpenCLInterface::GetInstance()->GetQueue();
521  auto* ptr = (CL_TYPE2*)(&((this->fInput->GetData())[0]));
522  Q.enqueueReadBuffer(*fDataBufferCL, CL_TRUE, 0, fTotalDataSize * sizeof(CL_TYPE2), ptr);
523 #ifdef ENFORCE_CL_FINISH
525 #endif
526  }
527  //otherwise assume we want to leave the data on the GPU for further processing
528  }
529 
531 
532  virtual bool DoInputOutputDimensionsMatch()
533  {
534  size_t in[XArgType::rank::value];
535  size_t out[XArgType::rank::value];
536 
537  this->fInput->GetDimensions(in);
538  this->fOutput->GetDimensions(out);
539 
540  for(unsigned int i = 0; i < XArgType::rank::value; i++)
541  {
542  if(in[i] != out[i])
543  {
544  return false;
545  }
546  }
547  return true;
548  }
549 
551 
552  bool fIsValid;
553  bool fIsForward;
554  bool fInitialized;
555  bool fAllSpatialDimensionsAreEqual;
556  bool fFillFromHostData;
557  bool fReadOutDataToHost;
558  size_t fDimensionSize[XArgType::rank::value];
559  unsigned int fSpatialDim[XArgType::rank::value - 1];
560  unsigned int fMaxNWorkItems;
561 
562  unsigned int fMaxBufferSize;
563  unsigned int fTotalDataSize;
564 
566  //Workspace for needed coefficients
567  std::vector< std::complex< double > > fTwiddle[XArgType::rank::value];
568  std::vector< std::complex< double > > fConjugateTwiddle[XArgType::rank::value];
569  std::vector< std::complex< double > > fScale[XArgType::rank::value];
570  std::vector< std::complex< double > > fCirculant[XArgType::rank::value];
571  std::vector< unsigned int > fPermuationArray[XArgType::rank::value];
572 
574 
575  std::string fOpenCLFlags;
576 
577  mutable cl::Kernel* fFFTKernel;
578 
579  //buffer for the spatial dimensions of each block to be transformed
580  cl::Buffer* fSpatialDimensionBufferCL;
581 
582  //buffer for the FFT twiddle factors
583  cl::Buffer* fTwiddleBufferCL;
584 
585  //buffer for the conjugate FFT twiddle factors
586  cl::Buffer* fConjugateTwiddleBufferCL;
587 
588  //buffer for the bluestein scale factors
589  cl::Buffer* fScaleBufferCL;
590 
591  //buffer for the bluestein circulant vector
592  cl::Buffer* fCirculantBufferCL;
593 
594  //buffer for the data to be transformed
595  cl::Buffer* fDataBufferCL;
596 
597  //buffer for the permutation array
598  cl::Buffer* fPermuationArrayCL;
599 
600  //buffer to global workspace
601  cl::Buffer* fWorkspaceBufferCL;
602 
603  unsigned int fNLocal;
604  unsigned int fNGlobal;
605  unsigned int fPreferredWorkgroupMultiple;
606 
608 };
609 
610 } // namespace hops
611 
612 #endif
#define CL_TYPE2
Definition: MHO_OpenCLInterface.hh:54
static void ComputeBitReversedIndicesBaseTwo(unsigned int N, unsigned int *index_arr)
Computes bit-reversed indices using Buneman algorithm for input N, must have N = 2^P,...
Definition: MHO_BitReversalPermutation.cc:119
static bool IsPowerOfTwo(unsigned int N)
Checks if an unsigned integer is a power of two.
Definition: MHO_BitReversalPermutation.cc:10
static void ComputeBluesteinScaleFactors(unsigned int N, std::complex< XFloatType > *scale)
Function ComputeBluesteinScaleFactors.
static unsigned int ComputeBluesteinArraySize(unsigned int N)
Function ComputeBluesteinArraySize Computes the array size needed to perform a Bluestein/Chirp-Z FFT ...
Definition: MHO_FastFourierTransformUtilities.hh:331
static void ComputeConjugateTwiddleFactors(unsigned int N, std::complex< XFloatType > *conj_twiddle)
Computes the conjugate twiddle factors for given size N and stores them in provided array.
Definition: MHO_FastFourierTransformUtilities.hh:76
static void ComputeTwiddleFactors(unsigned int N, std::complex< XFloatType > *twiddle)
Compute twiddle factors for a Fast Fourier Transform. computes all the twiddle factors e^{i*2*pi/N} f...
static void ComputeBluesteinCirculantVector(unsigned int N, unsigned int M, std::complex< XFloatType > *twiddle, std::complex< XFloatType > *scale, std::complex< XFloatType > *circulant)
Function ComputeBluesteinCirculantVector twiddle and circulant array must be length M = 2^p >= (2N - ...
Definition: MHO_FastFourierTransformUtilities.hh:363
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:23
virtual void SetOutput(XArgType *out)
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:55
void SetForward()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:63
void SetReadOutDataToHostFalse()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:75
virtual bool Initialize()
Function Initialize.
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:95
MHO_OpenCLBatchedMultidimensionalFastFourierTransform()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:30
virtual ~MHO_OpenCLBatchedMultidimensionalFastFourierTransform()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:40
typename XArgType::value_type complex_value_type
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:27
void SetWriteOutHostDataFalse()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:70
void SetBackward()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:65
virtual void SetInput(XArgType *in)
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:53
std::string GetOpenCLFlags() const
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:80
typename XArgType::value_type::value_type floating_point_value_type
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:28
void SetReadOutDataToHostTrue()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:73
void ForceLocalSize(unsigned int local)
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:91
virtual bool Execute()
Function Execute.
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:132
void SetWriteOutHostDataTrue()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:68
cl::Buffer * GetDataBuffer()
Definition: MHO_OpenCLBatchedMultidimensionalFastFourierTransform.hh:85
std::string GetKernelPath() const
Definition: MHO_OpenCLInterface.hh:133
cl::Device GetDevice() const
Definition: MHO_OpenCLInterface.hh:119
cl::CommandQueue & GetQueue(int i=-1) const
Definition: MHO_OpenCLInterface.cc:125
static MHO_OpenCLInterface * GetInstance()
Definition: MHO_OpenCLInterface.cc:32
Class MHO_Operator.
Definition: MHO_Operator.hh:21
Definition: MHO_ChannelLabeler.hh:17
Definition: MHO_Meta.hh:341