HOPS
HOPS class reference
MHO_OpenCLMultidimensionalFastFourierTransform.hh
Go to the documentation of this file.
1 #ifndef MHO_OpenCLMultidimensionalFastFourierTransform_HH__
2 #define MHO_OpenCLMultidimensionalFastFourierTransform_HH__
3 
4 #include <cstring>
5 
6 #include "MHO_Message.hh"
7 #include "MHO_Meta.hh"
8 #include "MHO_NDArrayWrapper.hh"
10 
15 
16 #include "MHO_OpenCLInterface.hh"
18 
19 #include "MHO_TableContainer.hh"
20 
21 namespace hops
22 {
23 
24 template< typename XArgType >
27 {
28  public:
30  "Array element type must be a complex floating point type.");
31  using complex_value_type = typename XArgType::value_type;
32  using floating_point_value_type = typename XArgType::value_type::value_type;
33 
35  {
36 
38  };
39 
40  virtual ~MHO_OpenCLMultidimensionalFastFourierTransform() { DeallocateDeviceWorkspace(); };
41 
42  protected:
43  virtual bool InitializeInPlace(XArgType* in) override
44  {
45  if(in != nullptr)
46  {
47  fIsValid = true;
48  }
49  else
50  {
51  fIsValid = false;
52  }
53 
54  if(fIsValid)
55  {
56  //check if the current transform sizes are the same as the input
57  bool need_to_resize = false;
58  for(std::size_t i = 0; i < XArgType::rank::value; i++)
59  {
60  if(fDimensionSize[i] != in->GetDimension(i))
61  {
62  need_to_resize = true;
63  break;
64  }
65  }
66  if(need_to_resize)
67  {
68  in->GetDimensions(fDimensionSize);
69 
70  InitHostWorkspace();
71  InitDeviceWorkspace();
72  // DeallocateDeviceWorkspace();
73  // AllocateDeviceWorkspace();
74  }
75  fInitialized = true;
76  }
77  return (fInitialized && fIsValid);
78  }
79 
80  virtual bool ExecuteInPlace(XArgType* in) override {}
81 
82  private:
83  // bool fIsValid;
84  // bool fForward;
85  // bool fInitialized;
86  // bool fTransformAxisLabels;
87  // size_t fDimensionSize[XArgType::rank::value];
88  // bool fAxesToXForm[XArgType::rank::value];
89 
90  //host workspace
91  unsigned int fTotalDataSize;
92  MHO_FastFourierTransformWorkspace fHostPlans[XArgType::rank::value];
93 
94  //device related parameters ////////////////////////////////////////////
95  cl::Context fContext; //access to the OpenCL device context
96  unsigned int fNLocal;
97  unsigned int fNGlobal;
98  unsigned int fPreferredWorkgroupMultiple;
99  mutable cl::Kernel* fFFTRadix2Kernel;
100  mutable cl::Kernel* fFFTBluesteinKernel;
101  std::string fOpenCLFlags;
102 
103  unsigned int fMaxBufferSize; //we use the same size for all of the buffers (max across all dimensions)
104  cl::Buffer* fDimensionBufferCL; //buffer for the dimensions of the array
105  cl::Buffer* fTwiddleBufferCL; //buffer for the FFT twiddle factors
106  cl::Buffer* fConjugateTwiddleBufferCL; //buffer for the conjugate FFT twiddle factors
107  cl::Buffer* fScaleBufferCL; //buffer for the bluestein scale factors
108  cl::Buffer* fCirculantBufferCL; //buffer for the bluestein circulant vector
109  cl::Buffer* fDataBufferCL; //buffer for the data to be transformed
110  cl::Buffer* fPermuationArrayCL; //buffer for the permutation array
111  cl::Buffer* fWorkspaceBufferCL; //buffer to global workspace
112 
113  void InitHostWorkspace()
114  {
115  fMaxBufferSize = 0;
116  for(std::size_t i = 0; i < XArgType::rank::value; i++)
117  {
118  if(fAxesToXForm[i])
119  {
120  fHostPlans[i].Resize(fDimensionSize[i]);
121  if(fMaxBufferSize < fHostPlans[i].GetN())
122  {
123  fMaxBufferSize = fHostPlans[i].GetN();
124  }
125  if(fMaxBufferSize < fHostPlans[i].GetM())
126  {
127  fMaxBufferSize = fHostPlans[i].GetM();
128  }
129  }
130  }
131  }
132 
133  void InitDeviceWorkspace()
134  {
135  DeallocateDeviceWorkspace();
136  AllocateDeviceWorkspace();
137  }
138 
139  void AllocateDeviceWorkspace()
140  {
141  std::cout << "building CL buffers" << std::endl;
142  fDimensionBufferCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, XArgType::rank::value * sizeof(unsigned int));
143  fTwiddleBufferCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize * sizeof(CL_TYPE2));
144  fConjugateTwiddleBufferCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize * sizeof(CL_TYPE2));
145  fScaleBufferCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize * sizeof(CL_TYPE2));
146  fCirculantBufferCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize * sizeof(CL_TYPE2));
147  fDataBufferCL = new cl::Buffer(fContext, CL_MEM_READ_WRITE, fTotalDataSize * sizeof(CL_TYPE2));
148  fPermuationArrayCL = new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize * sizeof(unsigned int));
149  }
150 
151  void DeallocateDeviceWorkspace()
152  {
153  std::cout << "deleting CL buffers" << std::endl;
154  delete fFFTRadix2Kernel;
155  delete fFFTBluesteinKernel;
156  delete fDimensionBufferCL;
157  delete fTwiddleBufferCL;
158  delete fConjugateTwiddleBufferCL;
159  delete fScaleBufferCL;
160  delete fCirculantBufferCL;
161  delete fDataBufferCL;
162  delete fPermuationArrayCL;
163  delete fWorkspaceBufferCL;
164  }
165 
166  void ConstructOpenCLKernels()
167  {
168  std::cout << "building opencl kernels" << std::endl;
169  //Get name of kernel source file
170  std::stringstream clFile;
172  << "/MHO_MultidimensionalFastFourierTransform_kernel.cl";
173 
174  //set the build options
175  std::stringstream options;
176  options << GetOpenCLFlags();
177 
178  MHO_OpenCLKernelBuilder k_builder;
179  fFFTRadix2Kernel = k_builder.BuildKernel(
180  clFile.str(), std::string("MultidimensionalFastFourierTransform_Radix2Stage"), options.str());
181  fFFTBluesteinKernel = k_builder.BuildKernel(
182  clFile.str(), std::string("MultidimensionalFastFourierTransform_BluesteinStage"), options.str());
183 
184  //get n-local
185  fNLocal =
186  fFFTKernel->getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >(MHO_OpenCLInterface::GetInstance()->GetDevice());
187  fPreferredWorkgroupMultiple = fFFTKernel->getWorkGroupInfo< CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE >(
189  if(fPreferredWorkgroupMultiple < fNLocal)
190  {
191  fNLocal = fPreferredWorkgroupMultiple;
192  }
193 
194  //determine the largest global worksize
195  fMaxNWorkItems = 0;
196  for(unsigned int D = 0; D < XArgType::rank::value; D++)
197  {
198  //compute number of 1d fft's needed (n-global)
199  unsigned int n_global = fDimensionSize[0];
200  unsigned int n_local_1d_transforms = 1;
201  for(unsigned int i = 0; i < XArgType::rank::value - 1; i++)
202  {
203  if(i != D)
204  {
205  n_global *= fSpatialDim[i];
206  n_local_1d_transforms *= fSpatialDim[i];
207  };
208  };
209 
210  //pad out n-global to be a multiple of the n-local
211  unsigned int nDummy = fNLocal - (n_global % fNLocal);
212  if(nDummy == fNLocal)
213  {
214  nDummy = 0;
215  };
216  n_global += nDummy;
217 
218  if(fMaxNWorkItems < n_global)
219  {
220  fMaxNWorkItems = n_global;
221  };
222  }
223  }
224 
225  //data
226 };
227 
228 } // namespace hops
229 
230 #endif
template meta-programming helper functions, mostly tuple access/modification
#define CL_TYPE2
Definition: MHO_OpenCLInterface.hh:54
navtive FFT workspace definitions
Definition: MHO_FastFourierTransformWorkspace.hh:25
void Resize(unsigned int n)
Resizes the internal data structure to a new size and fills it.
Definition: MHO_FastFourierTransformWorkspace.hh:72
unsigned int GetN()
Getter for n.
Definition: MHO_FastFourierTransformWorkspace.hh:94
unsigned int GetM()
Getter for m.
Definition: MHO_FastFourierTransformWorkspace.hh:101
Class MHO_MultidimensionalFastFourierTransformInterface.
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:25
size_t fDimensionSize[XArgType::rank::value]
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:236
typename XArgType::value_type::value_type floating_point_value_type
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:30
typename XArgType::value_type complex_value_type
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:29
bool fIsValid
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:231
bool fAxesToXForm[XArgType::rank::value]
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:237
bool fInitialized
Definition: MHO_MultidimensionalFastFourierTransformInterface.hh:233
std::string GetKernelPath() const
Definition: MHO_OpenCLInterface.hh:133
cl::Device GetDevice() const
Definition: MHO_OpenCLInterface.hh:119
static MHO_OpenCLInterface * GetInstance()
Definition: MHO_OpenCLInterface.cc:32
cl::Context GetContext() const
Definition: MHO_OpenCLInterface.hh:115
Definition: MHO_OpenCLMultidimensionalFastFourierTransform.hh:27
virtual ~MHO_OpenCLMultidimensionalFastFourierTransform()
Definition: MHO_OpenCLMultidimensionalFastFourierTransform.hh:40
MHO_OpenCLMultidimensionalFastFourierTransform()
Definition: MHO_OpenCLMultidimensionalFastFourierTransform.hh:34
virtual bool ExecuteInPlace(XArgType *in) override
Executes an operation in-place using input argument.
Definition: MHO_OpenCLMultidimensionalFastFourierTransform.hh:80
virtual bool InitializeInPlace(XArgType *in) override
Initializes in-place operation using input argument.
Definition: MHO_OpenCLMultidimensionalFastFourierTransform.hh:43
Class MHO_UnaryInPlaceOperator.
Definition: MHO_UnaryInPlaceOperator.hh:23
Definition: MHO_ChannelLabeler.hh:17
Definition: MHO_Meta.hh:341