1 #ifndef MHO_OpenCLMultidimensionalFastFourierTransform_HH__
2 #define MHO_OpenCLMultidimensionalFastFourierTransform_HH__
24 template<
typename XArgType >
30 "Array element type must be a complex floating point type.");
57 bool need_to_resize =
false;
58 for(std::size_t i = 0; i < XArgType::rank::value; i++)
62 need_to_resize =
true;
71 InitDeviceWorkspace();
91 unsigned int fTotalDataSize;
97 unsigned int fNGlobal;
98 unsigned int fPreferredWorkgroupMultiple;
99 mutable cl::Kernel* fFFTRadix2Kernel;
100 mutable cl::Kernel* fFFTBluesteinKernel;
101 std::string fOpenCLFlags;
103 unsigned int fMaxBufferSize;
104 cl::Buffer* fDimensionBufferCL;
105 cl::Buffer* fTwiddleBufferCL;
106 cl::Buffer* fConjugateTwiddleBufferCL;
107 cl::Buffer* fScaleBufferCL;
108 cl::Buffer* fCirculantBufferCL;
109 cl::Buffer* fDataBufferCL;
110 cl::Buffer* fPermuationArrayCL;
111 cl::Buffer* fWorkspaceBufferCL;
113 void InitHostWorkspace()
116 for(std::size_t i = 0; i < XArgType::rank::value; i++)
121 if(fMaxBufferSize < fHostPlans[i].GetN())
123 fMaxBufferSize = fHostPlans[i].
GetN();
125 if(fMaxBufferSize < fHostPlans[i].GetM())
127 fMaxBufferSize = fHostPlans[i].
GetM();
133 void InitDeviceWorkspace()
135 DeallocateDeviceWorkspace();
136 AllocateDeviceWorkspace();
139 void AllocateDeviceWorkspace()
141 std::cout <<
"building CL buffers" << std::endl;
142 fDimensionBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, XArgType::rank::value *
sizeof(
unsigned int));
143 fTwiddleBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize *
sizeof(
CL_TYPE2));
144 fConjugateTwiddleBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize *
sizeof(
CL_TYPE2));
145 fScaleBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize *
sizeof(
CL_TYPE2));
146 fCirculantBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize *
sizeof(
CL_TYPE2));
147 fDataBufferCL =
new cl::Buffer(fContext, CL_MEM_READ_WRITE, fTotalDataSize *
sizeof(
CL_TYPE2));
148 fPermuationArrayCL =
new cl::Buffer(fContext, CL_MEM_READ_ONLY, fMaxBufferSize *
sizeof(
unsigned int));
151 void DeallocateDeviceWorkspace()
153 std::cout <<
"deleting CL buffers" << std::endl;
154 delete fFFTRadix2Kernel;
155 delete fFFTBluesteinKernel;
156 delete fDimensionBufferCL;
157 delete fTwiddleBufferCL;
158 delete fConjugateTwiddleBufferCL;
159 delete fScaleBufferCL;
160 delete fCirculantBufferCL;
161 delete fDataBufferCL;
162 delete fPermuationArrayCL;
163 delete fWorkspaceBufferCL;
166 void ConstructOpenCLKernels()
168 std::cout <<
"building opencl kernels" << std::endl;
170 std::stringstream clFile;
172 <<
"/MHO_MultidimensionalFastFourierTransform_kernel.cl";
175 std::stringstream options;
176 options << GetOpenCLFlags();
178 MHO_OpenCLKernelBuilder k_builder;
179 fFFTRadix2Kernel = k_builder.BuildKernel(
180 clFile.str(), std::string(
"MultidimensionalFastFourierTransform_Radix2Stage"), options.str());
181 fFFTBluesteinKernel = k_builder.BuildKernel(
182 clFile.str(), std::string(
"MultidimensionalFastFourierTransform_BluesteinStage"), options.str());
187 fPreferredWorkgroupMultiple = fFFTKernel->getWorkGroupInfo< CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE >(
189 if(fPreferredWorkgroupMultiple < fNLocal)
191 fNLocal = fPreferredWorkgroupMultiple;
196 for(
unsigned int D = 0; D < XArgType::rank::value; D++)
200 unsigned int n_local_1d_transforms = 1;
201 for(
unsigned int i = 0; i < XArgType::rank::value - 1; i++)
205 n_global *= fSpatialDim[i];
206 n_local_1d_transforms *= fSpatialDim[i];
211 unsigned int nDummy = fNLocal - (n_global % fNLocal);
212 if(nDummy == fNLocal)
218 if(fMaxNWorkItems < n_global)
220 fMaxNWorkItems = n_global;
#define CL_TYPE2
Definition: MHO_OpenCLInterface.hh:54
std::string GetKernelPath() const
Definition: MHO_OpenCLInterface.hh:133
cl::Device GetDevice() const
Definition: MHO_OpenCLInterface.hh:119
static MHO_OpenCLInterface * GetInstance()
Definition: MHO_OpenCLInterface.cc:32
cl::Context GetContext() const
Definition: MHO_OpenCLInterface.hh:115
Class MHO_UnaryInPlaceOperator.
Definition: MHO_UnaryInPlaceOperator.hh:23
Definition: MHO_ChannelLabeler.hh:17
Definition: MHO_Meta.hh:341