HOPS
HOPS class reference
MHO_OpenCLScalarMultiply.hh
Go to the documentation of this file.
1 #ifndef MHO_OpenCLScalarMultiply_HH__
2 #define MHO_OpenCLScalarMultiply_HH__
3 
4 #include <algorithm>
5 
6 #include "MHO_Message.hh"
7 #include "MHO_NDArrayWrapper.hh"
8 #include "MHO_UnaryOperator.hh"
9 
10 #include "MHO_OpenCLInterface.hh"
13 
14 namespace hops
15 {
16 
25 template< typename XFactorType, class XArrayType > class MHO_OpenCLScalarMultiply: public MHO_UnaryOperator< XArrayType >
26 {
27  public:
29  : fInitialized(false), fFactor(0), fNLocal(0), fNGlobal(0), fKernel(nullptr),
30  fWriteOut(true), //default is always to write host -> device
31  fReadBack(true) //default is always to read device -> host
32  {
33  BuildKernel();
34  };
35 
36  virtual ~MHO_OpenCLScalarMultiply() { delete fKernel; };
37 
38  void SetFactor(XFactorType factor) { fFactor = factor; };
39 
40  XFactorType GetFactor() const { return fFactor; };
41 
42  //sometimes there is no need to read the data back from device -> host
43  //for example, if there is another kernel using the same data that
44  //is running immediately afterwards, we can just leave the data there
45  void SetReadTrue() { fReadBack = true; };
46 
47  void SetReadFalse() { fReadBack = false; };
48 
49  //sometimes there is no need to write the data from host -> device
50  //for example, if another kernel has just run, and the data is already
51  //present on the device, we can use it without doing a transferr
52  void SetWriteTrue() { fWriteOut = true; };
53 
54  void SetWriteFalse() { fWriteOut = false; };
55 
56  protected:
57  virtual bool InitializeInPlace(XArrayType* in)
58  {
59  if(in != nullptr)
60  {
61  if(in->template HasExtension< MHO_OpenCLNDArrayBuffer< XArrayType > >())
62  {
63  fArrayBuffer = in->template AsExtension< MHO_OpenCLNDArrayBuffer< XArrayType > >();
64  }
65  else
66  {
67  fArrayBuffer = in->template MakeExtension< MHO_OpenCLNDArrayBuffer< XArrayType > >();
68  }
69 
70  unsigned int array_size = in->GetSize();
71 
72  fKernel->setArg(0, array_size);
73  fKernel->setArg(1, fFactor);
74  fKernel->setArg(2, *(fArrayBuffer->GetDataBuffer()));
75 
76  //pad out n-global to be a multiple of the n-local
77  fNGlobal = array_size;
78  unsigned int dummy = fNLocal - (array_size % fNLocal);
79  if(dummy == fNLocal)
80  {
81  dummy = 0;
82  };
83  fNGlobal += dummy;
84 
85  fInitialized = true;
86  return true;
87  }
88  return false;
89  }
90 
91  virtual bool ExecuteInPlace(XArrayType* in)
92  {
93  if(fInitialized)
94  {
95  //write out the data to the device if we must, otherwise assume it is already on device
96  if(fWriteOut)
97  {
98  fArrayBuffer->WriteDataBuffer();
99  }
100 
101  //now fire up the kernel
102  MHO_OpenCLInterface::GetInstance()->GetQueue().enqueueNDRangeKernel(*fKernel, cl::NullRange, fNGlobal, fNLocal);
103 #ifdef ENFORCE_CL_FINISH
105 #endif
106 
107  //read back data to the host if we must, otherwise, leave it on the device for the next kernel
108  if(fReadBack)
109  {
110  fArrayBuffer->ReadDataBuffer();
111  }
112  return true;
113  }
114  return false;
115  }
116 
117  virtual bool InitializeOutOfPlace(const XArrayType* in, XArrayType* out)
118  {
119  ConditionallyResizeOutput(in->GetDimensionArray(), out);
120  return InitializeInPlace(out);
121  }
122 
123  virtual bool ExecuteOutOfPlace(const XArrayType* in, XArrayType* out)
124  {
125  //This may not be the most efficient way to do this
126  out->Copy(*in);
127  bool cached_value = fWriteOut;
128  fWriteOut = true;
129  bool ret_val = ExecuteInPlace(out);
130  fWriteOut = cached_value;
131  return ret_val;
132  }
133 
134  private:
135  void BuildKernel()
136  {
137  //Get name of kernel source file
138  std::stringstream clFile;
139  clFile << MHO_OpenCLInterface::GetInstance()->GetKernelPath() << "/MHO_VectorScale_kernel.cl";
140 
141  std::string flags = GetOpenCLFlags();
142 
143  MHO_OpenCLKernelBuilder k_builder;
144  fKernel = k_builder.BuildKernel(clFile.str(), std::string("VectorScale"), flags);
145 
146  fNLocal = fKernel->getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >(MHO_OpenCLInterface::GetInstance()->GetDevice());
147  unsigned int preferredWorkgroupMultiple = fKernel->getWorkGroupInfo< CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE >(
149  if(preferredWorkgroupMultiple < fNLocal)
150  {
151  fNLocal = preferredWorkgroupMultiple;
152  }
153  }
154 
155  std::string GetOpenCLFlags()
156  {
157  //set the build options
158  std::stringstream options;
159  options << " -I " << MHO_OpenCLInterface::GetInstance()->GetKernelPath();
160 
161  std::string factor_type = MHO_ClassName< XFactorType >();
162  std::string data_type = MHO_ClassName< typename XArrayType::value_type >();
163 
164  //pass flag that we have to do complex multiply
165  if(factor_type.find(std::string("complex")) != std::string::npos &&
166  data_type.find(std::string("complex")) != std::string::npos)
167  {
168  options << " -D COMPLEX_COMPLEX";
169  }
170 
171  //figure out the data type defines to insert in the OpenCL kernel
172  options << " -D CL_FACTOR_TYPE=" << MHO_ClassName< typename MHO_OpenCLTypeMap< XFactorType >::mapped_type >();
173  options << " -D CL_DATA_TYPE="
174  << MHO_ClassName< typename MHO_OpenCLTypeMap< typename XArrayType::value_type >::mapped_type >();
175 
176  return options.str();
177  }
178 
179  void ConditionallyResizeOutput(const std::array< std::size_t, XArrayType::rank::value >& dims, XArrayType* out)
180  {
181  auto out_dim = out->GetDimensionArray();
182  bool have_to_resize = false;
183  for(std::size_t i = 0; i < XArrayType::rank::value; i++)
184  {
185  if(out_dim[i] != dims[i])
186  {
187  have_to_resize = true;
188  }
189  }
190  if(have_to_resize)
191  {
192  out->Resize(&(dims[0]));
193  }
194  }
195 
196  bool fInitialized;
197  XFactorType fFactor;
198  unsigned int fNLocal;
199  unsigned int fNGlobal;
200  cl::Kernel* fKernel;
201  bool fWriteOut;
202  bool fReadBack;
203 
204  MHO_OpenCLNDArrayBuffer< XArrayType >* fArrayBuffer;
205 };
206 
207 } // namespace hops
208 
209 #endif
MHO_TableContainer< elem_type, ax_pack > data_type
Definition: TestSignalFFT.cc:25
std::string GetKernelPath() const
Definition: MHO_OpenCLInterface.hh:133
cl::Device GetDevice() const
Definition: MHO_OpenCLInterface.hh:119
cl::CommandQueue & GetQueue(int i=-1) const
Definition: MHO_OpenCLInterface.cc:125
static MHO_OpenCLInterface * GetInstance()
Definition: MHO_OpenCLInterface.cc:32
Definition: MHO_OpenCLKernelBuilder.hh:11
cl::Kernel * BuildKernel(std::string SourceFileName, std::string KernelName, std::string BuildFlags=std::string(""))
Definition: MHO_OpenCLKernelBuilder.cc:17
Definition: MHO_OpenCLNDArrayBuffer.hh:19
Definition: MHO_OpenCLScalarMultiply.hh:26
void SetReadFalse()
Definition: MHO_OpenCLScalarMultiply.hh:47
void SetWriteFalse()
Definition: MHO_OpenCLScalarMultiply.hh:54
MHO_OpenCLScalarMultiply()
Definition: MHO_OpenCLScalarMultiply.hh:28
virtual bool ExecuteOutOfPlace(const XArrayType *in, XArrayType *out)
Function ExecuteOutOfPlace.
Definition: MHO_OpenCLScalarMultiply.hh:123
virtual bool InitializeOutOfPlace(const XArrayType *in, XArrayType *out)
Function InitializeOutOfPlace.
Definition: MHO_OpenCLScalarMultiply.hh:117
XFactorType GetFactor() const
Definition: MHO_OpenCLScalarMultiply.hh:40
void SetFactor(XFactorType factor)
Definition: MHO_OpenCLScalarMultiply.hh:38
virtual ~MHO_OpenCLScalarMultiply()
Definition: MHO_OpenCLScalarMultiply.hh:36
void SetWriteTrue()
Definition: MHO_OpenCLScalarMultiply.hh:52
virtual bool InitializeInPlace(XArrayType *in)
Function InitializeInPlace.
Definition: MHO_OpenCLScalarMultiply.hh:57
void SetReadTrue()
Definition: MHO_OpenCLScalarMultiply.hh:45
virtual bool ExecuteInPlace(XArrayType *in)
Function ExecuteInPlace.
Definition: MHO_OpenCLScalarMultiply.hh:91
Class MHO_UnaryOperator.
Definition: MHO_UnaryOperator.hh:24
def dummy(fringe_data_interface)
Definition: example1.py:3
Definition: MHO_ChannelLabeler.hh:17