HOPS
HOPS class reference
MHO_EndZeroPadderOptimized.hh
Go to the documentation of this file.
1 #ifndef MHO_EndZeroPadderOptimized_HH__
2 #define MHO_EndZeroPadderOptimized_HH__
3 
4 #include <bitset>
5 #include <cstring>
6 #include <set>
7 #include <vector>
8 
9 #include "MHO_Message.hh"
10 #include "MHO_NDArrayWrapper.hh"
11 #include "MHO_TableContainer.hh"
12 #include "MHO_UnaryOperator.hh"
13 
14 namespace hops
15 {
16 
28 template< typename XArgType > class MHO_EndZeroPadderOptimized: public MHO_UnaryOperator< XArgType >
29 {
30  public:
32  {
33  fPaddingFactor = 1;
34  fPaddedSize = 0;
35  for(std::size_t i = 0; i < XArgType::rank::value; i++)
36  {
37  fInputDimensionSize[i] = 0;
38  fOutputDimensionSize[i] = 0;
39  fAxesToXForm[i] = true;
40  }
41 
42  fIsValid = false;
43  fInitialized = false;
44  fFlipped = false; //chooses which end we pad
45  fNormFXMode = true; //when enabled, then when flipped, copies the last element to N/2
46  fPreserveWorkspace = false; //when false tmp workspace will be destroyed after every use
47  fCopyTags = true; //copy tags if available is enabled by default
48  fTmpWorkspace = nullptr;
49  fHasPaddedAxis = false;
50  fInnermostPaddedAxis = 0;
51  fBlockElems = 0;
52  fNRows = 0;
53  fTotalInputElems = 0;
54  for(std::size_t i = 0; i < XArgType::rank::value; i++)
55  {
56  fInStride[i] = 0;
57  fOutStride[i] = 0;
58  }
59  };
60 
61  virtual ~MHO_EndZeroPadderOptimized() { delete fTmpWorkspace; };
62 
70  virtual void SetPaddingFactor(std::size_t factor) { fPaddingFactor = factor; };
71 
79  virtual void SetPaddedSize(std::size_t new_size)
80  {
81  fPaddedSize = new_size;
82  fPaddingFactor = 1;
83  }
84 
89  virtual void SetEndPadded() { fFlipped = false; }; //
90 
95  virtual void SetReverseEndPadded() { fFlipped = true; };
96 
101  virtual void DisableNormFXMode() { fNormFXMode = false; };
102 
107  virtual void EnableNormFXMode() { fNormFXMode = true; };
108 
113  virtual void PreserveWorkspace()
114  {
115  fPreserveWorkspace = true;
116  } //keep the memory reserved for the workspace around after exectution
117 
122  virtual void DoNotPreserveWorkspace() { fPreserveWorkspace = false; }
123 
128  virtual void DisableTagCopy() { fCopyTags = false; }
129 
134  virtual void EnableTagCopy() { fCopyTags = true; }
135 
142  {
143  for(std::size_t i = 0; i < XArgType::rank::value; i++)
144  {
145  fAxesToXForm[i] = true;
146  }
147  }
148 
153  {
154  for(std::size_t i = 0; i < XArgType::rank::value; i++)
155  {
156  fAxesToXForm[i] = false;
157  }
158  }
159 
165  void SelectAxis(std::size_t axis_index)
166  {
167  if(axis_index < XArgType::rank::value)
168  {
169  fAxesToXForm[axis_index] = true;
170  }
171  else
172  {
173  msg_error("operators", "Cannot transform axis with index: " << axis_index << "for array with rank: "
174  << XArgType::rank::value << eom);
175  }
176  }
177 
178  protected:
186  virtual bool InitializeInPlace(XArgType* in) override
187  {
188  if(fTmpWorkspace == nullptr)
189  {
190  fTmpWorkspace = new XArgType();
191  }
192  return InitializeOutOfPlace(in, fTmpWorkspace);
193  }
194 
201  virtual bool ExecuteInPlace(XArgType* in) override
202  {
203  bool status = ExecuteOutOfPlace(in, fTmpWorkspace);
204  //"in-place" execution requires a copy from the workspace back to the object we are modifying
205  in->Copy(*fTmpWorkspace);
206  if(!fPreserveWorkspace)
207  {
208  //destroy the temporary workspace when we are done
209  delete fTmpWorkspace;
210  fTmpWorkspace = nullptr;
211  }
212  return status;
213  }
214 
223  virtual bool InitializeOutOfPlace(const XArgType* in, XArgType* out) override
224  {
225  if(in != nullptr && out != nullptr && in != out)
226  {
227  fIsValid = true;
228  }
229 
230  if(fIsValid)
231  {
232  //output dimensions must be factor of fPaddingFactor bigger than input dims
233  in->GetDimensions(fInputDimensionSize);
234  out->GetDimensions(fOutputDimensionSize);
235  ConditionallyResizeOutput(in->GetDimensionArray(), out);
236  PrecomputeOffsetTables();
237  fInitialized = true;
238  }
239  return (fInitialized && fIsValid);
240  }
241 
249  virtual bool ExecuteOutOfPlace(const XArgType* in, XArgType* out) override
250  {
251  if(fIsValid && fInitialized)
252  {
253  profiler_scope();
254  constexpr std::size_t RANK = XArgType::rank::value;
255  const auto* in_data = in->GetData();
256  auto* out_data = out->GetData();
257 
258  if(!fFlipped)
259  {
260  //zero padding is placed at the end of the array
261  out->ZeroArray();
262  if(fHasPaddedAxis)
263  {
264  //row-wise memcpy: each "row" is a contiguous block starting at the
265  //innermost padded axis (fInnermostPaddedAxis); outer indices are
266  //tracked incrementally to avoid per-element integer divisions
267  const std::size_t block_bytes = fBlockElems * sizeof(*in_data);
268  std::size_t idx[RANK] = {};
269  std::size_t in_offset = 0, out_offset = 0;
270  for(std::size_t row = 0; row < fNRows; row++)
271  {
272  std::memcpy(out_data + out_offset, in_data + in_offset, block_bytes);
273  //increment outer indices (d < fInnermostPaddedAxis) with carry
274  for(int d = (int)fInnermostPaddedAxis - 1; d >= 0; d--)
275  {
276  idx[d]++;
277  in_offset += fInStride[d];
278  out_offset += fOutStride[d];
279  if(idx[d] < fInputDimensionSize[d])
280  break;
281  idx[d] = 0;
282  in_offset -= fInputDimensionSize[d] * fInStride[d];
283  out_offset -= fInputDimensionSize[d] * fOutStride[d];
284  }
285  }
286  }
287  else
288  {
289  //no axes are padded: in and out have the same dimensions, just copy
290  std::memcpy(out_data, in_data, fTotalInputElems * sizeof(*in_data));
291  }
292  }
293  else
294  {
295  //zero padding is placed at the start of the array (flipped mode)
296  //use precomputed per-axis offset tables to avoid per-element index arithmetic
297  out->ZeroArray();
298  std::size_t axis_out_offset[RANK];
299  std::size_t out_flat = 0;
300  for(std::size_t d = 0; d < RANK; d++)
301  {
302  axis_out_offset[d] = fOutAxisOffset[d][0];
303  out_flat += axis_out_offset[d];
304  }
305  std::size_t idx[RANK] = {};
306  for(std::size_t n = 0; n < fTotalInputElems; n++)
307  {
308  out_data[out_flat] = in_data[n];
309  //increment innermost-first with carry, updating out_flat incrementally
310  for(int d = (int)RANK - 1; d >= 0; d--)
311  {
312  idx[d]++;
313  if(idx[d] < fInputDimensionSize[d])
314  {
315  std::size_t new_contrib = fOutAxisOffset[d][idx[d]];
316  out_flat = out_flat - axis_out_offset[d] + new_contrib;
317  axis_out_offset[d] = new_contrib;
318  break;
319  }
320  else
321  {
322  idx[d] = 0;
323  std::size_t new_contrib = fOutAxisOffset[d][0];
324  out_flat = out_flat - axis_out_offset[d] + new_contrib;
325  axis_out_offset[d] = new_contrib;
326  //continue carry to next outer dimension
327  }
328  }
329  }
330  }
331 
332  IfTableTransformAxis(in, out);
333  return true;
334  }
335  else
336  {
337  msg_error("operators", "Array dimensions are not valid or intialization failed. Aborting zero padding." << eom);
338  return false;
339  }
340  }
341 
342  private:
343  //default...does nothing
351  template< typename XCheckType = XArgType >
352  typename std::enable_if< !std::is_base_of< MHO_TableContainerBase, XCheckType >::value, void >::type
353  IfTableTransformAxis(const XArgType* , XArgType* ){};
354 
355  //use SFINAE to generate specialization for MHO_TableContainer types
356  template< typename XCheckType = XArgType >
357  typename std::enable_if< std::is_base_of< MHO_TableContainerBase, XCheckType >::value, void >::type
358  IfTableTransformAxis(const XArgType* in, XArgType* out)
359  {
360  for(size_t i = 0; i < XArgType::rank::value; i++) //apply to all axes
361  {
362  TransformAxis axis_transformer(fAxesToXForm[i], fFlipped, fCopyTags);
363  apply_at2< typename XArgType::axis_pack_tuple_type, TransformAxis >(*in, *out, i, axis_transformer);
364  }
365  out->CopyTags(*in); //make sure the table tags get copied
366  }
367 
368  class TransformAxis
369  {
370  public:
371  TransformAxis(bool modify, bool flipped, bool copy_tags)
372  : fModify(modify), fFlipped(flipped), fCopyTags(copy_tags){};
373 
374  ~TransformAxis(){};
375 
376  //generic axis, do nothing
377  template< typename XAxisType > void operator()(const XAxisType& axis1, XAxisType& axis2)
378  {
379  if(!fCopyTags)
380  {
381  CopyLabelsWithoutTags(axis1, axis2);
382  }
383  else
384  {
385  axis2.Copy(axis1);
386  }
387  if(!fModify)
388  {
389  return;
390  } //just copy this axis
391  };
392 
393  void operator()(const MHO_Axis< double >& axis1, MHO_Axis< double >& axis2)
394  {
395  std::size_t ax1_size = axis1.GetSize();
396  std::size_t ax2_size = axis2.GetSize();
397  if(!fCopyTags)
398  {
399  CopyLabelsWithoutTags(axis1, axis2);
400  }
401  else
402  {
403  axis2.Copy(axis1);
404  }
405  if(!fModify)
406  {
407  return;
408  } //just copy this axis
409  axis2.Resize(ax2_size);
410  //assumes uniform labeling, probably ok as we only need this for FFTs
411  double delta = axis1(1) - axis1(0);
412  if(!fFlipped)
413  {
414  for(std::size_t i = 0; i < ax1_size; i++)
415  {
416  axis2(i) = axis1(i);
417  }
418  for(std::size_t i = ax1_size; i < ax2_size; i++)
419  {
420  axis2(i) = axis1(ax1_size - 1) + (i - (ax1_size - 1)) * delta;
421  }
422  }
423  else
424  {
425  for(std::size_t i = 0; i < ax1_size; i++)
426  {
427  axis2(i) = axis1(ax1_size - 1 - i);
428  }
429  for(std::size_t i = ax1_size; i < ax2_size; i++)
430  {
431  axis2(i) = axis1(0) - (i - (ax1_size - 1)) * delta;
432  }
433  }
434  }
435 
436  void operator()(const MHO_Axis< float >& axis1, MHO_Axis< float >& axis2)
437  {
438  std::size_t ax1_size = axis1.GetSize();
439  std::size_t ax2_size = axis2.GetSize();
440  if(!fCopyTags)
441  {
442  CopyLabelsWithoutTags(axis1, axis2);
443  }
444  else
445  {
446  axis2.Copy(axis1);
447  }
448  if(!fModify)
449  {
450  return;
451  } //just copy this axis
452  axis2.Resize(ax2_size);
453  //assumes uniform labeling, probably ok as we only need this for FFTs
454  double delta = axis1(1) - axis1(0);
455  if(!fFlipped)
456  {
457  for(std::size_t i = 0; i < ax1_size; i++)
458  {
459  axis2(i) = axis1(i);
460  }
461  for(std::size_t i = ax1_size; i < ax2_size; i++)
462  {
463  axis2(i) = axis1(ax1_size - 1) + (i - (ax1_size - 1)) * delta;
464  }
465  }
466  else
467  {
468  for(std::size_t i = 0; i < ax1_size; i++)
469  {
470  axis2(i) = axis1(ax1_size - 1 - i);
471  }
472  for(std::size_t i = ax1_size; i < ax2_size; i++)
473  {
474  axis2(i) = axis1(0) - (i - (ax1_size - 1)) * delta;
475  }
476  }
477  }
478 
479  private:
480  template< typename XAxisType > void CopyLabelsWithoutTags(const XAxisType& axis1, XAxisType& axis2)
481  {
482  //just copy axis labels
483  std::size_t ax1_size = axis1.GetSize();
484  std::size_t ax2_size = axis2.GetSize();
485  std::size_t s = std::min(ax1_size, ax2_size);
486  for(std::size_t i = 0; i < s; i++)
487  {
488  axis2(i) = axis1(i);
489  }
490  }
491 
492  bool fModify;
493  bool fFlipped;
494  bool fCopyTags;
495  };
496 
497  void PrecomputeOffsetTables()
498  {
499  constexpr std::size_t RANK = XArgType::rank::value;
500 
501  //compute element strides for both input and output arrays
502  fInStride[RANK - 1] = 1;
503  fOutStride[RANK - 1] = 1;
504  for(int d = (int)RANK - 2; d >= 0; d--)
505  {
506  fInStride[d] = fInStride[d + 1] * fInputDimensionSize[d + 1];
507  fOutStride[d] = fOutStride[d + 1] * fOutputDimensionSize[d + 1];
508  }
509 
510  //find the innermost (highest-index) axis that is being padded
511  fHasPaddedAxis = false;
512  fInnermostPaddedAxis = 0;
513  for(std::size_t d = 0; d < RANK; d++)
514  {
515  if(fAxesToXForm[d])
516  {
517  fInnermostPaddedAxis = d;
518  fHasPaddedAxis = true;
519  }
520  }
521 
522  //for the !fFlipped path: block size (contiguous elements per row) and row count
523  fBlockElems = 1;
524  fNRows = 1;
525  if(fHasPaddedAxis)
526  {
527  for(std::size_t d = fInnermostPaddedAxis; d < RANK; d++)
528  fBlockElems *= fInputDimensionSize[d];
529  for(std::size_t d = 0; d < fInnermostPaddedAxis; d++)
530  fNRows *= fInputDimensionSize[d];
531  }
532 
533  //total input elements (used for fFlipped path and no-padded-axis fast copy)
534  fTotalInputElems = 1;
535  for(std::size_t d = 0; d < RANK; d++)
536  fTotalInputElems *= fInputDimensionSize[d];
537 
538  //for the fFlipped path: precompute per-axis, per-index contributions to the
539  //output flat offset, so Execute needs only a table lookup + addition per element
540  fOutAxisOffset.resize(RANK);
541  for(std::size_t d = 0; d < RANK; d++)
542  {
543  fOutAxisOffset[d].resize(fInputDimensionSize[d]);
544  for(std::size_t k = 0; k < fInputDimensionSize[d]; k++)
545  {
546  if(!fAxesToXForm[d])
547  {
548  fOutAxisOffset[d][k] = k * fOutStride[d];
549  }
550  else if(!fNormFXMode)
551  {
552  fOutAxisOffset[d][k] = (fOutputDimensionSize[d] - 1 - k) * fOutStride[d];
553  }
554  else
555  {
556  //fNormFXMode: n=0 maps to N/2 for compatibility with norm_fx
557  if(k == 0)
558  fOutAxisOffset[d][k] = (fOutputDimensionSize[d] / 2) * fOutStride[d];
559  else
560  fOutAxisOffset[d][k] = (fOutputDimensionSize[d] - k) * fOutStride[d];
561  }
562  }
563  }
564  }
565 
566  void ConditionallyResizeOutput(const std::array< std::size_t, XArgType::rank::value >& dims, XArgType* out)
567  {
568  auto out_dim = out->GetDimensionArray();
569  bool have_to_resize = false;
570  for(std::size_t i = 0; i < XArgType::rank::value; i++)
571  {
572  if(fAxesToXForm[i])
573  {
574  if(dims[i] * fPaddingFactor != out_dim[i])
575  {
576  have_to_resize = true;
577  out_dim[i] = dims[i] * fPaddingFactor;
578  }
579  if(fPaddingFactor == 1)
580  {
581  if(dims[i] != fPaddedSize)
582  {
583  have_to_resize = true;
584  out_dim[i] = fPaddedSize;
585  }
586  }
587  }
588  else
589  {
590  if(dims[i] != out_dim[i])
591  {
592  have_to_resize = true;
593  out_dim[i] = dims[i];
594  }
595  }
596  }
597  if(have_to_resize)
598  {
599  out->Resize(&(out_dim[0]));
600  }
601  out->GetDimensions(fOutputDimensionSize);
602  }
603 
604  bool fIsValid;
605  bool fInitialized;
606  bool fFlipped;
607  bool fNormFXMode;
608  bool fPreserveWorkspace;
609  bool fCopyTags;
610 
611  std::size_t fPaddingFactor;
612  std::size_t fPaddedSize;
613  std::size_t fInputDimensionSize[XArgType::rank::value];
614  std::size_t fOutputDimensionSize[XArgType::rank::value];
615  bool fAxesToXForm[XArgType::rank::value];
616 
617  //precomputed for fast ExecuteOutOfPlace
618  bool fHasPaddedAxis;
619  std::size_t fInnermostPaddedAxis; //max axis index where fAxesToXForm[d]==true
620  std::size_t fBlockElems; //contiguous input elements per row (!fFlipped path)
621  std::size_t fNRows; //number of rows to copy (!fFlipped path)
622  std::size_t fTotalInputElems; //total number of input elements
623  std::size_t fInStride[XArgType::rank::value];
624  std::size_t fOutStride[XArgType::rank::value];
625  std::vector< std::vector< std::size_t > > fOutAxisOffset; //[dim][idx] -> out flat offset contribution (fFlipped path)
626 
627  XArgType* fTmpWorkspace;
628 };
629 
630 } // namespace hops
631 
632 #endif
#define msg_error(xKEY, xCONTENT)
Definition: MHO_Message.hh:238
#define profiler_scope()
Definition: MHO_Profiler.hh:237
virtual void Copy(const MHO_Axis &rhs)
Expensive copy for MHO_Axis that handles special treatment of index/interval labels.
Definition: MHO_Axis.hh:200
Class MHO_EndZeroPadderOptimized.
Definition: MHO_EndZeroPadderOptimized.hh:29
virtual bool ExecuteInPlace(XArgType *in) override
Executes operation in-place by copying temporary workspace back to input object.
Definition: MHO_EndZeroPadderOptimized.hh:201
void DeselectAllAxes()
Deselects all axes by setting each axis to false.
Definition: MHO_EndZeroPadderOptimized.hh:152
virtual void SetReverseEndPadded()
Setter for reverse end padded, place data at end of array and zero pad out to start.
Definition: MHO_EndZeroPadderOptimized.hh:95
virtual void SetPaddedSize(std::size_t new_size)
Setter for padded size, instead of a multiplicative factor, the original array, length N is padded ou...
Definition: MHO_EndZeroPadderOptimized.hh:79
virtual void DisableTagCopy()
Disables copying tags by setting fCopyTags to false.
Definition: MHO_EndZeroPadderOptimized.hh:128
virtual bool InitializeOutOfPlace(const XArgType *in, XArgType *out) override
Initializes out-of-place processing for input and output arrays.
Definition: MHO_EndZeroPadderOptimized.hh:223
virtual void DisableNormFXMode()
Disables Normal Mapping FX Mode by setting fNormFXMode to false. UNUSED - TODO REMOVE ME!
Definition: MHO_EndZeroPadderOptimized.hh:101
virtual ~MHO_EndZeroPadderOptimized()
Definition: MHO_EndZeroPadderOptimized.hh:61
MHO_EndZeroPadderOptimized()
Definition: MHO_EndZeroPadderOptimized.hh:31
void SelectAllAxes()
Selects all axes for transformation. sometimes we may want to select/deselect particular dimensions o...
Definition: MHO_EndZeroPadderOptimized.hh:141
virtual void SetEndPadded()
Setter for end padded, zero padding from end of data out to end of the array.
Definition: MHO_EndZeroPadderOptimized.hh:89
virtual void EnableNormFXMode()
Enables Normalized FX Mode by setting fNormFXMode to true. UNUSED - TODO REMOVE ME!
Definition: MHO_EndZeroPadderOptimized.hh:107
virtual void EnableTagCopy()
Enables copying of tags.
Definition: MHO_EndZeroPadderOptimized.hh:134
virtual void SetPaddingFactor(std::size_t factor)
Setter for padding factor, the factor M by which the new array will be extended (original array,...
Definition: MHO_EndZeroPadderOptimized.hh:70
virtual void PreserveWorkspace()
Sets a flag to preserve workspace memory after execution.
Definition: MHO_EndZeroPadderOptimized.hh:113
void SelectAxis(std::size_t axis_index)
Selects an axis for transformation if its index is within the array rank.
Definition: MHO_EndZeroPadderOptimized.hh:165
virtual bool ExecuteOutOfPlace(const XArgType *in, XArgType *out) override
Function ExecuteOutOfPlace.
Definition: MHO_EndZeroPadderOptimized.hh:249
virtual void DoNotPreserveWorkspace()
Sets preserve workspace flag to false, delete memory after execution.
Definition: MHO_EndZeroPadderOptimized.hh:122
virtual bool InitializeInPlace(XArgType *in) override
Initializes in-place by creating a temporary workspace and calling InitializeOutOfPlace.
Definition: MHO_EndZeroPadderOptimized.hh:186
virtual void Resize(const std::size_t *dim)
Resize an externally managed array using provided dimensions.
Definition: MHO_NDArrayWrapper_1.hh:52
std::size_t GetSize() const
Getter for size.
Definition: MHO_NDArrayWrapper_1.hh:99
Class MHO_UnaryOperator.
Definition: MHO_UnaryOperator.hh:24
struct type_status status
Definition: fourfit3.c:53
#define min(a, b)
Definition: max555.c:9
Definition: MHO_AdhocFlagging.hh:18