HOPS
HOPS class reference
vector_mbp_reduce.h
Go to the documentation of this file.
1 #ifndef H_VECTOR_MBP_REDUCE_H__
2 #define H_VECTOR_MBP_REDUCE_H__
3 
4 // CUDA includes
5 #include <cuComplex.h>
6 #include <cufft.h>
7 #include <stdint.h>
8 #include <cuda_runtime_api.h>
9 #include <cuda.h>
10 
11 static const int wholeArraySize = 100000000;
12 static const int blockSize = 1024;
13 static const int gridSize = 24; //this number is hardware-dependent; usually #SM*2 is a good number.
14 
15 
16 //sum a single vector to reduce it to a single value, single-block parallel block reduction
17 __global__ void cuda_vector_mbp_reduce(const float* a, float* out);
18 extern "C" void vector_mbp_reduce(float* a, float* b, int n);
19 
20 #endif
__global__ void cuda_vector_mbp_reduce(const float *a, float *out)
void vector_mbp_reduce(float *a, float *b, int n)