Commit 8d54f5e5 authored by James Sutherland's avatar James Sutherland
Browse files

Merge branch 'GPU_Particle_Ops_Test' into 'master'

Improve test file by adding GPU sanity check

Add CUDA checkpoint
Remove unnecessary type changes

See merge request !27
parents d5de1648 0e0ede28
......@@ -25,6 +25,18 @@
#define Index3D_Mem(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k)))
#define GPU_ERROR_CHECK(err) { gpu_error( err, __FILE__, __LINE__); }
inline void gpu_error(cudaError err, const char *file, int line)
{
if (cudaSuccess != err){
std::ostringstream msg;
msg << "GPU failed!\n"
<< "Cuda message : " << cudaGetErrorString(err) << "\n"
<< "\t - " << file << " : " << line << std::endl;
throw(std::runtime_error(msg.str()));
}
}
//==================================================================
namespace SpatialOps{
namespace Particle{
......@@ -70,7 +82,6 @@ namespace SpatialOps{
const int j = py ? ( py[iprt] - yloface ) / dy : 0;
const int k = pz ? ( pz[iprt] - zloface ) / dz : 0;
atomic_add(&dest[Index3D_Mem(nmax1, nmax2, i, j, k)], 1.0);
__syncthreads();
}
//------------------------------------------------------------------
......@@ -113,9 +124,9 @@ namespace SpatialOps{
const int izlo = ( pzlo - zloface ) / dz;
// hi indices are 1 past the end
const int ixhi = px ? int(fmin( double(nmax1), double(int(( pxhi - xloface ) / dx) + 1 ))) : ixlo+1;
const int iyhi = py ? int(fmin( double(nmax2), double(int(( pyhi - yloface ) / dy) + 1 ))) : iylo+1;
const int izhi = pz ? int(fmin( double(nmax3), double(int(( pzhi - zloface ) / dz) + 1 ))) : izlo+1;
const int ixhi = px ? fmin(nmax1, ( pxhi - xloface ) / dx + 1 ) : ixlo+1;
const int iyhi = py ? fmin(nmax2, ( pyhi - yloface ) / dy + 1 ) : iylo+1;
const int izhi = pz ? fmin(nmax3, ( pzhi - zloface ) / dz + 1 ) : izlo+1;
const double pvol = (px ? 2*rp : 1) * (py ? 2*rp : 1) * (pz ? 2*rp : 1);
......@@ -208,8 +219,8 @@ namespace SpatialOps{
break;
}
cudaDeviceSynchronize();
GPU_ERROR_CHECK(cudaGetLastError());
}
//==================================================================
......@@ -240,7 +251,6 @@ namespace SpatialOps{
const int j = py ? ( py[iprt] - yloface ) / dy : 0;
const int k = pz ? ( pz[iprt] - zloface ) / dz : 0;
atomic_add(&dest[Index3D_Mem(nmax1, nmax2, i, j, k)], src[iprt]);
__syncthreads();
}
//------------------------------------------------------------------
......@@ -284,9 +294,9 @@ namespace SpatialOps{
const int izlo = ( pzlo - zloface ) / dz;
// hi indices are 1 past the end
const int ixhi = px ? int(fmin( double(nmax1), double(int(( pxhi - xloface ) / dx) + 1 ))) : ixlo+1;
const int iyhi = py ? int(fmin( double(nmax2), double(int(( pyhi - yloface ) / dy) + 1 ))) : iylo+1;
const int izhi = pz ? int(fmin( double(nmax3), double(int(( pzhi - zloface ) / dz) + 1 ))) : izlo+1;
const int ixhi = px ? fmin(nmax1, ( pxhi - xloface ) / dx + 1 ) : ixlo+1;
const int iyhi = py ? fmin(nmax2, ( pyhi - yloface ) / dy + 1 ) : iylo+1;
const int izhi = pz ? fmin(nmax3, ( pzhi - zloface ) / dz + 1 ) : izlo+1;
const double pvol = (px ? 2*rp : 1) * (py ? 2*rp : 1) * (pz ? 2*rp : 1);
......@@ -382,8 +392,8 @@ namespace SpatialOps{
break;
}
cudaDeviceSynchronize();
GPU_ERROR_CHECK(cudaGetLastError());
}
//==================================================================
......@@ -416,7 +426,6 @@ namespace SpatialOps{
const int j = py ? ( py[iprt] - ylo ) / dy : 0;
const int k = pz ? ( pz[iprt] - zlo ) / dz : 0;
dest[iprt] += src[Index3D_Mem(nmax1, nmax2, i, j, k)];
__syncthreads();
}
......@@ -459,9 +468,9 @@ namespace SpatialOps{
const int izlo = ( pzlo - zloface ) / dz;
// hi indices are 1 past the end
const int ixhi = px ? int(fmin( double(nmax1), double(int(( pxhi - xloface ) / dx) + 1 ))) : ixlo+1;
const int iyhi = py ? int(fmin( double(nmax2), double(int(( pyhi - yloface ) / dy) + 1 ))) : iylo+1;
const int izhi = pz ? int(fmin( double(nmax3), double(int(( pzhi - zloface ) / dz) + 1 ))) : izlo+1;
const int ixhi = px ? fmin(nmax1, ( pxhi - xloface ) / dx + 1 ) : ixlo+1;
const int iyhi = py ? fmin(nmax2, ( pyhi - yloface ) / dy + 1 ) : iylo+1;
const int izhi = pz ? fmin(nmax3, ( pzhi - zloface ) / dz + 1 ) : izlo+1;
const double pvol = (px ? 2*rp : 1) * (py ? 2*rp : 1) * (pz ? 2*rp : 1);
......@@ -557,6 +566,7 @@ namespace SpatialOps{
break;
}
cudaDeviceSynchronize();
GPU_ERROR_CHECK(cudaGetLastError());
}
//==================================================================
......
......@@ -10,12 +10,160 @@ using std::endl;
#include <spatialops/particles/ParticleOperatorsImplementation.h>
#include <spatialops/structured/FieldHelper.h>
#include <spatialops/structured/Grid.h>
#include <util/TimeLogger.h>
#include <test/TestHelper.h>
typedef SpatialOps::SVolField CellField;
using namespace SpatialOps;
#ifdef ENABLE_CUDA
bool gpu_check(const int ncell,
const SpatialOps::Particle::InterpOptions interMethod)
{
typedef SpatialOps::Particle::ParticleField ParticleField;
const size_t np = pow(ncell, 3);
const IntVec dim(ncell, ncell, ncell);
const IntVec dimx(dim[0], 1, 1);
const IntVec dimy(1, dim[1], 1);
const IntVec dimz(1, 1, dim[2]);
const DoubleVec length( dim[0], dim[1], dim[2] );
const Grid grid( dim, length );
const GhostData cg(3);
const BoundaryCellInfo cbc = BoundaryCellInfo::build<CellField>();
const MemoryWindow mw = get_window_with_ghost( dim, cg, BoundaryCellInfo::build<CellField>(false,false,false) );
const MemoryWindow mwx= get_window_with_ghost( dimx, cg, BoundaryCellInfo::build<CellField>(false,false,false) );
const MemoryWindow mwy= get_window_with_ghost( dimy, cg, BoundaryCellInfo::build<CellField>(false,false,false) );
const MemoryWindow mwz= get_window_with_ghost( dimz, cg, BoundaryCellInfo::build<CellField>(false,false,false) );
CellField xcoord ( mwx, cbc, cg, NULL );
CellField ycoord ( mwy, cbc, cg, NULL );
CellField zcoord ( mwz, cbc, cg, NULL );
CellField f ( mw, cbc, cg, NULL );
CellField ctmpcpu ( mw, cbc, cg, NULL );
CellField ctmpgpu ( mw, cbc, cg, NULL );
const GhostData pg(0);
const BoundaryCellInfo pbc = BoundaryCellInfo::build<SpatialOps::Particle::ParticleField>();
const MemoryWindow pmw( IntVec(np,1,1) );
ParticleField pxCoord( pmw, pbc, pg, NULL );
ParticleField pyCoord( pmw, pbc, pg, NULL );
ParticleField pzCoord( pmw, pbc, pg, NULL );
ParticleField pSize ( pmw, pbc, pg, NULL );
ParticleField pfield ( pmw, pbc, pg, NULL );
ParticleField ptmpcpu( pmw, pbc, pg, NULL );
ParticleField ptmpgpu( pmw, pbc, pg, NULL );
grid.set_coord<SpatialOps::XDIR>( xcoord ); // set X the coordinates.
grid.set_coord<SpatialOps::YDIR>( ycoord ); // set Y the coordinates
grid.set_coord<SpatialOps::ZDIR>( zcoord ); // set Z the coordinates.
f <<= 10; // set the field values
pfield <<= 1;
pSize <<= 0.01;
//
// build the operators
//
typedef SpatialOps::Particle::CellToParticle<CellField> C2P;
C2P c2p(xcoord[1]-xcoord[0], xcoord[cg.get_minus(0)],
ycoord[1]-ycoord[0], ycoord[cg.get_minus(0)],
zcoord[1]-zcoord[0], zcoord[cg.get_minus(0)],
interMethod);
typedef SpatialOps::Particle::ParticleToCell<CellField> P2C;
P2C p2c(xcoord[1] - xcoord[0], xcoord[cg.get_minus(0)],
ycoord[1] - ycoord[0], ycoord[cg.get_minus(0)],
zcoord[1] - zcoord[0], zcoord[cg.get_minus(0)],
interMethod);
ParticleField::iterator ipx = pxCoord.begin();
ParticleField::iterator ipy = pyCoord.begin();
ParticleField::iterator ipz = pzCoord.begin();
srand(10);
for(; ipx != pxCoord.end(); ++ipx, ++ipy, ++ipz)
{
*ipx = (double)(rand() % (dim[0]*100))/100;
*ipy = (double)(rand() % (dim[1]*100))/100;
*ipz = (double)(rand() % (dim[2]*100))/100;
}
c2p.set_coordinate_information( &pxCoord, &pyCoord, &pzCoord, &pSize );
p2c.set_coordinate_information( &pxCoord, &pyCoord, &pzCoord, &pSize );
Timer timer;
timer.reset();
p2c.apply_to_field( pfield, ctmpcpu );
const double p2ctime_cpu = timer.stop();
timer.reset();
c2p.apply_to_field( f, ptmpcpu );
const double c2ptime_cpu = timer.stop();
// GPU
pxCoord.set_device_as_active(GPU_INDEX);
pyCoord.set_device_as_active(GPU_INDEX);
pzCoord.set_device_as_active(GPU_INDEX);
pSize.set_device_as_active (GPU_INDEX);
f.set_device_as_active (GPU_INDEX);
pfield.set_device_as_active (GPU_INDEX);
ptmpgpu.set_device_as_active(GPU_INDEX);
ctmpgpu.set_device_as_active(GPU_INDEX);
timer.reset();
p2c.apply_to_field( pfield, ctmpgpu );
const double p2ctime_gpu = timer.stop();
timer.reset();
c2p.apply_to_field( f, ptmpgpu );
const double c2ptime_gpu = timer.stop();
std::cout << " CPU/GPU time for ncell: " << ncell << "^3 \n"
<< " P2C : " << p2ctime_cpu/p2ctime_gpu << endl
<< " C2P : " << c2ptime_cpu/c2ptime_gpu << endl;
// sanity check
ptmpgpu.set_device_as_active(CPU_INDEX);
ctmpgpu.set_device_as_active(CPU_INDEX);
ParticleField::iterator ipcpu = ptmpcpu.begin();
ParticleField::iterator ipgpu = ptmpgpu.begin();
bool sanitycheck = true;
for (; ipcpu != ptmpcpu.end() ; ++ipcpu, ++ipgpu) {
if (abs(*ipcpu - *ipgpu) > 1e-8)
{
cout << "GPU ERROR ON PARTICE FIELD!" << endl
<< "CPU : " << *ipcpu << ", GPU : " << *ipgpu << "\n";
sanitycheck = false;
break;
}
}
CellField::iterator iccpu = ctmpcpu.begin();
CellField::iterator icgpu = ctmpgpu.begin();
for (; iccpu != ctmpcpu.end() ; ++iccpu, ++icgpu) {
if (abs(*iccpu - *icgpu) > 1e-8)
{
cout << "GPU ERROR ON CELL FIELD!" << endl
<< "CPU : " << *iccpu << ", GPU : " << *icgpu << "\n";
sanitycheck = false;
break;
}
}
return sanitycheck;
}
#endif
int main()
{
const IntVec dim(10,1,1);
......@@ -131,7 +279,13 @@ int main()
status( ctmpNI[ishift+7] == 0, "p2c [7] - Not Interpolated" );
status( ctmpNI[ishift+8] == 0, "p2c [8] - Not Interpolated" );
status( ctmpNI[ishift+9] == 0, "p2c [9] - Not Interpolated" );
# ifdef ENABLE_CUDA
status( gpu_check(16, SpatialOps::Particle::NO_INTERPOLATE), "GPU test: ncell= 16^3, Approximate method" );
status( gpu_check(32, SpatialOps::Particle::NO_INTERPOLATE), "GPU test: ncell= 32^3, Approximate method" );
status( gpu_check(16, SpatialOps::Particle::INTERPOLATE), "GPU test: ncell= 16^3, Interpolation" );
status( gpu_check(32, SpatialOps::Particle::INTERPOLATE), "GPU test: ncell= 32^3, Interpolation" );
# endif
if( status.ok() ) return 0;
return -1;
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment