Browse Source

fix bugs in CUDA codes

master
Dtouch 2 years ago
parent
commit
5b2ab8fc5c
  1. 8
      .idea/.gitignore
  2. 1
      .idea/.name
  3. 2
      .idea/ISDF.iml
  4. 6
      .idea/misc.xml
  5. 8
      .idea/modules.xml
  6. 6
      .idea/vcs.xml
  7. 8
      sdf_generate/CMakeLists.txt
  8. 4
      sdf_generate/include/cudaEigenTest.cuh
  9. 10
      sdf_generate/include/rod_generate.cuh
  10. 17
      sdf_generate/main.cpp
  11. 2
      sdf_generate/src/bvh.cpp
  12. 66
      sdf_generate/src/cudaEigenTest.cu
  13. 6
      sdf_generate/src/rod.cpp
  14. 136
      sdf_generate/src/rod_generate.cu

8
.idea/.gitignore

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

1
.idea/.name

@ -0,0 +1 @@
renderSDF

2
.idea/ISDF.iml

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CMake" type="CPP_MODULE" version="4" />

6
.idea/misc.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/sdf_generate">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>

8
.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ISDF.iml" filepath="$PROJECT_DIR$/.idea/ISDF.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

8
sdf_generate/CMakeLists.txt

@ -1,4 +1,9 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 86)
endif()
project(renderSDF CXX CUDA) project(renderSDF CXX CUDA)
set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CUDA_STANDARD 14)
@ -12,7 +17,8 @@ include_directories(include)
AUX_SOURCE_DIRECTORY(src DIR_SRCS) AUX_SOURCE_DIRECTORY(src DIR_SRCS)
FILE(GLOB_RECURSE DIR_INCLUDE include/*.h include/*.hpp include/*.cuh) FILE(GLOB_RECURSE DIR_INCLUDE include/*.h include/*.hpp include/*.cuh)
add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp) ADD_LIBRARY(sdfGenerate SHARED ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
#add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
target_link_libraries(sdfGenerate Eigen3::Eigen) target_link_libraries(sdfGenerate Eigen3::Eigen)

4
sdf_generate/include/cudaEigenTest.cuh

@ -8,11 +8,11 @@
#include "Eigen/Eigen" #include "Eigen/Eigen"
#include "Eigen/Dense" #include "Eigen/Dense"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "glm/glm.hpp" //#include "glm/glm.hpp"
__global__ void eigenKernel(Eigen::Matrix<double, 4, 3> factor, Eigen::Vector3d testVector); __global__ void eigenKernel(Eigen::Matrix<double, 4, 3> factor, Eigen::Vector3d testVector);
__global__ void glmKernel(glm::vec3 testVector); //__global__ void glmKernel(glm::vec3 testVector);
__host__ void testHost(); __host__ void testHost();

10
sdf_generate/include/rod_generate.cuh

@ -10,13 +10,19 @@
#include "Eigen/Eigen" #include "Eigen/Eigen"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
#ifdef _MSC_VER
#define DLL_EXPORT __declspec( dllexport )
#else
#define DLL_EXPORT
#endif
__global__ __global__
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent* extent, const Eigen::Vector3f *sampleMin, void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent* extent, const Eigen::Vector3f *sampleMin,
const Eigen::Vector3f *sampleMax, int radius); const Eigen::Vector3f *sampleMax, int radius);
__host__ void extern "C" DLL_EXPORT __host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
const Eigen::Vector3f &sampleMax, int radius); const Eigen::Vector3f &sampleMax, float radius);
class rod_generate { class rod_generate {

17
sdf_generate/main.cpp

@ -5,11 +5,12 @@
#include "iostream" #include "iostream"
#include "cudaEigenTest.cuh" #include "cudaEigenTest.cuh"
#include "bvh.h" #include "bvh.h"
#include "rod_generate.cuh"
int main() { float *test() {
// a case with 3*3*3 points (2*2*2 resolution) // a case with 3*3*3 points (2*2*2 resolution)
Eigen::Matrix<float, Eigen::Dynamic, 3>rod_points; Eigen::Matrix<float, Eigen::Dynamic, 3> rod_points;
rod_points.resize(27, 3);
rod_points << 0, 0, 0, rod_points << 0, 0, 0,
1, 0, 0, 1, 0, 0,
2, 0, 0, 2, 0, 0,
@ -93,9 +94,13 @@ int main() {
8, 17, 8, 17,
17, 26; 17, 26;
RodCrystal rod(rod_points, rod_beams); RodCrystal rod(rod_points, rod_beams);
// RodBVH bvh(rod);
// bvh.build();
return h_rod_generate(rod, Eigen::Vector3i(10, 10, 10), Eigen::Vector3f(-100, -100, -100),
Eigen::Vector3f(100, 100, 100), 0.2f);
}
int main() {
RodBVH bvh(rod); test();
bvh.build();
return 0; return 0;
} }

2
sdf_generate/src/bvh.cpp

@ -68,7 +68,7 @@ void RodBVH::quickSelect(std::vector<int> &rods, int axis) {
} }
std::swap(rods[right], rods[storeIndex]); std::swap(rods[right], rods[storeIndex]);
if (storeIndex == mid) { if (storeIndex == mid) {
return storeIndex; return;
} else if (storeIndex > mid) { } else if (storeIndex > mid) {
right = storeIndex - 1; right = storeIndex - 1;
} else { } else {

66
sdf_generate/src/cudaEigenTest.cu

@ -45,23 +45,23 @@ __global__ void eigenKernel(Eigen::Matrix<double, 2, 2> factor, Eigen::Matrix<do
// } // }
} }
__global__ void glmKernel(glm::vec3 testVector) { //__global__ void glmKernel(glm::vec3 testVector) {
if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) { // if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) {
printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z); // printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z);
glm::mat<2, 2, float> a = {1, 2, 3, 4}; // glm::mat<2, 2, float> a = {1, 2, 3, 4};
glm::mat<2, 2, float> b = {2, 3, 4, 5}; // glm::mat<2, 2, float> b = {2, 3, 4, 5};
glm::mat<2, 2, float> c = a * b; // glm::mat<2, 2, float> c = a * b;
glm::vec2 d = {3, 4}; // glm::vec2 d = {3, 4};
glm::vec2 e = c * d; // glm::vec2 e = c * d;
glm::vec3 factor = {1, 2, 3}; // glm::vec3 factor = {1, 2, 3};
glm::vec3 result = factor * testVector; // glm::vec3 result = factor * testVector;
printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]); // printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]);
printf("e: %f, %f\n", e[0], e[1]); // printf("e: %f, %f\n", e[0], e[1]);
printf("result: %f, %f, %f\n", result[0], result[1], result[2]); // printf("result: %f, %f, %f\n", result[0], result[1], result[2]);
} // }
//
//
} //}
__global__ void testKernel(float x) { __global__ void testKernel(float x) {
printf("testVector: %f, %f, %f\n", x, x, x); printf("testVector: %f, %f, %f\n", x, x, x);
@ -76,8 +76,8 @@ __host__ void testHost() {
eigenKernel <<< 1, 32>>>(factor, testVector); eigenKernel <<< 1, 32>>>(factor, testVector);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
glm::vec3 testVectorGlm(4, 5, 6); // glm::vec3 testVectorGlm(4, 5, 6);
glmKernel<<< 1, 32 >>>(testVectorGlm); // glmKernel<<< 1, 32 >>>(testVectorGlm);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
// //
// testKernel<<< 1, 32 >>>(3); // testKernel<<< 1, 32 >>>(3);
@ -92,19 +92,19 @@ __host__ void testHost() {
// printf("result in CPU: %f, %f\n", result(2, 0), result(2, 1)); // printf("result in CPU: %f, %f\n", result(2, 0), result(2, 1));
// printf("result in CPU: %f, %f\n", result(3, 0), result(3, 1)); // printf("result in CPU: %f, %f\n", result(3, 0), result(3, 1));
glm::mat<4, 3, float> factorGlm{}; // glm::mat<4, 3, float> factorGlm{};
factorGlm[0][0] = 1; // factorGlm[0][0] = 1;
factorGlm[0][1] = 2; // factorGlm[0][1] = 2;
factorGlm[0][2] = 3; // factorGlm[0][2] = 3;
factorGlm[1][0] = 4; // factorGlm[1][0] = 4;
factorGlm[1][1] = 5; // factorGlm[1][1] = 5;
factorGlm[1][2] = 6; // factorGlm[1][2] = 6;
factorGlm[2][0] = 7; // factorGlm[2][0] = 7;
factorGlm[2][1] = 8; // factorGlm[2][1] = 8;
factorGlm[2][2] = 9; // factorGlm[2][2] = 9;
factorGlm[3][0] = 10; // factorGlm[3][0] = 10;
factorGlm[3][1] = 11; // factorGlm[3][1] = 11;
factorGlm[3][2] = 12; // factorGlm[3][2] = 12;
glm::vec4 resultGlm = testVectorGlm * factorGlm; // glm::vec4 resultGlm = testVectorGlm * factorGlm;
} }

6
sdf_generate/src/rod.cpp

@ -9,7 +9,7 @@
RodCrystal::RodCrystal(Eigen::Matrix<float, Eigen::Dynamic, 3> _rod_points, Eigen::Matrix<int, Eigen::Dynamic, 2> _rod_beams) : rod_points(std::move(_rod_points)), RodCrystal::RodCrystal(Eigen::Matrix<float, Eigen::Dynamic, 3> _rod_points, Eigen::Matrix<int, Eigen::Dynamic, 2> _rod_beams) : rod_points(std::move(_rod_points)),
rod_beams(std::move(_rod_beams)) { rod_beams(std::move(_rod_beams)) {
rod_mid.resize(rod_beams.rows(), 3); rod_mid.resize(rod_beams.rows(), 3);
for (int i = 0; i < rod_beams.rows(); ++i) { // for (int i = 0; i < rod_beams.rows(); ++i) {
rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2; // rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2;
} // }
} }

136
sdf_generate/src/rod_generate.cu

@ -2,46 +2,91 @@
// Created by dtouch on 23-5-23. // Created by dtouch on 23-5-23.
// //
#include "../include/rod_generate.cuh" #include "rod_generate.cuh"
#include "device_functions.h"
#include "float.h"
__global__ __global__
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent, void
const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { g_rod_generate(int *beamData, int beamCnt, float *pointData, int pointCnt, cudaPitchedPtr sdf, const cudaExtent *extent,
size_t floatSize, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) {
// 3-dim grid and 3-dim block // 3-dim grid and 3-dim block
Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, 2>> rod_beams(beamData, beamCnt, 2);
Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, 3>> rod_points(pointData, pointCnt, 3);
auto ix = blockIdx.x * blockDim.x + threadIdx.x; auto ix = blockIdx.x * blockDim.x + threadIdx.x;
auto iy = blockIdx.y * blockDim.y + threadIdx.y; auto iy = blockIdx.y * blockDim.y + threadIdx.y;
auto iz = blockIdx.z * blockDim.z + threadIdx.z; auto iz = blockIdx.z * blockDim.z + threadIdx.z;
// if (ix == 0 && iy == 0 && iz == 0) {
// for (int i = 0; i < beamCnt; ++i) {
// printf("%d, %d\n", rod_beams(i, 0), rod_beams(i, 1));
// }
// for(int i = 0; i < pointCnt; ++i) {
// printf("%f, %f, %f\n", rod_points(i, 0), rod_points(i, 1), rod_points(i, 2));
// }
// }
if (ix >= extent->width / floatSize || iy >= extent->height || iz >= extent->depth) {
return;
}
auto x = sampleMin->x() + auto x = sampleMin->x() +
static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width); static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width / floatSize);
auto y = sampleMin->y() + auto y = sampleMin->y() +
static_cast<float>(iy) * (sampleMax->y() - sampleMin->y()) / static_cast<float>(extent->height); static_cast<float>(iy) * (sampleMax->y() - sampleMin->y()) / static_cast<float>(extent->height);
auto z = sampleMin->z() + auto z = sampleMin->z() +
static_cast<float>(iz) * (sampleMax->z() - sampleMin->z()) / static_cast<float>(extent->depth); static_cast<float>(iz) * (sampleMax->z() - sampleMin->z()) / static_cast<float>(extent->depth);
// printf("%d, %d, %d\n", ix, iy, iz);
// 获取sdf中下标为(ix,iy)的元素的行首指针 // 获取sdf中下标为(ix,iy)的元素的行首指针
auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); // auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height);
char *sdfPtr = (char *) sdf.ptr;
size_t pitch = sdf.pitch;
size_t slicePitch = pitch * extent->height;
auto p = Eigen::Vector3f(x, y, z); auto p = Eigen::Vector3f(x, y, z);
for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) { char *slice = sdfPtr + iz * slicePitch;
auto a = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0))); auto *row = (float *) (slice + iy * pitch);
auto b = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1))); // row[ix] is initialized as the max float in GPU
row[ix] = FLT_MAX;
// auto aTmp = Eigen::Vector3f(rod_points.row(rod_beams(2, 1)));
// printf("aTmp: (%f, %f, %f)\n", aTmp.x(), aTmp.y(), aTmp.z());
for (int i = 0; i < rod_beams.rows(); ++i) {
auto a = Eigen::Vector3f(rod_points.row(rod_beams(i, 0)));
auto b = Eigen::Vector3f(rod_points.row(rod_beams(i, 1)));
auto ab = b - a; auto ab = b - a;
auto ap = p - a; auto ap = p - a;
auto bp = p - b; auto bp = p - b;
if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) { if (ab.x() * bp.x() + ab.y() * bp.y() + ab.z() + bp.z() < 0 &&
sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm(); ab.x() * ap.x() + ab.y() * ap.y() + ab.z() * ap.z() > 0) {
row[ix] = min(row[ix], (ap.cross(bp)).norm() / ab.norm());
} else { } else {
sdfPtr[iz] = std::min(ap.norm(), bp.norm()); row[ix] = min(row[ix], min(ap.norm(), bp.norm()));
} }
} }
row[ix] -= radius;
} }
__host__ void __host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
const Eigen::Vector3f &sampleMax, int radius) { const Eigen::Vector3f &sampleMax, float radius) {
RodCrystal *d_rodCrystal; int *d_beamData;
cudaMalloc(&d_rodCrystal, sizeof(RodCrystal)); size_t beamBytes = rodCrystal.rod_beams.rows() * rodCrystal.rod_beams.cols() * sizeof(int);
cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice); cudaMalloc(&d_beamData, beamBytes);
cudaMemcpy(d_beamData, rodCrystal.rod_beams.data(), beamBytes, cudaMemcpyHostToDevice);
float *d_pointData;
size_t pointBytes = rodCrystal.rod_points.rows() * rodCrystal.rod_points.cols() * sizeof(float);
cudaMalloc(&d_pointData, pointBytes);
cudaMemcpy(d_pointData, rodCrystal.rod_points.data(), pointBytes, cudaMemcpyHostToDevice);
// RodCrystal *d_rodCrystal;
// cudaMalloc(&d_rodCrystal, sizeof(rodCrystal));
// cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(rodCrystal), cudaMemcpyHostToDevice);
// printf("size of rodCrystal: %lu; size of class RodCrystal: %lu\n", sizeof(rodCrystal), sizeof(RodCrystal));
// printf("size of rodCrystal.rod_points: %lu\n", sizeof(rodCrystal.rod_points));
// printf("size of rodCrystal.rod_beams: %lu\n", sizeof(rodCrystal.rod_beams));
// printf("size of rodCrystal.rod_points.row(0): %lu\n", sizeof(rodCrystal.rod_points.row(0)));
int sampleCntAll = sampleCnt.x() * sampleCnt.y() * sampleCnt.z();
float *h_sdf; float *h_sdf;
h_sdf = (float *) malloc(sampleCnt.x() * sampleCnt.y() * sampleCnt.z() * sizeof(float));
for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
h_sdf[i] = i; h_sdf[i] = i;
} }
@ -58,24 +103,33 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
cudaMemcpy3D(&copyParams); cudaMemcpy3D(&copyParams);
Eigen::Vector3f *d_sampleMin; Eigen::Vector3f *d_sampleMin;
cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f)); cudaMalloc(&d_sampleMin, sizeof(sampleMin));
cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); cudaMemcpy(d_sampleMin, &sampleMin, sizeof(sampleMin), cudaMemcpyHostToDevice);
Eigen::Vector3f *d_sampleMax; Eigen::Vector3f *d_sampleMax;
cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f)); cudaMalloc(&d_sampleMax, sizeof(sampleMax));
cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); cudaMemcpy(d_sampleMax, &sampleMax, sizeof(sampleMax), cudaMemcpyHostToDevice);
cudaExtent *d_extent; cudaExtent *d_extent;
cudaMalloc(&d_extent, sizeof(Eigen::Vector3i)); cudaMalloc(&d_extent, sizeof(extent));
cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice); cudaMemcpy(d_extent, &extent, sizeof(extent), cudaMemcpyHostToDevice);
dim3 block(64, 64, 64); dim3 grid(16, 16, 16);
dim3 grid((sampleCnt.x() + block.x - 1) / block.x, dim3 block((sampleCnt.x() + grid.x - 1) / grid.x,
(sampleCnt.y() + block.y - 1) / block.y, (sampleCnt.y() + grid.y - 1) / grid.y,
(sampleCnt.z() + block.z - 1) / block.z); (sampleCnt.z() + grid.z - 1) / grid.z);
g_rod_generate<<<grid, block>>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius); g_rod_generate<<<grid, block>>>(d_beamData, rodCrystal.rod_beams.rows(), d_pointData, rodCrystal.rod_points.rows(),
d_sdf, d_extent, sizeof(float), d_sampleMin, d_sampleMax, radius);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
// for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
// h_sdf[i] = -i;
// }
auto tmpBeam = rodCrystal.rod_beams(0, 1);
printf("tmpBeam: %d\n", tmpBeam);
printf("copy back to host\n"); printf("copy back to host\n");
copyParams = {nullptr}; copyParams = {nullptr};
copyParams.srcPtr = d_sdf; copyParams.srcPtr = d_sdf;
@ -85,9 +139,29 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
copyParams.kind = cudaMemcpyDeviceToHost; copyParams.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D(&copyParams); cudaMemcpy3D(&copyParams);
cudaFree(d_rodCrystal); // cudaFree(d_rodCrystal);
cudaFree(d_sdf.ptr); cudaFree(d_sdf.ptr);
cudaFree(d_sampleMin); cudaFree(d_sampleMin);
cudaFree(d_sampleMax); cudaFree(d_sampleMax);
cudaFree(d_extent); cudaFree(d_extent);
cudaFree(d_beamData);
cudaFree(d_pointData);
printf("[");
for (int i = 0; i < sampleCnt.x(); i++) {
printf("[");
for (int j = 0; j < sampleCnt.y(); j++) {
printf("[");
for (int k = 0; k < sampleCnt.z(); k++) {
printf("%f", h_sdf[i * sampleCnt.y() * sampleCnt.z() + j * sampleCnt.z() + k]);
if (k != sampleCnt.z() - 1)
printf(",");
}
printf("]");
if (j != sampleCnt.y() - 1)
printf(",");
}
printf("]");
}
printf("]");
free(h_sdf);
} }

Loading…
Cancel
Save