Browse Source

fix bugs in CUDA codes

master
Dtouch 2 years ago
parent
commit
5b2ab8fc5c
  1. 8
      .idea/.gitignore
  2. 1
      .idea/.name
  3. 2
      .idea/ISDF.iml
  4. 6
      .idea/misc.xml
  5. 8
      .idea/modules.xml
  6. 6
      .idea/vcs.xml
  7. 8
      sdf_generate/CMakeLists.txt
  8. 4
      sdf_generate/include/cudaEigenTest.cuh
  9. 10
      sdf_generate/include/rod_generate.cuh
  10. 17
      sdf_generate/main.cpp
  11. 2
      sdf_generate/src/bvh.cpp
  12. 66
      sdf_generate/src/cudaEigenTest.cu
  13. 6
      sdf_generate/src/rod.cpp
  14. 136
      sdf_generate/src/rod_generate.cu

8
.idea/.gitignore

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

1
.idea/.name

@ -0,0 +1 @@
renderSDF

2
.idea/ISDF.iml

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CMake" type="CPP_MODULE" version="4" />

6
.idea/misc.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$/sdf_generate">
<contentRoot DIR="$PROJECT_DIR$" />
</component>
</project>

8
.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ISDF.iml" filepath="$PROJECT_DIR$/.idea/ISDF.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

8
sdf_generate/CMakeLists.txt

@ -1,4 +1,9 @@
cmake_minimum_required(VERSION 3.16)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 86)
endif()
project(renderSDF CXX CUDA)
set(CMAKE_CUDA_STANDARD 14)
@ -12,7 +17,8 @@ include_directories(include)
AUX_SOURCE_DIRECTORY(src DIR_SRCS)
FILE(GLOB_RECURSE DIR_INCLUDE include/*.h include/*.hpp include/*.cuh)
add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
ADD_LIBRARY(sdfGenerate SHARED ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
#add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
target_link_libraries(sdfGenerate Eigen3::Eigen)

4
sdf_generate/include/cudaEigenTest.cuh

@ -8,11 +8,11 @@
#include "Eigen/Eigen"
#include "Eigen/Dense"
#include "cuda_runtime.h"
#include "glm/glm.hpp"
//#include "glm/glm.hpp"
__global__ void eigenKernel(Eigen::Matrix<double, 4, 3> factor, Eigen::Vector3d testVector);
__global__ void glmKernel(glm::vec3 testVector);
//__global__ void glmKernel(glm::vec3 testVector);
__host__ void testHost();

10
sdf_generate/include/rod_generate.cuh

@ -10,13 +10,19 @@
#include "Eigen/Eigen"
#include "unsupported/Eigen/CXX11/Tensor"
#ifdef _MSC_VER
#define DLL_EXPORT __declspec( dllexport )
#else
#define DLL_EXPORT
#endif
__global__
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent* extent, const Eigen::Vector3f *sampleMin,
const Eigen::Vector3f *sampleMax, int radius);
__host__ void
extern "C" DLL_EXPORT __host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
const Eigen::Vector3f &sampleMax, int radius);
const Eigen::Vector3f &sampleMax, float radius);
class rod_generate {

17
sdf_generate/main.cpp

@ -5,11 +5,12 @@
#include "iostream"
#include "cudaEigenTest.cuh"
#include "bvh.h"
#include "rod_generate.cuh"
int main() {
float *test() {
// a case with 3*3*3 points (2*2*2 resolution)
Eigen::Matrix<float, Eigen::Dynamic, 3>rod_points;
Eigen::Matrix<float, Eigen::Dynamic, 3> rod_points;
rod_points.resize(27, 3);
rod_points << 0, 0, 0,
1, 0, 0,
2, 0, 0,
@ -93,9 +94,13 @@ int main() {
8, 17,
17, 26;
RodCrystal rod(rod_points, rod_beams);
// RodBVH bvh(rod);
// bvh.build();
return h_rod_generate(rod, Eigen::Vector3i(10, 10, 10), Eigen::Vector3f(-100, -100, -100),
Eigen::Vector3f(100, 100, 100), 0.2f);
}
RodBVH bvh(rod);
bvh.build();
int main() {
test();
return 0;
}

2
sdf_generate/src/bvh.cpp

@ -68,7 +68,7 @@ void RodBVH::quickSelect(std::vector<int> &rods, int axis) {
}
std::swap(rods[right], rods[storeIndex]);
if (storeIndex == mid) {
return storeIndex;
return;
} else if (storeIndex > mid) {
right = storeIndex - 1;
} else {

66
sdf_generate/src/cudaEigenTest.cu

@ -45,23 +45,23 @@ __global__ void eigenKernel(Eigen::Matrix<double, 2, 2> factor, Eigen::Matrix<do
// }
}
__global__ void glmKernel(glm::vec3 testVector) {
if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) {
printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z);
glm::mat<2, 2, float> a = {1, 2, 3, 4};
glm::mat<2, 2, float> b = {2, 3, 4, 5};
glm::mat<2, 2, float> c = a * b;
glm::vec2 d = {3, 4};
glm::vec2 e = c * d;
glm::vec3 factor = {1, 2, 3};
glm::vec3 result = factor * testVector;
printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]);
printf("e: %f, %f\n", e[0], e[1]);
printf("result: %f, %f, %f\n", result[0], result[1], result[2]);
}
}
//__global__ void glmKernel(glm::vec3 testVector) {
// if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) {
// printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z);
// glm::mat<2, 2, float> a = {1, 2, 3, 4};
// glm::mat<2, 2, float> b = {2, 3, 4, 5};
// glm::mat<2, 2, float> c = a * b;
// glm::vec2 d = {3, 4};
// glm::vec2 e = c * d;
// glm::vec3 factor = {1, 2, 3};
// glm::vec3 result = factor * testVector;
// printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]);
// printf("e: %f, %f\n", e[0], e[1]);
// printf("result: %f, %f, %f\n", result[0], result[1], result[2]);
// }
//
//
//}
__global__ void testKernel(float x) {
printf("testVector: %f, %f, %f\n", x, x, x);
@ -76,8 +76,8 @@ __host__ void testHost() {
eigenKernel <<< 1, 32>>>(factor, testVector);
cudaDeviceSynchronize();
glm::vec3 testVectorGlm(4, 5, 6);
glmKernel<<< 1, 32 >>>(testVectorGlm);
// glm::vec3 testVectorGlm(4, 5, 6);
// glmKernel<<< 1, 32 >>>(testVectorGlm);
cudaDeviceSynchronize();
//
// testKernel<<< 1, 32 >>>(3);
@ -92,19 +92,19 @@ __host__ void testHost() {
// printf("result in CPU: %f, %f\n", result(2, 0), result(2, 1));
// printf("result in CPU: %f, %f\n", result(3, 0), result(3, 1));
glm::mat<4, 3, float> factorGlm{};
factorGlm[0][0] = 1;
factorGlm[0][1] = 2;
factorGlm[0][2] = 3;
factorGlm[1][0] = 4;
factorGlm[1][1] = 5;
factorGlm[1][2] = 6;
factorGlm[2][0] = 7;
factorGlm[2][1] = 8;
factorGlm[2][2] = 9;
factorGlm[3][0] = 10;
factorGlm[3][1] = 11;
factorGlm[3][2] = 12;
glm::vec4 resultGlm = testVectorGlm * factorGlm;
// glm::mat<4, 3, float> factorGlm{};
// factorGlm[0][0] = 1;
// factorGlm[0][1] = 2;
// factorGlm[0][2] = 3;
// factorGlm[1][0] = 4;
// factorGlm[1][1] = 5;
// factorGlm[1][2] = 6;
// factorGlm[2][0] = 7;
// factorGlm[2][1] = 8;
// factorGlm[2][2] = 9;
// factorGlm[3][0] = 10;
// factorGlm[3][1] = 11;
// factorGlm[3][2] = 12;
// glm::vec4 resultGlm = testVectorGlm * factorGlm;
}

6
sdf_generate/src/rod.cpp

@ -9,7 +9,7 @@
RodCrystal::RodCrystal(Eigen::Matrix<float, Eigen::Dynamic, 3> _rod_points, Eigen::Matrix<int, Eigen::Dynamic, 2> _rod_beams) : rod_points(std::move(_rod_points)),
rod_beams(std::move(_rod_beams)) {
rod_mid.resize(rod_beams.rows(), 3);
for (int i = 0; i < rod_beams.rows(); ++i) {
rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2;
}
// for (int i = 0; i < rod_beams.rows(); ++i) {
// rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2;
// }
}

136
sdf_generate/src/rod_generate.cu

@ -2,46 +2,91 @@
// Created by dtouch on 23-5-23.
//
#include "../include/rod_generate.cuh"
#include "rod_generate.cuh"
#include "device_functions.h"
#include "float.h"
__global__
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent,
const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) {
void
g_rod_generate(int *beamData, int beamCnt, float *pointData, int pointCnt, cudaPitchedPtr sdf, const cudaExtent *extent,
size_t floatSize, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) {
// 3-dim grid and 3-dim block
Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, 2>> rod_beams(beamData, beamCnt, 2);
Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, 3>> rod_points(pointData, pointCnt, 3);
auto ix = blockIdx.x * blockDim.x + threadIdx.x;
auto iy = blockIdx.y * blockDim.y + threadIdx.y;
auto iz = blockIdx.z * blockDim.z + threadIdx.z;
// if (ix == 0 && iy == 0 && iz == 0) {
// for (int i = 0; i < beamCnt; ++i) {
// printf("%d, %d\n", rod_beams(i, 0), rod_beams(i, 1));
// }
// for(int i = 0; i < pointCnt; ++i) {
// printf("%f, %f, %f\n", rod_points(i, 0), rod_points(i, 1), rod_points(i, 2));
// }
// }
if (ix >= extent->width / floatSize || iy >= extent->height || iz >= extent->depth) {
return;
}
auto x = sampleMin->x() +
static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width);
static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width / floatSize);
auto y = sampleMin->y() +
static_cast<float>(iy) * (sampleMax->y() - sampleMin->y()) / static_cast<float>(extent->height);
auto z = sampleMin->z() +
static_cast<float>(iz) * (sampleMax->z() - sampleMin->z()) / static_cast<float>(extent->depth);
// printf("%d, %d, %d\n", ix, iy, iz);
// 获取sdf中下标为(ix,iy)的元素的行首指针
auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height);
// auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height);
char *sdfPtr = (char *) sdf.ptr;
size_t pitch = sdf.pitch;
size_t slicePitch = pitch * extent->height;
auto p = Eigen::Vector3f(x, y, z);
for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) {
auto a = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0)));
auto b = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1)));
char *slice = sdfPtr + iz * slicePitch;
auto *row = (float *) (slice + iy * pitch);
// row[ix] is initialized as the max float in GPU
row[ix] = FLT_MAX;
// auto aTmp = Eigen::Vector3f(rod_points.row(rod_beams(2, 1)));
// printf("aTmp: (%f, %f, %f)\n", aTmp.x(), aTmp.y(), aTmp.z());
for (int i = 0; i < rod_beams.rows(); ++i) {
auto a = Eigen::Vector3f(rod_points.row(rod_beams(i, 0)));
auto b = Eigen::Vector3f(rod_points.row(rod_beams(i, 1)));
auto ab = b - a;
auto ap = p - a;
auto bp = p - b;
if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) {
sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm();
if (ab.x() * bp.x() + ab.y() * bp.y() + ab.z() + bp.z() < 0 &&
ab.x() * ap.x() + ab.y() * ap.y() + ab.z() * ap.z() > 0) {
row[ix] = min(row[ix], (ap.cross(bp)).norm() / ab.norm());
} else {
sdfPtr[iz] = std::min(ap.norm(), bp.norm());
row[ix] = min(row[ix], min(ap.norm(), bp.norm()));
}
}
row[ix] -= radius;
}
__host__ void
__host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
const Eigen::Vector3f &sampleMax, int radius) {
RodCrystal *d_rodCrystal;
cudaMalloc(&d_rodCrystal, sizeof(RodCrystal));
cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice);
const Eigen::Vector3f &sampleMax, float radius) {
int *d_beamData;
size_t beamBytes = rodCrystal.rod_beams.rows() * rodCrystal.rod_beams.cols() * sizeof(int);
cudaMalloc(&d_beamData, beamBytes);
cudaMemcpy(d_beamData, rodCrystal.rod_beams.data(), beamBytes, cudaMemcpyHostToDevice);
float *d_pointData;
size_t pointBytes = rodCrystal.rod_points.rows() * rodCrystal.rod_points.cols() * sizeof(float);
cudaMalloc(&d_pointData, pointBytes);
cudaMemcpy(d_pointData, rodCrystal.rod_points.data(), pointBytes, cudaMemcpyHostToDevice);
// RodCrystal *d_rodCrystal;
// cudaMalloc(&d_rodCrystal, sizeof(rodCrystal));
// cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(rodCrystal), cudaMemcpyHostToDevice);
// printf("size of rodCrystal: %lu; size of class RodCrystal: %lu\n", sizeof(rodCrystal), sizeof(RodCrystal));
// printf("size of rodCrystal.rod_points: %lu\n", sizeof(rodCrystal.rod_points));
// printf("size of rodCrystal.rod_beams: %lu\n", sizeof(rodCrystal.rod_beams));
// printf("size of rodCrystal.rod_points.row(0): %lu\n", sizeof(rodCrystal.rod_points.row(0)));
int sampleCntAll = sampleCnt.x() * sampleCnt.y() * sampleCnt.z();
float *h_sdf;
h_sdf = (float *) malloc(sampleCnt.x() * sampleCnt.y() * sampleCnt.z() * sizeof(float));
for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
h_sdf[i] = i;
}
@ -58,24 +103,33 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
cudaMemcpy3D(&copyParams);
Eigen::Vector3f *d_sampleMin;
cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f));
cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice);
cudaMalloc(&d_sampleMin, sizeof(sampleMin));
cudaMemcpy(d_sampleMin, &sampleMin, sizeof(sampleMin), cudaMemcpyHostToDevice);
Eigen::Vector3f *d_sampleMax;
cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f));
cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice);
cudaExtent *d_extent;
cudaMalloc(&d_extent, sizeof(Eigen::Vector3i));
cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice);
dim3 block(64, 64, 64);
dim3 grid((sampleCnt.x() + block.x - 1) / block.x,
(sampleCnt.y() + block.y - 1) / block.y,
(sampleCnt.z() + block.z - 1) / block.z);
g_rod_generate<<<grid, block>>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius);
cudaMalloc(&d_sampleMax, sizeof(sampleMax));
cudaMemcpy(d_sampleMax, &sampleMax, sizeof(sampleMax), cudaMemcpyHostToDevice);
cudaExtent *d_extent;
cudaMalloc(&d_extent, sizeof(extent));
cudaMemcpy(d_extent, &extent, sizeof(extent), cudaMemcpyHostToDevice);
dim3 grid(16, 16, 16);
dim3 block((sampleCnt.x() + grid.x - 1) / grid.x,
(sampleCnt.y() + grid.y - 1) / grid.y,
(sampleCnt.z() + grid.z - 1) / grid.z);
g_rod_generate<<<grid, block>>>(d_beamData, rodCrystal.rod_beams.rows(), d_pointData, rodCrystal.rod_points.rows(),
d_sdf, d_extent, sizeof(float), d_sampleMin, d_sampleMax, radius);
cudaDeviceSynchronize();
// for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
// h_sdf[i] = -i;
// }
auto tmpBeam = rodCrystal.rod_beams(0, 1);
printf("tmpBeam: %d\n", tmpBeam);
printf("copy back to host\n");
copyParams = {nullptr};
copyParams.srcPtr = d_sdf;
@ -85,9 +139,29 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
copyParams.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D(&copyParams);
cudaFree(d_rodCrystal);
// cudaFree(d_rodCrystal);
cudaFree(d_sdf.ptr);
cudaFree(d_sampleMin);
cudaFree(d_sampleMax);
cudaFree(d_extent);
cudaFree(d_beamData);
cudaFree(d_pointData);
printf("[");
for (int i = 0; i < sampleCnt.x(); i++) {
printf("[");
for (int j = 0; j < sampleCnt.y(); j++) {
printf("[");
for (int k = 0; k < sampleCnt.z(); k++) {
printf("%f", h_sdf[i * sampleCnt.y() * sampleCnt.z() + j * sampleCnt.z() + k]);
if (k != sampleCnt.z() - 1)
printf(",");
}
printf("]");
if (j != sampleCnt.y() - 1)
printf(",");
}
printf("]");
}
printf("]");
free(h_sdf);
}

Loading…
Cancel
Save