diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..ca92f41 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +renderSDF \ No newline at end of file diff --git a/.idea/ISDF.iml b/.idea/ISDF.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/ISDF.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..4b75e74 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..758005f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/sdf_generate/CMakeLists.txt b/sdf_generate/CMakeLists.txt index 50b3dea..4b2b38d 100644 --- a/sdf_generate/CMakeLists.txt +++ b/sdf_generate/CMakeLists.txt @@ -1,4 +1,9 @@ cmake_minimum_required(VERSION 3.16) + +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 86) +endif() + project(renderSDF CXX CUDA) set(CMAKE_CUDA_STANDARD 14) @@ -12,7 +17,8 @@ include_directories(include) AUX_SOURCE_DIRECTORY(src DIR_SRCS) FILE(GLOB_RECURSE DIR_INCLUDE include/*.h include/*.hpp include/*.cuh) -add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp) +ADD_LIBRARY(sdfGenerate SHARED ${DIR_SRCS} ${DIR_INCLUDE} main.cpp) +#add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp) target_link_libraries(sdfGenerate Eigen3::Eigen) diff --git a/sdf_generate/include/cudaEigenTest.cuh b/sdf_generate/include/cudaEigenTest.cuh index 4ef98f6..97cf653 100644 --- a/sdf_generate/include/cudaEigenTest.cuh +++ b/sdf_generate/include/cudaEigenTest.cuh @@ -8,11 +8,11 @@ #include "Eigen/Eigen" #include "Eigen/Dense" #include "cuda_runtime.h" -#include "glm/glm.hpp" +//#include "glm/glm.hpp" __global__ void eigenKernel(Eigen::Matrix factor, Eigen::Vector3d testVector); -__global__ void glmKernel(glm::vec3 testVector); +//__global__ void glmKernel(glm::vec3 testVector); __host__ void testHost(); diff --git a/sdf_generate/include/rod_generate.cuh b/sdf_generate/include/rod_generate.cuh index 3effb60..e5aa89f 100644 --- a/sdf_generate/include/rod_generate.cuh +++ b/sdf_generate/include/rod_generate.cuh @@ -10,13 +10,19 @@ #include "Eigen/Eigen" #include "unsupported/Eigen/CXX11/Tensor" +#ifdef _MSC_VER +#define DLL_EXPORT __declspec( dllexport ) +#else +#define DLL_EXPORT +#endif + __global__ void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent* extent, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius); -__host__ void +extern "C" DLL_EXPORT __host__ float* h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, - const Eigen::Vector3f &sampleMax, int radius); + const Eigen::Vector3f &sampleMax, float radius); class rod_generate { diff --git a/sdf_generate/main.cpp b/sdf_generate/main.cpp index 46044c9..637a4e3 100644 --- a/sdf_generate/main.cpp +++ b/sdf_generate/main.cpp @@ -5,11 +5,12 @@ #include "iostream" #include "cudaEigenTest.cuh" #include "bvh.h" +#include "rod_generate.cuh" -int main() { - +float *test() { // a case with 3*3*3 points (2*2*2 resolution) - Eigen::Matrixrod_points; + Eigen::Matrix rod_points; + rod_points.resize(27, 3); rod_points << 0, 0, 0, 1, 0, 0, 2, 0, 0, @@ -93,9 +94,13 @@ int main() { 8, 17, 17, 26; RodCrystal rod(rod_points, rod_beams); +// RodBVH bvh(rod); +// bvh.build(); + return h_rod_generate(rod, Eigen::Vector3i(10, 10, 10), Eigen::Vector3f(-100, -100, -100), + Eigen::Vector3f(100, 100, 100), 0.2f); +} - - RodBVH bvh(rod); - bvh.build(); +int main() { + test(); return 0; } \ No newline at end of file diff --git a/sdf_generate/src/bvh.cpp b/sdf_generate/src/bvh.cpp index 389a27d..ee657c2 100644 --- a/sdf_generate/src/bvh.cpp +++ b/sdf_generate/src/bvh.cpp @@ -68,7 +68,7 @@ void RodBVH::quickSelect(std::vector &rods, int axis) { } std::swap(rods[right], rods[storeIndex]); if (storeIndex == mid) { - return storeIndex; + return; } else if (storeIndex > mid) { right = storeIndex - 1; } else { diff --git a/sdf_generate/src/cudaEigenTest.cu b/sdf_generate/src/cudaEigenTest.cu index 3d5e893..8402407 100644 --- a/sdf_generate/src/cudaEigenTest.cu +++ b/sdf_generate/src/cudaEigenTest.cu @@ -45,23 +45,23 @@ __global__ void eigenKernel(Eigen::Matrix factor, Eigen::Matrix a = {1, 2, 3, 4}; - glm::mat<2, 2, float> b = {2, 3, 4, 5}; - glm::mat<2, 2, float> c = a * b; - glm::vec2 d = {3, 4}; - glm::vec2 e = c * d; - glm::vec3 factor = {1, 2, 3}; - glm::vec3 result = factor * testVector; - printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]); - printf("e: %f, %f\n", e[0], e[1]); - printf("result: %f, %f, %f\n", result[0], result[1], result[2]); - } - - -} +//__global__ void glmKernel(glm::vec3 testVector) { +// if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) { +// printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z); +// glm::mat<2, 2, float> a = {1, 2, 3, 4}; +// glm::mat<2, 2, float> b = {2, 3, 4, 5}; +// glm::mat<2, 2, float> c = a * b; +// glm::vec2 d = {3, 4}; +// glm::vec2 e = c * d; +// glm::vec3 factor = {1, 2, 3}; +// glm::vec3 result = factor * testVector; +// printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]); +// printf("e: %f, %f\n", e[0], e[1]); +// printf("result: %f, %f, %f\n", result[0], result[1], result[2]); +// } +// +// +//} __global__ void testKernel(float x) { printf("testVector: %f, %f, %f\n", x, x, x); @@ -76,8 +76,8 @@ __host__ void testHost() { eigenKernel <<< 1, 32>>>(factor, testVector); cudaDeviceSynchronize(); - glm::vec3 testVectorGlm(4, 5, 6); - glmKernel<<< 1, 32 >>>(testVectorGlm); +// glm::vec3 testVectorGlm(4, 5, 6); +// glmKernel<<< 1, 32 >>>(testVectorGlm); cudaDeviceSynchronize(); // // testKernel<<< 1, 32 >>>(3); @@ -92,19 +92,19 @@ __host__ void testHost() { // printf("result in CPU: %f, %f\n", result(2, 0), result(2, 1)); // printf("result in CPU: %f, %f\n", result(3, 0), result(3, 1)); - glm::mat<4, 3, float> factorGlm{}; - factorGlm[0][0] = 1; - factorGlm[0][1] = 2; - factorGlm[0][2] = 3; - factorGlm[1][0] = 4; - factorGlm[1][1] = 5; - factorGlm[1][2] = 6; - factorGlm[2][0] = 7; - factorGlm[2][1] = 8; - factorGlm[2][2] = 9; - factorGlm[3][0] = 10; - factorGlm[3][1] = 11; - factorGlm[3][2] = 12; - glm::vec4 resultGlm = testVectorGlm * factorGlm; +// glm::mat<4, 3, float> factorGlm{}; +// factorGlm[0][0] = 1; +// factorGlm[0][1] = 2; +// factorGlm[0][2] = 3; +// factorGlm[1][0] = 4; +// factorGlm[1][1] = 5; +// factorGlm[1][2] = 6; +// factorGlm[2][0] = 7; +// factorGlm[2][1] = 8; +// factorGlm[2][2] = 9; +// factorGlm[3][0] = 10; +// factorGlm[3][1] = 11; +// factorGlm[3][2] = 12; +// glm::vec4 resultGlm = testVectorGlm * factorGlm; } \ No newline at end of file diff --git a/sdf_generate/src/rod.cpp b/sdf_generate/src/rod.cpp index b6a3c07..2dc353a 100644 --- a/sdf_generate/src/rod.cpp +++ b/sdf_generate/src/rod.cpp @@ -9,7 +9,7 @@ RodCrystal::RodCrystal(Eigen::Matrix _rod_points, Eigen::Matrix _rod_beams) : rod_points(std::move(_rod_points)), rod_beams(std::move(_rod_beams)) { rod_mid.resize(rod_beams.rows(), 3); - for (int i = 0; i < rod_beams.rows(); ++i) { - rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2; - } +// for (int i = 0; i < rod_beams.rows(); ++i) { +// rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2; +// } } diff --git a/sdf_generate/src/rod_generate.cu b/sdf_generate/src/rod_generate.cu index 8394e2c..537eb7d 100644 --- a/sdf_generate/src/rod_generate.cu +++ b/sdf_generate/src/rod_generate.cu @@ -2,46 +2,91 @@ // Created by dtouch on 23-5-23. // -#include "../include/rod_generate.cuh" +#include "rod_generate.cuh" +#include "device_functions.h" +#include "float.h" __global__ -void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent, - const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { +void +g_rod_generate(int *beamData, int beamCnt, float *pointData, int pointCnt, cudaPitchedPtr sdf, const cudaExtent *extent, + size_t floatSize, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { // 3-dim grid and 3-dim block + Eigen::Map> rod_beams(beamData, beamCnt, 2); + Eigen::Map> rod_points(pointData, pointCnt, 3); auto ix = blockIdx.x * blockDim.x + threadIdx.x; auto iy = blockIdx.y * blockDim.y + threadIdx.y; auto iz = blockIdx.z * blockDim.z + threadIdx.z; +// if (ix == 0 && iy == 0 && iz == 0) { +// for (int i = 0; i < beamCnt; ++i) { +// printf("%d, %d\n", rod_beams(i, 0), rod_beams(i, 1)); +// } +// for(int i = 0; i < pointCnt; ++i) { +// printf("%f, %f, %f\n", rod_points(i, 0), rod_points(i, 1), rod_points(i, 2)); +// } +// } + if (ix >= extent->width / floatSize || iy >= extent->height || iz >= extent->depth) { + return; + } auto x = sampleMin->x() + - static_cast(ix) * (sampleMax->x() - sampleMin->x()) / static_cast(extent->width); + static_cast(ix) * (sampleMax->x() - sampleMin->x()) / static_cast(extent->width / floatSize); auto y = sampleMin->y() + static_cast(iy) * (sampleMax->y() - sampleMin->y()) / static_cast(extent->height); auto z = sampleMin->z() + static_cast(iz) * (sampleMax->z() - sampleMin->z()) / static_cast(extent->depth); +// printf("%d, %d, %d\n", ix, iy, iz); // 获取sdf中下标为(ix,iy)的元素的行首指针 - auto sdfPtr = reinterpret_cast((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); +// auto sdfPtr = reinterpret_cast((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); + char *sdfPtr = (char *) sdf.ptr; + size_t pitch = sdf.pitch; + size_t slicePitch = pitch * extent->height; auto p = Eigen::Vector3f(x, y, z); - for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) { - auto a = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0))); - auto b = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1))); + char *slice = sdfPtr + iz * slicePitch; + auto *row = (float *) (slice + iy * pitch); + // row[ix] is initialized as the max float in GPU + row[ix] = FLT_MAX; +// auto aTmp = Eigen::Vector3f(rod_points.row(rod_beams(2, 1))); +// printf("aTmp: (%f, %f, %f)\n", aTmp.x(), aTmp.y(), aTmp.z()); + for (int i = 0; i < rod_beams.rows(); ++i) { + auto a = Eigen::Vector3f(rod_points.row(rod_beams(i, 0))); + auto b = Eigen::Vector3f(rod_points.row(rod_beams(i, 1))); auto ab = b - a; auto ap = p - a; auto bp = p - b; - if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) { - sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm(); + if (ab.x() * bp.x() + ab.y() * bp.y() + ab.z() + bp.z() < 0 && + ab.x() * ap.x() + ab.y() * ap.y() + ab.z() * ap.z() > 0) { + row[ix] = min(row[ix], (ap.cross(bp)).norm() / ab.norm()); } else { - sdfPtr[iz] = std::min(ap.norm(), bp.norm()); + row[ix] = min(row[ix], min(ap.norm(), bp.norm())); } } + row[ix] -= radius; } -__host__ void +__host__ float* h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, - const Eigen::Vector3f &sampleMax, int radius) { - RodCrystal *d_rodCrystal; - cudaMalloc(&d_rodCrystal, sizeof(RodCrystal)); - cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice); + const Eigen::Vector3f &sampleMax, float radius) { + int *d_beamData; + size_t beamBytes = rodCrystal.rod_beams.rows() * rodCrystal.rod_beams.cols() * sizeof(int); + cudaMalloc(&d_beamData, beamBytes); + cudaMemcpy(d_beamData, rodCrystal.rod_beams.data(), beamBytes, cudaMemcpyHostToDevice); + + float *d_pointData; + size_t pointBytes = rodCrystal.rod_points.rows() * rodCrystal.rod_points.cols() * sizeof(float); + cudaMalloc(&d_pointData, pointBytes); + cudaMemcpy(d_pointData, rodCrystal.rod_points.data(), pointBytes, cudaMemcpyHostToDevice); + +// RodCrystal *d_rodCrystal; +// cudaMalloc(&d_rodCrystal, sizeof(rodCrystal)); +// cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(rodCrystal), cudaMemcpyHostToDevice); +// printf("size of rodCrystal: %lu; size of class RodCrystal: %lu\n", sizeof(rodCrystal), sizeof(RodCrystal)); +// printf("size of rodCrystal.rod_points: %lu\n", sizeof(rodCrystal.rod_points)); +// printf("size of rodCrystal.rod_beams: %lu\n", sizeof(rodCrystal.rod_beams)); +// printf("size of rodCrystal.rod_points.row(0): %lu\n", sizeof(rodCrystal.rod_points.row(0))); + + int sampleCntAll = sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); float *h_sdf; + h_sdf = (float *) malloc(sampleCnt.x() * sampleCnt.y() * sampleCnt.z() * sizeof(float)); for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { h_sdf[i] = i; } @@ -58,24 +103,33 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c cudaMemcpy3D(©Params); Eigen::Vector3f *d_sampleMin; - cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f)); - cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); + cudaMalloc(&d_sampleMin, sizeof(sampleMin)); + cudaMemcpy(d_sampleMin, &sampleMin, sizeof(sampleMin), cudaMemcpyHostToDevice); Eigen::Vector3f *d_sampleMax; - cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f)); - cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); - - cudaExtent *d_extent; - cudaMalloc(&d_extent, sizeof(Eigen::Vector3i)); - cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice); - - dim3 block(64, 64, 64); - dim3 grid((sampleCnt.x() + block.x - 1) / block.x, - (sampleCnt.y() + block.y - 1) / block.y, - (sampleCnt.z() + block.z - 1) / block.z); - g_rod_generate<<>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius); + cudaMalloc(&d_sampleMax, sizeof(sampleMax)); + cudaMemcpy(d_sampleMax, &sampleMax, sizeof(sampleMax), cudaMemcpyHostToDevice); + + cudaExtent *d_extent; + cudaMalloc(&d_extent, sizeof(extent)); + cudaMemcpy(d_extent, &extent, sizeof(extent), cudaMemcpyHostToDevice); + + dim3 grid(16, 16, 16); + dim3 block((sampleCnt.x() + grid.x - 1) / grid.x, + (sampleCnt.y() + grid.y - 1) / grid.y, + (sampleCnt.z() + grid.z - 1) / grid.z); + g_rod_generate<<>>(d_beamData, rodCrystal.rod_beams.rows(), d_pointData, rodCrystal.rod_points.rows(), + d_sdf, d_extent, sizeof(float), d_sampleMin, d_sampleMax, radius); cudaDeviceSynchronize(); +// for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { +// h_sdf[i] = -i; +// } + + auto tmpBeam = rodCrystal.rod_beams(0, 1); + printf("tmpBeam: %d\n", tmpBeam); + + printf("copy back to host\n"); copyParams = {nullptr}; copyParams.srcPtr = d_sdf; @@ -85,9 +139,29 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c copyParams.kind = cudaMemcpyDeviceToHost; cudaMemcpy3D(©Params); - cudaFree(d_rodCrystal); +// cudaFree(d_rodCrystal); cudaFree(d_sdf.ptr); cudaFree(d_sampleMin); cudaFree(d_sampleMax); cudaFree(d_extent); + cudaFree(d_beamData); + cudaFree(d_pointData); + printf("["); + for (int i = 0; i < sampleCnt.x(); i++) { + printf("["); + for (int j = 0; j < sampleCnt.y(); j++) { + printf("["); + for (int k = 0; k < sampleCnt.z(); k++) { + printf("%f", h_sdf[i * sampleCnt.y() * sampleCnt.z() + j * sampleCnt.z() + k]); + if (k != sampleCnt.z() - 1) + printf(","); + } + printf("]"); + if (j != sampleCnt.y() - 1) + printf(","); + } + printf("]"); + } + printf("]"); + free(h_sdf); }