diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/.name b/.idea/.name
new file mode 100644
index 0000000..ca92f41
--- /dev/null
+++ b/.idea/.name
@@ -0,0 +1 @@
+renderSDF
\ No newline at end of file
diff --git a/.idea/ISDF.iml b/.idea/ISDF.iml
new file mode 100644
index 0000000..f08604b
--- /dev/null
+++ b/.idea/ISDF.iml
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..4b75e74
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..758005f
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/sdf_generate/CMakeLists.txt b/sdf_generate/CMakeLists.txt
index 50b3dea..4b2b38d 100644
--- a/sdf_generate/CMakeLists.txt
+++ b/sdf_generate/CMakeLists.txt
@@ -1,4 +1,9 @@
cmake_minimum_required(VERSION 3.16)
+
+if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+ set(CMAKE_CUDA_ARCHITECTURES 86)
+endif()
+
project(renderSDF CXX CUDA)
set(CMAKE_CUDA_STANDARD 14)
@@ -12,7 +17,8 @@ include_directories(include)
AUX_SOURCE_DIRECTORY(src DIR_SRCS)
FILE(GLOB_RECURSE DIR_INCLUDE include/*.h include/*.hpp include/*.cuh)
-add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
+ADD_LIBRARY(sdfGenerate SHARED ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
+#add_executable(sdfGenerate ${DIR_SRCS} ${DIR_INCLUDE} main.cpp)
target_link_libraries(sdfGenerate Eigen3::Eigen)
diff --git a/sdf_generate/include/cudaEigenTest.cuh b/sdf_generate/include/cudaEigenTest.cuh
index 4ef98f6..97cf653 100644
--- a/sdf_generate/include/cudaEigenTest.cuh
+++ b/sdf_generate/include/cudaEigenTest.cuh
@@ -8,11 +8,11 @@
#include "Eigen/Eigen"
#include "Eigen/Dense"
#include "cuda_runtime.h"
-#include "glm/glm.hpp"
+//#include "glm/glm.hpp"
__global__ void eigenKernel(Eigen::Matrix factor, Eigen::Vector3d testVector);
-__global__ void glmKernel(glm::vec3 testVector);
+//__global__ void glmKernel(glm::vec3 testVector);
__host__ void testHost();
diff --git a/sdf_generate/include/rod_generate.cuh b/sdf_generate/include/rod_generate.cuh
index 3effb60..e5aa89f 100644
--- a/sdf_generate/include/rod_generate.cuh
+++ b/sdf_generate/include/rod_generate.cuh
@@ -10,13 +10,19 @@
#include "Eigen/Eigen"
#include "unsupported/Eigen/CXX11/Tensor"
+#ifdef _MSC_VER
+#define DLL_EXPORT __declspec( dllexport )
+#else
+#define DLL_EXPORT
+#endif
+
__global__
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent* extent, const Eigen::Vector3f *sampleMin,
const Eigen::Vector3f *sampleMax, int radius);
-__host__ void
+extern "C" DLL_EXPORT __host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
- const Eigen::Vector3f &sampleMax, int radius);
+ const Eigen::Vector3f &sampleMax, float radius);
class rod_generate {
diff --git a/sdf_generate/main.cpp b/sdf_generate/main.cpp
index 46044c9..637a4e3 100644
--- a/sdf_generate/main.cpp
+++ b/sdf_generate/main.cpp
@@ -5,11 +5,12 @@
#include "iostream"
#include "cudaEigenTest.cuh"
#include "bvh.h"
+#include "rod_generate.cuh"
-int main() {
-
+float *test() {
// a case with 3*3*3 points (2*2*2 resolution)
- Eigen::Matrixrod_points;
+ Eigen::Matrix rod_points;
+ rod_points.resize(27, 3);
rod_points << 0, 0, 0,
1, 0, 0,
2, 0, 0,
@@ -93,9 +94,13 @@ int main() {
8, 17,
17, 26;
RodCrystal rod(rod_points, rod_beams);
+// RodBVH bvh(rod);
+// bvh.build();
+ return h_rod_generate(rod, Eigen::Vector3i(10, 10, 10), Eigen::Vector3f(-100, -100, -100),
+ Eigen::Vector3f(100, 100, 100), 0.2f);
+}
-
- RodBVH bvh(rod);
- bvh.build();
+int main() {
+ test();
return 0;
}
\ No newline at end of file
diff --git a/sdf_generate/src/bvh.cpp b/sdf_generate/src/bvh.cpp
index 389a27d..ee657c2 100644
--- a/sdf_generate/src/bvh.cpp
+++ b/sdf_generate/src/bvh.cpp
@@ -68,7 +68,7 @@ void RodBVH::quickSelect(std::vector &rods, int axis) {
}
std::swap(rods[right], rods[storeIndex]);
if (storeIndex == mid) {
- return storeIndex;
+ return;
} else if (storeIndex > mid) {
right = storeIndex - 1;
} else {
diff --git a/sdf_generate/src/cudaEigenTest.cu b/sdf_generate/src/cudaEigenTest.cu
index 3d5e893..8402407 100644
--- a/sdf_generate/src/cudaEigenTest.cu
+++ b/sdf_generate/src/cudaEigenTest.cu
@@ -45,23 +45,23 @@ __global__ void eigenKernel(Eigen::Matrix factor, Eigen::Matrix a = {1, 2, 3, 4};
- glm::mat<2, 2, float> b = {2, 3, 4, 5};
- glm::mat<2, 2, float> c = a * b;
- glm::vec2 d = {3, 4};
- glm::vec2 e = c * d;
- glm::vec3 factor = {1, 2, 3};
- glm::vec3 result = factor * testVector;
- printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]);
- printf("e: %f, %f\n", e[0], e[1]);
- printf("result: %f, %f, %f\n", result[0], result[1], result[2]);
- }
-
-
-}
+//__global__ void glmKernel(glm::vec3 testVector) {
+// if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) {
+// printf("testVector: %f, %f, %f\n", testVector.x, testVector.y, testVector.z);
+// glm::mat<2, 2, float> a = {1, 2, 3, 4};
+// glm::mat<2, 2, float> b = {2, 3, 4, 5};
+// glm::mat<2, 2, float> c = a * b;
+// glm::vec2 d = {3, 4};
+// glm::vec2 e = c * d;
+// glm::vec3 factor = {1, 2, 3};
+// glm::vec3 result = factor * testVector;
+// printf("c: %f, %f, %f, %f\n", c[0][0], c[0][1], c[1][0], c[1][1]);
+// printf("e: %f, %f\n", e[0], e[1]);
+// printf("result: %f, %f, %f\n", result[0], result[1], result[2]);
+// }
+//
+//
+//}
__global__ void testKernel(float x) {
printf("testVector: %f, %f, %f\n", x, x, x);
@@ -76,8 +76,8 @@ __host__ void testHost() {
eigenKernel <<< 1, 32>>>(factor, testVector);
cudaDeviceSynchronize();
- glm::vec3 testVectorGlm(4, 5, 6);
- glmKernel<<< 1, 32 >>>(testVectorGlm);
+// glm::vec3 testVectorGlm(4, 5, 6);
+// glmKernel<<< 1, 32 >>>(testVectorGlm);
cudaDeviceSynchronize();
//
// testKernel<<< 1, 32 >>>(3);
@@ -92,19 +92,19 @@ __host__ void testHost() {
// printf("result in CPU: %f, %f\n", result(2, 0), result(2, 1));
// printf("result in CPU: %f, %f\n", result(3, 0), result(3, 1));
- glm::mat<4, 3, float> factorGlm{};
- factorGlm[0][0] = 1;
- factorGlm[0][1] = 2;
- factorGlm[0][2] = 3;
- factorGlm[1][0] = 4;
- factorGlm[1][1] = 5;
- factorGlm[1][2] = 6;
- factorGlm[2][0] = 7;
- factorGlm[2][1] = 8;
- factorGlm[2][2] = 9;
- factorGlm[3][0] = 10;
- factorGlm[3][1] = 11;
- factorGlm[3][2] = 12;
- glm::vec4 resultGlm = testVectorGlm * factorGlm;
+// glm::mat<4, 3, float> factorGlm{};
+// factorGlm[0][0] = 1;
+// factorGlm[0][1] = 2;
+// factorGlm[0][2] = 3;
+// factorGlm[1][0] = 4;
+// factorGlm[1][1] = 5;
+// factorGlm[1][2] = 6;
+// factorGlm[2][0] = 7;
+// factorGlm[2][1] = 8;
+// factorGlm[2][2] = 9;
+// factorGlm[3][0] = 10;
+// factorGlm[3][1] = 11;
+// factorGlm[3][2] = 12;
+// glm::vec4 resultGlm = testVectorGlm * factorGlm;
}
\ No newline at end of file
diff --git a/sdf_generate/src/rod.cpp b/sdf_generate/src/rod.cpp
index b6a3c07..2dc353a 100644
--- a/sdf_generate/src/rod.cpp
+++ b/sdf_generate/src/rod.cpp
@@ -9,7 +9,7 @@
RodCrystal::RodCrystal(Eigen::Matrix _rod_points, Eigen::Matrix _rod_beams) : rod_points(std::move(_rod_points)),
rod_beams(std::move(_rod_beams)) {
rod_mid.resize(rod_beams.rows(), 3);
- for (int i = 0; i < rod_beams.rows(); ++i) {
- rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2;
- }
+// for (int i = 0; i < rod_beams.rows(); ++i) {
+// rod_mid.row(i) = (rod_points.row(rod_beams(i, 0)) + rod_points.row(rod_beams(i, 1))) / 2;
+// }
}
diff --git a/sdf_generate/src/rod_generate.cu b/sdf_generate/src/rod_generate.cu
index 8394e2c..537eb7d 100644
--- a/sdf_generate/src/rod_generate.cu
+++ b/sdf_generate/src/rod_generate.cu
@@ -2,46 +2,91 @@
// Created by dtouch on 23-5-23.
//
-#include "../include/rod_generate.cuh"
+#include "rod_generate.cuh"
+#include "device_functions.h"
+#include "float.h"
__global__
-void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent,
- const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) {
+void
+g_rod_generate(int *beamData, int beamCnt, float *pointData, int pointCnt, cudaPitchedPtr sdf, const cudaExtent *extent,
+ size_t floatSize, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) {
// 3-dim grid and 3-dim block
+ Eigen::Map> rod_beams(beamData, beamCnt, 2);
+ Eigen::Map> rod_points(pointData, pointCnt, 3);
auto ix = blockIdx.x * blockDim.x + threadIdx.x;
auto iy = blockIdx.y * blockDim.y + threadIdx.y;
auto iz = blockIdx.z * blockDim.z + threadIdx.z;
+// if (ix == 0 && iy == 0 && iz == 0) {
+// for (int i = 0; i < beamCnt; ++i) {
+// printf("%d, %d\n", rod_beams(i, 0), rod_beams(i, 1));
+// }
+// for(int i = 0; i < pointCnt; ++i) {
+// printf("%f, %f, %f\n", rod_points(i, 0), rod_points(i, 1), rod_points(i, 2));
+// }
+// }
+ if (ix >= extent->width / floatSize || iy >= extent->height || iz >= extent->depth) {
+ return;
+ }
auto x = sampleMin->x() +
- static_cast(ix) * (sampleMax->x() - sampleMin->x()) / static_cast(extent->width);
+ static_cast(ix) * (sampleMax->x() - sampleMin->x()) / static_cast(extent->width / floatSize);
auto y = sampleMin->y() +
static_cast(iy) * (sampleMax->y() - sampleMin->y()) / static_cast(extent->height);
auto z = sampleMin->z() +
static_cast(iz) * (sampleMax->z() - sampleMin->z()) / static_cast(extent->depth);
+// printf("%d, %d, %d\n", ix, iy, iz);
// 获取sdf中下标为(ix,iy)的元素的行首指针
- auto sdfPtr = reinterpret_cast((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height);
+// auto sdfPtr = reinterpret_cast((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height);
+ char *sdfPtr = (char *) sdf.ptr;
+ size_t pitch = sdf.pitch;
+ size_t slicePitch = pitch * extent->height;
auto p = Eigen::Vector3f(x, y, z);
- for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) {
- auto a = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0)));
- auto b = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1)));
+ char *slice = sdfPtr + iz * slicePitch;
+ auto *row = (float *) (slice + iy * pitch);
+ // row[ix] is initialized as the max float in GPU
+ row[ix] = FLT_MAX;
+// auto aTmp = Eigen::Vector3f(rod_points.row(rod_beams(2, 1)));
+// printf("aTmp: (%f, %f, %f)\n", aTmp.x(), aTmp.y(), aTmp.z());
+ for (int i = 0; i < rod_beams.rows(); ++i) {
+ auto a = Eigen::Vector3f(rod_points.row(rod_beams(i, 0)));
+ auto b = Eigen::Vector3f(rod_points.row(rod_beams(i, 1)));
auto ab = b - a;
auto ap = p - a;
auto bp = p - b;
- if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) {
- sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm();
+ if (ab.x() * bp.x() + ab.y() * bp.y() + ab.z() + bp.z() < 0 &&
+ ab.x() * ap.x() + ab.y() * ap.y() + ab.z() * ap.z() > 0) {
+ row[ix] = min(row[ix], (ap.cross(bp)).norm() / ab.norm());
} else {
- sdfPtr[iz] = std::min(ap.norm(), bp.norm());
+ row[ix] = min(row[ix], min(ap.norm(), bp.norm()));
}
}
+ row[ix] -= radius;
}
-__host__ void
+__host__ float*
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin,
- const Eigen::Vector3f &sampleMax, int radius) {
- RodCrystal *d_rodCrystal;
- cudaMalloc(&d_rodCrystal, sizeof(RodCrystal));
- cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice);
+ const Eigen::Vector3f &sampleMax, float radius) {
+ int *d_beamData;
+ size_t beamBytes = rodCrystal.rod_beams.rows() * rodCrystal.rod_beams.cols() * sizeof(int);
+ cudaMalloc(&d_beamData, beamBytes);
+ cudaMemcpy(d_beamData, rodCrystal.rod_beams.data(), beamBytes, cudaMemcpyHostToDevice);
+
+ float *d_pointData;
+ size_t pointBytes = rodCrystal.rod_points.rows() * rodCrystal.rod_points.cols() * sizeof(float);
+ cudaMalloc(&d_pointData, pointBytes);
+ cudaMemcpy(d_pointData, rodCrystal.rod_points.data(), pointBytes, cudaMemcpyHostToDevice);
+
+// RodCrystal *d_rodCrystal;
+// cudaMalloc(&d_rodCrystal, sizeof(rodCrystal));
+// cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(rodCrystal), cudaMemcpyHostToDevice);
+// printf("size of rodCrystal: %lu; size of class RodCrystal: %lu\n", sizeof(rodCrystal), sizeof(RodCrystal));
+// printf("size of rodCrystal.rod_points: %lu\n", sizeof(rodCrystal.rod_points));
+// printf("size of rodCrystal.rod_beams: %lu\n", sizeof(rodCrystal.rod_beams));
+// printf("size of rodCrystal.rod_points.row(0): %lu\n", sizeof(rodCrystal.rod_points.row(0)));
+
+ int sampleCntAll = sampleCnt.x() * sampleCnt.y() * sampleCnt.z();
float *h_sdf;
+ h_sdf = (float *) malloc(sampleCnt.x() * sampleCnt.y() * sampleCnt.z() * sizeof(float));
for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
h_sdf[i] = i;
}
@@ -58,24 +103,33 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
cudaMemcpy3D(©Params);
Eigen::Vector3f *d_sampleMin;
- cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f));
- cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice);
+ cudaMalloc(&d_sampleMin, sizeof(sampleMin));
+ cudaMemcpy(d_sampleMin, &sampleMin, sizeof(sampleMin), cudaMemcpyHostToDevice);
Eigen::Vector3f *d_sampleMax;
- cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f));
- cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice);
-
- cudaExtent *d_extent;
- cudaMalloc(&d_extent, sizeof(Eigen::Vector3i));
- cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice);
-
- dim3 block(64, 64, 64);
- dim3 grid((sampleCnt.x() + block.x - 1) / block.x,
- (sampleCnt.y() + block.y - 1) / block.y,
- (sampleCnt.z() + block.z - 1) / block.z);
- g_rod_generate<<>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius);
+ cudaMalloc(&d_sampleMax, sizeof(sampleMax));
+ cudaMemcpy(d_sampleMax, &sampleMax, sizeof(sampleMax), cudaMemcpyHostToDevice);
+
+ cudaExtent *d_extent;
+ cudaMalloc(&d_extent, sizeof(extent));
+ cudaMemcpy(d_extent, &extent, sizeof(extent), cudaMemcpyHostToDevice);
+
+ dim3 grid(16, 16, 16);
+ dim3 block((sampleCnt.x() + grid.x - 1) / grid.x,
+ (sampleCnt.y() + grid.y - 1) / grid.y,
+ (sampleCnt.z() + grid.z - 1) / grid.z);
+ g_rod_generate<<>>(d_beamData, rodCrystal.rod_beams.rows(), d_pointData, rodCrystal.rod_points.rows(),
+ d_sdf, d_extent, sizeof(float), d_sampleMin, d_sampleMax, radius);
cudaDeviceSynchronize();
+// for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) {
+// h_sdf[i] = -i;
+// }
+
+ auto tmpBeam = rodCrystal.rod_beams(0, 1);
+ printf("tmpBeam: %d\n", tmpBeam);
+
+
printf("copy back to host\n");
copyParams = {nullptr};
copyParams.srcPtr = d_sdf;
@@ -85,9 +139,29 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c
copyParams.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D(©Params);
- cudaFree(d_rodCrystal);
+// cudaFree(d_rodCrystal);
cudaFree(d_sdf.ptr);
cudaFree(d_sampleMin);
cudaFree(d_sampleMax);
cudaFree(d_extent);
+ cudaFree(d_beamData);
+ cudaFree(d_pointData);
+ printf("[");
+ for (int i = 0; i < sampleCnt.x(); i++) {
+ printf("[");
+ for (int j = 0; j < sampleCnt.y(); j++) {
+ printf("[");
+ for (int k = 0; k < sampleCnt.z(); k++) {
+ printf("%f", h_sdf[i * sampleCnt.y() * sampleCnt.z() + j * sampleCnt.z() + k]);
+ if (k != sampleCnt.z() - 1)
+ printf(",");
+ }
+ printf("]");
+ if (j != sampleCnt.y() - 1)
+ printf(",");
+ }
+ printf("]");
+ }
+ printf("]");
+ free(h_sdf);
}