From e736268cc1d445c0e6c5534e7ba1caaaea11b24b Mon Sep 17 00:00:00 2001 From: Dtouch Date: Mon, 27 Mar 2023 23:52:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BA=86GPU=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E7=9A=84LoopDetection=EF=BC=8C=E9=80=90=E6=AD=A5?= =?UTF-8?q?=E7=94=A8glm::vec=E4=BB=A3=E6=9B=BFstd::vec=E6=9D=A5?= =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E4=B8=80=E4=B8=AA=E7=82=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 5 +- include/device/Nurbs/bvh.cuh | 10 +- include/device/Nurbs/loop_detection.cuh | 42 +++++ include/device/Nurbs/nurbs_curve.cuh | 8 +- include/device/Nurbs/nurbs_surface.cuh | 61 +++++-- include/device/aabb.cuh | 20 +-- include/device/device_utils.cuh | 2 + include/device/srf_mesh.cuh | 17 ++ include/utils.h | 23 ++- src/device/Nurbs/bvh.cu | 19 +- src/device/Nurbs/loop_detection.cu | 229 ++++++++++++++++++++++++ src/device/Nurbs/nurbs_common.cu | 4 +- src/device/Nurbs/nurbs_curve.cu | 38 ++-- src/device/Nurbs/nurbs_surface.cu | 220 +++++++++++++++++------ src/device/aabb.cu | 32 ++-- src/device/device_utils.cu | 2 +- src/device/srf_mesh.cu | 4 + src/device/vec.cu | 2 +- src/utils.cpp | 2 +- tests/main.cpp | 61 +++++-- 20 files changed, 643 insertions(+), 158 deletions(-) create mode 100644 include/device/Nurbs/loop_detection.cuh create mode 100644 include/device/srf_mesh.cuh create mode 100644 src/device/Nurbs/loop_detection.cu create mode 100644 src/device/srf_mesh.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 230989a..cb44c0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set(PROJECT_SOURCES src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh - ) + src/device/Nurbs/loop_detection.cu include/device/Nurbs/loop_detection.cuh src/device/srf_mesh.cu include/device/srf_mesh.cuh) add_executable(NurbsPerformer ${PROJECT_SOURCES}) @@ -30,8 +30,9 @@ add_executable(NurbsPerformer ${PROJECT_SOURCES}) #include_directories("/usr/local/device-11.8/targets/x86_64-linux/include") # windows include_directories("$ENV{CUDA_PATH}/include") - +include_directories("E:/CLib/glm") include_directories(include) +include_directories("E:/CLib/tinynurbs/include") #MESSAGE("CUDA PATH::: $ENV{CUDA_PATH}") #MESSAGE("CUDA PATH::: $ENV{LD_LIBRARY_PATH}") diff --git a/include/device/Nurbs/bvh.cuh b/include/device/Nurbs/bvh.cuh index 5c7831a..5a09ad0 100644 --- a/include/device/Nurbs/bvh.cuh +++ b/include/device/Nurbs/bvh.cuh @@ -7,6 +7,14 @@ #include "device/aabb.cuh" +struct BVHNode { + AABB bounds; // AABB包围盒 + int firstChild = -1, level = 0; // 第一个孩子节点的下标,该结点所在层次 + // 曲面片u, v的范围 + float u0 = 0., u1 = 0., v0 = 0., v1 = 0.; + int idx_u = -1, idx_v = -1; +}; + /** * 当前层的第一个节点的坐标 * @param layer 层号。根节点为第一层 @@ -29,7 +37,7 @@ public: }; __global__ void -buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, +g_buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, int sampleCnt_u, int sampleCnt_v, BVHNode *BVH); #endif //NURBSEVALUATOR_BVH_CUH diff --git a/include/device/Nurbs/loop_detection.cuh b/include/device/Nurbs/loop_detection.cuh new file mode 100644 index 0000000..48c4067 --- /dev/null +++ b/include/device/Nurbs/loop_detection.cuh @@ -0,0 +1,42 @@ +#ifndef NURBSPERFORMER_LOOP_DETECTION_CUH +#define NURBSPERFORMER_LOOP_DETECTION_CUH + +#include "device/srf_mesh.cuh" +#include "map" +#include "set" + +class LoopDetection { +public: + SrfMesh *d_s_srfMesh; + SrfMesh *d_f_srfMesh; + + float s_paramRegionSize_u; + float s_paramRegionSize_v; + + int edgeSampleCnt; + + glm::vec<2, int> *d_selectedPointsIdx; + glm::vec2 *d_vectorFields; + int *d_rotationNumbers; + + LoopDetection(glm::vec3 *s_eval, glm::vec3 *f_eval, glm::vec3 *s_tan_u, glm::vec3 *f_tan_u, glm::vec3 *s_tan_v, + glm::vec3 *f_tan_v, glm::vec3 *s_norm, glm::vec3 *f_norm, int edgeSampleCnt, + float s_paramRegionSize_u, float s_paramRegionSize_v); + + __host__ void h_initOrientedDisAndVecFields( + const std::map, std::set>> &intersectBoxPairs); + __host__ void h_getRotationNumber(); + + ~LoopDetection(); +}; + +__global__ void +g_initOrientedDisAndVecFields(SrfMesh *s_srfMesh, SrfMesh *f_srfMesh, int s_cellCnt, glm::vec<2, int> *s_cells, + glm::vec<2, int> **f_cells, const int *f_cellCnt, glm::vec<2, int> *selectedPointsIdx, + glm::vec2 *vectorFields); + +__global__ void +g_getRotationNumber(SrfMesh *s_srfMesh, float paramRegionSize_u, float paramRegionSize_v, glm::vec2 *vectorFields, + int *rotationNumbers); + +#endif //NURBSPERFORMER_LOOP_DETECTION_CUH diff --git a/include/device/Nurbs/nurbs_curve.cuh b/include/device/Nurbs/nurbs_curve.cuh index fac7c6c..28a4e5c 100644 --- a/include/device/Nurbs/nurbs_curve.cuh +++ b/include/device/Nurbs/nurbs_curve.cuh @@ -1,13 +1,9 @@ -// -// Created by 14727 on 2022/12/9. -// - #ifndef NURBSEVALUATOR_NURBS_CURVE_CUH #define NURBSEVALUATOR_NURBS_CURVE_CUH #include #include -//#include +#include namespace NurbsCurve { const int POINT_SIZE = 4; @@ -51,7 +47,7 @@ namespace NurbsCurve { * @param sampleCnt_ 在参数域内均匀采样的采样数,它会更新成员变量中的sampleCnt * @return 由 map 组成的vector{} */ - std::vector> evaluate(int sampleCnt_); + std::vector evaluate(int sampleCnt_); /** * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 diff --git a/include/device/Nurbs/nurbs_surface.cuh b/include/device/Nurbs/nurbs_surface.cuh index 8c170cf..5caa346 100644 --- a/include/device/Nurbs/nurbs_surface.cuh +++ b/include/device/Nurbs/nurbs_surface.cuh @@ -8,9 +8,16 @@ #include #include "cuda_runtime.h" #include "bvh.cuh" +#include "glm/glm.hpp" + +typedef std::vector LinePoints3; +typedef std::vector MeshPoints3; +typedef std::vector> MeshPoints4; +typedef std::pair idx2; +typedef std::pair boxPair; namespace NurbsSurface { - const int POINT_SIZE = 4; +// const int POINT_SIZE = 4; /** * 曲线计算的核函数 * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) @@ -32,7 +39,7 @@ namespace NurbsSurface { class Surface { private: - std::vector>> controlPoints; + MeshPoints4 controlPoints; float *d_points; std::vector knots_u; std::vector knots_v; @@ -50,7 +57,13 @@ namespace NurbsSurface { float *d_normals; float *d_k; // 最大曲率 + __host__ void recursiveGetRayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint, int idx, + std::vector &intersectionLeafNodes); + public: + float *h_evaluations; + float *h_derivatives; + float *h_normals; BVH bvh; BVH gauss_map; /** @@ -59,17 +72,26 @@ namespace NurbsSurface { * @param knots_u u方向knots * @param knots_v v方向knots */ - __host__ explicit Surface(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v); + __host__ explicit Surface(MeshPoints4 controlPoints, std::vector knots_u, std::vector knots_v); /** - * 供外部CPU程序使用的、负责调用gpu并行计算的方法 + * 供外部CPU程序使用的、负责调用gpu并行计算采样点xyz值的方法 * @param sampleCnt_u u方向采样数目 * @param sampleCnt_v v方向采样数目 * @return 由 map 组成的vector{<, {x, y, z}>} */ - __host__ std::vector>> - evaluate(int sampleCnt_u_, int sampleCnt_v_); + __host__ std::vector> getEvaluateVec(int sampleCnt_u, int sampleCnt_v) const; + + __host__ std::vector getDerivativeVec(int sampleCnt_u, int sampleCnt_v) const; + + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算采样点xyz值的方法 + * @param sampleCnt_u u方向采样数目 + * @param sampleCnt_v v方向采样数目 + * @return 表示为vertices形式的采样点值数组 + */ + __host__ void evaluate(int sampleCnt_u, int sampleCnt_v); /** * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法 @@ -84,13 +106,18 @@ namespace NurbsSurface { /** * 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法 */ - __host__ void buildBHV(int layerCnt, bool useK = false); + __host__ void buildBVH(int layerCnt, bool useK = false); /** * 供外部CPU程序使用的、负责调用gpu并行计算Gauss Map Tree的方法 */ __host__ void buildGaussMap(int layerCnt); + /** + * 供外部CPU程序使用的、负责调用递归函数求解BVH与ray交点的方法 + */ + __host__ std::vector rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint); + void setRecordTime(bool r); @@ -99,9 +126,9 @@ namespace NurbsSurface { }; __host__ void recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2, - std::vector> &pairs); + std::vector> &pairs); - __host__ std::vector> getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2); + __host__ std::vector getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2); /** * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交 @@ -112,8 +139,8 @@ namespace NurbsSurface { * @return true:gauss map的包围盒有交集,说明gauss map<可能>有重合;false: gauss map一定没有重合 */ __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair idxRange_u1, - std::pair idxRange_v1, std::pair idxRange_u2, - std::pair idxRange_v2); + std::pair idxRange_v1, std::pair idxRange_u2, + std::pair idxRange_v2); /** * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交 * @param range_u1 第一个gauss map的参数u范围 @@ -127,11 +154,11 @@ namespace NurbsSurface { * @return true:gauss map的包围盒有交集,说明gauss map<可能>有重合;false: gauss map一定没有重合 */ __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair range_u1, - std::pair range_v1, std::pair range_u2, - std::pair range_v2, std::pair paramRange_u1, - std::pair paramRange_v1, - std::pair paramRange_u2, - std::pair paramRange_v2); + std::pair range_v1, std::pair range_u2, + std::pair range_v2, std::pair paramRange_u1, + std::pair paramRange_v1, + std::pair paramRange_u2, + std::pair paramRange_v2); } diff --git a/include/device/aabb.cuh b/include/device/aabb.cuh index d6ee233..4d3980d 100644 --- a/include/device/aabb.cuh +++ b/include/device/aabb.cuh @@ -6,31 +6,25 @@ #define NURBSEVALUATOR_AABB_CUH #include "vec.cuh" +#include "glm/glm.hpp" #include "cstdio" class AABB { public: // 边界 - FVec3 pMin, pMax; + glm::vec3 pMin, pMax; __device__ __host__ AABB(); - __device__ __host__ explicit AABB(const FVec3& p); - __device__ __host__ AABB(const FVec3& p1, const FVec3& p2); + __device__ __host__ explicit AABB(const glm::vec3& p); + __device__ __host__ AABB(const glm::vec3& p1, const glm::vec3& p2); // aabb包围盒合并操作,包围盒和点 - __device__ __host__ AABB Union(const FVec3& p); + __device__ __host__ AABB Union(const glm::vec3& p) const; // aabb包围盒合并操作,两个包围盒 - __device__ __host__ AABB Union(const AABB& b2); + __device__ __host__ AABB Union(const AABB& b2) const; // aabb包围盒沿各坐标轴政府方向分别扩张 __device__ __host__ void expand(float k); // 判断两个aabb包围盒是否重叠 - __device__ __host__ bool IsOverlap(const AABB& b2); -}; - -struct BVHNode { - AABB bounds; // AABB包围盒 - int firstChild = -1, level = 0; // 第一个孩子节点的下标,该结点所在层次,孩子个数 - // 曲面片u, v的范围 - float u0 = 0., u1 = 0., v0 = 0., v1 = 0.; + __device__ __host__ bool IsOverlap(const AABB& b2) const; }; #endif //NURBSEVALUATOR_AABB_CUH diff --git a/include/device/device_utils.cuh b/include/device/device_utils.cuh index 3c50dfc..cbeb0a0 100644 --- a/include/device/device_utils.cuh +++ b/include/device/device_utils.cuh @@ -5,6 +5,8 @@ #ifndef NURBSEVALUATOR_DEVICE_UTILS_CUH #define NURBSEVALUATOR_DEVICE_UTILS_CUH +#define D_DBL_MAX 1.7976931348623158e+308 +#define D_FLT_MAX 3.402823466e+38F __device__ void d_safeFree(float * &p); diff --git a/include/device/srf_mesh.cuh b/include/device/srf_mesh.cuh new file mode 100644 index 0000000..8d06a1b --- /dev/null +++ b/include/device/srf_mesh.cuh @@ -0,0 +1,17 @@ +#ifndef NURBSPERFORMER_SRF_MESH_CUH +#define NURBSPERFORMER_SRF_MESH_CUH + +#include "glm/glm.hpp" + +class SrfMesh { +public: + int edgeSampleCnt; + glm::vec3 *evaluationRes; + glm::vec3 *tangent_u; + glm::vec3 *tangent_v; + glm::vec3 *normal; + SrfMesh(glm::vec3 *evalRes, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *nor, int edgeSampleCnt); +}; + + +#endif //NURBSPERFORMER_SRF_MESH_CUH diff --git a/include/utils.h b/include/utils.h index dbcb8b6..8bb2c18 100644 --- a/include/utils.h +++ b/include/utils.h @@ -8,10 +8,13 @@ #include double get_time(); #else + #include #include "device/Nurbs/bvh.cuh" +#include "device/srf_mesh.cuh" double get_time(); + #endif /** @@ -19,9 +22,27 @@ double get_time(); * 注意指针是引用传参,因为要把指针本身置空 */ void safeCudaFree(float *&p); + void safeCudaFree(BVHNode *&p); -//template + +template +void safeCudaFree(T *&p) { + if (p != nullptr) { + cudaFree(p); + p = nullptr; + } +} + +template +void safeFree(T *&p) { + if (p != nullptr) { + free(p); + p = nullptr; + } +} + void safeFree(float *&p); + void safeFree(BVHNode *&p); diff --git a/src/device/Nurbs/bvh.cu b/src/device/Nurbs/bvh.cu index bce1cc9..f8cd2f6 100644 --- a/src/device/Nurbs/bvh.cu +++ b/src/device/Nurbs/bvh.cu @@ -2,11 +2,8 @@ // Created by 14727 on 2022/12/11. // -#include "../../../include/device/Nurbs/bvh.cuh" -/** - * 当前层的第一个节点的坐标 - * @param layer 层号。根节点为第一层 - */ +#include "device/Nurbs/bvh.cuh" + __device__ __host__ int getStartIdxOfLayerN(int layer) { if (layer == 1) return 0; int num = 1; @@ -48,7 +45,7 @@ __host__ int h_getChildNodeIdx(int ix, int iy){ } __global__ void -buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, +g_buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) { // 假设还是二维grid和二维的block int ix = blockIdx.x * blockDim.x + threadIdx.x; @@ -74,22 +71,24 @@ buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot // if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]); if (step == 1) { - AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], + AABB bound(glm::vec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2])); // 最初只包含u,v点 // 叶子节点 BVH[idx].u0 = u; BVH[idx].u1 = u + step_u * singleStepVal_u; BVH[idx].v0 = v; BVH[idx].v1 = v + step_v * singleStepVal_v; + BVH[idx].idx_u = ix; + BVH[idx].idx_v = iy; BVH[idx].level = level; int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3; int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; - bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1], + bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1], evaluatedPoints[tmpIdx1 + 2])); - bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1], + bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1], evaluatedPoints[tmpIdx2 + 2])); - bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1], + bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1], evaluatedPoints[tmpIdx3 + 2])); if(k != nullptr) bound.expand(*k); BVH[idx].bounds = bound; diff --git a/src/device/Nurbs/loop_detection.cu b/src/device/Nurbs/loop_detection.cu new file mode 100644 index 0000000..467ab66 --- /dev/null +++ b/src/device/Nurbs/loop_detection.cu @@ -0,0 +1,229 @@ +#include +#include "device/Nurbs/loop_detection.cuh" +#include "utils.h" +#include "cuda_runtime.h" +#include "device/device_utils.cuh" + +LoopDetection::LoopDetection(glm::vec3 *const s_eval, glm::vec3 *const f_eval, glm::vec3 *const s_tan_u, + glm::vec3 *const f_tan_u, glm::vec3 *const s_tan_v, glm::vec3 *const f_tan_v, + glm::vec3 *const s_norm, glm::vec3 *const f_norm, int edgeSampleCnt, + float s_paramRegionSize_u, float s_paramRegionSize_v) : + edgeSampleCnt(edgeSampleCnt), + s_paramRegionSize_u(s_paramRegionSize_u), + s_paramRegionSize_v(s_paramRegionSize_v) { + auto initSrfMesh = [](SrfMesh *&d_srfMesh, glm::vec3 *eval, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *norm, + int edgeSampleCnt) { + auto h_srfMesh = new SrfMesh(eval, tan_u, tan_v, norm, edgeSampleCnt); + safeCudaFree(d_srfMesh); + auto srfMeshSize = sizeof(SrfMesh); + cudaMalloc((void **) &d_srfMesh, srfMeshSize); + cudaMemcpy(d_srfMesh, h_srfMesh, srfMeshSize, cudaMemcpyHostToDevice); + safeFree(h_srfMesh); + }; + initSrfMesh(d_s_srfMesh, s_eval, s_tan_u, s_tan_v, s_norm, edgeSampleCnt); + initSrfMesh(d_f_srfMesh, f_eval, f_tan_u, f_tan_v, f_norm, edgeSampleCnt); +} + + +LoopDetection::~LoopDetection() { + safeCudaFree(d_selectedPointsIdx); + safeCudaFree(d_vectorFields); + safeCudaFree(d_s_srfMesh); + safeCudaFree(d_f_srfMesh); + safeCudaFree(d_rotationNumbers); +} + +__host__ void LoopDetection::h_initOrientedDisAndVecFields( + const std::map, std::set>> &intersectBoxPairs) { + int sampleCnt = edgeSampleCnt * edgeSampleCnt; + + safeCudaFree(d_selectedPointsIdx); + auto selectedPointsIdxVec = std::vector(sampleCnt, glm::vec<2, int>(-1, -1)); + auto h_selectedPointsIdx = new glm::vec<2, int>[sampleCnt]; + std::copy(selectedPointsIdxVec.begin(), selectedPointsIdxVec.end(), h_selectedPointsIdx); + int selectedPtsIdxBytes = sizeof(glm::vec<2, int>) * sampleCnt; + cudaMalloc((void**)&d_selectedPointsIdx, selectedPtsIdxBytes); + cudaMemcpy(d_selectedPointsIdx, h_selectedPointsIdx, selectedPtsIdxBytes, cudaMemcpyHostToDevice); + + safeCudaFree(d_vectorFields); + cudaMalloc((void**)&d_vectorFields, sizeof(glm::vec2) * sampleCnt); + + int mapCnt = intersectBoxPairs.size(); + + auto *h_s_cells = new glm::vec<2, int>[mapCnt]; + glm::vec<2, int> *d_s_cells; + auto **h_f_cells = new glm::vec<2, int> *[mapCnt]; + glm::vec<2, int> **d_f_cells; + auto *h_f_cellsCnt = new int[mapCnt]; + int *d_f_cellsCnt; + int i = 0; + for (const auto &boxPair: intersectBoxPairs) { + h_s_cells[i] = glm::vec<2, int>(boxPair.first.first, boxPair.first.second); + h_f_cellsCnt[i] = boxPair.second.size(); + auto h_f_cellsTmp = new glm::vec<2, int>[h_f_cellsCnt[i]]; + int j = 0; + for (auto box: boxPair.second) { + h_f_cellsTmp[j] = glm::vec<2, int>(box.first, box.second); + j++; + } + auto f_relatedCellsBytes = sizeof(glm::vec<2, int>) * h_f_cellsCnt[i]; + cudaMalloc((void **) &h_f_cells[i], f_relatedCellsBytes); + cudaMemcpy(h_f_cells[i], h_f_cellsTmp, f_relatedCellsBytes, cudaMemcpyHostToDevice); + free(h_f_cellsTmp); + i++; + } + + int s_cellsBytes = sizeof(glm::vec<2, int>) * mapCnt; + cudaMalloc((void **) &d_s_cells, s_cellsBytes); + cudaMemcpy(d_s_cells, h_s_cells, s_cellsBytes, cudaMemcpyHostToDevice); + + int f_cellsPtrBytes = sizeof(glm::vec<2, int>) * mapCnt; + cudaMalloc((void **) &d_f_cells, f_cellsPtrBytes); + cudaMemcpy(d_f_cells, h_f_cells, f_cellsPtrBytes, cudaMemcpyHostToDevice); + + int f_cellsCntBytes = sizeof(int) * mapCnt; + cudaMalloc((void **) &d_f_cellsCnt, f_cellsCntBytes); + cudaMemcpy(d_f_cellsCnt, h_f_cellsCnt, f_cellsCntBytes, cudaMemcpyHostToDevice); + + dim3 block(64, 64); + dim3 grid((mapCnt + block.x * block.y - 1) / (block.x * block.y)); + g_initOrientedDisAndVecFields <<< grid, block >>>(d_s_srfMesh, d_f_srfMesh, mapCnt, d_s_cells, d_f_cells, + d_f_cellsCnt, d_selectedPointsIdx, d_vectorFields); + + cudaFree(d_s_cells); + cudaFree(d_f_cells); + cudaFree(d_f_cellsCnt); + for (int k = 0; k < mapCnt; k++) { + cudaFree(h_f_cells[k]); + } + free(h_s_cells); + free(h_f_cells); + free(h_f_cellsCnt); + free(h_selectedPointsIdx); +} + +__host__ void LoopDetection::h_getRotationNumber() { + safeCudaFree(d_rotationNumbers); + cudaMalloc((void**)&d_rotationNumbers, sizeof(int) * edgeSampleCnt * edgeSampleCnt); + + dim3 block(32, 32); + dim3 grid((edgeSampleCnt + block.x - 1) / block.x, (edgeSampleCnt + block.y - 1) / block.y); + g_getRotationNumber<<>>(d_s_srfMesh, s_paramRegionSize_u, s_paramRegionSize_v, d_vectorFields, + d_rotationNumbers); + +} + +__device__ const int neighborIdxes[4][2] = {{1, 0}, + {0, 1}, + {-1, 0}, + {0, -1}}; + +__device__ const int dirs[2][2][2] = {{{-1, 1}, {1, 1}}, + {{-1, -1}, {1, -1}}}; // 必须是左上,右上,左下,右下的顺序 + +__global__ void +g_initOrientedDisAndVecFields(SrfMesh *s_srfMesh, SrfMesh *f_srfMesh, int s_cellCnt, glm::vec<2, int> *s_cells, + glm::vec<2, int> **f_cells, const int *f_cellCnt, glm::vec<2, int> *selectedPointsIdx, + glm::vec2 *vectorFields) { +// // 二维的grid和一维的block +// int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x * blockDim.y + threadIdx.x; + // 一维的grid和二维的block + int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; + if (idx >= s_cellCnt) return; +// printf("idx: %d\n", idx); + if (idx == 0) printf("<%d, %d>, %f\n", neighborIdxes[0][0], neighborIdxes[0][1], -D_FLT_MAX); + int s_edgeSampleCnt = s_srfMesh->edgeSampleCnt; + int f_edgeSampleCnt = f_srfMesh->edgeSampleCnt; + for (auto neighborIdx: neighborIdxes) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + float minDis = -D_FLT_MAX; + int minDisFIdx_u = -1, minDisFIdx_v = -1; + int idxI = s_cells[idx].x + i + neighborIdx[0]; + int idxJ = s_cells[idx].y + j + neighborIdx[1]; + if (idxI < 0 || idxI >= s_edgeSampleCnt || idxJ < 0 || idxJ >= s_edgeSampleCnt || + selectedPointsIdx[idxI * s_edgeSampleCnt + idxJ] != glm::vec<2, int>(2, 2)) { + // 尽量不重复计算 + // 但在多线程情况下,这样还是可以能重复计算 + continue; + } + for (int fi = 0; fi < f_cellCnt[idx]; fi++) { + for (int k = 0; k < 2; k++) { + for (int l = 0; l < 2; l++) { + for (int step_u = 0; step_u <= 2; step_u++) { + for (int step_v = 0; step_v + step_u <= 2; step_v++) { + int fIdx_u = f_cells[idx][fi].x + k + step_u * dirs[k][l][0]; + int fIdx_v = f_cells[idx][fi].y + l + step_v * dirs[k][l][1]; + if (fIdx_u < 0 || fIdx_u >= f_edgeSampleCnt || fIdx_v < 0 || + fIdx_v >= f_edgeSampleCnt) + continue; + auto dis = glm::distance(s_srfMesh->evaluationRes[idxI][idxJ], + f_srfMesh->evaluationRes[fIdx_u][fIdx_v]); + if (dis < minDis) { + minDis = dis; + minDisFIdx_u = fIdx_u; + minDisFIdx_v = fIdx_v; + } + } + } + } + } + } + selectedPointsIdx[idxI * s_edgeSampleCnt + idxJ] = glm::vec<2, int>(minDisFIdx_u, minDisFIdx_v); + auto n2 = f_srfMesh->normal[minDisFIdx_u * f_edgeSampleCnt + minDisFIdx_v]; + auto ps_pu = s_srfMesh->tangent_u[idxI * s_edgeSampleCnt + idxJ]; + auto ps_pv = s_srfMesh->tangent_v[idxI * s_edgeSampleCnt + idxJ]; + vectorFields[idxI * s_edgeSampleCnt + idxJ] = glm::vec2(glm::dot(n2, ps_pu), glm::dot(n2, ps_pv)); + } + } + } +} + +__global__ void +g_getRotationNumber(SrfMesh *s_srfMesh, float paramRegionSize_u, float paramRegionSize_v, glm::vec2 *vectorFields, + int *rotationNumbers) { + // 二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + int s_edgeSampleCnt = s_srfMesh->edgeSampleCnt; + float F[2][2], G[2][2]; + for (int biasI = 0; biasI < 2; biasI++) { + for (int biasJ = 0; biasJ < 2; biasJ++) { + auto idxI = ix + biasI; + auto idxJ = iy + biasJ; + auto v = vectorFields[idxI * s_edgeSampleCnt + idxJ]; + auto vSquareSum = v.x * v.x + v.y * v.y; + auto pTheta_pChi = -v.y / vSquareSum; + auto pTheta_pPhi = v.x / vSquareSum; +// auto pChi_pu = +// (vectorFields[idxI + 1][idxJ].x - vectorFields[idxI - 1][idxJ].x) / (2 * uParamCellSize); +// auto pChi_pv = +// (vectorFields[idxI][idxJ + 1].x - vectorFields[idxI][idxJ - 1].x) / (2 * vParamCellSize); + glm::vec2 pV_pu; + glm::vec2 pV_pv; + if (idxI == 0) + pV_pu = (vectorFields[(idxI + 1) * s_edgeSampleCnt + idxJ] - + vectorFields[idxI * s_edgeSampleCnt + idxJ]) / paramRegionSize_u; + else if (idxI == s_edgeSampleCnt - 1) + pV_pu = (vectorFields[idxI * s_edgeSampleCnt + idxJ] - + vectorFields[(idxI - 1) * s_edgeSampleCnt + idxJ]) / paramRegionSize_u; + else + pV_pu = (vectorFields[(idxI + 1) * s_edgeSampleCnt + idxJ] - + vectorFields[(idxI - 1) * s_edgeSampleCnt + idxJ]) / (2 * paramRegionSize_u); + if (idxJ == 0) + pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ + 1] - + vectorFields[idxI * s_edgeSampleCnt + idxJ]) / paramRegionSize_v; + else if (idxJ == s_edgeSampleCnt - 1) + pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ] - + vectorFields[idxI * s_edgeSampleCnt + idxJ - 1]) / paramRegionSize_v; + else + pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ + 1] - + vectorFields[idxI * s_edgeSampleCnt + idxJ - 1]) / (2 * paramRegionSize_v); + F[biasI][biasJ] = pTheta_pChi * pV_pu.x + pTheta_pPhi * pV_pu.y; + G[biasI][biasJ] = pTheta_pChi * pV_pv.x + pTheta_pPhi * pV_pv.y; + } + } + rotationNumbers[ix * s_edgeSampleCnt + iy] = int( + round(((F[0][1] + F[1][1] - F[0][0] - F[1][0]) * paramRegionSize_u + + (G[0][0] + G[0][1] - G[1][0] - G[1][1]) * paramRegionSize_v)) / 2.); +} diff --git a/src/device/Nurbs/nurbs_common.cu b/src/device/Nurbs/nurbs_common.cu index b49a8d2..89380e0 100644 --- a/src/device/Nurbs/nurbs_common.cu +++ b/src/device/Nurbs/nurbs_common.cu @@ -2,8 +2,8 @@ // Created by 14727 on 2022/11/19. // -#include "../../../include/device/Nurbs/nurbs_common.cuh" -#include "../../../include/device/device_utils.cuh" +#include "device/Nurbs/nurbs_common.cuh" +#include "device/device_utils.cuh" #include __device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { diff --git a/src/device/Nurbs/nurbs_curve.cu b/src/device/Nurbs/nurbs_curve.cu index 8bc74f4..8c0be64 100644 --- a/src/device/Nurbs/nurbs_curve.cu +++ b/src/device/Nurbs/nurbs_curve.cu @@ -1,9 +1,10 @@ // // Created by 14727 on 2022/12/9. // -#include "../../../include/device/Nurbs/nurbs_common.cuh" -#include "../../../include/device/Nurbs/nurbs_curve.cuh" -#include "../../../include/utils.h" +#include "device/Nurbs/nurbs_common.cuh" +#include "device/Nurbs/nurbs_curve.cuh" +#include "utils.h" +#include "device/Nurbs/loop_detection.cuh" __global__ void NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt, @@ -107,7 +108,7 @@ __global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, } NurbsCurve::Curve::Curve(std::vector> controlPoints, - std::vector knots) { + std::vector knots) { this->knots = std::move(knots); this->controlPoints = std::move(controlPoints); recordTime = false; @@ -130,7 +131,7 @@ NurbsCurve::Curve::~Curve() { cudaDeviceReset(); } -std::vector> +std::vector NurbsCurve::Curve::evaluate(int sampleCnt) { if (POINT_SIZE != controlPoints[0].size()) { @@ -166,13 +167,14 @@ NurbsCurve::Curve::evaluate(int sampleCnt) { // 分配nTexture1的内存。只需要GPU内存 safeCudaFree(d_nTexture1); // 注意内存管理 - cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 + cudaMalloc((void **) &d_nTexture1, + sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 // 结果数组 size_t resBytes = sampleCnt * 3 * sizeof(float); float *d_res; - cudaMalloc((void **)&d_res, resBytes); - auto *h_res = (float *)malloc(resBytes); + cudaMalloc((void **) &d_res, resBytes); + auto *h_res = (float *) malloc(resBytes); // 构造g_basisTexture线程层级 @@ -198,19 +200,27 @@ NurbsCurve::Curve::evaluate(int sampleCnt) { printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device); } + // TODO 应当有一个evaluate函数单独地只用于计算d_res + // TODO 因为不是所有调用evaluate的操作都需要一个CPU上的结果,因此这一部分数据拷贝可能是多余的 cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); - std::vector> res(sampleCnt, std::vector(3, 0.)); - for(int i = 0; i < sampleCnt; i++) { + std::vector res(sampleCnt, glm::vec3()); + for (int i = 0; i < sampleCnt; i++) { int baseIdx = i * 3; - res[i][0] = h_res[baseIdx]; - res[i][1] = h_res[baseIdx + 1]; - res[i][2] = h_res[baseIdx + 2]; + res[i].x = h_res[baseIdx]; + res[i].y = h_res[baseIdx + 1]; + res[i].z = h_res[baseIdx + 2]; } - safeFree(h_points); safeFree(h_knots); safeCudaFree(d_res); safeFree(h_res); + + int cnt = 68; + dim3 block1(8, 8); + dim3 grid1((cnt + block1.x * block1.y - 1) / (block1.x * block1.y)); + +// g_initOrientedDisAndVecFields <<< grid1, block1 >>>(nullptr, nullptr, cnt, nullptr, nullptr, nullptr, nullptr, nullptr); + return res; } diff --git a/src/device/Nurbs/nurbs_surface.cu b/src/device/Nurbs/nurbs_surface.cu index 0cd8e93..141882b 100644 --- a/src/device/Nurbs/nurbs_surface.cu +++ b/src/device/Nurbs/nurbs_surface.cu @@ -7,6 +7,7 @@ #include "utils.h" #include "device/Nurbs/bvh.cuh" #include "device/device_utils.cuh" +#include "tinynurbs/tinynurbs.h" __global__ void NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, @@ -266,8 +267,8 @@ NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const } -__host__ NurbsSurface::Surface::Surface(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v) { +__host__ NurbsSurface::Surface::Surface(MeshPoints4 controlPoints, std::vector knots_u, + std::vector knots_v) { this->knots_u = std::move(knots_u); this->knots_v = std::move(knots_v); this->controlPoints = std::move(controlPoints); @@ -284,24 +285,22 @@ __host__ NurbsSurface::Surface::Surface(std::vector>> -NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { - if (POINT_SIZE != controlPoints[0][0].size()) { - printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); - return {}; - } - +__host__ void NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { // 构造指向device的controlPoints const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); - const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); + const int pointsBytes = pointsCnt_u * pointsCnt_v * sizeof(glm::vec4); auto *h_points = (float *) malloc(pointsBytes); for (int i = 0; i < pointsCnt_u; i++) { for (int j = 0; j < pointsCnt_v; j++) { for (int k = 0; k < POINT_SIZE; k++) { h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; } +// printf("%f/ %f/ %f/ %f ", controlPoints[i][j][0], controlPoints[i][j][1], controlPoints[i][j][2], controlPoints[i][j][3]); } } cudaMalloc((void **) &d_points, pointsBytes); @@ -314,6 +313,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i]; for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i]; + safeCudaFree(d_knots_u); + safeCudaFree(d_knots_v); cudaMalloc((void **) &d_knots_u, knotsBytes_u); cudaMalloc((void **) &d_knots_v, knotsBytes_v); cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice); @@ -330,10 +331,10 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { // 结果数组 size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float); - + safeCudaFree(d_evaluationRes); cudaMalloc((void **) &d_evaluationRes, resBytes); - auto *h_res = (float *) malloc(resBytes); - + safeFree(h_evaluations); + h_evaluations = (float *) malloc(resBytes); // 构造g_basisTexture线程层级 dim3 blockBasis(512); @@ -354,8 +355,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { cudaDeviceSynchronize(); g_evaluate <<>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, - POINT_SIZE, - knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); + POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, + sampleCnt_v); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { time_cost_device = get_time() - time_cost_device; @@ -363,34 +364,58 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { time_cost_device); } - cudaMemcpy(h_res, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost); - std::vector>> res(sampleCnt_u, std::vector>(sampleCnt_v, - std::vector(3, - 0.))); + cudaMemcpy(h_evaluations, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost); + + + // 释放内存 + safeFree(h_points); + safeFree(h_knots_u); + safeFree(h_knots_v); +} + + +__host__ std::vector> +NurbsSurface::Surface::getEvaluateVec(int sampleCnt_u, int sampleCnt_v) const { + std::vector> res(sampleCnt_u, std::vector(sampleCnt_v, glm::vec3())); for (int i = 0; i < sampleCnt_u; i++) { int baseIdx = i * sampleCnt_v * 3; for (int j = 0; j < sampleCnt_v; j++) { baseIdx += j * 3; - res[i][j][0] = h_res[baseIdx]; - res[i][j][1] = h_res[baseIdx + 1]; - res[i][j][2] = h_res[baseIdx + 2]; + res[i][j].x = h_evaluations[baseIdx]; + res[i][j].y = h_evaluations[baseIdx + 1]; + res[i][j].z = h_evaluations[baseIdx + 2]; // printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0], res[i][j][1], res[i][j][2]); } } - - // 释放内存 - safeFree(h_points); - safeFree(h_knots_u); - safeFree(h_knots_v); - safeFree(h_res); - return res; } -__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) { - // 先完成evaluation - evaluate(sampleCnt_u, sampleCnt_v); +__host__ std::vector NurbsSurface::Surface::getDerivativeVec(int sampleCnt_u, int sampleCnt_v) const { + MeshPoints3 der_u(sampleCnt_u, LinePoints3(sampleCnt_v)); + MeshPoints3 der_v(sampleCnt_u, LinePoints3(sampleCnt_v)); + MeshPoints3 normal(sampleCnt_u, LinePoints3(sampleCnt_v)); + for (int i = 0; i < sampleCnt_u; i++) { + int baseIdx = i * sampleCnt_v * 6; + for (int j = 0; j < sampleCnt_v; j++) { + baseIdx += j * 6; + der_u[i][j].x = h_derivatives[baseIdx]; + der_u[i][j].y = h_derivatives[baseIdx + 1]; + der_u[i][j].z = h_derivatives[baseIdx + 2]; + der_v[i][j].x = h_derivatives[baseIdx + 3]; + der_v[i][j].y = h_derivatives[baseIdx + 4]; + der_v[i][j].z = h_derivatives[baseIdx + 5]; + auto baseIdxNorm = baseIdx / 2; + normal[i][j].x = h_normals[baseIdxNorm]; + normal[i][j].y = h_normals[baseIdxNorm + 1]; + normal[i][j].z = h_normals[baseIdxNorm + 2]; + // TODO normalize 归一化在gpu中实现! + normal[i][j] = glm::normalize(normal[i][j]); + } + } + return {der_u, der_v, normal}; +} +__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) { if (POINT_SIZE != controlPoints[0][0].size()) { printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); return; @@ -405,13 +430,13 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v // 构造切向量计算结果 safeCudaFree(d_derivatives); - cudaMalloc((void **) &d_derivatives, - sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采样所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 + size_t derBytes = sampleCnt_u * sampleCnt_v * 6 * sizeof(float); + cudaMalloc((void **) &d_derivatives, derBytes); // 每个采样所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 // 构造法向量计算结果 safeCudaFree(d_normals); - cudaMalloc((void **) &d_normals, - sampleCnt_v * sampleCnt_u * 3 * sizeof(float)); + size_t normalBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float); + cudaMalloc((void **) &d_normals, normalBytes); // 构造线程层级 dim3 block(32, 32); @@ -437,13 +462,22 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v, time_cost_device); } + + // 结果数组 + safeFree(h_normals); + h_normals = (float *) malloc(normalBytes); + cudaMemcpy(h_normals, d_normals, normalBytes, cudaMemcpyDeviceToHost); + + safeFree(h_derivatives); + h_derivatives = (float *) malloc(derBytes); + cudaMemcpy(h_derivatives, d_derivatives, derBytes, cudaMemcpyDeviceToHost); + cudaFree(d_derTexture_u); cudaFree(d_derTexture_v); } __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) { - // 先计算切向量 - derivative(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); return; @@ -491,17 +525,18 @@ NurbsSurface::Surface::~Surface() { safeCudaFree(d_normals); safeCudaFree(d_derivatives); safeFree(bvh.nodes); + safeFree(h_evaluations); + safeFree(h_normals); + safeFree(h_derivatives); cudaDeviceReset(); } -__host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) { +__host__ void NurbsSurface::Surface::buildBVH(int layerCnt, bool useK) { + // TODO 构造BVH的函数不应该出现在NURBS Surface中,应该是BVH类的事情! + // TODO NURBS Surface只需要一个函数去调用BVH对象的方法即可 int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u; - if (useK) { - // 先计算曲率,过程中也会计算曲面值 - curvature(sampleCnt_u, sampleCnt_v); - } else { - // 只做evaluation - evaluate(sampleCnt_u, sampleCnt_v); + if (!useK) { + // 必须safeFree一下,这样global函数中才能通过d_k = nullptr知道不需要再free safeCudaFree(d_k); } if (POINT_SIZE != controlPoints[0][0].size()) { @@ -521,14 +556,15 @@ __host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) { // 记录用时 double time_cost_device; if (recordTime) time_cost_device = get_time(); - buildBvh<<>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1], - knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh); + g_buildBvh<<>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh); cudaDeviceSynchronize(); if (recordTime) { time_cost_device = get_time() - time_cost_device; printf("GPU time cost of a %d-layer BVH building: %lf\n", bvh.maxLevel, time_cost_device); } + // 将bvh拷贝到cpu中 safeFree(bvh.nodes); bvh.nodes = (BVHNode *) malloc(bvhBytes); cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost); @@ -537,9 +573,9 @@ __host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) { } __host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) { + // TODO,构造GAUSS Map的函数不应该出现在NURBS Surface中,应该是GAUSS Map类的事情! int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u; - // 先计算法向量 - derivative(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); return; @@ -556,8 +592,8 @@ __host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) { // 记录用时 double time_cost_device; if (recordTime) time_cost_device = get_time(); - buildBvh<<>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1], - knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree); + g_buildBvh<<>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree); cudaDeviceSynchronize(); if (recordTime) { time_cost_device = get_time() - time_cost_device; @@ -603,16 +639,24 @@ __host__ void NurbsSurface::recursiveGetOverlapLeafNodes(const BVH &bvh1, const } } -__host__ std::vector> + +__host__ std::vector NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) { - std::vector> resPairs; + std::vector resPairs; // 记录用时 double time_cost_device = get_time(); + recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs); + std::vector boxPairsIdx2(resPairs.size()); + for (int i = 0; i < resPairs.size(); i++) { + boxPairsIdx2[i] = {{bvh1.nodes[resPairs[i].first].idx_u, bvh1.nodes[resPairs[i].first].idx_v}, + {bvh2.nodes[resPairs[i].second].idx_u, bvh2.nodes[resPairs[i].second].idx_v}}; + } + time_cost_device = get_time() - time_cost_device; printf("CPU time cost for recursively calculating the overlapped leaf nodes: %lf\n", time_cost_device); - return resPairs; + return boxPairsIdx2; } __host__ bool @@ -631,8 +675,10 @@ NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair &idxRange_u, const std::pair idxRange_v) { + // 获取某个范围的gauss map的aabb AABB bounding; for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) { for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) { @@ -670,3 +716,69 @@ __host__ bool NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2 auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt); return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2, idxRange_v2); } + +__host__ void +NurbsSurface::Surface::recursiveGetRayBVHIntersection(const glm::vec3 dir, const glm::vec3 startPoint, const int idx, + std::vector &intersectionLeafNodes) { + auto bvhNode = bvh.nodes[idx]; + // 射线与AABB判交 + auto isRayBoxIntersect = [&]() { + const auto &box = bvhNode.bounds; + float t; // 射线的参数,当t<=0,表示线面交点不在视平面前方,视为没有交点 + if (dir.x != 0.) { + // 当x分量不为0,则射线会与AABB中垂直于x轴的平面可能有交 + // 注意x的正负。x为正会先遇到较小点所在平面 + if (dir.x > 0) t = (box.pMin.x - startPoint.x) / dir.x; + else t = (box.pMax.x - startPoint.x) / dir.x; + if (t > 0.) { + // 射线与平面在前方有交点。但交点不一定在盒子上 + auto tmpPt = startPoint + t * dir; // 交点 + if (box.pMin.y <= tmpPt.y && box.pMin.z <= tmpPt.z && box.pMax.y >= tmpPt.y && box.pMax.z >= tmpPt.z) + return true; + } + } + if (dir.y != 0.) { + // 同上测试y方向 + if (dir.y > 0) t = (box.pMin.y - startPoint.y) / dir.y; + else t = (box.pMax.y - startPoint.y) / dir.y; + if (t > 0.) { + auto tmpPt = startPoint + t * dir; + if (box.pMin.x <= tmpPt.x && box.pMin.z <= tmpPt.z && box.pMax.x >= tmpPt.x && box.pMax.z >= tmpPt.z) + return true; + } + } + if (dir.z != 0.) { + // 同上测试z方向 + if (dir.z > 0) t = (box.pMin.z - startPoint.z) / dir.z; + else t = (box.pMax.z - startPoint.z) / dir.z; + if (t > 0.) { + auto tmpPt = startPoint + t * dir; + if (box.pMin.x <= tmpPt.x && box.pMin.y <= tmpPt.y && box.pMax.x >= tmpPt.x && box.pMax.y >= tmpPt.y) + return true; + } + } + return false; + }; + + // 不相交 + if (!isRayBoxIntersect()) return; + // 相交 + if (bvhNode.firstChild == -1) { + // 与叶节点相交 + intersectionLeafNodes.emplace_back(bvhNode); + } else { + // 与父节点相交 + for (int i = 0; i < 4; i++) + recursiveGetRayBVHIntersection(dir, startPoint, bvhNode.firstChild + i, intersectionLeafNodes); + } +} + +__host__ std::vector NurbsSurface::Surface::rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint) { + std::vector res; + recursiveGetRayBVHIntersection(dir, startPoint, 0, res); + //TODO sort res by t + +// res.emplace_back(BVHNode()); +// printf("res size: %lld\n", res.size()); + return res; +} \ No newline at end of file diff --git a/src/device/aabb.cu b/src/device/aabb.cu index c3b9038..68b5ccc 100644 --- a/src/device/aabb.cu +++ b/src/device/aabb.cu @@ -2,36 +2,34 @@ // Created by 14727 on 2022/12/11. // -#include "../../include/device/aabb.cuh" - -#define D_DBL_MAX 1.7976931348623158e+308 -#define D_FLT_MAX 3.402823466e+38F +#include "device/aabb.cuh" +#include "device/device_utils.cuh" __device__ __host__ AABB::AABB() { float minNum = -D_FLT_MAX; float maxNum = D_FLT_MAX; - pMax = FVec3(minNum, minNum, minNum); - pMin = FVec3(maxNum, maxNum, maxNum); + pMax = glm::vec3(minNum, minNum, minNum); + pMin = glm::vec3(maxNum, maxNum, maxNum); } -__device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {} +__device__ __host__ AABB::AABB(const glm::vec3 &p) : pMin(p), pMax(p) {} -__device__ __host__ AABB::AABB(const FVec3 &p1, const FVec3 &p2) { - pMin = FVec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z)); - pMax = FVec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z)); +__device__ __host__ AABB::AABB(const glm::vec3 &p1, const glm::vec3 &p2) { + pMin = glm::vec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z)); + pMax = glm::vec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z)); } -__device__ __host__ AABB AABB::Union(const FVec3 &p) { - return {FVec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)), - FVec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))}; +__device__ __host__ AABB AABB::Union(const glm::vec3 &p) const { + return {glm::vec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)), + glm::vec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))}; } -__device__ __host__ AABB AABB::Union(const AABB &b2) { - return {FVec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)), - FVec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))}; +__device__ __host__ AABB AABB::Union(const AABB &b2) const { + return {glm::vec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)), + glm::vec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))}; } -__device__ __host__ bool AABB::IsOverlap(const AABB& b2) { +__device__ __host__ bool AABB::IsOverlap(const AABB &b2) const { if (pMin.x > b2.pMax.x) return false; if (b2.pMin.x > pMax.x) return false; if (pMin.y > b2.pMax.y) return false; diff --git a/src/device/device_utils.cu b/src/device/device_utils.cu index 29b9aae..71388cb 100644 --- a/src/device/device_utils.cu +++ b/src/device/device_utils.cu @@ -2,7 +2,7 @@ // Created by 14727 on 2022/12/9. // -#include "../../include/device/device_utils.cuh" +#include "device/device_utils.cuh" __device__ void d_safeFree(float *&p) { if (p != nullptr) { diff --git a/src/device/srf_mesh.cu b/src/device/srf_mesh.cu new file mode 100644 index 0000000..f56a997 --- /dev/null +++ b/src/device/srf_mesh.cu @@ -0,0 +1,4 @@ +#include "device/srf_mesh.cuh" + +SrfMesh::SrfMesh(glm::vec3 *evalRes, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *nor, int edgeSampleCnt) : + evaluationRes(evalRes), tangent_u(tan_u), tangent_v(tan_v), normal(nor), edgeSampleCnt(edgeSampleCnt) {} diff --git a/src/device/vec.cu b/src/device/vec.cu index d353446..c8da897 100644 --- a/src/device/vec.cu +++ b/src/device/vec.cu @@ -2,7 +2,7 @@ // Created by 14727 on 2022/12/11. // -#include "../../include/device/vec.cuh" +#include "device/vec.cuh" __device__ __host__ FVec3::FVec3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {} diff --git a/src/utils.cpp b/src/utils.cpp index ee5d15f..2ffdfaf 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,4 +1,4 @@ -#include "../include/utils.h" +#include "utils.h" #include "cuda_runtime.h" #if IN_UNIX diff --git a/tests/main.cpp b/tests/main.cpp index 29ee133..73ccbcd 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -1,6 +1,7 @@ #include -#include "../include/device/Nurbs/nurbs_curve.cuh" -#include "../include/device/Nurbs/nurbs_surface.cuh" +#include "device/Nurbs/nurbs_curve.cuh" +#include "device/Nurbs/nurbs_surface.cuh" +#include "device/Nurbs/loop_detection.cuh" int main() { // NurbsSurface::Surface nurbsSurfaceEvaluator({ @@ -28,28 +29,50 @@ int main() { // {{-0.61, 0.22, 0.09, 1}, {-0.21, 0.20, 0.09, 1}, {0.13, 0.20, 0.12, 1}, {0.49, 0.19, -0.03, 1}}, // {{-0.52, 0.51, 0.13, 1}, {-0.20, 0.32, 0.36, 1}, {0.18, 0.29, 0.28, 1}, {0.51, 0.53, 0.12, 1}} // }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); +// NurbsSurface::Surface nurbsSurfaceEvaluator({ +// {{2, -2, -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2, -0.5, 4}, {-2, -2, 1.5, 2}}, +// {{2, -1, -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1, -0.5, 1}, {-2, -1, 1.5, 1}}, +// {{3, 1.2, -2, 9}, {-2.5, 1.2, -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}}, +// {{2, 2, -2, 1}, {-2.5, 2, -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2, 1.5, 1}} +// }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); +// NurbsSurface::Surface nurbsSurfaceEvaluator({ +// {{-0.5, -0.5, -0.2, 1}, {-0.2, -0.5, -0.1, 1}, {0.2, -0.5, -0.1, 1}, {0.5, -0.5, -0.2, 1}}, +// {{-0.5, -0.2, -0.1, 1}, {-0.2, -0.2, 0., 1}, {0.2, -0.2, 0., 1}, {0.5, -0.2, -0.1, 1}}, +// {{-0.5, 0.2, -0.1, 1}, {-0.2, 0.2, 0., 1}, {0.2, 0.2, 0., 1}, {0.5, 0.2, -0.1, 1}}, +// {{-0.5, 0.5, -0.2, 1}, {-0.2, 0.5, -0.1, 1}, {0.2, 0.5, -0.1, 1}, {0.5, 0.5, -0.2, 1}} +// }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); NurbsSurface::Surface nurbsSurfaceEvaluator({ - {{2, -2, -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2, -0.5, 4}, {-2, -2, 1.5, 2}}, - {{2, -1, -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1, -0.5, 1}, {-2, -1, 1.5, 1}}, - {{3, 1.2, -2, 9}, {-2.5, 1.2, -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}}, - {{2, 2, -2, 1}, {-2.5, 2, -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2, 1.5, 1}} - }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); + {{0, 0, 0.2, 1.1}, {0, 0.2, 0.3, 0.9}, {0, 0.4, 0.25, 1.1}, {0, 0.6, 0.12, 0.9}, {0, 0.8, 0.22, 1}, {0, 1, 0.12, 1}}, + {{0.2, 0, 0.22, 1}, {0.2, 0.2, -0.3, 1}, {0.2, 0.4, 0.4, 1}, {0.2, 0.6, -0.1, 1.1}, {0.2, 0.8, -0.1, 1}, {0.2, 1, 0.2, 0.9}}, + {{0.4, 0, 0.18, 0.9}, {0.4, 0.2, -0.3, 1}, {0.4, 0.4, 0.4, 1.1}, {0.4, 0.6, -0.1, 1}, {0.4, 0.8, -0.1, 0.9}, {0.4, 1, 0.23, 1}}, + {{0.6, 0, 0.2, 0.9}, {0.6, 0.2, 0.4, 0.9}, {0.6, 0.4, 0.4, 1}, {0.6, 0.6, 0.4, 0.9}, {0.6, 0.8, 0.4, 1}, {0.6, 1, 0.2, 1}}, + {{0.8, 0, 0.19, 1.1}, {0.8, 0.2, -0.4, 1}, {0.8, 0.4, 0.4, 1.1}, {0.8, 0.6, -0.27, 1}, {0.8, 0.8, -0.27, 1}, {0.8, 1, 0.2, 0.9}}, + {{1, 0, 0.2, 1.1}, {1, 0.2, 0.12, 1}, {1, 0.4, 0.22, 1}, {1, 0.6, 0.32, 1}, {1, 0.8, 0.22, 1.2}, {1, 1, 0.12, 1.1}} + }, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1}); nurbsSurfaceEvaluator.setRecordTime(true); - nurbsSurfaceEvaluator.buildGaussMap(9); // 构建曲面的Gauss map +// nurbsSurfaceEvaluator.buildGaussMap(6); // 构建曲面的Gauss map // nurbsSurfaceEvaluator.gauss_map.printQuadTree(); // 打印整棵Gauss map树 - auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map, - nurbsSurfaceEvaluator.gauss_map); - printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size()); +// auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map, +// nurbsSurfaceEvaluator.gauss_map); +// printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size()); + //判定两个Surface构造的GaussMap在指定u、v范围内是否有重合 - auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map, - nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3}, - {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}); - printf("is guass map overlapped: %d\n", isRegionallyOverlapped); +// auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map, +// nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3}, +// {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1}, +// {0, 1}, {0, 1}, {0, 1}); +// printf("is guass map overlapped: %d\n", isRegionallyOverlapped); + + int level = 8; + int edgeSampleCnt = pow(2, level - 1) + 1; + nurbsSurfaceEvaluator.evaluate(edgeSampleCnt, edgeSampleCnt); + nurbsSurfaceEvaluator.derivative(edgeSampleCnt, edgeSampleCnt); + // 如果buildBVH需要曲率K,这里需要再先: nurbsSurfaceEvaluator.curvature(edgeSampleCnt, edgeSampleCnt); + // 同时buildBVH传入第二个参数 + nurbsSurfaceEvaluator.buildBVH(level); // 构建曲面的BVH + auto derInfo = nurbsSurfaceEvaluator.getDerivativeVec(edgeSampleCnt, edgeSampleCnt); // 求值 -// nurbsSurfaceEvaluator.buildBHV(9); // 构建曲面的BVH -// nurbsSurfaceEvaluator.evaluate(101, 101); // 求值 printf("==============================\n"); NurbsCurve::Curve nurbsCurveEvaluator( @@ -62,6 +85,8 @@ int main() { {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}); nurbsCurveEvaluator.setRecordTime(true); + + auto res = nurbsCurveEvaluator.evaluate(11); for (auto &re: res) { printf("%f, %f, %f\n", re[0], re[1], re[2]);