gauss map

4 years ago · b85f521791
11 changed files with 367 additions and 125 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,24 +3,27 @@ project(NurbsPerformer LANGUAGES CXX CUDA)

 set(CMAKE_CUDA_STANDARD 14)

-add_executable(NurbsPerformer src/main.cpp
-        src/utils.cpp include/utils.h
-        src/device/Nurbs/nurbs_common.cu include/device/Nurbs/nurbs_common.cuh
-        src/device/device_utils.cu include/device/device_utils.cuh
-        src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh
-        src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh
-        src/device/aabb.cu include/device/aabb.cuh
-        src/device/vec.cu include/device/vec.cuh
-        src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh
+set(PROJECT_SOURCES
+        tests/main.cpp
+        src/utils.cpp                       include/utils.h
+        src/device/aabb.cu                  include/device/aabb.cuh
+        src/device/vec.cu                   include/device/vec.cuh
+        src/device/device_utils.cu          include/device/device_utils.cuh
+        src/device/Nurbs/nurbs_common.cu    include/device/Nurbs/nurbs_common.cuh
+        src/device/Nurbs/nurbs_surface.cu   include/device/Nurbs/nurbs_surface.cuh
+        src/device/Nurbs/nurbs_curve.cu     include/device/Nurbs/nurbs_curve.cuh
+        src/device/Nurbs/bvh.cu             include/device/Nurbs/bvh.cuh
        )

+add_executable(NurbsPerformer ${PROJECT_SOURCES})
+
 #add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
 #add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")

 # 指定静态库位置
 #set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 #生成静态库
-#add_library(NurbsPerformer NurbsPerformer.cu NurbsPerformer.cuh utils.cpp utils.h)
+#add_library(NurbsPerformer ${PROJECT_SOURCES})

 # 引用系统环境变量CUDA_PATH
 # linux
@ -28,6 +31,8 @@ add_executable(NurbsPerformer src/main.cpp
 # windows
 include_directories("$ENV{CUDA_PATH}/include")

+include_directories(include)
+
 #MESSAGE("CUDA PATH::: $ENV{CUDA_PATH}")
 #MESSAGE("CUDA PATH::: $ENV{LD_LIBRARY_PATH}")
 #MESSAGE("CUDA PATH::: $ENV{CPATH}")
--- a/README.md
+++ b/README.md
@ -17,13 +17,13 @@ A tool for performing NURBS curve/surface modeling operations in parallel using
 1. CMakeLists.txt中注释以下代码，不再生成可执行文件

 ```cmake
-add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h)
+add_executable(NurbsPerformer ${PROJECT_SOURCES})
 ```
 2. CMakeLists.txt中取消注释以下代码，表示需要生成静态库。构建项目。

 ```cmake
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-add_library(NurbsEvaluator src/utils.cpp include/utils.h)
+add_library(NurbsPerformer ${PROJECT_SOURCES})
 ```
 3. 原项目连接到生成的静态库目录。如果是由CMake构建，可以用 target_link_libraries 指向依赖(.lib)，用 target_include_directories 指向头文件目录。
 4. 使用.cu文件调用NurbsEvaluator
--- a/include/device/Nurbs/bvh.cuh
+++ b/include/device/Nurbs/bvh.cuh
@ -5,25 +5,31 @@
 #ifndef NURBSEVALUATOR_BVH_CUH
 #define NURBSEVALUATOR_BVH_CUH

-#include "../aabb.cuh"
+#include "device/aabb.cuh"

-struct BVHNode {
-    AABB bounds;  // AABB包围盒
-    int firstChild, level, childNum = 4;  // 第一个孩子节点的下标，该结点所在层次，孩子个数
-    // 曲面片u, v的范围
-    float u0, u1, v0, v1;
-};
+/**
+ * 当前层的第一个节点的坐标
+ * @param layer 层号。根节点为第一层
+ */
+__device__ __host__ int getStartIdxOfLayerN(int layer);
+
+/**
+ * 根据u、v参数域中的小矩形的位置，判断它在BVH叶节点层（也就是最后一层）中的位置
+ */
+__device__ int d_getChildNodeIdx(int ix, int iy);
+
+__host__ int h_getChildNodeIdx(int ix, int iy);

 class BVH {
 public:
    int maxLevel, size;
-    BVHNode* nodes = nullptr;
+    BVHNode *nodes = nullptr;

    __host__ void printQuadTree();
 };

 __global__ void
-buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float d_lastKnot_u, float d_lastKnot_v,
-                int d_sampleCnt_u, int d_sampleCnt_v, BVHNode *BVH);
+buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
+         int sampleCnt_u, int sampleCnt_v, BVHNode *BVH);

 #endif //NURBSEVALUATOR_BVH_CUH
--- a/include/device/Nurbs/nurbs_surface.cuh
+++ b/include/device/Nurbs/nurbs_surface.cuh
@ -4,6 +4,7 @@

 #ifndef NURBSEVALUATOR_NURBS_SURFACE_CUH
 #define NURBSEVALUATOR_NURBS_SURFACE_CUH
+
 #include <vector>
 #include "cuda_runtime.h"
 #include "bvh.cuh"
@ -15,18 +16,19 @@ namespace NurbsSurface {
     * @param d_pointSize 点的大小(3: [x, y, z] | 4：[x, y, z, w])
     */
    __global__ static void
-    g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u,
-               int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
-               int d_sampleCnt_v);
+    g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points,
+               int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v,
+               int d_sampleCnt_u, int d_sampleCnt_v);

    __global__ static void
-    g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u,
-                 const float *nTexture_v,
-                 const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u,
-                 float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v);
+    g_derivative(float *derivatives, float *normals, const float *derTexture_u, const float *derTexture_v,
+                 const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u,
+                 int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
+                 int d_sampleCnt_v);

    __global__ static void
-    g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v, float *ms, float*k);
+    g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v,
+                float *ms, float *k);

    class Surface {
    private:
@ -45,11 +47,12 @@ namespace NurbsSurface {

        float *d_evaluationRes;  // evaluation结果
        float *d_derivatives;  // 一阶导计算结果
+        float *d_normals;
        float *d_k;  // 最大曲率

-        BVH bvh;
-
    public:
+        BVH bvh;
+        BVH gauss_map;
        /**
         * 构造函数
         * @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3]
@ -57,7 +60,7 @@ namespace NurbsSurface {
         * @param knots_v v方向knots
         */
        __host__ explicit Surface(std::vector<std::vector<std::vector<float>>> controlPoints,
-                                    std::vector<float> knots_u, std::vector<float> knots_v);
+                                  std::vector<float> knots_u, std::vector<float> knots_v);

        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算的方法
@ -81,15 +84,55 @@ namespace NurbsSurface {
        /**
         * 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法
         */
-        __host__ void buildBHV(int sampleCnt_u, int sampleCnt_v);
+        __host__ void buildBHV(int layerCnt, bool useK = false);
+
+        /**
+         * 供外部CPU程序使用的、负责调用gpu并行计算Gauss Map Tree的方法
+         */
+        __host__ void buildGaussMap(int layerCnt);
+

        void setRecordTime(bool r);

        ~Surface();

    };
-}

+    __host__ void recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2,
+                                                      std::vector<std::pair<int, int>> &pairs);
+
+    __host__ std::vector<std::pair<int, int>> getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2);
+
+    /**
+     * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交
+     * @param idxRange_u1 第一个gauss map的参数u范围对应的u方向上的采样网格的格子下标范围
+     * @param idxRange_v1 第一个gauss map的参数v范围对应的u方向上的采样网格的格子下标范围
+     * @param idxRange_u2 第而个gauss map的参数u范围对应的u方向上的采样网格的格子下标范围
+     * @param idxRange_v2 第二个gauss map的参数v范围对应的u方向上的采样网格的格子下标范围
+     * @return true:gauss map的包围盒有交集，说明gauss map<可能>有重合；false: gauss map一定没有重合
+     */
+    __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<int, int> idxRange_u1,
+                                               std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
+                                               std::pair<int, int> idxRange_v2);
+    /**
+     * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交
+     * @param range_u1 第一个gauss map的参数u范围
+     * @param range_v1 第一个gauss map的参数v范围
+     * @param range_u2 第二个gauss map的参数u范围
+     * @param range_v2 第二个gauss map的参数v范围
+     * @param paramRange_u1 第一个gauss map的参数u定义域
+     * @param paramRange_v1 第一个gauss map的参数v定义域
+     * @param paramRange_u2 第二个gauss map的参数u定义域
+     * @param paramRange_v2 第二个gauss map的参数v定义域
+     * @return true:gauss map的包围盒有交集，说明gauss map<可能>有重合；false: gauss map一定没有重合
+     */
+    __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<float, float> range_u1,
+                                               std::pair<float, float> range_v1, std::pair<float, float> range_u2,
+                                               std::pair<float, float> range_v2, std::pair<float, float> paramRange_u1,
+                                               std::pair<float, float> paramRange_v1,
+                                               std::pair<float, float> paramRange_u2,
+                                               std::pair<float, float> paramRange_v2);
+}


 #endif //NURBSEVALUATOR_NURBS_SURFACE_CUH
--- a/include/device/aabb.cuh
+++ b/include/device/aabb.cuh
@ -20,9 +20,17 @@ public:
    __device__ __host__ AABB Union(const FVec3& p);
    // aabb包围盒合并操作，两个包围盒
    __device__ __host__ AABB Union(const AABB& b2);
+    // aabb包围盒沿各坐标轴政府方向分别扩张
+    __device__ __host__ void expand(float k);
    // 判断两个aabb包围盒是否重叠
    __device__ __host__ bool IsOverlap(const AABB& b2);
 };

+struct BVHNode {
+    AABB bounds;  // AABB包围盒
+    int firstChild = -1, level = 0;  // 第一个孩子节点的下标，该结点所在层次，孩子个数
+    // 曲面片u, v的范围
+    float u0 = 0., u1 = 0., v0 = 0., v1 = 0.;
+};

 #endif //NURBSEVALUATOR_AABB_CUH
--- a/include/device/vec.cuh
+++ b/include/device/vec.cuh
@ -11,7 +11,6 @@ public:
    float x, y, z;
    __device__ __host__ FVec3(float x_, float y_, float z_);
    __device__ __host__ FVec3(FVec3 const &fVec3);
-
    __device__ __host__ FVec3();
 };

--- a/src/device/Nurbs/bvh.cu
+++ b/src/device/Nurbs/bvh.cu
@ -7,7 +7,7 @@
 * 当前层的第一个节点的坐标
 * @param layer 层号。根节点为第一层
 */
-__device__ int getStartIdxOfLayerN(int layer) {
+__device__ __host__ int getStartIdxOfLayerN(int layer) {
    if (layer == 1) return 0;
    int num = 1;
    // 找规律得出
@ -17,13 +17,7 @@ __device__ int getStartIdxOfLayerN(int layer) {
    return num;
 }

-/**
- * 根据u、v参数域中的小矩形的位置，判断它在BVH叶节点层（也就是最后一层）中的位置
- * @param ix
- * @param iy
- * @return
- */
-__device__ int getChildNodeIdx(int ix, int iy) {
+__device__ int d_getChildNodeIdx(int ix, int iy) {
    int idx = 0;
    while(ix > 0) {
        int log2XCeil = int(__log2f(ix));
@ -38,9 +32,23 @@ __device__ int getChildNodeIdx(int ix, int iy) {
    return idx;
 }

+__host__ int h_getChildNodeIdx(int ix, int iy){
+    int idx = 0;
+    while(ix > 0) {
+        int log2XCeil = int(log2f(ix));
+        idx += int(pow(4, log2XCeil));
+        ix -= int(pow(2, log2XCeil));
+    }
+    while(iy > 0) {
+        int log2XCeil = int(log2f(iy));
+        idx += 2 * int(pow(4, log2XCeil));
+        iy -= int(pow(2, log2XCeil));
+    }
+    return idx;
+}

 __global__ void
-buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
+buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
                int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) {
    // 假设还是二维grid和二维的block
    int ix = blockIdx.x * blockDim.x + threadIdx.x;
@ -53,7 +61,7 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l

    float u = singleStepVal_u * ix, v = singleStepVal_v * iy;

-    int bias = getChildNodeIdx(ix, iy);  // 每层的节点在总数组中的编号相对该层第一个节点的偏置
+    int bias = d_getChildNodeIdx(ix, iy);  // 每层的节点在总数组中的编号相对该层第一个节点的偏置
    for (int step = 1; step <= sampleCnt_u - 1 || step <= sampleCnt_v - 1; step = step * 2, --level) {

        float step_u = min(step, sampleCnt_u - 1);
@ -63,43 +71,32 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l
        int idx = baseIdx + bias;

        int tmpIdx = (ix * sampleCnt_v + iy) * 3;
-//        AABB bound(FVec3(evaluatedPoints[tmpIdx] + *k, evaluatedPoints[tmpIdx + 1] + *k,
-//                         evaluatedPoints[tmpIdx + 2] + *k));  // 最初只包含u，v点
-        AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1],
-                         evaluatedPoints[tmpIdx + 2]));  // 最初只包含u，v点
-        if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]);
+
+//        if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]);
        if (step == 1) {
+            AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1],
+                             evaluatedPoints[tmpIdx + 2]));  // 最初只包含u，v点
            // 叶子节点
            BVH[idx].u0 = u;
            BVH[idx].u1 = u + step_u * singleStepVal_u;
            BVH[idx].v0 = v;
            BVH[idx].v1 = v + step_v * singleStepVal_v;
            BVH[idx].level = level;
-//            int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3;
-//            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1] + *k, evaluatedPoints[tmpIdx1 + 1] + *k,
-//                                      evaluatedPoints[tmpIdx1 + 2] + *k));
-//            int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
-//            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2] + *k, evaluatedPoints[tmpIdx2 + 1] + *k,
-//                                      evaluatedPoints[tmpIdx2 + 2] + *k));
-//            int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
-//            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3] + *k, evaluatedPoints[tmpIdx3 + 1] + *k,
-//                                      evaluatedPoints[tmpIdx3 + 2] + *k));
            int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3;
+            int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
+            int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1],
                                      evaluatedPoints[tmpIdx1 + 2]));
-            int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1],
                                      evaluatedPoints[tmpIdx2 + 2]));
-            int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1],
                                      evaluatedPoints[tmpIdx3 + 2]));
-
+            if(k != nullptr) bound.expand(*k);
            BVH[idx].bounds = bound;
-            BVH[idx].childNum = -1;
-
-
+            BVH[idx].firstChild = -1;
        } else {
            if (ix % step == 0 || iy % step == 0) {
+
                // 非叶子节点
                BVH[idx].u0 = u;
                BVH[idx].u1 = u + step_u * singleStepVal_u;
@ -108,7 +105,7 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l
                BVH[idx].level = level;

                int firstChild = 4 * idx + 1;
-                bound = bound.Union(BVH[firstChild].bounds);
+                AABB bound(BVH[firstChild].bounds);
                bound = bound.Union(BVH[firstChild + 1].bounds);
                bound = bound.Union(BVH[firstChild + 2].bounds);
                bound = bound.Union(BVH[firstChild + 3].bounds);
@ -123,6 +120,16 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l
    }
 }

+__global__ void getOverlapLeafNodes(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
+                                    int sampleCnt_u, int sampleCnt_v, BVHNode *BVH){
+    // 假设还是二维grid和二维的block
+//    int ix = blockIdx.x * blockDim.x + threadIdx.x;
+//    int iy = blockIdx.y * blockDim.y + threadIdx.y;
+//    if (ix >= sampleCnt_u - 1 || iy >= sampleCnt_v - 1) {
+//        return;
+//    }
+}
+
 __host__ void BVH::printQuadTree() {

    for (int l = 1, levelNodesCnt = 1, baseIdx = 0; l <= maxLevel; l++, levelNodesCnt *= 4, baseIdx = baseIdx * 4 + 1) {
--- a/src/device/Nurbs/nurbs_surface.cu
+++ b/src/device/Nurbs/nurbs_surface.cu
@ -2,17 +2,17 @@
 // Created by 14727 on 2022/12/9.
 //

-#include "../../../include/device/Nurbs/nurbs_surface.cuh"
-#include "../../../include/device/Nurbs/nurbs_common.cuh"
-#include "../../../include/utils.h"
-#include "../../../include/device/Nurbs/bvh.cuh"
+#include "device/Nurbs/nurbs_surface.cuh"
+#include "device/Nurbs/nurbs_common.cuh"
+#include "utils.h"
+#include "device/Nurbs/bvh.cuh"
+#include "device/device_utils.cuh"

 __global__ void
 NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points,
                         int d_pointsCnt_u,
                         int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
                         int d_sampleCnt_v) {
-
    // 二维grid和二维的block
    int ix = blockIdx.x * blockDim.x + threadIdx.x;
    int iy = blockIdx.y * blockDim.y + threadIdx.y;
@ -51,7 +51,7 @@ NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_n


 __global__ void
-NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v,
+NurbsSurface::g_derivative(float *derivatives, float *normals, const float *derTexture_u, const float *derTexture_v,
                           const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u,
                           int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v,
                           int d_sampleCnt_u, int d_sampleCnt_v) {
@ -118,12 +118,16 @@ NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const
    derivatives[baseIdx + 4] = pdy_v;
    derivatives[baseIdx + 5] = pdz_v;

-    float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n =
-            pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量
-    if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) ||
-        (ix == 8 && iy == 10))
-        printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v,
-               pdz_v, x_n, y_n, z_n);
+    // 叉乘得到法向量
+    baseIdx = (ix * d_sampleCnt_v + iy) * 3;
+    normals[baseIdx] = pdy_u * pdz_v - pdy_v * pdz_u;
+    normals[baseIdx + 1] = pdx_v * pdz_u - pdx_u * pdz_v;
+    normals[baseIdx + 2] = pdx_u * pdy_v - pdx_v * pdy_u;
+    normalization(normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]);
+//    if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) ||
+//        (ix == 8 && iy == 10))
+//        printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v,
+//               pdz_v, normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]);
 }

 __global__ void
@ -210,20 +214,18 @@ NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const
 //    normalization(uvx, uvy, uvz);
 //    normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv);

-
-    if (ix == 8 && iy == 9) {
-        printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu,
-               sndPdz_uu,
-               uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv);
-        printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu,
-               sndPdz_vu);
-    }
+//    if (ix == 8 && iy == 9) {
+//        printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu,
+//               sndPdz_uu,
+//               uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv);
+//        printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu,
+//               sndPdz_vu);
+//    }

    float m1 = max(max(sndPdx_uu, sndPdy_uu), sndPdz_uu);
    float m2 = max(max(uvx, uvy), uvz);
    float m3 = max(max(sndPdx_vv, sndPdy_vv), sndPdz_vv);

-
 //    __shared__ float ms[363];
    ms[(ix * sampleCnt_v + iy) * 3] = m1;
    ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2;
@ -280,6 +282,7 @@ __host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<floa
    d_evaluationRes = nullptr;
    d_derivatives = nullptr;
    d_k = nullptr;
+    d_normals = nullptr;
    bvh.nodes = nullptr;
 }

@ -342,6 +345,7 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
    // 记录用时
    double time_cost_device;
+    if (recordTime) time_cost_device = get_time();
    g_basisTexture<<<gridBasis_u, blockBasis>>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u,
                                                sampleCnt_u);
    cudaDeviceSynchronize();
@ -349,7 +353,6 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
                                                sampleCnt_v);
    cudaDeviceSynchronize();

-    if (recordTime) time_cost_device = get_time();
    g_evaluate <<<grid, block>>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v,
                                 POINT_SIZE,
                                 knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v);
@ -403,7 +406,12 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v
    // 构造切向量计算结果
    safeCudaFree(d_derivatives);
    cudaMalloc((void **) &d_derivatives,
-               sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量，前三位是对u的偏导、后三位是对v的偏导
+               sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采样所求的切向量是一个六元向量，前三位是对u的偏导、后三位是对v的偏导
+
+    // 构造法向量计算结果
+    safeCudaFree(d_normals);
+    cudaMalloc((void **) &d_normals,
+               sampleCnt_v * sampleCnt_u * 3 * sizeof(float));

    // 构造线程层级
    dim3 block(32, 32);
@ -420,11 +428,9 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v
    g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v,
                                          sampleCnt_v);
    cudaDeviceSynchronize();
-    g_derivative<<<grid, block>>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points,
-                                  pointsCnt_u,
-                                  pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1],
-                                  sampleCnt_u,
-                                  sampleCnt_v);
+    g_derivative<<<grid, block>>>(d_derivatives, d_normals, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v,
+                                  d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1],
+                                  knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v);
    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
    if (recordTime) {
        time_cost_device = get_time() - time_cost_device;
@ -462,9 +468,8 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v)
    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
    if (recordTime) {
        time_cost_device = get_time() - time_cost_device;
-        printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n",
-               sampleCnt_u * sampleCnt_v,
-               time_cost_device);
+        printf("GPU time cost of surface curvature calculating for %d samples: %lf\n",
+               sampleCnt_u * sampleCnt_v, time_cost_device);
    }
    safeCudaFree(ms);
 }
@ -483,13 +488,22 @@ NurbsSurface::Surface::~Surface() {
    safeCudaFree(d_knots_v);
    safeCudaFree(d_k);
    safeCudaFree(d_evaluationRes);
+    safeCudaFree(d_normals);
+    safeCudaFree(d_derivatives);
    safeFree(bvh.nodes);
    cudaDeviceReset();
 }

-__host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v) {
-    // 先计算曲率
-    curvature(sampleCnt_u, sampleCnt_v);
+__host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) {
+    int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
+    if (useK) {
+        // 先计算曲率，过程中也会计算曲面值
+        curvature(sampleCnt_u, sampleCnt_v);
+    } else {
+        // 只做evaluation
+        evaluate(sampleCnt_u, sampleCnt_v);
+        safeCudaFree(d_k);
+    }
    if (POINT_SIZE != controlPoints[0][0].size()) {
        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
        return;
@ -498,25 +512,161 @@ __host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v)
    dim3 block(32, 32);
    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);

-    bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1);
+//    bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1);
+    bvh.maxLevel = layerCnt;
    bvh.size = (pow(4, bvh.maxLevel) - 1) / 3;  // 等比数列求和公示求出数总的节点数
    size_t bvhBytes = sizeof(BVHNode) * bvh.size;
    BVHNode *d_bvh = nullptr;
    cudaMalloc((void **) &d_bvh, bvhBytes);
    // 记录用时
-    double time_cost_device = get_time();
-    buildSurfaceBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
-                                     knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
+    double time_cost_device;
+    if (recordTime) time_cost_device = get_time();
+    buildBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
+                              knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
    cudaDeviceSynchronize();
    if (recordTime) {
        time_cost_device = get_time() - time_cost_device;
-        printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n",
-               sampleCnt_u * sampleCnt_v,
-               time_cost_device);
+        printf("GPU time cost of a %d-layer BVH building: %lf\n",
+               bvh.maxLevel, time_cost_device);
    }
-    if (bvh.nodes == nullptr) bvh.nodes = (BVHNode *) malloc(bvhBytes);
+    safeFree(bvh.nodes);
+    bvh.nodes = (BVHNode *) malloc(bvhBytes);
    cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost);
    safeCudaFree(d_bvh);
-
 //    bvh.printQuadTree();
 }
+
+__host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) {
+    int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
+    // 先计算法向量
+    derivative(sampleCnt_u, sampleCnt_v);
+    if (POINT_SIZE != controlPoints[0][0].size()) {
+        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
+        return;
+    }
+    // 构造线程层级
+    dim3 block(32, 32);
+    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
+
+    gauss_map.maxLevel = layerCnt;
+    gauss_map.size = (pow(4, layerCnt) - 1) / 3;  // 等比数列求和公示求出数总的节点数
+    size_t gaussMapBytes = sizeof(BVHNode) * gauss_map.size;
+    BVHNode *d_gaussMapTree = nullptr;
+    cudaMalloc((void **) &d_gaussMapTree, gaussMapBytes);
+    // 记录用时
+    double time_cost_device;
+    if (recordTime) time_cost_device = get_time();
+    buildBvh<<<grid, block>>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1],
+                              knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree);
+    cudaDeviceSynchronize();
+    if (recordTime) {
+        time_cost_device = get_time() - time_cost_device;
+        printf("GPU time cost of a %d-layer Gauss Map building: %lf\n",
+               layerCnt, time_cost_device);
+    }
+    safeFree(gauss_map.nodes);
+    gauss_map.nodes = (BVHNode *) malloc(gaussMapBytes);
+    cudaMemcpy(gauss_map.nodes, d_gaussMapTree, gaussMapBytes, cudaMemcpyDeviceToHost);
+    safeCudaFree(d_gaussMapTree);
+}
+
+__host__ void NurbsSurface::recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2,
+                                                         std::vector<std::pair<int, int>> &pairs) {
+    auto A = bvh1.nodes[idx1];
+    auto B = bvh2.nodes[idx2];
+    auto AABBSize = [](const AABB &aabb) {
+        return (aabb.pMax.z - aabb.pMin.z) *
+               (aabb.pMax.y - aabb.pMin.y) *
+               (aabb.pMax.x - aabb.pMin.x);
+    };
+    // 两个包围盒不相交，返回
+    if (!A.bounds.IsOverlap(B.bounds)) return;
+    // 相交
+    if (A.firstChild == -1 && B.firstChild == -1) {
+        // 两者都是叶子节点
+        pairs.emplace_back(idx1, idx2);
+    } else if (A.firstChild != -1 && B.firstChild == -1) {
+        // A是中间结点，B是叶子结点
+        for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
+    } else if (A.firstChild == -1 && B.firstChild != -1) {
+        // A是叶子结点，B是中间结点
+        for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
+    } else {
+        // 都是中间结点
+        if (AABBSize(A.bounds) > AABBSize(B.bounds)) {
+            // A的包围盒更大
+            for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
+        } else {
+            // B的包围盒更大
+            for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
+        }
+    }
+}
+
+__host__ std::vector<std::pair<int, int>>
+NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) {
+    std::vector<std::pair<int, int>> resPairs;
+    // 记录用时
+    double time_cost_device = get_time();
+    recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs);
+    time_cost_device = get_time() - time_cost_device;
+    printf("CPU time cost for recursively calculating the overlapped leaf nodes: %lf\n", time_cost_device);
+
+    return resPairs;
+}
+
+__host__ bool
+NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<int, int> idxRange_u1,
+                                    std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
+                                    std::pair<int, int> idxRange_v2) {
+    if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
+        printf("BVH Layer error!\n");
+        return false;
+    }
+    int commonMaxLayer = gm1.maxLevel;
+    int edgeCellCnt = pow(2, commonMaxLayer - 1);
+    if (idxRange_u1.first < 0 || idxRange_u2.first < 0 || idxRange_v1.first < 0 || idxRange_v2.first < 0 ||
+        idxRange_u1.second >= edgeCellCnt || idxRange_u2.second >= edgeCellCnt ||
+        idxRange_v1.second >= edgeCellCnt || idxRange_v2.second >= edgeCellCnt) {
+        printf("Error when detecting overlapping: idx range invalid!\n");
+        return false;
+    }
+    auto getRangedBox = [&commonMaxLayer](const BVH &bvh, const std::pair<int, int> &idxRange_u,
+                                          const std::pair<int, int> idxRange_v) {
+        AABB bounding;
+        for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) {
+            for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) {
+                bounding = bounding.Union(
+                        bvh.nodes[getStartIdxOfLayerN(commonMaxLayer) + h_getChildNodeIdx(i, j)].bounds);
+            }
+        }
+        return bounding;
+    };
+    return getRangedBox(gm1, idxRange_u1, idxRange_v1)
+            .IsOverlap(getRangedBox(gm2, idxRange_u2, idxRange_v2));
+}
+
+__host__ bool NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<float, float> range_u1,
+                                                  std::pair<float, float> range_v1, std::pair<float, float> range_u2,
+                                                  std::pair<float, float> range_v2,
+                                                  std::pair<float, float> paramRange_u1,
+                                                  std::pair<float, float> paramRange_v1,
+                                                  std::pair<float, float> paramRange_u2,
+                                                  std::pair<float, float> paramRange_v2) {
+    if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
+        printf("BVH Layer error!\n");
+        return false;
+    }
+    int edgeCellCnt = pow(2, gm1.maxLevel - 1);
+    // 根据所给参数的范围和参数的定义域范围，获得对应的采样网格中的范围
+    auto getIdxRange = [](std::pair<float, float> range, std::pair<float, float> paramRange, int edgeCellCnt) {
+        float paramStep = (paramRange.second - paramRange.first) / edgeCellCnt;
+        return std::pair<int, int>({int((range.first - paramRange.first) / paramStep),
+                                    int((range.second - paramRange.first) / paramStep)});
+    };
+    auto idxRange_u1 = getIdxRange(range_u1, paramRange_u1, edgeCellCnt);
+    auto idxRange_v1 = getIdxRange(range_v1, paramRange_v1, edgeCellCnt);
+    auto idxRange_u2 = getIdxRange(range_u2, paramRange_u2, edgeCellCnt);
+    auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt);
+    return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2, idxRange_v2);
+}
--- a/src/device/aabb.cu
+++ b/src/device/aabb.cu
@ -10,8 +10,8 @@
 __device__ __host__ AABB::AABB() {
    float minNum = -D_FLT_MAX;
    float maxNum = D_FLT_MAX;
-    pMin = FVec3(minNum, minNum, minNum);
-    pMax = FVec3(maxNum, maxNum, maxNum);
+    pMax = FVec3(minNum, minNum, minNum);
+    pMin = FVec3(maxNum, maxNum, maxNum);
 }

 __device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {}
@ -40,3 +40,12 @@ __device__ __host__ bool AABB::IsOverlap(const AABB& b2) {
    if (b2.pMin.z > pMax.z) return false;
    return true;
 }
+
+__device__ __host__ void AABB::expand(float k) {
+    pMin.x -= k;
+    pMin.y -= k;
+    pMin.z -= k;
+    pMax.x += k;
+    pMax.y += k;
+    pMax.z += k;
+}
--- a/src/device/device_utils.cu
+++ b/src/device/device_utils.cu
@ -16,11 +16,8 @@ __device__ bool d_floatEqual(float a, float b) {
 }

 __device__ void normalization(float &a, float &b, float &c) {
-    float sumA = a * a;
-    float sumB = b * b;
-    float sumC = c * c;
-    float sum = sumA + sumB + sumC;
-    a = sqrt(sumA / sum);
-    b = sqrt(sumB / sum);
-    c = sqrt(sumC / sum);
+    float sqrtSum = sqrt(a * a + b * b + c * c);
+    a /= sqrtSum;
+    b /= sqrtSum;
+    c /= sqrtSum;
 }
--- a/tests/main.cpp
+++ b/tests/main.cpp
@ -22,16 +22,34 @@ int main() {
 //                                                  {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1},
 //                                                  {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1});

+//    NurbsSurface::Surface nurbsSurfaceEvaluator({
+//                                                        {{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3,  -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}},
+//                                                        {{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1},  {0.10, -0.18, 0.03, 1},  {0.49, -0.15, -0.03, 1}},
+//                                                        {{-0.61, 0.22,  0.09, 1},  {-0.21, 0.20,  0.09, 1},  {0.13, 0.20,  0.12, 1},  {0.49, 0.19,  -0.03, 1}},
+//                                                        {{-0.52, 0.51,  0.13, 1},  {-0.20, 0.32,  0.36, 1},  {0.18, 0.29,  0.28, 1},  {0.51, 0.53,  0.12, 1}}
+//                                                }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
    NurbsSurface::Surface nurbsSurfaceEvaluator({
-                                                        {{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3,  -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}},
-                                                        {{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1},  {0.10, -0.18, 0.03, 1},  {0.49, -0.15, -0.03, 1}},
-                                                        {{-0.61, 0.22,  0.09, 1},  {-0.21, 0.20,  0.09, 1},  {0.13, 0.20,  0.12, 1},  {0.49, 0.19,  -0.03, 1}},
-                                                        {{-0.52, 0.51,  0.13, 1},  {-0.20, 0.32,  0.36, 1},  {0.18, 0.29,  0.28, 1},  {0.51, 0.53,  0.12, 1}}
+                                                        {{2, -2,  -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2,  -0.5, 4}, {-2, -2,  1.5, 2}},
+                                                        {{2, -1,  -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1,  -0.5, 1}, {-2, -1,  1.5, 1}},
+                                                        {{3, 1.2, -2, 9}, {-2.5, 1.2,  -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}},
+                                                        {{2, 2,   -2, 1}, {-2.5, 2,    -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2,   1.5, 1}}
                                                }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
    nurbsSurfaceEvaluator.setRecordTime(true);

-    nurbsSurfaceEvaluator.buildBHV(17, 17);
-//    nurbsSurfaceEvaluator.evaluate(101, 101);
+    nurbsSurfaceEvaluator.buildGaussMap(9); // 构建曲面的Gauss map
+//    nurbsSurfaceEvaluator.gauss_map.printQuadTree(); // 打印整棵Gauss map树
+    auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map,
+                                                                    nurbsSurfaceEvaluator.gauss_map);
+    printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size());
+    //判定两个Surface构造的GaussMap在指定u、v范围内是否有重合
+    auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map,
+                                                                      nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3},
+                                                                      {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1},
+                                                                      {0, 1}, {0, 1}, {0, 1});
+    printf("is guass map overlapped: %d\n", isRegionallyOverlapped);
+
+//    nurbsSurfaceEvaluator.buildBHV(9); // 构建曲面的BVH
+//    nurbsSurfaceEvaluator.evaluate(101, 101); // 求值
    printf("==============================\n");

    NurbsCurve::Curve nurbsCurveEvaluator(