From e736268cc1d445c0e6c5534e7ba1caaaea11b24b Mon Sep 17 00:00:00 2001
From: Dtouch <dtouch@foxmail.com>
Date: Mon, 27 Mar 2023 23:52:12 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5=E4=BA=86GPU=E7=89=88?=
 =?UTF-8?q?=E6=9C=AC=E7=9A=84LoopDetection=EF=BC=8C=E9=80=90=E6=AD=A5?=
 =?UTF-8?q?=E7=94=A8glm::vec=E4=BB=A3=E6=9B=BFstd::vec<float>=E6=9D=A5?=
 =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E4=B8=80=E4=B8=AA=E7=82=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CMakeLists.txt                          |   5 +-
 include/device/Nurbs/bvh.cuh            |  10 +-
 include/device/Nurbs/loop_detection.cuh |  42 +++++
 include/device/Nurbs/nurbs_curve.cuh    |   8 +-
 include/device/Nurbs/nurbs_surface.cuh  |  61 +++++--
 include/device/aabb.cuh                 |  20 +--
 include/device/device_utils.cuh         |   2 +
 include/device/srf_mesh.cuh             |  17 ++
 include/utils.h                         |  23 ++-
 src/device/Nurbs/bvh.cu                 |  19 +-
 src/device/Nurbs/loop_detection.cu      | 229 ++++++++++++++++++++++++
 src/device/Nurbs/nurbs_common.cu        |   4 +-
 src/device/Nurbs/nurbs_curve.cu         |  38 ++--
 src/device/Nurbs/nurbs_surface.cu       | 220 +++++++++++++++++------
 src/device/aabb.cu                      |  32 ++--
 src/device/device_utils.cu              |   2 +-
 src/device/srf_mesh.cu                  |   4 +
 src/device/vec.cu                       |   2 +-
 src/utils.cpp                           |   2 +-
 tests/main.cpp                          |  61 +++++--
 20 files changed, 643 insertions(+), 158 deletions(-)
 create mode 100644 include/device/Nurbs/loop_detection.cuh
 create mode 100644 include/device/srf_mesh.cuh
 create mode 100644 src/device/Nurbs/loop_detection.cu
 create mode 100644 src/device/srf_mesh.cu
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 230989a..cb44c0a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,7 @@ set(PROJECT_SOURCES
         src/device/Nurbs/nurbs_surface.cu   include/device/Nurbs/nurbs_surface.cuh
         src/device/Nurbs/nurbs_curve.cu     include/device/Nurbs/nurbs_curve.cuh
         src/device/Nurbs/bvh.cu             include/device/Nurbs/bvh.cuh
-        )
+        src/device/Nurbs/loop_detection.cu include/device/Nurbs/loop_detection.cuh src/device/srf_mesh.cu include/device/srf_mesh.cuh)
 
 add_executable(NurbsPerformer ${PROJECT_SOURCES})
 
@@ -30,8 +30,9 @@ add_executable(NurbsPerformer ${PROJECT_SOURCES})
 #include_directories("/usr/local/device-11.8/targets/x86_64-linux/include")
 # windows
 include_directories("$ENV{CUDA_PATH}/include")
-
+include_directories("E:/CLib/glm")
 include_directories(include)
+include_directories("E:/CLib/tinynurbs/include")
 
 #MESSAGE("CUDA PATH::: $ENV{CUDA_PATH}")
 #MESSAGE("CUDA PATH::: $ENV{LD_LIBRARY_PATH}")
diff --git a/include/device/Nurbs/bvh.cuh b/include/device/Nurbs/bvh.cuh
index 5c7831a..5a09ad0 100644
--- a/include/device/Nurbs/bvh.cuh
+++ b/include/device/Nurbs/bvh.cuh
@@ -7,6 +7,14 @@
 
 #include "device/aabb.cuh"
 
+struct BVHNode {
+    AABB bounds;  // AABB包围盒
+    int firstChild = -1, level = 0;  // 第一个孩子节点的下标，该结点所在层次
+    // 曲面片u, v的范围
+    float u0 = 0., u1 = 0., v0 = 0., v1 = 0.;
+    int idx_u = -1, idx_v = -1;
+};
+
 /**
  * 当前层的第一个节点的坐标
  * @param layer 层号。根节点为第一层
@@ -29,7 +37,7 @@ public:
 };
 
 __global__ void
-buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
+g_buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
          int sampleCnt_u, int sampleCnt_v, BVHNode *BVH);
 
 #endif //NURBSEVALUATOR_BVH_CUH
diff --git a/include/device/Nurbs/loop_detection.cuh b/include/device/Nurbs/loop_detection.cuh
new file mode 100644
index 0000000..48c4067
--- /dev/null
+++ b/include/device/Nurbs/loop_detection.cuh
@@ -0,0 +1,42 @@
+#ifndef NURBSPERFORMER_LOOP_DETECTION_CUH
+#define NURBSPERFORMER_LOOP_DETECTION_CUH
+
+#include "device/srf_mesh.cuh"
+#include "map"
+#include "set"
+
+class LoopDetection {
+public:
+    SrfMesh *d_s_srfMesh;
+    SrfMesh *d_f_srfMesh;
+
+    float s_paramRegionSize_u;
+    float s_paramRegionSize_v;
+
+    int edgeSampleCnt;
+
+    glm::vec<2, int> *d_selectedPointsIdx;
+    glm::vec2 *d_vectorFields;
+    int *d_rotationNumbers;
+
+    LoopDetection(glm::vec3 *s_eval, glm::vec3 *f_eval, glm::vec3 *s_tan_u, glm::vec3 *f_tan_u, glm::vec3 *s_tan_v,
+                  glm::vec3 *f_tan_v, glm::vec3 *s_norm, glm::vec3 *f_norm, int edgeSampleCnt,
+                  float s_paramRegionSize_u, float s_paramRegionSize_v);
+
+    __host__ void h_initOrientedDisAndVecFields(
+            const std::map<std::pair<int, int>, std::set<std::pair<int, int>>> &intersectBoxPairs);
+    __host__ void h_getRotationNumber();
+
+    ~LoopDetection();
+};
+
+__global__ void
+g_initOrientedDisAndVecFields(SrfMesh *s_srfMesh, SrfMesh *f_srfMesh, int s_cellCnt, glm::vec<2, int> *s_cells,
+                              glm::vec<2, int> **f_cells, const int *f_cellCnt, glm::vec<2, int> *selectedPointsIdx,
+                              glm::vec2 *vectorFields);
+
+__global__ void
+g_getRotationNumber(SrfMesh *s_srfMesh, float paramRegionSize_u, float paramRegionSize_v, glm::vec2 *vectorFields,
+                    int *rotationNumbers);
+
+#endif //NURBSPERFORMER_LOOP_DETECTION_CUH
diff --git a/include/device/Nurbs/nurbs_curve.cuh b/include/device/Nurbs/nurbs_curve.cuh
index fac7c6c..28a4e5c 100644
--- a/include/device/Nurbs/nurbs_curve.cuh
+++ b/include/device/Nurbs/nurbs_curve.cuh
@@ -1,13 +1,9 @@
-//
-// Created by 14727 on 2022/12/9.
-//
-
 #ifndef NURBSEVALUATOR_NURBS_CURVE_CUH
 #define NURBSEVALUATOR_NURBS_CURVE_CUH
 
 #include <cuda_runtime.h>
 #include <vector>
-//#include <map>
+#include <glm/glm.hpp>
 
 namespace NurbsCurve {
     const int POINT_SIZE = 4;
@@ -51,7 +47,7 @@ namespace NurbsCurve {
          * @param sampleCnt_ 在参数域内均匀采样的采样数，它会更新成员变量中的sampleCnt
          * @return 由 map 组成的vector{<u, {x, y, z}>}
          */
-        std::vector<std::vector<float>> evaluate(int sampleCnt_);
+        std::vector<glm::vec3> evaluate(int sampleCnt_);
 
         /**
          * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法
diff --git a/include/device/Nurbs/nurbs_surface.cuh b/include/device/Nurbs/nurbs_surface.cuh
index 8c170cf..5caa346 100644
--- a/include/device/Nurbs/nurbs_surface.cuh
+++ b/include/device/Nurbs/nurbs_surface.cuh
@@ -8,9 +8,16 @@
 #include <vector>
 #include "cuda_runtime.h"
 #include "bvh.cuh"
+#include "glm/glm.hpp"
+
+typedef std::vector<glm::vec3> LinePoints3;
+typedef std::vector<LinePoints3> MeshPoints3;
+typedef std::vector<std::vector<glm::vec4>> MeshPoints4;
+typedef std::pair<int, int> idx2;
+typedef std::pair<idx2, idx2> boxPair;
 
 namespace NurbsSurface {
-    const int POINT_SIZE = 4;
+//    const int POINT_SIZE = 4;
     /**
      * 曲线计算的核函数
      * @param d_pointSize 点的大小(3: [x, y, z] | 4：[x, y, z, w])
@@ -32,7 +39,7 @@ namespace NurbsSurface {
 
     class Surface {
     private:
-        std::vector<std::vector<std::vector<float>>> controlPoints;
+        MeshPoints4 controlPoints;
         float *d_points;
         std::vector<float> knots_u;
         std::vector<float> knots_v;
@@ -50,7 +57,13 @@ namespace NurbsSurface {
         float *d_normals;
         float *d_k;  // 最大曲率
 
+        __host__ void recursiveGetRayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint, int idx,
+                                                     std::vector<BVHNode> &intersectionLeafNodes);
+
     public:
+        float *h_evaluations;
+        float *h_derivatives;
+        float *h_normals;
         BVH bvh;
         BVH gauss_map;
         /**
@@ -59,17 +72,26 @@ namespace NurbsSurface {
          * @param knots_u u方向knots
          * @param knots_v v方向knots
          */
-        __host__ explicit Surface(std::vector<std::vector<std::vector<float>>> controlPoints,
-                                  std::vector<float> knots_u, std::vector<float> knots_v);
+        __host__ explicit Surface(MeshPoints4 controlPoints, std::vector<float> knots_u, std::vector<float> knots_v);
 
         /**
-         * 供外部CPU程序使用的、负责调用gpu并行计算的方法
+         * 供外部CPU程序使用的、负责调用gpu并行计算采样点xyz值的方法
          * @param sampleCnt_u u方向采样数目
          * @param sampleCnt_v v方向采样数目
          * @return 由 map 组成的vector{<<u, v>, {x, y, z}>}
          */
-        __host__ std::vector<std::vector<std::vector<float>>>
-        evaluate(int sampleCnt_u_, int sampleCnt_v_);
+        __host__ std::vector<std::vector<glm::vec3>> getEvaluateVec(int sampleCnt_u, int sampleCnt_v) const;
+
+        __host__ std::vector<MeshPoints3> getDerivativeVec(int sampleCnt_u, int sampleCnt_v) const;
+
+
+        /**
+         * 供外部CPU程序使用的、负责调用gpu并行计算采样点xyz值的方法
+         * @param sampleCnt_u u方向采样数目
+         * @param sampleCnt_v v方向采样数目
+         * @return 表示为vertices形式的采样点值数组
+         */
+        __host__ void evaluate(int sampleCnt_u, int sampleCnt_v);
 
         /**
          * 供外部CPU程序使用的、负责调用gpu并行计算切向量的方法
@@ -84,13 +106,18 @@ namespace NurbsSurface {
         /**
          * 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法
          */
-        __host__ void buildBHV(int layerCnt, bool useK = false);
+        __host__ void buildBVH(int layerCnt, bool useK = false);
 
         /**
          * 供外部CPU程序使用的、负责调用gpu并行计算Gauss Map Tree的方法
          */
         __host__ void buildGaussMap(int layerCnt);
 
+        /**
+         * 供外部CPU程序使用的、负责调用递归函数求解BVH与ray交点的方法
+         */
+        __host__ std::vector<BVHNode> rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint);
+
 
         void setRecordTime(bool r);
 
@@ -99,9 +126,9 @@ namespace NurbsSurface {
     };
 
     __host__ void recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2,
-                                                      std::vector<std::pair<int, int>> &pairs);
+                                               std::vector<std::pair<int, int>> &pairs);
 
-    __host__ std::vector<std::pair<int, int>> getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2);
+    __host__ std::vector<boxPair> getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2);
 
     /**
      * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交
@@ -112,8 +139,8 @@ namespace NurbsSurface {
      * @return true:gauss map的包围盒有交集，说明gauss map<可能>有重合；false: gauss map一定没有重合
      */
     __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<int, int> idxRange_u1,
-                                               std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
-                                               std::pair<int, int> idxRange_v2);
+                                        std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
+                                        std::pair<int, int> idxRange_v2);
     /**
      * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交
      * @param range_u1 第一个gauss map的参数u范围
@@ -127,11 +154,11 @@ namespace NurbsSurface {
      * @return true:gauss map的包围盒有交集，说明gauss map<可能>有重合；false: gauss map一定没有重合
      */
     __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<float, float> range_u1,
-                                               std::pair<float, float> range_v1, std::pair<float, float> range_u2,
-                                               std::pair<float, float> range_v2, std::pair<float, float> paramRange_u1,
-                                               std::pair<float, float> paramRange_v1,
-                                               std::pair<float, float> paramRange_u2,
-                                               std::pair<float, float> paramRange_v2);
+                                        std::pair<float, float> range_v1, std::pair<float, float> range_u2,
+                                        std::pair<float, float> range_v2, std::pair<float, float> paramRange_u1,
+                                        std::pair<float, float> paramRange_v1,
+                                        std::pair<float, float> paramRange_u2,
+                                        std::pair<float, float> paramRange_v2);
 }
 
 
diff --git a/include/device/aabb.cuh b/include/device/aabb.cuh
index d6ee233..4d3980d 100644
--- a/include/device/aabb.cuh
+++ b/include/device/aabb.cuh
@@ -6,31 +6,25 @@
 #define NURBSEVALUATOR_AABB_CUH
 
 #include "vec.cuh"
+#include "glm/glm.hpp"
 #include "cstdio"
 
 class AABB {
 public:
     // 边界
-    FVec3 pMin, pMax;
+    glm::vec3 pMin, pMax;
     __device__ __host__ AABB();
-    __device__ __host__ explicit AABB(const FVec3& p);
-    __device__ __host__ AABB(const FVec3& p1, const FVec3& p2);
+    __device__ __host__ explicit AABB(const glm::vec3& p);
+    __device__ __host__ AABB(const glm::vec3& p1, const glm::vec3& p2);
 
     // aabb包围盒合并操作，包围盒和点
-    __device__ __host__ AABB Union(const FVec3& p);
+    __device__ __host__ AABB Union(const glm::vec3& p) const;
     // aabb包围盒合并操作，两个包围盒
-    __device__ __host__ AABB Union(const AABB& b2);
+    __device__ __host__ AABB Union(const AABB& b2) const;
     // aabb包围盒沿各坐标轴政府方向分别扩张
     __device__ __host__ void expand(float k);
     // 判断两个aabb包围盒是否重叠
-    __device__ __host__ bool IsOverlap(const AABB& b2);
-};
-
-struct BVHNode {
-    AABB bounds;  // AABB包围盒
-    int firstChild = -1, level = 0;  // 第一个孩子节点的下标，该结点所在层次，孩子个数
-    // 曲面片u, v的范围
-    float u0 = 0., u1 = 0., v0 = 0., v1 = 0.;
+    __device__ __host__ bool IsOverlap(const AABB& b2) const;
 };
 
 #endif //NURBSEVALUATOR_AABB_CUH
diff --git a/include/device/device_utils.cuh b/include/device/device_utils.cuh
index 3c50dfc..cbeb0a0 100644
--- a/include/device/device_utils.cuh
+++ b/include/device/device_utils.cuh
@@ -5,6 +5,8 @@
 #ifndef NURBSEVALUATOR_DEVICE_UTILS_CUH
 #define NURBSEVALUATOR_DEVICE_UTILS_CUH
 
+#define D_DBL_MAX 1.7976931348623158e+308
+#define D_FLT_MAX 3.402823466e+38F
 
 __device__ void d_safeFree(float * &p);
 
diff --git a/include/device/srf_mesh.cuh b/include/device/srf_mesh.cuh
new file mode 100644
index 0000000..8d06a1b
--- /dev/null
+++ b/include/device/srf_mesh.cuh
@@ -0,0 +1,17 @@
+#ifndef NURBSPERFORMER_SRF_MESH_CUH
+#define NURBSPERFORMER_SRF_MESH_CUH
+
+#include "glm/glm.hpp"
+
+class SrfMesh {
+public:
+    int edgeSampleCnt;
+    glm::vec3 *evaluationRes;
+    glm::vec3 *tangent_u;
+    glm::vec3 *tangent_v;
+    glm::vec3 *normal;
+    SrfMesh(glm::vec3 *evalRes, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *nor, int edgeSampleCnt);
+};
+
+
+#endif //NURBSPERFORMER_SRF_MESH_CUH
diff --git a/include/utils.h b/include/utils.h
index dbcb8b6..8bb2c18 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -8,10 +8,13 @@
 #include <ctime>
 double get_time();
 #else
+
 #include <windows.h>
 #include "device/Nurbs/bvh.cuh"
+#include "device/srf_mesh.cuh"
 
 double get_time();
+
 #endif
 
 /**
@@ -19,9 +22,27 @@ double get_time();
  *  注意指针是引用传参，因为要把指针本身置空
  */
 void safeCudaFree(float *&p);
+
 void safeCudaFree(BVHNode *&p);
-//template<typename T>
+
+template<typename T>
+void safeCudaFree(T *&p) {
+    if (p != nullptr) {
+        cudaFree(p);
+        p = nullptr;
+    }
+}
+
+template<typename T>
+void safeFree(T *&p) {
+    if (p != nullptr) {
+        free(p);
+        p = nullptr;
+    }
+}
+
 void safeFree(float *&p);
+
 void safeFree(BVHNode *&p);
 
 
diff --git a/src/device/Nurbs/bvh.cu b/src/device/Nurbs/bvh.cu
index bce1cc9..f8cd2f6 100644
--- a/src/device/Nurbs/bvh.cu
+++ b/src/device/Nurbs/bvh.cu
@@ -2,11 +2,8 @@
 // Created by 14727 on 2022/12/11.
 //
 
-#include "../../../include/device/Nurbs/bvh.cuh"
-/**
- * 当前层的第一个节点的坐标
- * @param layer 层号。根节点为第一层
- */
+#include "device/Nurbs/bvh.cuh"
+
 __device__ __host__ int getStartIdxOfLayerN(int layer) {
     if (layer == 1) return 0;
     int num = 1;
@@ -48,7 +45,7 @@ __host__ int h_getChildNodeIdx(int ix, int iy){
 }
 
 __global__ void
-buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
+g_buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v,
                 int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) {
     // 假设还是二维grid和二维的block
     int ix = blockIdx.x * blockDim.x + threadIdx.x;
@@ -74,22 +71,24 @@ buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot
 
 //        if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]);
         if (step == 1) {
-            AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1],
+            AABB bound(glm::vec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1],
                              evaluatedPoints[tmpIdx + 2]));  // 最初只包含u，v点
             // 叶子节点
             BVH[idx].u0 = u;
             BVH[idx].u1 = u + step_u * singleStepVal_u;
             BVH[idx].v0 = v;
             BVH[idx].v1 = v + step_v * singleStepVal_v;
+            BVH[idx].idx_u = ix;
+            BVH[idx].idx_v = iy;
             BVH[idx].level = level;
             int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3;
             int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3;
             int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3;
-            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1],
+            bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1],
                                       evaluatedPoints[tmpIdx1 + 2]));
-            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1],
+            bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1],
                                       evaluatedPoints[tmpIdx2 + 2]));
-            bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1],
+            bound = bound.Union(glm::vec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1],
                                       evaluatedPoints[tmpIdx3 + 2]));
             if(k != nullptr) bound.expand(*k);
             BVH[idx].bounds = bound;
diff --git a/src/device/Nurbs/loop_detection.cu b/src/device/Nurbs/loop_detection.cu
new file mode 100644
index 0000000..467ab66
--- /dev/null
+++ b/src/device/Nurbs/loop_detection.cu
@@ -0,0 +1,229 @@
+#include <vector>
+#include "device/Nurbs/loop_detection.cuh"
+#include "utils.h"
+#include "cuda_runtime.h"
+#include "device/device_utils.cuh"
+
+LoopDetection::LoopDetection(glm::vec3 *const s_eval, glm::vec3 *const f_eval, glm::vec3 *const s_tan_u,
+                             glm::vec3 *const f_tan_u, glm::vec3 *const s_tan_v, glm::vec3 *const f_tan_v,
+                             glm::vec3 *const s_norm, glm::vec3 *const f_norm, int edgeSampleCnt,
+                             float s_paramRegionSize_u, float s_paramRegionSize_v) :
+        edgeSampleCnt(edgeSampleCnt),
+        s_paramRegionSize_u(s_paramRegionSize_u),
+        s_paramRegionSize_v(s_paramRegionSize_v) {
+    auto initSrfMesh = [](SrfMesh *&d_srfMesh, glm::vec3 *eval, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *norm,
+                          int edgeSampleCnt) {
+        auto h_srfMesh = new SrfMesh(eval, tan_u, tan_v, norm, edgeSampleCnt);
+        safeCudaFree(d_srfMesh);
+        auto srfMeshSize = sizeof(SrfMesh);
+        cudaMalloc((void **) &d_srfMesh, srfMeshSize);
+        cudaMemcpy(d_srfMesh, h_srfMesh, srfMeshSize, cudaMemcpyHostToDevice);
+        safeFree(h_srfMesh);
+    };
+    initSrfMesh(d_s_srfMesh, s_eval, s_tan_u, s_tan_v, s_norm, edgeSampleCnt);
+    initSrfMesh(d_f_srfMesh, f_eval, f_tan_u, f_tan_v, f_norm, edgeSampleCnt);
+}
+
+
+LoopDetection::~LoopDetection() {
+    safeCudaFree(d_selectedPointsIdx);
+    safeCudaFree(d_vectorFields);
+    safeCudaFree(d_s_srfMesh);
+    safeCudaFree(d_f_srfMesh);
+    safeCudaFree(d_rotationNumbers);
+}
+
+__host__ void LoopDetection::h_initOrientedDisAndVecFields(
+        const std::map<std::pair<int, int>, std::set<std::pair<int, int>>> &intersectBoxPairs) {
+    int sampleCnt = edgeSampleCnt * edgeSampleCnt;
+
+    safeCudaFree(d_selectedPointsIdx);
+    auto selectedPointsIdxVec = std::vector<glm::vec2>(sampleCnt, glm::vec<2, int>(-1, -1));
+    auto h_selectedPointsIdx = new glm::vec<2, int>[sampleCnt];
+    std::copy(selectedPointsIdxVec.begin(), selectedPointsIdxVec.end(), h_selectedPointsIdx);
+    int selectedPtsIdxBytes = sizeof(glm::vec<2, int>) * sampleCnt;
+    cudaMalloc((void**)&d_selectedPointsIdx, selectedPtsIdxBytes);
+    cudaMemcpy(d_selectedPointsIdx, h_selectedPointsIdx, selectedPtsIdxBytes, cudaMemcpyHostToDevice);
+
+    safeCudaFree(d_vectorFields);
+    cudaMalloc((void**)&d_vectorFields, sizeof(glm::vec2) * sampleCnt);
+
+    int mapCnt = intersectBoxPairs.size();
+
+    auto *h_s_cells = new glm::vec<2, int>[mapCnt];
+    glm::vec<2, int> *d_s_cells;
+    auto **h_f_cells = new glm::vec<2, int> *[mapCnt];
+    glm::vec<2, int> **d_f_cells;
+    auto *h_f_cellsCnt = new int[mapCnt];
+    int *d_f_cellsCnt;
+    int i = 0;
+    for (const auto &boxPair: intersectBoxPairs) {
+        h_s_cells[i] = glm::vec<2, int>(boxPair.first.first, boxPair.first.second);
+        h_f_cellsCnt[i] = boxPair.second.size();
+        auto h_f_cellsTmp = new glm::vec<2, int>[h_f_cellsCnt[i]];
+        int j = 0;
+        for (auto box: boxPair.second) {
+            h_f_cellsTmp[j] = glm::vec<2, int>(box.first, box.second);
+            j++;
+        }
+        auto f_relatedCellsBytes = sizeof(glm::vec<2, int>) * h_f_cellsCnt[i];
+        cudaMalloc((void **) &h_f_cells[i], f_relatedCellsBytes);
+        cudaMemcpy(h_f_cells[i], h_f_cellsTmp, f_relatedCellsBytes, cudaMemcpyHostToDevice);
+        free(h_f_cellsTmp);
+        i++;
+    }
+
+    int s_cellsBytes = sizeof(glm::vec<2, int>) * mapCnt;
+    cudaMalloc((void **) &d_s_cells, s_cellsBytes);
+    cudaMemcpy(d_s_cells, h_s_cells, s_cellsBytes, cudaMemcpyHostToDevice);
+
+    int f_cellsPtrBytes = sizeof(glm::vec<2, int>) * mapCnt;
+    cudaMalloc((void **) &d_f_cells, f_cellsPtrBytes);
+    cudaMemcpy(d_f_cells, h_f_cells, f_cellsPtrBytes, cudaMemcpyHostToDevice);
+
+    int f_cellsCntBytes = sizeof(int) * mapCnt;
+    cudaMalloc((void **) &d_f_cellsCnt, f_cellsCntBytes);
+    cudaMemcpy(d_f_cellsCnt, h_f_cellsCnt, f_cellsCntBytes, cudaMemcpyHostToDevice);
+
+    dim3 block(64, 64);
+    dim3 grid((mapCnt + block.x * block.y - 1) / (block.x * block.y));
+    g_initOrientedDisAndVecFields <<< grid, block >>>(d_s_srfMesh, d_f_srfMesh, mapCnt, d_s_cells, d_f_cells,
+                                                      d_f_cellsCnt, d_selectedPointsIdx, d_vectorFields);
+
+    cudaFree(d_s_cells);
+    cudaFree(d_f_cells);
+    cudaFree(d_f_cellsCnt);
+    for (int k = 0; k < mapCnt; k++) {
+        cudaFree(h_f_cells[k]);
+    }
+    free(h_s_cells);
+    free(h_f_cells);
+    free(h_f_cellsCnt);
+    free(h_selectedPointsIdx);
+}
+
+__host__ void LoopDetection::h_getRotationNumber() {
+    safeCudaFree(d_rotationNumbers);
+    cudaMalloc((void**)&d_rotationNumbers, sizeof(int) * edgeSampleCnt * edgeSampleCnt);
+
+    dim3 block(32, 32);
+    dim3 grid((edgeSampleCnt + block.x - 1) / block.x, (edgeSampleCnt + block.y - 1) / block.y);
+    g_getRotationNumber<<<grid, block>>>(d_s_srfMesh, s_paramRegionSize_u, s_paramRegionSize_v, d_vectorFields,
+                                  d_rotationNumbers);
+
+}
+
+__device__ const int neighborIdxes[4][2] = {{1,  0},
+                                            {0,  1},
+                                            {-1, 0},
+                                            {0,  -1}};
+
+__device__ const int dirs[2][2][2] = {{{-1, 1},  {1, 1}},
+                                      {{-1, -1}, {1, -1}}}; // 必须是左上，右上，左下，右下的顺序
+
+__global__ void
+g_initOrientedDisAndVecFields(SrfMesh *s_srfMesh, SrfMesh *f_srfMesh, int s_cellCnt, glm::vec<2, int> *s_cells,
+                              glm::vec<2, int> **f_cells, const int *f_cellCnt, glm::vec<2, int> *selectedPointsIdx,
+                              glm::vec2 *vectorFields) {
+//    // 二维的grid和一维的block
+//    int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x * blockDim.y + threadIdx.x;
+    // 一维的grid和二维的block
+    int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
+    if (idx >= s_cellCnt) return;
+//    printf("idx: %d\n", idx);
+    if (idx == 0) printf("<%d, %d>, %f\n", neighborIdxes[0][0], neighborIdxes[0][1], -D_FLT_MAX);
+    int s_edgeSampleCnt = s_srfMesh->edgeSampleCnt;
+    int f_edgeSampleCnt = f_srfMesh->edgeSampleCnt;
+    for (auto neighborIdx: neighborIdxes) {
+        for (int i = 0; i < 2; i++) {
+            for (int j = 0; j < 2; j++) {
+                float minDis = -D_FLT_MAX;
+                int minDisFIdx_u = -1, minDisFIdx_v = -1;
+                int idxI = s_cells[idx].x + i + neighborIdx[0];
+                int idxJ = s_cells[idx].y + j + neighborIdx[1];
+                if (idxI < 0 || idxI >= s_edgeSampleCnt || idxJ < 0 || idxJ >= s_edgeSampleCnt ||
+                    selectedPointsIdx[idxI * s_edgeSampleCnt + idxJ] != glm::vec<2, int>(2, 2)) {
+                    // 尽量不重复计算
+                    // 但在多线程情况下，这样还是可以能重复计算
+                    continue;
+                }
+                for (int fi = 0; fi < f_cellCnt[idx]; fi++) {
+                    for (int k = 0; k < 2; k++) {
+                        for (int l = 0; l < 2; l++) {
+                            for (int step_u = 0; step_u <= 2; step_u++) {
+                                for (int step_v = 0; step_v + step_u <= 2; step_v++) {
+                                    int fIdx_u = f_cells[idx][fi].x + k + step_u * dirs[k][l][0];
+                                    int fIdx_v = f_cells[idx][fi].y + l + step_v * dirs[k][l][1];
+                                    if (fIdx_u < 0 || fIdx_u >= f_edgeSampleCnt || fIdx_v < 0 ||
+                                        fIdx_v >= f_edgeSampleCnt)
+                                        continue;
+                                    auto dis = glm::distance(s_srfMesh->evaluationRes[idxI][idxJ],
+                                                             f_srfMesh->evaluationRes[fIdx_u][fIdx_v]);
+                                    if (dis < minDis) {
+                                        minDis = dis;
+                                        minDisFIdx_u = fIdx_u;
+                                        minDisFIdx_v = fIdx_v;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                selectedPointsIdx[idxI * s_edgeSampleCnt + idxJ] = glm::vec<2, int>(minDisFIdx_u, minDisFIdx_v);
+                auto n2 = f_srfMesh->normal[minDisFIdx_u * f_edgeSampleCnt + minDisFIdx_v];
+                auto ps_pu = s_srfMesh->tangent_u[idxI * s_edgeSampleCnt + idxJ];
+                auto ps_pv = s_srfMesh->tangent_v[idxI * s_edgeSampleCnt + idxJ];
+                vectorFields[idxI * s_edgeSampleCnt + idxJ] = glm::vec2(glm::dot(n2, ps_pu), glm::dot(n2, ps_pv));
+            }
+        }
+    }
+}
+
+__global__ void
+g_getRotationNumber(SrfMesh *s_srfMesh, float paramRegionSize_u, float paramRegionSize_v, glm::vec2 *vectorFields,
+                    int *rotationNumbers) {
+    // 二维grid和二维的block
+    int ix = blockIdx.x * blockDim.x + threadIdx.x;
+    int iy = blockIdx.y * blockDim.y + threadIdx.y;
+    int s_edgeSampleCnt = s_srfMesh->edgeSampleCnt;
+    float F[2][2], G[2][2];
+    for (int biasI = 0; biasI < 2; biasI++) {
+        for (int biasJ = 0; biasJ < 2; biasJ++) {
+            auto idxI = ix + biasI;
+            auto idxJ = iy + biasJ;
+            auto v = vectorFields[idxI * s_edgeSampleCnt + idxJ];
+            auto vSquareSum = v.x * v.x + v.y * v.y;
+            auto pTheta_pChi = -v.y / vSquareSum;
+            auto pTheta_pPhi = v.x / vSquareSum;
+//                    auto pChi_pu =
+//                            (vectorFields[idxI + 1][idxJ].x - vectorFields[idxI - 1][idxJ].x) / (2 * uParamCellSize);
+//                    auto pChi_pv =
+//                            (vectorFields[idxI][idxJ + 1].x - vectorFields[idxI][idxJ - 1].x) / (2 * vParamCellSize);
+            glm::vec2 pV_pu;
+            glm::vec2 pV_pv;
+            if (idxI == 0)
+                pV_pu = (vectorFields[(idxI + 1) * s_edgeSampleCnt + idxJ] -
+                         vectorFields[idxI * s_edgeSampleCnt + idxJ]) / paramRegionSize_u;
+            else if (idxI == s_edgeSampleCnt - 1)
+                pV_pu = (vectorFields[idxI * s_edgeSampleCnt + idxJ] -
+                         vectorFields[(idxI - 1) * s_edgeSampleCnt + idxJ]) / paramRegionSize_u;
+            else
+                pV_pu = (vectorFields[(idxI + 1) * s_edgeSampleCnt + idxJ] -
+                         vectorFields[(idxI - 1) * s_edgeSampleCnt + idxJ]) / (2 * paramRegionSize_u);
+            if (idxJ == 0)
+                pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ + 1] -
+                         vectorFields[idxI * s_edgeSampleCnt + idxJ]) / paramRegionSize_v;
+            else if (idxJ == s_edgeSampleCnt - 1)
+                pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ] -
+                         vectorFields[idxI * s_edgeSampleCnt + idxJ - 1]) / paramRegionSize_v;
+            else
+                pV_pv = (vectorFields[idxI * s_edgeSampleCnt + idxJ + 1] -
+                         vectorFields[idxI * s_edgeSampleCnt + idxJ - 1]) / (2 * paramRegionSize_v);
+            F[biasI][biasJ] = pTheta_pChi * pV_pu.x + pTheta_pPhi * pV_pu.y;
+            G[biasI][biasJ] = pTheta_pChi * pV_pv.x + pTheta_pPhi * pV_pv.y;
+        }
+    }
+    rotationNumbers[ix * s_edgeSampleCnt + iy] = int(
+            round(((F[0][1] + F[1][1] - F[0][0] - F[1][0]) * paramRegionSize_u +
+                   (G[0][0] + G[0][1] - G[1][0] - G[1][1]) * paramRegionSize_v)) / 2.);
+}
diff --git a/src/device/Nurbs/nurbs_common.cu b/src/device/Nurbs/nurbs_common.cu
index b49a8d2..89380e0 100644
--- a/src/device/Nurbs/nurbs_common.cu
+++ b/src/device/Nurbs/nurbs_common.cu
@@ -2,8 +2,8 @@
 // Created by 14727 on 2022/11/19.
 //
 
-#include "../../../include/device/Nurbs/nurbs_common.cuh"
-#include "../../../include/device/device_utils.cuh"
+#include "device/Nurbs/nurbs_common.cuh"
+#include "device/device_utils.cuh"
 #include <cstdio>
 
 __device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {
diff --git a/src/device/Nurbs/nurbs_curve.cu b/src/device/Nurbs/nurbs_curve.cu
index 8bc74f4..8c0be64 100644
--- a/src/device/Nurbs/nurbs_curve.cu
+++ b/src/device/Nurbs/nurbs_curve.cu
@@ -1,9 +1,10 @@
 //
 // Created by 14727 on 2022/12/9.
 //
-#include "../../../include/device/Nurbs/nurbs_common.cuh"
-#include "../../../include/device/Nurbs/nurbs_curve.cuh"
-#include "../../../include/utils.h"
+#include "device/Nurbs/nurbs_common.cuh"
+#include "device/Nurbs/nurbs_curve.cuh"
+#include "utils.h"
+#include "device/Nurbs/loop_detection.cuh"
 
 __global__ void
 NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt,
@@ -107,7 +108,7 @@ __global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt,
 }
 
 NurbsCurve::Curve::Curve(std::vector<std::vector<float>> controlPoints,
-                                          std::vector<float> knots) {
+                         std::vector<float> knots) {
     this->knots = std::move(knots);
     this->controlPoints = std::move(controlPoints);
     recordTime = false;
@@ -130,7 +131,7 @@ NurbsCurve::Curve::~Curve() {
     cudaDeviceReset();
 }
 
-std::vector<std::vector<float>>
+std::vector<glm::vec3>
 NurbsCurve::Curve::evaluate(int sampleCnt) {
 
     if (POINT_SIZE != controlPoints[0].size()) {
@@ -166,13 +167,14 @@ NurbsCurve::Curve::evaluate(int sampleCnt) {
 
     // 分配nTexture1的内存。只需要GPU内存
     safeCudaFree(d_nTexture1);  // 注意内存管理
-    cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小，在算梯度时用得到i=pointsCnt + 1的基函数值
+    cudaMalloc((void **) &d_nTexture1,
+               sampleCnt * (pointsCnt + 1) * sizeof(float)); // 注意nTexture的大小，在算梯度时用得到i=pointsCnt + 1的基函数值
 
     // 结果数组
     size_t resBytes = sampleCnt * 3 * sizeof(float);
     float *d_res;
-    cudaMalloc((void **)&d_res, resBytes);
-    auto *h_res = (float *)malloc(resBytes);
+    cudaMalloc((void **) &d_res, resBytes);
+    auto *h_res = (float *) malloc(resBytes);
 
 
     // 构造g_basisTexture线程层级
@@ -198,19 +200,27 @@ NurbsCurve::Curve::evaluate(int sampleCnt) {
         printf("GPU time cost of curve evaluation for %d samples: %lf\n", sampleCnt, time_cost_device);
     }
 
+    // TODO 应当有一个evaluate函数单独地只用于计算d_res
+    // TODO 因为不是所有调用evaluate的操作都需要一个CPU上的结果，因此这一部分数据拷贝可能是多余的
     cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost);
-    std::vector<std::vector<float>> res(sampleCnt, std::vector<float>(3, 0.));
-    for(int i = 0; i < sampleCnt; i++) {
+    std::vector<glm::vec3> res(sampleCnt, glm::vec3());
+    for (int i = 0; i < sampleCnt; i++) {
         int baseIdx = i * 3;
-        res[i][0] = h_res[baseIdx];
-        res[i][1] = h_res[baseIdx + 1];
-        res[i][2] = h_res[baseIdx + 2];
+        res[i].x = h_res[baseIdx];
+        res[i].y = h_res[baseIdx + 1];
+        res[i].z = h_res[baseIdx + 2];
     }
-
     safeFree(h_points);
     safeFree(h_knots);
     safeCudaFree(d_res);
     safeFree(h_res);
+
+    int cnt = 68;
+    dim3 block1(8, 8);
+    dim3 grid1((cnt + block1.x * block1.y - 1) / (block1.x * block1.y));
+
+//    g_initOrientedDisAndVecFields <<< grid1, block1 >>>(nullptr, nullptr, cnt, nullptr, nullptr, nullptr, nullptr, nullptr);
+
     return res;
 }
 
diff --git a/src/device/Nurbs/nurbs_surface.cu b/src/device/Nurbs/nurbs_surface.cu
index 0cd8e93..141882b 100644
--- a/src/device/Nurbs/nurbs_surface.cu
+++ b/src/device/Nurbs/nurbs_surface.cu
@@ -7,6 +7,7 @@
 #include "utils.h"
 #include "device/Nurbs/bvh.cuh"
 #include "device/device_utils.cuh"
+#include "tinynurbs/tinynurbs.h"
 
 __global__ void
 NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points,
@@ -266,8 +267,8 @@ NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const
 }
 
 
-__host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<float>>> controlPoints,
-                                        std::vector<float> knots_u, std::vector<float> knots_v) {
+__host__ NurbsSurface::Surface::Surface(MeshPoints4 controlPoints, std::vector<float> knots_u,
+                                        std::vector<float> knots_v) {
     this->knots_u = std::move(knots_u);
     this->knots_v = std::move(knots_v);
     this->controlPoints = std::move(controlPoints);
@@ -284,24 +285,22 @@ __host__ NurbsSurface::Surface::Surface(std::vector<std::vector<std::vector<floa
     d_k = nullptr;
     d_normals = nullptr;
     bvh.nodes = nullptr;
+    h_evaluations = nullptr;
+    h_derivatives = nullptr;
+    h_normals = nullptr;
 }
 
-__host__ std::vector<std::vector<std::vector<float>>>
-NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
-    if (POINT_SIZE != controlPoints[0][0].size()) {
-        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
-        return {};
-    }
-
+__host__ void NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
     // 构造指向device的controlPoints
     const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size();
-    const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float);
+    const int pointsBytes = pointsCnt_u * pointsCnt_v * sizeof(glm::vec4);
     auto *h_points = (float *) malloc(pointsBytes);
     for (int i = 0; i < pointsCnt_u; i++) {
         for (int j = 0; j < pointsCnt_v; j++) {
             for (int k = 0; k < POINT_SIZE; k++) {
                 h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k];
             }
+//            printf("%f/ %f/ %f/ %f     ", controlPoints[i][j][0], controlPoints[i][j][1], controlPoints[i][j][2], controlPoints[i][j][3]);
         }
     }
     cudaMalloc((void **) &d_points, pointsBytes);
@@ -314,6 +313,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
     for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i];
     for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i];
 
+    safeCudaFree(d_knots_u);
+    safeCudaFree(d_knots_v);
     cudaMalloc((void **) &d_knots_u, knotsBytes_u);
     cudaMalloc((void **) &d_knots_v, knotsBytes_v);
     cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice);
@@ -330,10 +331,10 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
 
     // 结果数组
     size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float);
-
+    safeCudaFree(d_evaluationRes);
     cudaMalloc((void **) &d_evaluationRes, resBytes);
-    auto *h_res = (float *) malloc(resBytes);
-
+    safeFree(h_evaluations);
+    h_evaluations = (float *) malloc(resBytes);
 
     // 构造g_basisTexture线程层级
     dim3 blockBasis(512);
@@ -354,8 +355,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
     cudaDeviceSynchronize();
 
     g_evaluate <<<grid, block>>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v,
-                                 POINT_SIZE,
-                                 knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v);
+                                 POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u,
+                                 sampleCnt_v);
     cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
     if (recordTime) {
         time_cost_device = get_time() - time_cost_device;
@@ -363,34 +364,58 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
                time_cost_device);
     }
 
-    cudaMemcpy(h_res, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost);
-    std::vector<std::vector<std::vector<float>>> res(sampleCnt_u, std::vector<std::vector<float>>(sampleCnt_v,
-                                                                                                  std::vector<float>(3,
-                                                                                                                     0.)));
+    cudaMemcpy(h_evaluations, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost);
+
+
+    // 释放内存
+    safeFree(h_points);
+    safeFree(h_knots_u);
+    safeFree(h_knots_v);
+}
+
+
+__host__ std::vector<std::vector<glm::vec3>>
+NurbsSurface::Surface::getEvaluateVec(int sampleCnt_u, int sampleCnt_v) const {
+    std::vector<std::vector<glm::vec3>> res(sampleCnt_u, std::vector<glm::vec3>(sampleCnt_v, glm::vec3()));
     for (int i = 0; i < sampleCnt_u; i++) {
         int baseIdx = i * sampleCnt_v * 3;
         for (int j = 0; j < sampleCnt_v; j++) {
             baseIdx += j * 3;
-            res[i][j][0] = h_res[baseIdx];
-            res[i][j][1] = h_res[baseIdx + 1];
-            res[i][j][2] = h_res[baseIdx + 2];
+            res[i][j].x = h_evaluations[baseIdx];
+            res[i][j].y = h_evaluations[baseIdx + 1];
+            res[i][j].z = h_evaluations[baseIdx + 2];
 //            printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0], res[i][j][1], res[i][j][2]);
         }
     }
-
-    // 释放内存
-    safeFree(h_points);
-    safeFree(h_knots_u);
-    safeFree(h_knots_v);
-    safeFree(h_res);
-
     return res;
 }
 
-__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) {
-    // 先完成evaluation
-    evaluate(sampleCnt_u, sampleCnt_v);
+__host__ std::vector<MeshPoints3> NurbsSurface::Surface::getDerivativeVec(int sampleCnt_u, int sampleCnt_v) const {
+    MeshPoints3 der_u(sampleCnt_u, LinePoints3(sampleCnt_v));
+    MeshPoints3 der_v(sampleCnt_u, LinePoints3(sampleCnt_v));
+    MeshPoints3 normal(sampleCnt_u, LinePoints3(sampleCnt_v));
+    for (int i = 0; i < sampleCnt_u; i++) {
+        int baseIdx = i * sampleCnt_v * 6;
+        for (int j = 0; j < sampleCnt_v; j++) {
+            baseIdx += j * 6;
+            der_u[i][j].x = h_derivatives[baseIdx];
+            der_u[i][j].y = h_derivatives[baseIdx + 1];
+            der_u[i][j].z = h_derivatives[baseIdx + 2];
+            der_v[i][j].x = h_derivatives[baseIdx + 3];
+            der_v[i][j].y = h_derivatives[baseIdx + 4];
+            der_v[i][j].z = h_derivatives[baseIdx + 5];
+            auto baseIdxNorm = baseIdx / 2;
+            normal[i][j].x = h_normals[baseIdxNorm];
+            normal[i][j].y = h_normals[baseIdxNorm + 1];
+            normal[i][j].z = h_normals[baseIdxNorm + 2];
+            // TODO normalize 归一化在gpu中实现！
+            normal[i][j] = glm::normalize(normal[i][j]);
+        }
+    }
+    return {der_u, der_v, normal};
+}
 
+__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) {
     if (POINT_SIZE != controlPoints[0][0].size()) {
         printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
         return;
@@ -405,13 +430,13 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v
 
     // 构造切向量计算结果
     safeCudaFree(d_derivatives);
-    cudaMalloc((void **) &d_derivatives,
-               sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采样所求的切向量是一个六元向量，前三位是对u的偏导、后三位是对v的偏导
+    size_t derBytes = sampleCnt_u * sampleCnt_v * 6 * sizeof(float);
+    cudaMalloc((void **) &d_derivatives, derBytes); // 每个采样所求的切向量是一个六元向量，前三位是对u的偏导、后三位是对v的偏导
 
     // 构造法向量计算结果
     safeCudaFree(d_normals);
-    cudaMalloc((void **) &d_normals,
-               sampleCnt_v * sampleCnt_u * 3 * sizeof(float));
+    size_t normalBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float);
+    cudaMalloc((void **) &d_normals, normalBytes);
 
     // 构造线程层级
     dim3 block(32, 32);
@@ -437,13 +462,22 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v
         printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v,
                time_cost_device);
     }
+
+    // 结果数组
+    safeFree(h_normals);
+    h_normals = (float *) malloc(normalBytes);
+    cudaMemcpy(h_normals, d_normals, normalBytes, cudaMemcpyDeviceToHost);
+
+    safeFree(h_derivatives);
+    h_derivatives = (float *) malloc(derBytes);
+    cudaMemcpy(h_derivatives, d_derivatives, derBytes, cudaMemcpyDeviceToHost);
+
     cudaFree(d_derTexture_u);
     cudaFree(d_derTexture_v);
 }
 
 __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) {
-    // 先计算切向量
-    derivative(sampleCnt_u, sampleCnt_v);
+
     if (POINT_SIZE != controlPoints[0][0].size()) {
         printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
         return;
@@ -491,17 +525,18 @@ NurbsSurface::Surface::~Surface() {
     safeCudaFree(d_normals);
     safeCudaFree(d_derivatives);
     safeFree(bvh.nodes);
+    safeFree(h_evaluations);
+    safeFree(h_normals);
+    safeFree(h_derivatives);
     cudaDeviceReset();
 }
 
-__host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) {
+__host__ void NurbsSurface::Surface::buildBVH(int layerCnt, bool useK) {
+    // TODO 构造BVH的函数不应该出现在NURBS Surface中，应该是BVH类的事情！
+    // TODO NURBS Surface只需要一个函数去调用BVH对象的方法即可
     int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
-    if (useK) {
-        // 先计算曲率，过程中也会计算曲面值
-        curvature(sampleCnt_u, sampleCnt_v);
-    } else {
-        // 只做evaluation
-        evaluate(sampleCnt_u, sampleCnt_v);
+    if (!useK) {
+        // 必须safeFree一下，这样global函数中才能通过d_k = nullptr知道不需要再free
         safeCudaFree(d_k);
     }
     if (POINT_SIZE != controlPoints[0][0].size()) {
@@ -521,14 +556,15 @@ __host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) {
     // 记录用时
     double time_cost_device;
     if (recordTime) time_cost_device = get_time();
-    buildBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
-                              knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
+    g_buildBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
+                                knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
     cudaDeviceSynchronize();
     if (recordTime) {
         time_cost_device = get_time() - time_cost_device;
         printf("GPU time cost of a %d-layer BVH building: %lf\n",
                bvh.maxLevel, time_cost_device);
     }
+    // 将bvh拷贝到cpu中
     safeFree(bvh.nodes);
     bvh.nodes = (BVHNode *) malloc(bvhBytes);
     cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost);
@@ -537,9 +573,9 @@ __host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) {
 }
 
 __host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) {
+    // TODO，构造GAUSS Map的函数不应该出现在NURBS Surface中，应该是GAUSS Map类的事情！
     int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
-    // 先计算法向量
-    derivative(sampleCnt_u, sampleCnt_v);
+
     if (POINT_SIZE != controlPoints[0][0].size()) {
         printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
         return;
@@ -556,8 +592,8 @@ __host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) {
     // 记录用时
     double time_cost_device;
     if (recordTime) time_cost_device = get_time();
-    buildBvh<<<grid, block>>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1],
-                              knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree);
+    g_buildBvh<<<grid, block>>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1],
+                                knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree);
     cudaDeviceSynchronize();
     if (recordTime) {
         time_cost_device = get_time() - time_cost_device;
@@ -603,16 +639,24 @@ __host__ void NurbsSurface::recursiveGetOverlapLeafNodes(const BVH &bvh1, const
     }
 }
 
-__host__ std::vector<std::pair<int, int>>
+
+__host__ std::vector<boxPair>
 NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) {
-    std::vector<std::pair<int, int>> resPairs;
+    std::vector<idx2> resPairs;
     // 记录用时
     double time_cost_device = get_time();
+
     recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs);
+    std::vector<boxPair> boxPairsIdx2(resPairs.size());
+    for (int i = 0; i < resPairs.size(); i++) {
+        boxPairsIdx2[i] = {{bvh1.nodes[resPairs[i].first].idx_u,  bvh1.nodes[resPairs[i].first].idx_v},
+                           {bvh2.nodes[resPairs[i].second].idx_u, bvh2.nodes[resPairs[i].second].idx_v}};
+    }
+
     time_cost_device = get_time() - time_cost_device;
     printf("CPU time cost for recursively calculating the overlapped leaf nodes: %lf\n", time_cost_device);
 
-    return resPairs;
+    return boxPairsIdx2;
 }
 
 __host__ bool
@@ -631,8 +675,10 @@ NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<in
         printf("Error when detecting overlapping: idx range invalid!\n");
         return false;
     }
+
     auto getRangedBox = [&commonMaxLayer](const BVH &bvh, const std::pair<int, int> &idxRange_u,
                                           const std::pair<int, int> idxRange_v) {
+        // 获取某个范围的gauss map的aabb
         AABB bounding;
         for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) {
             for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) {
@@ -670,3 +716,69 @@ __host__ bool NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2
     auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt);
     return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2, idxRange_v2);
 }
+
+__host__ void
+NurbsSurface::Surface::recursiveGetRayBVHIntersection(const glm::vec3 dir, const glm::vec3 startPoint, const int idx,
+                                                      std::vector<BVHNode> &intersectionLeafNodes) {
+    auto bvhNode = bvh.nodes[idx];
+    // 射线与AABB判交
+    auto isRayBoxIntersect = [&]() {
+        const auto &box = bvhNode.bounds;
+        float t; // 射线的参数，当t<=0，表示线面交点不在视平面前方，视为没有交点
+        if (dir.x != 0.) {
+            // 当x分量不为0，则射线会与AABB中垂直于x轴的平面可能有交
+            // 注意x的正负。x为正会先遇到较小点所在平面
+            if (dir.x > 0) t = (box.pMin.x - startPoint.x) / dir.x;
+            else t = (box.pMax.x - startPoint.x) / dir.x;
+            if (t > 0.) {
+                // 射线与平面在前方有交点。但交点不一定在盒子上
+                auto tmpPt = startPoint + t * dir; // 交点
+                if (box.pMin.y <= tmpPt.y && box.pMin.z <= tmpPt.z && box.pMax.y >= tmpPt.y && box.pMax.z >= tmpPt.z)
+                    return true;
+            }
+        }
+        if (dir.y != 0.) {
+            // 同上测试y方向
+            if (dir.y > 0) t = (box.pMin.y - startPoint.y) / dir.y;
+            else t = (box.pMax.y - startPoint.y) / dir.y;
+            if (t > 0.) {
+                auto tmpPt = startPoint + t * dir;
+                if (box.pMin.x <= tmpPt.x && box.pMin.z <= tmpPt.z && box.pMax.x >= tmpPt.x && box.pMax.z >= tmpPt.z)
+                    return true;
+            }
+        }
+        if (dir.z != 0.) {
+            // 同上测试z方向
+            if (dir.z > 0) t = (box.pMin.z - startPoint.z) / dir.z;
+            else t = (box.pMax.z - startPoint.z) / dir.z;
+            if (t > 0.) {
+                auto tmpPt = startPoint + t * dir;
+                if (box.pMin.x <= tmpPt.x && box.pMin.y <= tmpPt.y && box.pMax.x >= tmpPt.x && box.pMax.y >= tmpPt.y)
+                    return true;
+            }
+        }
+        return false;
+    };
+
+    // 不相交
+    if (!isRayBoxIntersect()) return;
+    // 相交
+    if (bvhNode.firstChild == -1) {
+        // 与叶节点相交
+        intersectionLeafNodes.emplace_back(bvhNode);
+    } else {
+        // 与父节点相交
+        for (int i = 0; i < 4; i++)
+            recursiveGetRayBVHIntersection(dir, startPoint, bvhNode.firstChild + i, intersectionLeafNodes);
+    }
+}
+
+__host__ std::vector<BVHNode> NurbsSurface::Surface::rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint) {
+    std::vector<BVHNode> res;
+    recursiveGetRayBVHIntersection(dir, startPoint, 0, res);
+    //TODO sort res by t
+
+//    res.emplace_back(BVHNode());
+//    printf("res size: %lld\n", res.size());
+    return res;
+}
\ No newline at end of file
diff --git a/src/device/aabb.cu b/src/device/aabb.cu
index c3b9038..68b5ccc 100644
--- a/src/device/aabb.cu
+++ b/src/device/aabb.cu
@@ -2,36 +2,34 @@
 // Created by 14727 on 2022/12/11.
 //
 
-#include "../../include/device/aabb.cuh"
-
-#define D_DBL_MAX 1.7976931348623158e+308
-#define D_FLT_MAX 3.402823466e+38F
+#include "device/aabb.cuh"
+#include "device/device_utils.cuh"
 
 __device__ __host__ AABB::AABB() {
     float minNum = -D_FLT_MAX;
     float maxNum = D_FLT_MAX;
-    pMax = FVec3(minNum, minNum, minNum);
-    pMin = FVec3(maxNum, maxNum, maxNum);
+    pMax = glm::vec3(minNum, minNum, minNum);
+    pMin = glm::vec3(maxNum, maxNum, maxNum);
 }
 
-__device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {}
+__device__ __host__ AABB::AABB(const glm::vec3 &p) : pMin(p), pMax(p) {}
 
-__device__ __host__ AABB::AABB(const FVec3 &p1, const FVec3 &p2) {
-    pMin = FVec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z));
-    pMax = FVec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z));
+__device__ __host__ AABB::AABB(const glm::vec3 &p1, const glm::vec3 &p2) {
+    pMin = glm::vec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z));
+    pMax = glm::vec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z));
 }
 
-__device__ __host__ AABB AABB::Union(const FVec3 &p) {
-    return {FVec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)),
-            FVec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))};
+__device__ __host__ AABB AABB::Union(const glm::vec3 &p) const {
+    return {glm::vec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)),
+            glm::vec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))};
 }
 
-__device__ __host__ AABB AABB::Union(const AABB &b2) {
-    return {FVec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)),
-            FVec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))};
+__device__ __host__ AABB AABB::Union(const AABB &b2) const {
+    return {glm::vec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)),
+            glm::vec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))};
 }
 
-__device__ __host__ bool AABB::IsOverlap(const AABB& b2) {
+__device__ __host__ bool AABB::IsOverlap(const AABB &b2) const {
     if (pMin.x > b2.pMax.x) return false;
     if (b2.pMin.x > pMax.x) return false;
     if (pMin.y > b2.pMax.y) return false;
diff --git a/src/device/device_utils.cu b/src/device/device_utils.cu
index 29b9aae..71388cb 100644
--- a/src/device/device_utils.cu
+++ b/src/device/device_utils.cu
@@ -2,7 +2,7 @@
 // Created by 14727 on 2022/12/9.
 //
 
-#include "../../include/device/device_utils.cuh"
+#include "device/device_utils.cuh"
 
 __device__ void d_safeFree(float *&p) {
     if (p != nullptr) {
diff --git a/src/device/srf_mesh.cu b/src/device/srf_mesh.cu
new file mode 100644
index 0000000..f56a997
--- /dev/null
+++ b/src/device/srf_mesh.cu
@@ -0,0 +1,4 @@
+#include "device/srf_mesh.cuh"
+
+SrfMesh::SrfMesh(glm::vec3 *evalRes, glm::vec3 *tan_u, glm::vec3 *tan_v, glm::vec3 *nor, int edgeSampleCnt) :
+        evaluationRes(evalRes), tangent_u(tan_u), tangent_v(tan_v), normal(nor), edgeSampleCnt(edgeSampleCnt) {}
diff --git a/src/device/vec.cu b/src/device/vec.cu
index d353446..c8da897 100644
--- a/src/device/vec.cu
+++ b/src/device/vec.cu
@@ -2,7 +2,7 @@
 // Created by 14727 on 2022/12/11.
 //
 
-#include "../../include/device/vec.cuh"
+#include "device/vec.cuh"
 
 __device__ __host__ FVec3::FVec3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
 
diff --git a/src/utils.cpp b/src/utils.cpp
index ee5d15f..2ffdfaf 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -1,4 +1,4 @@
-#include "../include/utils.h"
+#include "utils.h"
 #include "cuda_runtime.h"
 
 #if IN_UNIX
diff --git a/tests/main.cpp b/tests/main.cpp
index 29ee133..73ccbcd 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,6 +1,7 @@
 #include <cstdio>
-#include "../include/device/Nurbs/nurbs_curve.cuh"
-#include "../include/device/Nurbs/nurbs_surface.cuh"
+#include "device/Nurbs/nurbs_curve.cuh"
+#include "device/Nurbs/nurbs_surface.cuh"
+#include "device/Nurbs/loop_detection.cuh"
 
 int main() {
 //    NurbsSurface::Surface nurbsSurfaceEvaluator({
@@ -28,28 +29,50 @@ int main() {
 //                                                        {{-0.61, 0.22,  0.09, 1},  {-0.21, 0.20,  0.09, 1},  {0.13, 0.20,  0.12, 1},  {0.49, 0.19,  -0.03, 1}},
 //                                                        {{-0.52, 0.51,  0.13, 1},  {-0.20, 0.32,  0.36, 1},  {0.18, 0.29,  0.28, 1},  {0.51, 0.53,  0.12, 1}}
 //                                                }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
+//    NurbsSurface::Surface nurbsSurfaceEvaluator({
+//                                                        {{2, -2,  -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2,  -0.5, 4}, {-2, -2,  1.5, 2}},
+//                                                        {{2, -1,  -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1,  -0.5, 1}, {-2, -1,  1.5, 1}},
+//                                                        {{3, 1.2, -2, 9}, {-2.5, 1.2,  -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}},
+//                                                        {{2, 2,   -2, 1}, {-2.5, 2,    -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2,   1.5, 1}}
+//                                                }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
+//    NurbsSurface::Surface nurbsSurfaceEvaluator({
+//                                                               {{-0.5, -0.5, -0.2, 1}, {-0.2, -0.5, -0.1, 1}, {0.2, -0.5, -0.1, 1}, {0.5, -0.5, -0.2, 1}},
+//                                                               {{-0.5, -0.2, -0.1, 1}, {-0.2, -0.2, 0., 1},   {0.2, -0.2, 0., 1},   {0.5, -0.2, -0.1, 1}},
+//                                                               {{-0.5, 0.2, -0.1, 1},  {-0.2, 0.2, 0., 1},    {0.2, 0.2, 0., 1},    {0.5, 0.2, -0.1, 1}},
+//                                                               {{-0.5, 0.5, -0.2, 1},  {-0.2, 0.5, -0.1, 1},  {0.2, 0.5, -0.1, 1},  {0.5, 0.5, -0.2, 1}}
+//                                                       }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
     NurbsSurface::Surface nurbsSurfaceEvaluator({
-                                                        {{2, -2,  -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2,  -0.5, 4}, {-2, -2,  1.5, 2}},
-                                                        {{2, -1,  -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1,  -0.5, 1}, {-2, -1,  1.5, 1}},
-                                                        {{3, 1.2, -2, 9}, {-2.5, 1.2,  -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}},
-                                                        {{2, 2,   -2, 1}, {-2.5, 2,    -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2,   1.5, 1}}
-                                                }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1});
+                                                        {{0,   0, 0.2,  1.1}, {0,   0.2, 0.3,  0.9}, {0,   0.4, 0.25, 1.1}, {0,   0.6, 0.12,  0.9}, {0,   0.8, 0.22,  1},   {0,   1, 0.12, 1}},
+                                                        {{0.2, 0, 0.22, 1},   {0.2, 0.2, -0.3, 1},   {0.2, 0.4, 0.4,  1},   {0.2, 0.6, -0.1,  1.1}, {0.2, 0.8, -0.1,  1},   {0.2, 1, 0.2,  0.9}},
+                                                        {{0.4, 0, 0.18, 0.9}, {0.4, 0.2, -0.3, 1},   {0.4, 0.4, 0.4,  1.1}, {0.4, 0.6, -0.1,  1},   {0.4, 0.8, -0.1,  0.9}, {0.4, 1, 0.23, 1}},
+                                                        {{0.6, 0, 0.2,  0.9}, {0.6, 0.2, 0.4,  0.9}, {0.6, 0.4, 0.4,  1},   {0.6, 0.6, 0.4,   0.9}, {0.6, 0.8, 0.4,   1},   {0.6, 1, 0.2,  1}},
+                                                        {{0.8, 0, 0.19, 1.1}, {0.8, 0.2, -0.4, 1},   {0.8, 0.4, 0.4,  1.1}, {0.8, 0.6, -0.27, 1},   {0.8, 0.8, -0.27, 1},   {0.8, 1, 0.2,  0.9}},
+                                                        {{1,   0, 0.2,  1.1}, {1,   0.2, 0.12, 1},   {1,   0.4, 0.22, 1},   {1,   0.6, 0.32,  1},   {1,   0.8, 0.22,  1.2}, {1,   1, 0.12, 1.1}}
+                                                }, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1});
     nurbsSurfaceEvaluator.setRecordTime(true);
 
-    nurbsSurfaceEvaluator.buildGaussMap(9); // 构建曲面的Gauss map
+//    nurbsSurfaceEvaluator.buildGaussMap(6); // 构建曲面的Gauss map
 //    nurbsSurfaceEvaluator.gauss_map.printQuadTree(); // 打印整棵Gauss map树
-    auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map,
-                                                                    nurbsSurfaceEvaluator.gauss_map);
-    printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size());
+//    auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map,
+//                                                                    nurbsSurfaceEvaluator.gauss_map);
+//    printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size());
+
     //判定两个Surface构造的GaussMap在指定u、v范围内是否有重合
-    auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map,
-                                                                      nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3},
-                                                                      {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1},
-                                                                      {0, 1}, {0, 1}, {0, 1});
-    printf("is guass map overlapped: %d\n", isRegionallyOverlapped);
+//    auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map,
+//                                                                      nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3},
+//                                                                      {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1},
+//                                                                      {0, 1}, {0, 1}, {0, 1});
+//    printf("is guass map overlapped: %d\n", isRegionallyOverlapped);
+
+    int level = 8;
+    int edgeSampleCnt = pow(2, level - 1) + 1;
+    nurbsSurfaceEvaluator.evaluate(edgeSampleCnt, edgeSampleCnt);
+    nurbsSurfaceEvaluator.derivative(edgeSampleCnt, edgeSampleCnt);
+    // 如果buildBVH需要曲率K，这里需要再先: nurbsSurfaceEvaluator.curvature(edgeSampleCnt, edgeSampleCnt);
+    // 同时buildBVH传入第二个参数
+    nurbsSurfaceEvaluator.buildBVH(level); // 构建曲面的BVH
+    auto derInfo = nurbsSurfaceEvaluator.getDerivativeVec(edgeSampleCnt, edgeSampleCnt); // 求值
 
-//    nurbsSurfaceEvaluator.buildBHV(9); // 构建曲面的BVH
-//    nurbsSurfaceEvaluator.evaluate(101, 101); // 求值
     printf("==============================\n");
 
     NurbsCurve::Curve nurbsCurveEvaluator(
@@ -62,6 +85,8 @@ int main() {
             {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1});
     nurbsCurveEvaluator.setRecordTime(true);
 
+
+
     auto res = nurbsCurveEvaluator.evaluate(11);
     for (auto &re: res) {
         printf("%f, %f, %f\n", re[0], re[1], re[2]);