diff --git a/CMakeLists.txt b/CMakeLists.txt index 90c4ef9..230989a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,24 +3,27 @@ project(NurbsPerformer LANGUAGES CXX CUDA) set(CMAKE_CUDA_STANDARD 14) -add_executable(NurbsPerformer src/main.cpp - src/utils.cpp include/utils.h - src/device/Nurbs/nurbs_common.cu include/device/Nurbs/nurbs_common.cuh - src/device/device_utils.cu include/device/device_utils.cuh - src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh - src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh - src/device/aabb.cu include/device/aabb.cuh - src/device/vec.cu include/device/vec.cuh - src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh +set(PROJECT_SOURCES + tests/main.cpp + src/utils.cpp include/utils.h + src/device/aabb.cu include/device/aabb.cuh + src/device/vec.cu include/device/vec.cuh + src/device/device_utils.cu include/device/device_utils.cuh + src/device/Nurbs/nurbs_common.cu include/device/Nurbs/nurbs_common.cuh + src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh + src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh + src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh ) +add_executable(NurbsPerformer ${PROJECT_SOURCES}) + #add_compile_options("$<$:/utf-8>") #add_compile_options("$<$:/utf-8>") # 指定静态库位置 #set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) #生成静态库 -#add_library(NurbsPerformer NurbsPerformer.cu NurbsPerformer.cuh utils.cpp utils.h) +#add_library(NurbsPerformer ${PROJECT_SOURCES}) # 引用系统环境变量CUDA_PATH # linux @@ -28,6 +31,8 @@ add_executable(NurbsPerformer src/main.cpp # windows include_directories("$ENV{CUDA_PATH}/include") +include_directories(include) + #MESSAGE("CUDA PATH::: $ENV{CUDA_PATH}") #MESSAGE("CUDA PATH::: $ENV{LD_LIBRARY_PATH}") #MESSAGE("CUDA PATH::: $ENV{CPATH}") diff --git a/README.md b/README.md index 9fc7e10..ebcb1dc 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ A tool for performing NURBS curve/surface modeling operations in parallel using 1. CMakeLists.txt中注释以下代码,不再生成可执行文件 ```cmake -add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h) +add_executable(NurbsPerformer ${PROJECT_SOURCES}) ``` 2. CMakeLists.txt中取消注释以下代码,表示需要生成静态库。构建项目。 ```cmake set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) -add_library(NurbsEvaluator src/utils.cpp include/utils.h) +add_library(NurbsPerformer ${PROJECT_SOURCES}) ``` 3. 原项目连接到生成的静态库目录。如果是由CMake构建,可以用 target_link_libraries 指向依赖(.lib),用 target_include_directories 指向头文件目录。 4. 使用.cu文件调用NurbsEvaluator diff --git a/include/device/Nurbs/bvh.cuh b/include/device/Nurbs/bvh.cuh index 7d01728..5c7831a 100644 --- a/include/device/Nurbs/bvh.cuh +++ b/include/device/Nurbs/bvh.cuh @@ -5,25 +5,31 @@ #ifndef NURBSEVALUATOR_BVH_CUH #define NURBSEVALUATOR_BVH_CUH -#include "../aabb.cuh" +#include "device/aabb.cuh" -struct BVHNode { - AABB bounds; // AABB包围盒 - int firstChild, level, childNum = 4; // 第一个孩子节点的下标,该结点所在层次,孩子个数 - // 曲面片u, v的范围 - float u0, u1, v0, v1; -}; +/** + * 当前层的第一个节点的坐标 + * @param layer 层号。根节点为第一层 + */ +__device__ __host__ int getStartIdxOfLayerN(int layer); + +/** + * 根据u、v参数域中的小矩形的位置,判断它在BVH叶节点层(也就是最后一层)中的位置 + */ +__device__ int d_getChildNodeIdx(int ix, int iy); + +__host__ int h_getChildNodeIdx(int ix, int iy); class BVH { public: int maxLevel, size; - BVHNode* nodes = nullptr; + BVHNode *nodes = nullptr; __host__ void printQuadTree(); }; __global__ void -buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float d_lastKnot_u, float d_lastKnot_v, - int d_sampleCnt_u, int d_sampleCnt_v, BVHNode *BVH); +buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, + int sampleCnt_u, int sampleCnt_v, BVHNode *BVH); #endif //NURBSEVALUATOR_BVH_CUH diff --git a/include/device/Nurbs/nurbs_surface.cuh b/include/device/Nurbs/nurbs_surface.cuh index af92497..8c170cf 100644 --- a/include/device/Nurbs/nurbs_surface.cuh +++ b/include/device/Nurbs/nurbs_surface.cuh @@ -4,6 +4,7 @@ #ifndef NURBSEVALUATOR_NURBS_SURFACE_CUH #define NURBSEVALUATOR_NURBS_SURFACE_CUH + #include #include "cuda_runtime.h" #include "bvh.cuh" @@ -15,18 +16,19 @@ namespace NurbsSurface { * @param d_pointSize 点的大小(3: [x, y, z] | 4:[x, y, z, w]) */ __global__ static void - g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, - int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, - int d_sampleCnt_v); + g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, + int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, + int d_sampleCnt_u, int d_sampleCnt_v); __global__ static void - g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, - const float *nTexture_v, - const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, - float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); + g_derivative(float *derivatives, float *normals, const float *derTexture_u, const float *derTexture_v, + const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, + int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, + int d_sampleCnt_v); __global__ static void - g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v, float *ms, float*k); + g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v, + float *ms, float *k); class Surface { private: @@ -45,11 +47,12 @@ namespace NurbsSurface { float *d_evaluationRes; // evaluation结果 float *d_derivatives; // 一阶导计算结果 + float *d_normals; float *d_k; // 最大曲率 - BVH bvh; - public: + BVH bvh; + BVH gauss_map; /** * 构造函数 * @param controlPoints 控制点矩阵[pointsCnt_u][pointsCnt_v][3] @@ -57,7 +60,7 @@ namespace NurbsSurface { * @param knots_v v方向knots */ __host__ explicit Surface(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v); + std::vector knots_u, std::vector knots_v); /** * 供外部CPU程序使用的、负责调用gpu并行计算的方法 @@ -81,15 +84,55 @@ namespace NurbsSurface { /** * 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法 */ - __host__ void buildBHV(int sampleCnt_u, int sampleCnt_v); + __host__ void buildBHV(int layerCnt, bool useK = false); + + /** + * 供外部CPU程序使用的、负责调用gpu并行计算Gauss Map Tree的方法 + */ + __host__ void buildGaussMap(int layerCnt); + void setRecordTime(bool r); ~Surface(); }; -} + __host__ void recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2, + std::vector> &pairs); + + __host__ std::vector> getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2); + + /** + * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交 + * @param idxRange_u1 第一个gauss map的参数u范围对应的u方向上的采样网格的格子下标范围 + * @param idxRange_v1 第一个gauss map的参数v范围对应的u方向上的采样网格的格子下标范围 + * @param idxRange_u2 第而个gauss map的参数u范围对应的u方向上的采样网格的格子下标范围 + * @param idxRange_v2 第二个gauss map的参数v范围对应的u方向上的采样网格的格子下标范围 + * @return true:gauss map的包围盒有交集,说明gauss map<可能>有重合;false: gauss map一定没有重合 + */ + __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair idxRange_u1, + std::pair idxRange_v1, std::pair idxRange_u2, + std::pair idxRange_v2); + /** + * 判断两个曲面的Gauss Map在指定的、各自的参数范围内有没有交 + * @param range_u1 第一个gauss map的参数u范围 + * @param range_v1 第一个gauss map的参数v范围 + * @param range_u2 第二个gauss map的参数u范围 + * @param range_v2 第二个gauss map的参数v范围 + * @param paramRange_u1 第一个gauss map的参数u定义域 + * @param paramRange_v1 第一个gauss map的参数v定义域 + * @param paramRange_u2 第二个gauss map的参数u定义域 + * @param paramRange_v2 第二个gauss map的参数v定义域 + * @return true:gauss map的包围盒有交集,说明gauss map<可能>有重合;false: gauss map一定没有重合 + */ + __host__ bool isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair range_u1, + std::pair range_v1, std::pair range_u2, + std::pair range_v2, std::pair paramRange_u1, + std::pair paramRange_v1, + std::pair paramRange_u2, + std::pair paramRange_v2); +} #endif //NURBSEVALUATOR_NURBS_SURFACE_CUH \ No newline at end of file diff --git a/include/device/aabb.cuh b/include/device/aabb.cuh index 36b476f..d6ee233 100644 --- a/include/device/aabb.cuh +++ b/include/device/aabb.cuh @@ -20,9 +20,17 @@ public: __device__ __host__ AABB Union(const FVec3& p); // aabb包围盒合并操作,两个包围盒 __device__ __host__ AABB Union(const AABB& b2); + // aabb包围盒沿各坐标轴政府方向分别扩张 + __device__ __host__ void expand(float k); // 判断两个aabb包围盒是否重叠 __device__ __host__ bool IsOverlap(const AABB& b2); }; +struct BVHNode { + AABB bounds; // AABB包围盒 + int firstChild = -1, level = 0; // 第一个孩子节点的下标,该结点所在层次,孩子个数 + // 曲面片u, v的范围 + float u0 = 0., u1 = 0., v0 = 0., v1 = 0.; +}; #endif //NURBSEVALUATOR_AABB_CUH diff --git a/include/device/vec.cuh b/include/device/vec.cuh index 5bd99d5..e11f349 100644 --- a/include/device/vec.cuh +++ b/include/device/vec.cuh @@ -11,7 +11,6 @@ public: float x, y, z; __device__ __host__ FVec3(float x_, float y_, float z_); __device__ __host__ FVec3(FVec3 const &fVec3); - __device__ __host__ FVec3(); }; diff --git a/src/device/Nurbs/bvh.cu b/src/device/Nurbs/bvh.cu index 874c6ba..bce1cc9 100644 --- a/src/device/Nurbs/bvh.cu +++ b/src/device/Nurbs/bvh.cu @@ -7,7 +7,7 @@ * 当前层的第一个节点的坐标 * @param layer 层号。根节点为第一层 */ -__device__ int getStartIdxOfLayerN(int layer) { +__device__ __host__ int getStartIdxOfLayerN(int layer) { if (layer == 1) return 0; int num = 1; // 找规律得出 @@ -17,13 +17,7 @@ __device__ int getStartIdxOfLayerN(int layer) { return num; } -/** - * 根据u、v参数域中的小矩形的位置,判断它在BVH叶节点层(也就是最后一层)中的位置 - * @param ix - * @param iy - * @return - */ -__device__ int getChildNodeIdx(int ix, int iy) { +__device__ int d_getChildNodeIdx(int ix, int iy) { int idx = 0; while(ix > 0) { int log2XCeil = int(__log2f(ix)); @@ -38,9 +32,23 @@ __device__ int getChildNodeIdx(int ix, int iy) { return idx; } +__host__ int h_getChildNodeIdx(int ix, int iy){ + int idx = 0; + while(ix > 0) { + int log2XCeil = int(log2f(ix)); + idx += int(pow(4, log2XCeil)); + ix -= int(pow(2, log2XCeil)); + } + while(iy > 0) { + int log2XCeil = int(log2f(iy)); + idx += 2 * int(pow(4, log2XCeil)); + iy -= int(pow(2, log2XCeil)); + } + return idx; +} __global__ void -buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, +buildBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) { // 假设还是二维grid和二维的block int ix = blockIdx.x * blockDim.x + threadIdx.x; @@ -53,7 +61,7 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l float u = singleStepVal_u * ix, v = singleStepVal_v * iy; - int bias = getChildNodeIdx(ix, iy); // 每层的节点在总数组中的编号相对该层第一个节点的偏置 + int bias = d_getChildNodeIdx(ix, iy); // 每层的节点在总数组中的编号相对该层第一个节点的偏置 for (int step = 1; step <= sampleCnt_u - 1 || step <= sampleCnt_v - 1; step = step * 2, --level) { float step_u = min(step, sampleCnt_u - 1); @@ -63,43 +71,32 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l int idx = baseIdx + bias; int tmpIdx = (ix * sampleCnt_v + iy) * 3; -// AABB bound(FVec3(evaluatedPoints[tmpIdx] + *k, evaluatedPoints[tmpIdx + 1] + *k, -// evaluatedPoints[tmpIdx + 2] + *k)); // 最初只包含u,v点 - AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], - evaluatedPoints[tmpIdx + 2])); // 最初只包含u,v点 - if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]); + +// if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]); if (step == 1) { + AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], + evaluatedPoints[tmpIdx + 2])); // 最初只包含u,v点 // 叶子节点 BVH[idx].u0 = u; BVH[idx].u1 = u + step_u * singleStepVal_u; BVH[idx].v0 = v; BVH[idx].v1 = v + step_v * singleStepVal_v; BVH[idx].level = level; -// int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3; -// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1] + *k, evaluatedPoints[tmpIdx1 + 1] + *k, -// evaluatedPoints[tmpIdx1 + 2] + *k)); -// int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; -// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2] + *k, evaluatedPoints[tmpIdx2 + 1] + *k, -// evaluatedPoints[tmpIdx2 + 2] + *k)); -// int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; -// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3] + *k, evaluatedPoints[tmpIdx3 + 1] + *k, -// evaluatedPoints[tmpIdx3 + 2] + *k)); int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3; + int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; + int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1], evaluatedPoints[tmpIdx1 + 2])); - int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1], evaluatedPoints[tmpIdx2 + 2])); - int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1], evaluatedPoints[tmpIdx3 + 2])); - + if(k != nullptr) bound.expand(*k); BVH[idx].bounds = bound; - BVH[idx].childNum = -1; - - + BVH[idx].firstChild = -1; } else { if (ix % step == 0 || iy % step == 0) { + // 非叶子节点 BVH[idx].u0 = u; BVH[idx].u1 = u + step_u * singleStepVal_u; @@ -108,7 +105,7 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l BVH[idx].level = level; int firstChild = 4 * idx + 1; - bound = bound.Union(BVH[firstChild].bounds); + AABB bound(BVH[firstChild].bounds); bound = bound.Union(BVH[firstChild + 1].bounds); bound = bound.Union(BVH[firstChild + 2].bounds); bound = bound.Union(BVH[firstChild + 3].bounds); @@ -123,6 +120,16 @@ buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float l } } +__global__ void getOverlapLeafNodes(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, + int sampleCnt_u, int sampleCnt_v, BVHNode *BVH){ + // 假设还是二维grid和二维的block +// int ix = blockIdx.x * blockDim.x + threadIdx.x; +// int iy = blockIdx.y * blockDim.y + threadIdx.y; +// if (ix >= sampleCnt_u - 1 || iy >= sampleCnt_v - 1) { +// return; +// } +} + __host__ void BVH::printQuadTree() { for (int l = 1, levelNodesCnt = 1, baseIdx = 0; l <= maxLevel; l++, levelNodesCnt *= 4, baseIdx = baseIdx * 4 + 1) { diff --git a/src/device/Nurbs/nurbs_surface.cu b/src/device/Nurbs/nurbs_surface.cu index 7e35a9c..0cd8e93 100644 --- a/src/device/Nurbs/nurbs_surface.cu +++ b/src/device/Nurbs/nurbs_surface.cu @@ -2,17 +2,17 @@ // Created by 14727 on 2022/12/9. // -#include "../../../include/device/Nurbs/nurbs_surface.cuh" -#include "../../../include/device/Nurbs/nurbs_common.cuh" -#include "../../../include/utils.h" -#include "../../../include/device/Nurbs/bvh.cuh" +#include "device/Nurbs/nurbs_surface.cuh" +#include "device/Nurbs/nurbs_common.cuh" +#include "utils.h" +#include "device/Nurbs/bvh.cuh" +#include "device/device_utils.cuh" __global__ void NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v) { - // 二维grid和二维的block int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y * blockDim.y + threadIdx.y; @@ -51,7 +51,7 @@ NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_n __global__ void -NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, +NurbsSurface::g_derivative(float *derivatives, float *normals, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v) { @@ -118,12 +118,16 @@ NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const derivatives[baseIdx + 4] = pdy_v; derivatives[baseIdx + 5] = pdz_v; - float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = - pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 - if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || - (ix == 8 && iy == 10)) - printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, - pdz_v, x_n, y_n, z_n); + // 叉乘得到法向量 + baseIdx = (ix * d_sampleCnt_v + iy) * 3; + normals[baseIdx] = pdy_u * pdz_v - pdy_v * pdz_u; + normals[baseIdx + 1] = pdx_v * pdz_u - pdx_u * pdz_v; + normals[baseIdx + 2] = pdx_u * pdy_v - pdx_v * pdy_u; + normalization(normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]); +// if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || +// (ix == 8 && iy == 10)) +// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, +// pdz_v, normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]); } __global__ void @@ -210,20 +214,18 @@ NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const // normalization(uvx, uvy, uvz); // normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); - - if (ix == 8 && iy == 9) { - printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, - sndPdz_uu, - uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); - printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, - sndPdz_vu); - } +// if (ix == 8 && iy == 9) { +// printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, +// sndPdz_uu, +// uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); +// printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, +// sndPdz_vu); +// } float m1 = max(max(sndPdx_uu, sndPdy_uu), sndPdz_uu); float m2 = max(max(uvx, uvy), uvz); float m3 = max(max(sndPdx_vv, sndPdy_vv), sndPdz_vv); - // __shared__ float ms[363]; ms[(ix * sampleCnt_v + iy) * 3] = m1; ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2; @@ -280,6 +282,7 @@ __host__ NurbsSurface::Surface::Surface(std::vector>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u, sampleCnt_u); cudaDeviceSynchronize(); @@ -349,7 +353,6 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { sampleCnt_v); cudaDeviceSynchronize(); - if (recordTime) time_cost_device = get_time(); g_evaluate <<>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); @@ -403,7 +406,12 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v // 构造切向量计算结果 safeCudaFree(d_derivatives); cudaMalloc((void **) &d_derivatives, - sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 + sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采样所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 + + // 构造法向量计算结果 + safeCudaFree(d_normals); + cudaMalloc((void **) &d_normals, + sampleCnt_v * sampleCnt_u * 3 * sizeof(float)); // 构造线程层级 dim3 block(32, 32); @@ -420,11 +428,9 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v g_derTexture<<>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, sampleCnt_v); cudaDeviceSynchronize(); - g_derivative<<>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, - pointsCnt_u, - pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], - sampleCnt_u, - sampleCnt_v); + g_derivative<<>>(d_derivatives, d_normals, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, + d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], + knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { time_cost_device = get_time() - time_cost_device; @@ -462,9 +468,8 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { time_cost_device = get_time() - time_cost_device; - printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", - sampleCnt_u * sampleCnt_v, - time_cost_device); + printf("GPU time cost of surface curvature calculating for %d samples: %lf\n", + sampleCnt_u * sampleCnt_v, time_cost_device); } safeCudaFree(ms); } @@ -483,13 +488,22 @@ NurbsSurface::Surface::~Surface() { safeCudaFree(d_knots_v); safeCudaFree(d_k); safeCudaFree(d_evaluationRes); + safeCudaFree(d_normals); + safeCudaFree(d_derivatives); safeFree(bvh.nodes); cudaDeviceReset(); } -__host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v) { - // 先计算曲率 - curvature(sampleCnt_u, sampleCnt_v); +__host__ void NurbsSurface::Surface::buildBHV(int layerCnt, bool useK) { + int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u; + if (useK) { + // 先计算曲率,过程中也会计算曲面值 + curvature(sampleCnt_u, sampleCnt_v); + } else { + // 只做evaluation + evaluate(sampleCnt_u, sampleCnt_v); + safeCudaFree(d_k); + } if (POINT_SIZE != controlPoints[0][0].size()) { printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); return; @@ -498,25 +512,161 @@ __host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v) dim3 block(32, 32); dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); - bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1); +// bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1); + bvh.maxLevel = layerCnt; bvh.size = (pow(4, bvh.maxLevel) - 1) / 3; // 等比数列求和公示求出数总的节点数 size_t bvhBytes = sizeof(BVHNode) * bvh.size; BVHNode *d_bvh = nullptr; cudaMalloc((void **) &d_bvh, bvhBytes); // 记录用时 - double time_cost_device = get_time(); - buildSurfaceBvh<<>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1], - knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh); + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + buildBvh<<>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh); cudaDeviceSynchronize(); if (recordTime) { time_cost_device = get_time() - time_cost_device; - printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", - sampleCnt_u * sampleCnt_v, - time_cost_device); + printf("GPU time cost of a %d-layer BVH building: %lf\n", + bvh.maxLevel, time_cost_device); } - if (bvh.nodes == nullptr) bvh.nodes = (BVHNode *) malloc(bvhBytes); + safeFree(bvh.nodes); + bvh.nodes = (BVHNode *) malloc(bvhBytes); cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost); safeCudaFree(d_bvh); - // bvh.printQuadTree(); } + +__host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) { + int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u; + // 先计算法向量 + derivative(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + + gauss_map.maxLevel = layerCnt; + gauss_map.size = (pow(4, layerCnt) - 1) / 3; // 等比数列求和公示求出数总的节点数 + size_t gaussMapBytes = sizeof(BVHNode) * gauss_map.size; + BVHNode *d_gaussMapTree = nullptr; + cudaMalloc((void **) &d_gaussMapTree, gaussMapBytes); + // 记录用时 + double time_cost_device; + if (recordTime) time_cost_device = get_time(); + buildBvh<<>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree); + cudaDeviceSynchronize(); + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of a %d-layer Gauss Map building: %lf\n", + layerCnt, time_cost_device); + } + safeFree(gauss_map.nodes); + gauss_map.nodes = (BVHNode *) malloc(gaussMapBytes); + cudaMemcpy(gauss_map.nodes, d_gaussMapTree, gaussMapBytes, cudaMemcpyDeviceToHost); + safeCudaFree(d_gaussMapTree); +} + +__host__ void NurbsSurface::recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2, + std::vector> &pairs) { + auto A = bvh1.nodes[idx1]; + auto B = bvh2.nodes[idx2]; + auto AABBSize = [](const AABB &aabb) { + return (aabb.pMax.z - aabb.pMin.z) * + (aabb.pMax.y - aabb.pMin.y) * + (aabb.pMax.x - aabb.pMin.x); + }; + // 两个包围盒不相交,返回 + if (!A.bounds.IsOverlap(B.bounds)) return; + // 相交 + if (A.firstChild == -1 && B.firstChild == -1) { + // 两者都是叶子节点 + pairs.emplace_back(idx1, idx2); + } else if (A.firstChild != -1 && B.firstChild == -1) { + // A是中间结点,B是叶子结点 + for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs); + } else if (A.firstChild == -1 && B.firstChild != -1) { + // A是叶子结点,B是中间结点 + for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs); + } else { + // 都是中间结点 + if (AABBSize(A.bounds) > AABBSize(B.bounds)) { + // A的包围盒更大 + for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs); + } else { + // B的包围盒更大 + for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs); + } + } +} + +__host__ std::vector> +NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) { + std::vector> resPairs; + // 记录用时 + double time_cost_device = get_time(); + recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs); + time_cost_device = get_time() - time_cost_device; + printf("CPU time cost for recursively calculating the overlapped leaf nodes: %lf\n", time_cost_device); + + return resPairs; +} + +__host__ bool +NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair idxRange_u1, + std::pair idxRange_v1, std::pair idxRange_u2, + std::pair idxRange_v2) { + if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) { + printf("BVH Layer error!\n"); + return false; + } + int commonMaxLayer = gm1.maxLevel; + int edgeCellCnt = pow(2, commonMaxLayer - 1); + if (idxRange_u1.first < 0 || idxRange_u2.first < 0 || idxRange_v1.first < 0 || idxRange_v2.first < 0 || + idxRange_u1.second >= edgeCellCnt || idxRange_u2.second >= edgeCellCnt || + idxRange_v1.second >= edgeCellCnt || idxRange_v2.second >= edgeCellCnt) { + printf("Error when detecting overlapping: idx range invalid!\n"); + return false; + } + auto getRangedBox = [&commonMaxLayer](const BVH &bvh, const std::pair &idxRange_u, + const std::pair idxRange_v) { + AABB bounding; + for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) { + for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) { + bounding = bounding.Union( + bvh.nodes[getStartIdxOfLayerN(commonMaxLayer) + h_getChildNodeIdx(i, j)].bounds); + } + } + return bounding; + }; + return getRangedBox(gm1, idxRange_u1, idxRange_v1) + .IsOverlap(getRangedBox(gm2, idxRange_u2, idxRange_v2)); +} + +__host__ bool NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair range_u1, + std::pair range_v1, std::pair range_u2, + std::pair range_v2, + std::pair paramRange_u1, + std::pair paramRange_v1, + std::pair paramRange_u2, + std::pair paramRange_v2) { + if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) { + printf("BVH Layer error!\n"); + return false; + } + int edgeCellCnt = pow(2, gm1.maxLevel - 1); + // 根据所给参数的范围和参数的定义域范围,获得对应的采样网格中的范围 + auto getIdxRange = [](std::pair range, std::pair paramRange, int edgeCellCnt) { + float paramStep = (paramRange.second - paramRange.first) / edgeCellCnt; + return std::pair({int((range.first - paramRange.first) / paramStep), + int((range.second - paramRange.first) / paramStep)}); + }; + auto idxRange_u1 = getIdxRange(range_u1, paramRange_u1, edgeCellCnt); + auto idxRange_v1 = getIdxRange(range_v1, paramRange_v1, edgeCellCnt); + auto idxRange_u2 = getIdxRange(range_u2, paramRange_u2, edgeCellCnt); + auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt); + return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2, idxRange_v2); +} diff --git a/src/device/aabb.cu b/src/device/aabb.cu index f516e91..c3b9038 100644 --- a/src/device/aabb.cu +++ b/src/device/aabb.cu @@ -10,8 +10,8 @@ __device__ __host__ AABB::AABB() { float minNum = -D_FLT_MAX; float maxNum = D_FLT_MAX; - pMin = FVec3(minNum, minNum, minNum); - pMax = FVec3(maxNum, maxNum, maxNum); + pMax = FVec3(minNum, minNum, minNum); + pMin = FVec3(maxNum, maxNum, maxNum); } __device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {} @@ -40,3 +40,12 @@ __device__ __host__ bool AABB::IsOverlap(const AABB& b2) { if (b2.pMin.z > pMax.z) return false; return true; } + +__device__ __host__ void AABB::expand(float k) { + pMin.x -= k; + pMin.y -= k; + pMin.z -= k; + pMax.x += k; + pMax.y += k; + pMax.z += k; +} diff --git a/src/device/device_utils.cu b/src/device/device_utils.cu index 37969d2..29b9aae 100644 --- a/src/device/device_utils.cu +++ b/src/device/device_utils.cu @@ -16,11 +16,8 @@ __device__ bool d_floatEqual(float a, float b) { } __device__ void normalization(float &a, float &b, float &c) { - float sumA = a * a; - float sumB = b * b; - float sumC = c * c; - float sum = sumA + sumB + sumC; - a = sqrt(sumA / sum); - b = sqrt(sumB / sum); - c = sqrt(sumC / sum); + float sqrtSum = sqrt(a * a + b * b + c * c); + a /= sqrtSum; + b /= sqrtSum; + c /= sqrtSum; } \ No newline at end of file diff --git a/src/main.cpp b/tests/main.cpp similarity index 59% rename from src/main.cpp rename to tests/main.cpp index 61f004d..29ee133 100644 --- a/src/main.cpp +++ b/tests/main.cpp @@ -22,16 +22,34 @@ int main() { // {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, // {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); +// NurbsSurface::Surface nurbsSurfaceEvaluator({ +// {{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3, -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}}, +// {{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1}, {0.10, -0.18, 0.03, 1}, {0.49, -0.15, -0.03, 1}}, +// {{-0.61, 0.22, 0.09, 1}, {-0.21, 0.20, 0.09, 1}, {0.13, 0.20, 0.12, 1}, {0.49, 0.19, -0.03, 1}}, +// {{-0.52, 0.51, 0.13, 1}, {-0.20, 0.32, 0.36, 1}, {0.18, 0.29, 0.28, 1}, {0.51, 0.53, 0.12, 1}} +// }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); NurbsSurface::Surface nurbsSurfaceEvaluator({ - {{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3, -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}}, - {{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1}, {0.10, -0.18, 0.03, 1}, {0.49, -0.15, -0.03, 1}}, - {{-0.61, 0.22, 0.09, 1}, {-0.21, 0.20, 0.09, 1}, {0.13, 0.20, 0.12, 1}, {0.49, 0.19, -0.03, 1}}, - {{-0.52, 0.51, 0.13, 1}, {-0.20, 0.32, 0.36, 1}, {0.18, 0.29, 0.28, 1}, {0.51, 0.53, 0.12, 1}} + {{2, -2, -2, 1}, {-2.5, -2.2, -1.5, 3}, {-2, -2, -0.5, 4}, {-2, -2, 1.5, 2}}, + {{2, -1, -2, 2}, {-2.5, -1.2, -1.5, 1}, {-2, -1, -0.5, 1}, {-2, -1, 1.5, 1}}, + {{3, 1.2, -2, 9}, {-2.5, 1.2, -1.5, 1}, {-2, 1.5, -0.5, 7}, {-2, 1.5, 1.5, 4}}, + {{2, 2, -2, 1}, {-2.5, 2, -1.5, 1}, {-2, 2.5, -0.5, 7}, {-2, 2, 1.5, 1}} }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); nurbsSurfaceEvaluator.setRecordTime(true); - nurbsSurfaceEvaluator.buildBHV(17, 17); -// nurbsSurfaceEvaluator.evaluate(101, 101); + nurbsSurfaceEvaluator.buildGaussMap(9); // 构建曲面的Gauss map +// nurbsSurfaceEvaluator.gauss_map.printQuadTree(); // 打印整棵Gauss map树 + auto overlappedLeafNodes = NurbsSurface::getOverlappedLeafNodes(nurbsSurfaceEvaluator.gauss_map, + nurbsSurfaceEvaluator.gauss_map); + printf("overlapped leafNodes cnt: %llu\n", overlappedLeafNodes.size()); + //判定两个Surface构造的GaussMap在指定u、v范围内是否有重合 + auto isRegionallyOverlapped = NurbsSurface::isGaussMapsOverlapped(nurbsSurfaceEvaluator.gauss_map, + nurbsSurfaceEvaluator.gauss_map, {0.2, 0.3}, + {0.2, 0.3}, {0.28, 0.38}, {0.28, 0.38}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}); + printf("is guass map overlapped: %d\n", isRegionallyOverlapped); + +// nurbsSurfaceEvaluator.buildBHV(9); // 构建曲面的BVH +// nurbsSurfaceEvaluator.evaluate(101, 101); // 求值 printf("==============================\n"); NurbsCurve::Curve nurbsCurveEvaluator(