From 0b882462e4c664beba97424d6c876fff39f68fa6 Mon Sep 17 00:00:00 2001 From: Dtouch Date: Fri, 16 Dec 2022 16:54:31 +0800 Subject: [PATCH] correct BVH --- CMakeLists.txt | 11 +- include/device/Nurbs/bvh.cuh | 29 ++++ .../nurbs_common.cuh} | 6 +- .../{NurbsCurve.cuh => Nurbs/nurbs_curve.cuh} | 6 +- .../nurbs_surface.cuh} | 18 +- include/device/aabb.cuh | 28 ++++ .../{DeviceUtils.cuh => device_utils.cuh} | 6 +- include/device/vec.cuh | 19 +++ include/utils.h | 6 + src/device/Nurbs/bvh.cu | 137 +++++++++++++++ .../{NurbsCommon.cu => Nurbs/nurbs_common.cu} | 7 +- .../{NurbsCurve.cu => Nurbs/nurbs_curve.cu} | 6 +- .../nurbs_surface.cu} | 158 ++++++++++++++---- src/device/aabb.cu | 42 +++++ .../{DeviceUtils.cu => device_utils.cu} | 2 +- src/device/vec.cu | 13 ++ src/main.cpp | 38 +++-- src/utils.cpp | 16 +- 18 files changed, 480 insertions(+), 68 deletions(-) create mode 100644 include/device/Nurbs/bvh.cuh rename include/device/{NurbsCommon.cuh => Nurbs/nurbs_common.cuh} (90%) rename include/device/{NurbsCurve.cuh => Nurbs/nurbs_curve.cuh} (94%) rename include/device/{NurbsSurface.cuh => Nurbs/nurbs_surface.cuh} (85%) create mode 100644 include/device/aabb.cuh rename include/device/{DeviceUtils.cuh => device_utils.cuh} (79%) create mode 100644 include/device/vec.cuh create mode 100644 src/device/Nurbs/bvh.cu rename src/device/{NurbsCommon.cu => Nurbs/nurbs_common.cu} (94%) rename src/device/{NurbsCurve.cu => Nurbs/nurbs_curve.cu} (98%) rename src/device/{NurbsSurface.cu => Nurbs/nurbs_surface.cu} (75%) create mode 100644 src/device/aabb.cu rename src/device/{DeviceUtils.cu => device_utils.cu} (90%) create mode 100644 src/device/vec.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index de4778f..450246e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,16 @@ project(NurbsEvaluator LANGUAGES CXX CUDA) set(CMAKE_CUDA_STANDARD 14) -add_executable(NurbsEvaluator src/main.cpp src/utils.cpp include/utils.h src/device/NurbsCommon.cu include/device/NurbsCommon.cuh src/device/DeviceUtils.cu include/device/DeviceUtils.cuh src/device/NurbsSurface.cu include/device/NurbsSurface.cuh src/device/NurbsCurve.cu include/device/NurbsCurve.cuh) +add_executable(NurbsEvaluator src/main.cpp + src/utils.cpp include/utils.h + src/device/Nurbs/nurbs_common.cu include/device/Nurbs/nurbs_common.cuh + src/device/device_utils.cu include/device/device_utils.cuh + src/device/Nurbs/nurbs_surface.cu include/device/Nurbs/nurbs_surface.cuh + src/device/Nurbs/nurbs_curve.cu include/device/Nurbs/nurbs_curve.cuh + src/device/aabb.cu include/device/aabb.cuh + src/device/vec.cu include/device/vec.cuh + src/device/Nurbs/bvh.cu include/device/Nurbs/bvh.cuh + ) #add_compile_options("$<$:/utf-8>") #add_compile_options("$<$:/utf-8>") diff --git a/include/device/Nurbs/bvh.cuh b/include/device/Nurbs/bvh.cuh new file mode 100644 index 0000000..7d01728 --- /dev/null +++ b/include/device/Nurbs/bvh.cuh @@ -0,0 +1,29 @@ +// +// Created by 14727 on 2022/12/11. +// + +#ifndef NURBSEVALUATOR_BVH_CUH +#define NURBSEVALUATOR_BVH_CUH + +#include "../aabb.cuh" + +struct BVHNode { + AABB bounds; // AABB包围盒 + int firstChild, level, childNum = 4; // 第一个孩子节点的下标,该结点所在层次,孩子个数 + // 曲面片u, v的范围 + float u0, u1, v0, v1; +}; + +class BVH { +public: + int maxLevel, size; + BVHNode* nodes = nullptr; + + __host__ void printQuadTree(); +}; + +__global__ void +buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float d_lastKnot_u, float d_lastKnot_v, + int d_sampleCnt_u, int d_sampleCnt_v, BVHNode *BVH); + +#endif //NURBSEVALUATOR_BVH_CUH diff --git a/include/device/NurbsCommon.cuh b/include/device/Nurbs/nurbs_common.cuh similarity index 90% rename from include/device/NurbsCommon.cuh rename to include/device/Nurbs/nurbs_common.cuh index dd3bd2a..ef5b1bc 100644 --- a/include/device/NurbsCommon.cuh +++ b/include/device/Nurbs/nurbs_common.cuh @@ -2,8 +2,8 @@ // Created by 14727 on 2022/11/19. // -#ifndef NURBSEVALUATOR_NURBSCOMMON_CUH -#define NURBSEVALUATOR_NURBSCOMMON_CUH +#ifndef NURBSEVALUATOR_NURBS_COMMON_CUH +#define NURBSEVALUATOR_NURBS_COMMON_CUH /** * 当u值已知时,根据基函数N的递推表达式,采用动态规划的方式求解N值 @@ -29,4 +29,4 @@ __global__ void g_derTexture(float *derTexture, const float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, int d_sampleCnt); -#endif //NURBSEVALUATOR_NURBSCOMMON_CUH +#endif //NURBSEVALUATOR_NURBS_COMMON_CUH diff --git a/include/device/NurbsCurve.cuh b/include/device/Nurbs/nurbs_curve.cuh similarity index 94% rename from include/device/NurbsCurve.cuh rename to include/device/Nurbs/nurbs_curve.cuh index 17e105e..fac7c6c 100644 --- a/include/device/NurbsCurve.cuh +++ b/include/device/Nurbs/nurbs_curve.cuh @@ -2,8 +2,8 @@ // Created by 14727 on 2022/12/9. // -#ifndef NURBSEVALUATOR_NURBSCURVE_CUH -#define NURBSEVALUATOR_NURBSCURVE_CUH +#ifndef NURBSEVALUATOR_NURBS_CURVE_CUH +#define NURBSEVALUATOR_NURBS_CURVE_CUH #include #include @@ -70,4 +70,4 @@ namespace NurbsCurve { } -#endif //NURBSEVALUATOR_NURBSCURVE_CUH +#endif //NURBSEVALUATOR_NURBS_CURVE_CUH diff --git a/include/device/NurbsSurface.cuh b/include/device/Nurbs/nurbs_surface.cuh similarity index 85% rename from include/device/NurbsSurface.cuh rename to include/device/Nurbs/nurbs_surface.cuh index feb2f21..af92497 100644 --- a/include/device/NurbsSurface.cuh +++ b/include/device/Nurbs/nurbs_surface.cuh @@ -2,10 +2,11 @@ // Created by 14727 on 2022/12/9. // -#ifndef NURBSEVALUATOR_NURBSSURFACE_CUH -#define NURBSEVALUATOR_NURBSSURFACE_CUH +#ifndef NURBSEVALUATOR_NURBS_SURFACE_CUH +#define NURBSEVALUATOR_NURBS_SURFACE_CUH #include #include "cuda_runtime.h" +#include "bvh.cuh" namespace NurbsSurface { const int POINT_SIZE = 4; @@ -25,7 +26,7 @@ namespace NurbsSurface { float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v); __global__ static void - g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v); + g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, float lastKnot_v, float *ms, float*k); class Surface { private: @@ -42,7 +43,11 @@ namespace NurbsSurface { float *d_nTexture1_u; // u方向指向度为p-1时的device中的nurbs基函数矩阵 float *d_nTexture1_v; // v方向指向度为p-1时的device中的nurbs基函数矩阵 + float *d_evaluationRes; // evaluation结果 float *d_derivatives; // 一阶导计算结果 + float *d_k; // 最大曲率 + + BVH bvh; public: /** @@ -73,6 +78,11 @@ namespace NurbsSurface { */ __host__ void curvature(int sampleCnt_u, int sampleCnt_v); + /** + * 供外部CPU程序使用的、负责调用gpu并行计算BVH的方法 + */ + __host__ void buildBHV(int sampleCnt_u, int sampleCnt_v); + void setRecordTime(bool r); ~Surface(); @@ -82,4 +92,4 @@ namespace NurbsSurface { -#endif //NURBSEVALUATOR_NURBSSURFACE_CUH \ No newline at end of file +#endif //NURBSEVALUATOR_NURBS_SURFACE_CUH \ No newline at end of file diff --git a/include/device/aabb.cuh b/include/device/aabb.cuh new file mode 100644 index 0000000..36b476f --- /dev/null +++ b/include/device/aabb.cuh @@ -0,0 +1,28 @@ +// +// Created by 14727 on 2022/12/11. +// + +#ifndef NURBSEVALUATOR_AABB_CUH +#define NURBSEVALUATOR_AABB_CUH + +#include "vec.cuh" +#include "cstdio" + +class AABB { +public: + // 边界 + FVec3 pMin, pMax; + __device__ __host__ AABB(); + __device__ __host__ explicit AABB(const FVec3& p); + __device__ __host__ AABB(const FVec3& p1, const FVec3& p2); + + // aabb包围盒合并操作,包围盒和点 + __device__ __host__ AABB Union(const FVec3& p); + // aabb包围盒合并操作,两个包围盒 + __device__ __host__ AABB Union(const AABB& b2); + // 判断两个aabb包围盒是否重叠 + __device__ __host__ bool IsOverlap(const AABB& b2); +}; + + +#endif //NURBSEVALUATOR_AABB_CUH diff --git a/include/device/DeviceUtils.cuh b/include/device/device_utils.cuh similarity index 79% rename from include/device/DeviceUtils.cuh rename to include/device/device_utils.cuh index 5ea9fa6..3c50dfc 100644 --- a/include/device/DeviceUtils.cuh +++ b/include/device/device_utils.cuh @@ -2,8 +2,8 @@ // Created by 14727 on 2022/12/9. // -#ifndef NURBSEVALUATOR_DEVICEUTILS_CUH -#define NURBSEVALUATOR_DEVICEUTILS_CUH +#ifndef NURBSEVALUATOR_DEVICE_UTILS_CUH +#define NURBSEVALUATOR_DEVICE_UTILS_CUH __device__ void d_safeFree(float * &p); @@ -20,4 +20,4 @@ __device__ bool d_floatEqual(float a, float b); __device__ void normalization(float &a, float &b, float &c); -#endif //NURBSEVALUATOR_DEVICEUTILS_CUH +#endif //NURBSEVALUATOR_DEVICE_UTILS_CUH diff --git a/include/device/vec.cuh b/include/device/vec.cuh new file mode 100644 index 0000000..5bd99d5 --- /dev/null +++ b/include/device/vec.cuh @@ -0,0 +1,19 @@ +// +// Created by 14727 on 2022/12/11. +// + +#ifndef NURBSEVALUATOR_VEC_CUH +#define NURBSEVALUATOR_VEC_CUH +#include "cuda_runtime.h" + +class FVec3 { +public: + float x, y, z; + __device__ __host__ FVec3(float x_, float y_, float z_); + __device__ __host__ FVec3(FVec3 const &fVec3); + + __device__ __host__ FVec3(); +}; + + +#endif //NURBSEVALUATOR_VEC_CUH diff --git a/include/utils.h b/include/utils.h index c9c24dc..dbcb8b6 100644 --- a/include/utils.h +++ b/include/utils.h @@ -9,6 +9,8 @@ double get_time(); #else #include +#include "device/Nurbs/bvh.cuh" + double get_time(); #endif @@ -17,6 +19,10 @@ double get_time(); * 注意指针是引用传参,因为要把指针本身置空 */ void safeCudaFree(float *&p); +void safeCudaFree(BVHNode *&p); +//template void safeFree(float *&p); +void safeFree(BVHNode *&p); + #endif //UNTITLED1_UTILS_H diff --git a/src/device/Nurbs/bvh.cu b/src/device/Nurbs/bvh.cu new file mode 100644 index 0000000..874c6ba --- /dev/null +++ b/src/device/Nurbs/bvh.cu @@ -0,0 +1,137 @@ +// +// Created by 14727 on 2022/12/11. +// + +#include "../../../include/device/Nurbs/bvh.cuh" +/** + * 当前层的第一个节点的坐标 + * @param layer 层号。根节点为第一层 + */ +__device__ int getStartIdxOfLayerN(int layer) { + if (layer == 1) return 0; + int num = 1; + // 找规律得出 + for (int i = 2; i < layer; i++) { + num = num * 4 + 1; + } + return num; +} + +/** + * 根据u、v参数域中的小矩形的位置,判断它在BVH叶节点层(也就是最后一层)中的位置 + * @param ix + * @param iy + * @return + */ +__device__ int getChildNodeIdx(int ix, int iy) { + int idx = 0; + while(ix > 0) { + int log2XCeil = int(__log2f(ix)); + idx += int(pow(4, log2XCeil)); + ix -= int(pow(2, log2XCeil)); + } + while(iy > 0) { + int log2XCeil = int(__log2f(iy)); + idx += 2 * int(pow(4, log2XCeil)); + iy -= int(pow(2, log2XCeil)); + } + return idx; +} + + +__global__ void +buildSurfaceBvh(const float *k, int level, const float *evaluatedPoints, float lastKnot_u, float lastKnot_v, + int sampleCnt_u, int sampleCnt_v, BVHNode *BVH) { + // 假设还是二维grid和二维的block + int ix = blockIdx.x * blockDim.x + threadIdx.x; + int iy = blockIdx.y * blockDim.y + threadIdx.y; + if (ix >= sampleCnt_u - 1 || iy >= sampleCnt_v - 1) { + return; + } + float singleStepVal_u = lastKnot_u / (sampleCnt_u - 1); + float singleStepVal_v = lastKnot_v / (sampleCnt_v - 1); + + float u = singleStepVal_u * ix, v = singleStepVal_v * iy; + + int bias = getChildNodeIdx(ix, iy); // 每层的节点在总数组中的编号相对该层第一个节点的偏置 + for (int step = 1; step <= sampleCnt_u - 1 || step <= sampleCnt_v - 1; step = step * 2, --level) { + + float step_u = min(step, sampleCnt_u - 1); + float step_v = min(step, sampleCnt_v - 1); + + int baseIdx = getStartIdxOfLayerN(level); // 当前层的第一个节点的坐标 + int idx = baseIdx + bias; + + int tmpIdx = (ix * sampleCnt_v + iy) * 3; +// AABB bound(FVec3(evaluatedPoints[tmpIdx] + *k, evaluatedPoints[tmpIdx + 1] + *k, +// evaluatedPoints[tmpIdx + 2] + *k)); // 最初只包含u,v点 + AABB bound(FVec3(evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], + evaluatedPoints[tmpIdx + 2])); // 最初只包含u,v点 + if(idx == 75) printf("75: %g, %g, %g\n", evaluatedPoints[tmpIdx], evaluatedPoints[tmpIdx + 1], evaluatedPoints[tmpIdx + 2]); + if (step == 1) { + // 叶子节点 + BVH[idx].u0 = u; + BVH[idx].u1 = u + step_u * singleStepVal_u; + BVH[idx].v0 = v; + BVH[idx].v1 = v + step_v * singleStepVal_v; + BVH[idx].level = level; +// int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3; +// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1] + *k, evaluatedPoints[tmpIdx1 + 1] + *k, +// evaluatedPoints[tmpIdx1 + 2] + *k)); +// int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; +// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2] + *k, evaluatedPoints[tmpIdx2 + 1] + *k, +// evaluatedPoints[tmpIdx2 + 2] + *k)); +// int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; +// bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3] + *k, evaluatedPoints[tmpIdx3 + 1] + *k, +// evaluatedPoints[tmpIdx3 + 2] + *k)); + int tmpIdx1 = ((ix + 1) * sampleCnt_v + iy) * 3; + bound = bound.Union(FVec3(evaluatedPoints[tmpIdx1], evaluatedPoints[tmpIdx1 + 1], + evaluatedPoints[tmpIdx1 + 2])); + int tmpIdx2 = (ix * sampleCnt_v + iy + 1) * 3; + bound = bound.Union(FVec3(evaluatedPoints[tmpIdx2], evaluatedPoints[tmpIdx2 + 1], + evaluatedPoints[tmpIdx2 + 2])); + int tmpIdx3 = ((ix + 1) * sampleCnt_v + iy + 1) * 3; + bound = bound.Union(FVec3(evaluatedPoints[tmpIdx3], evaluatedPoints[tmpIdx3 + 1], + evaluatedPoints[tmpIdx3 + 2])); + + BVH[idx].bounds = bound; + BVH[idx].childNum = -1; + + + } else { + if (ix % step == 0 || iy % step == 0) { + // 非叶子节点 + BVH[idx].u0 = u; + BVH[idx].u1 = u + step_u * singleStepVal_u; + BVH[idx].v0 = v; + BVH[idx].v1 = v + step_v * singleStepVal_v; + BVH[idx].level = level; + + int firstChild = 4 * idx + 1; + bound = bound.Union(BVH[firstChild].bounds); + bound = bound.Union(BVH[firstChild + 1].bounds); + bound = bound.Union(BVH[firstChild + 2].bounds); + bound = bound.Union(BVH[firstChild + 3].bounds); + BVH[idx].firstChild = firstChild; + BVH[idx].bounds = bound; + } + } + + bias /= 4; + + __syncthreads(); + } +} + +__host__ void BVH::printQuadTree() { + + for (int l = 1, levelNodesCnt = 1, baseIdx = 0; l <= maxLevel; l++, levelNodesCnt *= 4, baseIdx = baseIdx * 4 + 1) { + for (int biasIdx = 0; biasIdx < levelNodesCnt; biasIdx++) { + int idx = baseIdx + biasIdx; + auto pMax = nodes[idx].bounds.pMax; + auto pMin = nodes[idx].bounds.pMin; + printf("<<%g, %g, %g>,<%g, %g, %g>> ", pMin.x, pMin.y, pMin.z, pMax.x, pMax.y, pMax.z); + } + printf("\n =============$$$$$$$============= \n"); + } +} diff --git a/src/device/NurbsCommon.cu b/src/device/Nurbs/nurbs_common.cu similarity index 94% rename from src/device/NurbsCommon.cu rename to src/device/Nurbs/nurbs_common.cu index 6c15ee1..b49a8d2 100644 --- a/src/device/NurbsCommon.cu +++ b/src/device/Nurbs/nurbs_common.cu @@ -2,8 +2,9 @@ // Created by 14727 on 2022/11/19. // -#include "../../include/device/NurbsCommon.cuh" -#include "../../include/device/DeviceUtils.cuh" +#include "../../../include/device/Nurbs/nurbs_common.cuh" +#include "../../../include/device/device_utils.cuh" +#include __device__ void d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) { int m = d_knotsCnt - 1; @@ -45,7 +46,7 @@ __global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d for (int i = 0; i < d_pointsCnt; i++) { nTexture[idx * d_pointsCnt + i] = N_dp[d_degree * (d_knotsCnt - 1) + i]; nTexture1[idx * (d_pointsCnt + 1) + i] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + i]; -// printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); +// if(idx == 3)printf("nTexture1: %g ", nTexture1[idx * (d_pointsCnt + 1) + i]); } nTexture1[idx * (d_pointsCnt + 1) + d_pointsCnt] = N_dp[(d_degree - 1) * (d_knotsCnt - 1) + d_pointsCnt]; // nTexture1多记录一列数据 diff --git a/src/device/NurbsCurve.cu b/src/device/Nurbs/nurbs_curve.cu similarity index 98% rename from src/device/NurbsCurve.cu rename to src/device/Nurbs/nurbs_curve.cu index 71c92df..8bc74f4 100644 --- a/src/device/NurbsCurve.cu +++ b/src/device/Nurbs/nurbs_curve.cu @@ -1,9 +1,9 @@ // // Created by 14727 on 2022/12/9. // -#include "../../include/device/NurbsCommon.cuh" -#include "../../include/device/NurbsCurve.cuh" -#include "../../include/utils.h" +#include "../../../include/device/Nurbs/nurbs_common.cuh" +#include "../../../include/device/Nurbs/nurbs_curve.cuh" +#include "../../../include/utils.h" __global__ void NurbsCurve::g_evaluate(float *res, const float *NTexture, const float *d_points, const int d_pointsCnt, diff --git a/src/device/NurbsSurface.cu b/src/device/Nurbs/nurbs_surface.cu similarity index 75% rename from src/device/NurbsSurface.cu rename to src/device/Nurbs/nurbs_surface.cu index d39b8ad..7e35a9c 100644 --- a/src/device/NurbsSurface.cu +++ b/src/device/Nurbs/nurbs_surface.cu @@ -2,13 +2,14 @@ // Created by 14727 on 2022/12/9. // -#include "../../include/device/NurbsSurface.cuh" -#include "../../include/device/NurbsCommon.cuh" -#include "../../include/device/DeviceUtils.cuh" -#include "../../include/utils.h" +#include "../../../include/device/Nurbs/nurbs_surface.cuh" +#include "../../../include/device/Nurbs/nurbs_common.cuh" +#include "../../../include/utils.h" +#include "../../../include/device/Nurbs/bvh.cuh" __global__ void -NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, +NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, + int d_pointsCnt_u, int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, int d_sampleCnt_v) { @@ -40,14 +41,15 @@ NurbsSurface::g_evaluate(float* res, const float *d_nTexture_u, const float *d_n y = y / sumW; z = z / sumW; -// int baseIdx = (ix * d_sampleCnt_v + iy) * 3; -// res[baseIdx] = x; -// res[baseIdx + 1] = y; -// res[baseIdx + 2] = z; + int baseIdx = (ix * d_sampleCnt_v + iy) * 3; + res[baseIdx] = x; + res[baseIdx + 1] = y; + res[baseIdx + 2] = z; // printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, x, y, z); // %g输出,舍弃无意义的0 } + __global__ void NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, @@ -125,8 +127,8 @@ NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const } __global__ void -NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, - float lastKnot_v) { +NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const int sampleCnt_v, float lastKnot_u, + float lastKnot_v, float *ms, float *k) { // 二维grid和二维的block int ix = blockIdx.x * blockDim.x + threadIdx.x; int iy = blockIdx.y * blockDim.y + threadIdx.y; @@ -202,20 +204,68 @@ NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleC } float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; - normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); - normalization(uvx, uvy, uvz); - normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); +// normalization(sndPdx_uv, sndPdy_uv, sndPdz_uv); +// normalization(sndPdx_vu, sndPdy_vu, sndPdz_vu); +// normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); +// normalization(uvx, uvy, uvz); +// normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); - if (ix == 8 && iy == 9) + if (ix == 8 && iy == 9) { printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, sndPdz_uu, uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); + printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, + sndPdz_vu); + } + + float m1 = max(max(sndPdx_uu, sndPdy_uu), sndPdz_uu); + float m2 = max(max(uvx, uvy), uvz); + float m3 = max(max(sndPdx_vv, sndPdy_vv), sndPdz_vv); + + +// __shared__ float ms[363]; + ms[(ix * sampleCnt_v + iy) * 3] = m1; + ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2; + ms[(ix * sampleCnt_v + iy) * 3 + 2] = m3; + __syncthreads(); +// if(ix == 1 && iy == 1) { +// for(int i = 0; i < sampleCnt_u; i++) { +// for(int j = 0; j < sampleCnt_v; j++) { +// printf("%g ", ms[(i * sampleCnt_v + j) * 3]); +// } +// printf("\n"); +// } +// } + + // 规约求最大值 + for (int step = (sampleCnt_u + 1) / 2; step > 1; step = (step + 1) / 2) { + // step 表示现在参与计算最大值的数据的长度的一半 + if (ix < step && ix + step < sampleCnt_u) { + ms[(ix * sampleCnt_v + iy) * 3] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3]); + ms[(ix * sampleCnt_v + iy) * 3 + 1] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 1]); + ms[(ix * sampleCnt_v + iy) * 3 + 2] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 2]); + } + } + + for (int step = (sampleCnt_v + 1) / 2; step > 1; step = (step + 1) / 2) { + // step 表示现在参与计算最大值的数据的长度的一半 + if (iy < step && iy + step < sampleCnt_v) { + ms[iy * 3] = max(ms[iy * 3], ms[(iy + step) * 3]); + ms[iy * 3 + 1] = max(ms[iy * 3 + 1], ms[(iy + step) * 3 + 1]); + ms[iy * 3 + 2] = max(ms[iy * 3 + 2], ms[(iy + step) * 3 + 2]); + } + } + __syncthreads(); + int n = sampleCnt_u - 1; + int m = sampleCnt_v - 1; + *k = (ms[0] / (n * n) + 2 * ms[1] / (n * m) + ms[2] / (m * m)) / 8; +// if(ix == 1 && iy == 1)printf("%g gggg\n", ms[0]); } __host__ NurbsSurface::Surface::Surface(std::vector>> controlPoints, - std::vector knots_u, std::vector knots_v) { + std::vector knots_u, std::vector knots_v) { this->knots_u = std::move(knots_u); this->knots_v = std::move(knots_v); this->controlPoints = std::move(controlPoints); @@ -227,7 +277,10 @@ __host__ NurbsSurface::Surface::Surface(std::vector>> @@ -274,9 +327,9 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { // 结果数组 size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float); - float *d_res; - cudaMalloc((void **)&d_res, resBytes); - auto *h_res = (float *)malloc(resBytes); + + cudaMalloc((void **) &d_evaluationRes, resBytes); + auto *h_res = (float *) malloc(resBytes); // 构造g_basisTexture线程层级 @@ -297,7 +350,8 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { cudaDeviceSynchronize(); if (recordTime) time_cost_device = get_time(); - g_evaluate <<>>(d_res, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, + g_evaluate <<>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, + POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { @@ -306,11 +360,13 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { time_cost_device); } - cudaMemcpy(h_res, d_res, resBytes, cudaMemcpyDeviceToHost); - std::vector>> res(sampleCnt_u, std::vector>(sampleCnt_v, std::vector(3, 0.))); - for(int i = 0; i < sampleCnt_u; i++) { + cudaMemcpy(h_res, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost); + std::vector>> res(sampleCnt_u, std::vector>(sampleCnt_v, + std::vector(3, + 0.))); + for (int i = 0; i < sampleCnt_u; i++) { int baseIdx = i * sampleCnt_v * 3; - for(int j = 0; j < sampleCnt_v; j++) { + for (int j = 0; j < sampleCnt_v; j++) { baseIdx += j * 3; res[i][j][0] = h_res[baseIdx]; res[i][j][1] = h_res[baseIdx + 1]; @@ -323,7 +379,6 @@ NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) { safeFree(h_points); safeFree(h_knots_u); safeFree(h_knots_v); - safeCudaFree(d_res); safeFree(h_res); return res; @@ -380,8 +435,6 @@ __host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v cudaFree(d_derTexture_v); } - - __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) { // 先计算切向量 derivative(sampleCnt_u, sampleCnt_v); @@ -390,15 +443,22 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) return; } + // 构造记录每个采样点中的最大M1、M2、M3的数组 + // 这里用共享内存会更好,但使用共享内存动态分配长度总是出错(好像是因为长度过长),后续需要思考解决这个问题 + float *ms = nullptr; + cudaMalloc((void **) &ms, sampleCnt_u * sampleCnt_v * 3 * sizeof(float)); + + cudaMalloc((void **) &d_k, sizeof(float)); + // 构造线程层级 dim3 block(32, 32); - dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + dim3 grid((sampleCnt_u - 1 + block.x - 1) / block.x, (sampleCnt_v - 1 + block.y - 1) / block.y); // 记录用时 double time_cost_device; if (recordTime) time_cost_device = get_time(); g_curvature<<>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], - knots_v[knots_v.size() - 1]); + knots_v[knots_v.size() - 1], ms, d_k); cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 if (recordTime) { time_cost_device = get_time() - time_cost_device; @@ -406,6 +466,7 @@ __host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) sampleCnt_u * sampleCnt_v, time_cost_device); } + safeCudaFree(ms); } void NurbsSurface::Surface::setRecordTime(bool r) { @@ -420,5 +481,42 @@ NurbsSurface::Surface::~Surface() { safeCudaFree(d_points); safeCudaFree(d_knots_u); safeCudaFree(d_knots_v); + safeCudaFree(d_k); + safeCudaFree(d_evaluationRes); + safeFree(bvh.nodes); cudaDeviceReset(); -} \ No newline at end of file +} + +__host__ void NurbsSurface::Surface::buildBHV(int sampleCnt_u, int sampleCnt_v) { + // 先计算曲率 + curvature(sampleCnt_u, sampleCnt_v); + if (POINT_SIZE != controlPoints[0][0].size()) { + printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); + return; + } + // 构造线程层级 + dim3 block(32, 32); + dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); + + bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1); + bvh.size = (pow(4, bvh.maxLevel) - 1) / 3; // 等比数列求和公示求出数总的节点数 + size_t bvhBytes = sizeof(BVHNode) * bvh.size; + BVHNode *d_bvh = nullptr; + cudaMalloc((void **) &d_bvh, bvhBytes); + // 记录用时 + double time_cost_device = get_time(); + buildSurfaceBvh<<>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1], + knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh); + cudaDeviceSynchronize(); + if (recordTime) { + time_cost_device = get_time() - time_cost_device; + printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", + sampleCnt_u * sampleCnt_v, + time_cost_device); + } + if (bvh.nodes == nullptr) bvh.nodes = (BVHNode *) malloc(bvhBytes); + cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost); + safeCudaFree(d_bvh); + +// bvh.printQuadTree(); +} diff --git a/src/device/aabb.cu b/src/device/aabb.cu new file mode 100644 index 0000000..f516e91 --- /dev/null +++ b/src/device/aabb.cu @@ -0,0 +1,42 @@ +// +// Created by 14727 on 2022/12/11. +// + +#include "../../include/device/aabb.cuh" + +#define D_DBL_MAX 1.7976931348623158e+308 +#define D_FLT_MAX 3.402823466e+38F + +__device__ __host__ AABB::AABB() { + float minNum = -D_FLT_MAX; + float maxNum = D_FLT_MAX; + pMin = FVec3(minNum, minNum, minNum); + pMax = FVec3(maxNum, maxNum, maxNum); +} + +__device__ __host__ AABB::AABB(const FVec3 &p) : pMin(p), pMax(p) {} + +__device__ __host__ AABB::AABB(const FVec3 &p1, const FVec3 &p2) { + pMin = FVec3(fmin(p1.x, p2.x), fmin(p1.y, p2.y), fmin(p1.z, p2.z)); + pMax = FVec3(fmax(p1.x, p2.x), fmax(p1.y, p2.y), fmax(p1.z, p2.z)); +} + +__device__ __host__ AABB AABB::Union(const FVec3 &p) { + return {FVec3(fmin(pMin.x, p.x), fmin(pMin.y, p.y), fmin(pMin.z, p.z)), + FVec3(fmax(pMax.x, p.x), fmax(pMax.y, p.y), fmax(pMax.z, p.z))}; +} + +__device__ __host__ AABB AABB::Union(const AABB &b2) { + return {FVec3(fmin(pMin.x, b2.pMin.x), fmin(pMin.y, b2.pMin.y), fmin(pMin.z, b2.pMin.z)), + FVec3(fmax(pMax.x, b2.pMax.x), fmax(pMax.y, b2.pMax.y), fmax(pMax.z, b2.pMax.z))}; +} + +__device__ __host__ bool AABB::IsOverlap(const AABB& b2) { + if (pMin.x > b2.pMax.x) return false; + if (b2.pMin.x > pMax.x) return false; + if (pMin.y > b2.pMax.y) return false; + if (b2.pMin.y > pMax.y) return false; + if (pMin.z > b2.pMax.z) return false; + if (b2.pMin.z > pMax.z) return false; + return true; +} diff --git a/src/device/DeviceUtils.cu b/src/device/device_utils.cu similarity index 90% rename from src/device/DeviceUtils.cu rename to src/device/device_utils.cu index 06f75ae..37969d2 100644 --- a/src/device/DeviceUtils.cu +++ b/src/device/device_utils.cu @@ -2,7 +2,7 @@ // Created by 14727 on 2022/12/9. // -#include "../../include/device/DeviceUtils.cuh" +#include "../../include/device/device_utils.cuh" __device__ void d_safeFree(float *&p) { if (p != nullptr) { diff --git a/src/device/vec.cu b/src/device/vec.cu new file mode 100644 index 0000000..d353446 --- /dev/null +++ b/src/device/vec.cu @@ -0,0 +1,13 @@ +// +// Created by 14727 on 2022/12/11. +// + +#include "../../include/device/vec.cuh" + +__device__ __host__ FVec3::FVec3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {} + +__device__ __host__ FVec3::FVec3(const FVec3 &fVec3) : x(static_cast(fVec3.x)), + y(static_cast(fVec3.y)), + z(static_cast(fVec3.z)) {} + +__device__ __host__ FVec3::FVec3():x(0.), y(0.), z(0.) {} diff --git a/src/main.cpp b/src/main.cpp index 81e8bc6..e3df294 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,17 +1,17 @@ #include -#include "../include/device/NurbsCurve.cuh" -#include "../include/device/NurbsSurface.cuh" +#include "../include/device/Nurbs/nurbs_curve.cuh" +#include "../include/device/Nurbs/nurbs_surface.cuh" int main() { - NurbsSurface::Surface nurbsSurfaceEvaluator({ - {{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}}, - {{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}}, - {{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}}, - {{-1.5, 3.2, 1, 0.5}, {2.6, 7, -2, 0.7}, {5, 0.8, 4.2, 0.8}, {-4, 1, 4, 0.7}, {2.1, 4, -2, 0.3}, {11, -6, 4, 0.6}}, - {{-0.2, 2, 0, 0.7}, {5, 3, 2, 0.4}, {5, 1.5, 1.4, 0.6}, {-3, 2, 5, 0.8}, {0.8, 1.3, 0, 0.5}, {15, -2, 0.9, 0.6}}, - {{3, 1.4, -1, 0.4}, {6, 2, 4, 0.6}, {-1, 0, -2, 0.4}, {0, 2.8, 2, 0.6}, {-0.5, 2, 1.2, 0.9}, {7, -3, -2, 0.3}},}, - {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, - {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); +// NurbsSurface::Surface nurbsSurfaceEvaluator({ +// {{-1, 0, 0, 0.3}, {0, 1, 6, 0.8}, {1, 0, 4, 0.5}, {2, 0.5, 3, 0.8}, {3, 3, 1, 0.6}, {4, -5, 0, 0.7}}, +// {{-2, 1, 1.2, 0.2}, {1, 2, 3, 0.3}, {2, 2, 3, 0.6}, {-1, -0.3, 2, 0.4}, {-1, 2, 0, 0.9}, {7, -8, 2, 0.3}}, +// {{-3.4, 2, 3, 0.8}, {2, 3, 0, 0.6}, {4, 3, 7, 0.3}, {-2, 0, -0.2, 0.4}, {1, 1.7, 5, 0.6}, {9, -10.3, 6, 0.7}}, +// {{-1.5, 3.2, 1, 0.5}, {2.6, 7, -2, 0.7}, {5, 0.8, 4.2, 0.8}, {-4, 1, 4, 0.7}, {2.1, 4, -2, 0.3}, {11, -6, 4, 0.6}}, +// {{-0.2, 2, 0, 0.7}, {5, 3, 2, 0.4}, {5, 1.5, 1.4, 0.6}, {-3, 2, 5, 0.8}, {0.8, 1.3, 0, 0.5}, {15, -2, 0.9, 0.6}}, +// {{3, 1.4, -1, 0.4}, {6, 2, 4, 0.6}, {-1, 0, -2, 0.4}, {0, 2.8, 2, 0.6}, {-0.5, 2, 1.2, 0.9}, {7, -3, -2, 0.3}},}, +// {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, +// {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); // NurbsSurface::Evaluator nurbsSurfaceEvaluator({ // {{-1, 0, 0, 1}, {0, 1, 6, 1}, {1, 0, 4, 1}, {2, 0.5, 3, 1}, {3, 3, 1, 1}, {4, -5, 0, 1}}, // {{-2, 1, 1.2, 1}, {1, 2, 3, 1}, {2, 2, 3, 1}, {-1, -0.3, 2, 1}, {-1, 2, 0, 1}, {7, -8, 2, 1}}, @@ -21,9 +21,16 @@ int main() { // {{3, 1.4, -1, 1}, {6, 2, 4, 1}, {-1, 0, -2, 1}, {0, 2.8, 2, 1}, {-0.5, 2, 1.2, 1}, {7, -3, -2, 1}},}, // {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}, // {0, 0, 0, 0.2, 0.7, 0.8, 1, 1, 1}); + + NurbsSurface::Surface nurbsSurfaceEvaluator({ + {{-0.57, -0.48, -0.31, 1}, {-0.17, -0.49, -0.43, 1}, {0.3, -0.53, -0.27, 1}, {0.66, -0.42, -0.36, 1}}, + {{-0.53, -0.15, -0.03, 1}, {-0.21, -0.12, 0.09, 1}, {0.10, -0.18, 0.03, 1}, {0.49, -0.15, -0.03, 1}}, + {{-0.61, 0.22, 0.09, 1}, {-0.21, 0.20, 0.09, 1}, {0.13, 0.20, 0.12, 1}, {0.49, 0.19, -0.03, 1}}, + {{-0.52, 0.51, 0.13, 1}, {-0.20, 0.32, 0.36, 1}, {0.18, 0.29, 0.28, 1}, {0.51, 0.53, 0.12, 1}} + }, {0, 0, 0, 0, 1, 1, 1, 1}, {0, 0, 0, 0, 1, 1, 1, 1}); nurbsSurfaceEvaluator.setRecordTime(true); - nurbsSurfaceEvaluator.curvature(101, 101); + nurbsSurfaceEvaluator.buildBHV(17, 17); // nurbsSurfaceEvaluator.evaluate(101, 101); printf("==============================\n"); @@ -34,15 +41,14 @@ int main() { {2, 0.5, 3, 0.4}, {3, 3, 1, 0.5}, {4, -5, 0, 0.7}}, - {0, 0, 0, 0.1, 0.5, 0.8, 1, 1, 1}); + {0, 0, 0.1, 0.2, 0.5, 0.8, 1, 1, 1}); nurbsCurveEvaluator.setRecordTime(true); -// nurbsCurveEvaluator.curvature(11); + auto res = nurbsCurveEvaluator.evaluate(11); - for(auto & re : res) { + for (auto &re: res) { printf("%f, %f, %f\n", re[0], re[1], re[2]); } printf("\n"); -// nurbsCurveEvaluator.derivative(); return 0; } \ No newline at end of file diff --git a/src/utils.cpp b/src/utils.cpp index fa4ebfa..ee5d15f 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -38,9 +38,23 @@ void safeCudaFree(float *&p) { } } +void safeCudaFree(BVHNode *&p){ + if (p != nullptr) { + cudaFree(p); + p = nullptr; + } +} +//template void safeFree(float *&p) { if (p != nullptr) { free(p); p = nullptr; } -} \ No newline at end of file +} + +void safeFree(BVHNode *&p) { + if (p != nullptr) { + free(p); + p = nullptr; + } +}